From 4ed628aca1dc9298e231f6d02db9e81989316eb7 Mon Sep 17 00:00:00 2001
From: Brett Walker <bwalker@gitlab.com>
Date: Wed, 26 Feb 2025 17:29:30 -0600
Subject: [PATCH 01/12] Initial grid table filter

---
 lib/banzai/filter/grid_table_filter.rb        | 531 ++++++++++++++++++
 .../pipeline/plain_markdown_pipeline.rb       |   1 +
 2 files changed, 532 insertions(+)
 create mode 100644 lib/banzai/filter/grid_table_filter.rb
diff --git a/lib/banzai/filter/grid_table_filter.rb b/lib/banzai/filter/grid_table_filter.rb
new file mode 100644
index 00000000000000..1998287fd3efa4
--- /dev/null
+++ b/lib/banzai/filter/grid_table_filter.rb
@@ -0,0 +1,531 @@
+# frozen_string_literal: true
+
+#
+# GridTableFilter.rb
+#
+# (c) 2025 by Miguel Angel Reina Ortega & Andreas Kraft
+# License: BSD 3-Clause License. See the LICENSE file for further details.
+#
+
+# TODO: This is now a legacy filter, and is only used with the Ruby parser.
+# The current markdown parser now properly handles grid table blocks.
+# issue: https://gitlab.com/gitlab-org/gitlab/-/issues/460864
+# GridTableFilter.rb
+#
+# Converts Pandoc-style grid tables to HTML tables with rowspan and colspan support
+#
+
+module Banzai
+  module Filter
+    class GridTableFilter < HTML::Pipeline::TextFilter
+      MARKDOWN_GRID_TABLE_BLOCK_REGEX = %r{
+        (?<code>
+          # Grid table blocks:
+          # +---+---+---+---+
+          # Anything, starting with | blocks which are ignored by this filter
+          # +---+---+---+---+
+
+          ^\s*\+-.*\+\s$          # First separator line
+          (?:.*\n)*?              # Any number of rows (non-greedy)
+          \s*\+-.*\+\s$           # Last separator line
+        )
+      }mx
+
+      require 'logger'
+
+      # Add these regex constants at the top of the file, after the require statement
+      GRID_TABLE_SEPARATOR = /\s*\+([-:=]+\+)+\s*$/
+      GRID_TABLE_HEADER_SEPARATOR = /.*\+([=:]+\+)+.*$/
+      GRID_TABLE_BODY_SEPARATOR = /.*\+([:-]+\+)+.*$/
+      GRID_TABLE_BODY_SEPARATOR_LINE = /[-:]+$/
+
+      class Cell
+        attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag
+
+        def initialize
+          @content = nil
+          @rowspan = 0
+          @colspan = 0
+          @colspan_adjusted = false
+          @alignment = 'align="center"'
+          @position = nil
+          @list_flag = false
+        end
+
+        def set_alignment(default_alignments, header_delimiter_positions)
+          header_delimiter_index = 0
+
+          while header_delimiter_index < default_alignments.length &&
+              @position > header_delimiter_positions[header_delimiter_index]
+            header_delimiter_index += 1
+          end
+
+          raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length
+
+          if @position < header_delimiter_positions[header_delimiter_index]
+            @alignment = default_alignments[header_delimiter_index]
+          elsif @position == header_delimiter_positions[header_delimiter_index]
+            @alignment = default_alignments[header_delimiter_index]
+            header_delimiter_index + 1
+          end
+        end
+      end
+
+      class Row
+        attr_accessor :cells
+
+        def initialize(length = 1)
+          @cells = Array.new(length) { Cell.new }
+        end
+
+        def [](index)
+          @cells[index]
+        end
+
+        def []=(index, value)
+          @cells[index] = value
+        end
+      end
+
+      class RowTracker
+        attr_accessor :row_tracker
+
+        def initialize(items)
+          @row_tracker = Array.new(items, 0)
+        end
+
+        def [](index)
+          @row_tracker[index]
+        end
+
+        def []=(index, value)
+          @row_tracker[index] = value
+        end
+      end
+
+      # Helper method to detect separator lines
+      def separator?(line)
+        GRID_TABLE_SEPARATOR.match?(line)
+      end
+
+      # Helper method to handle content in cells
+      def handling_content(cell, content)
+        if cell.content.nil?
+          cell.rowspan += 1
+          cell.colspan += 1
+          if content.strip.start_with?("- ") # List
+            cell.list_flag = true
+            cell.content = "#{content.strip}\n"
+          elsif cell.list_flag && !content.strip.empty?
+            cell.content += "#{content.strip}\n"
+          elsif content.strip == ""
+            cell.list_flag = false
+            cell.content = "\n"
+          else
+            cell.content = content.strip.gsub(/\\\s*$/, "\n")
+          end
+        elsif content.strip.start_with?("- ")
+          cell.content += "\n" unless cell.list_flag
+
+          cell.list_flag = true
+          cell.content += "#{content.strip}\n"
+        elsif cell.list_flag && !content.strip.empty?
+          cell.content = cell.content.strip.chomp("\n")
+          cell.content += " #{content.strip}\n"
+        elsif content.strip.empty?
+          cell.list_flag = false
+          cell.content += cell.content.end_with?("\n") ? "" : "\n"
+        else
+          content = content.strip.gsub(/\\\s*$/, "\n")
+          cell.content += " #{content}"
+        end
+
+        cell
+      end
+
+      # Helper method to adjust colspan
+      def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions)
+        (column_index...number_of_parts).each do |j|
+          delimiter_start = nil
+          col_i = column_index
+
+          until delimiter_start
+            delimiter_start = col_i > 0 ? row[col_i - 1].position : 0
+            col_i -= 1
+          end
+
+          delimiters = ['|', '+']
+          positions = delimiters.filter_map do |delimiter|
+            pos = line[delimiter_start + 1..]&.index(delimiter)
+            pos ? pos + delimiter_start + 1 : nil
+          end
+
+          position = positions.min
+
+          if position && position > delimiter_positions[j]
+            row[column_index].colspan += 1
+
+            if position == delimiter_positions[-1]
+              colspan_allocated = row[column_index].colspan
+              row[column_index].colspan += number_of_columns - colspan_allocated - column_index
+            end
+          elsif position && position < delimiter_positions[j]
+            raise "Wrong cell formatting"
+          else
+            break
+          end
+        end
+
+        row[column_index]
+      end
+
+      # rubocop:disable Metrics/AbcSize -- PoC
+      # rubocop:disable Metrics/CyclomaticComplexity -- PoC
+      # rubocop:disable Metrics/PerceivedComplexity -- PoC
+      def parse_pandoc_table_with_spans(pandoc_table)
+        # Split the input into lines
+        lines = pandoc_table.strip.split("\n").map(&:strip)
+
+        separator_indices = lines.each_index.select { |i| separator?(lines[i]) }
+
+        raise "No valid separators found in the provided Pandoc table." if separator_indices.empty?
+
+        # Calculate max number of columns and delimiter positions
+        delimiter_positions = []
+        number_of_columns = separator_indices.map { |i| lines[i].count("+") - 1 }.max
+        separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns }
+
+        number_of_columns.times do |j|
+          start_pos = j == 0 ? 0 : delimiter_positions[j - 1]
+          pos = lines[separator_index_max_columns][start_pos + 1..]&.index("+")
+          delimiter_positions << (pos ? pos + start_pos + 1 : -1)
+        end
+
+        # Process header
+        has_header = false
+        header_delimiter_positions = []
+        default_alignments = []
+        header_rows = []
+        header_separator_index = nil
+
+        separator_indices.each do |index|
+          next unless GRID_TABLE_HEADER_SEPARATOR.match?(lines[index])
+
+          has_header = true
+          header_separator_index = index
+          parts = lines[index].strip.delete_prefix("+").split("+")
+
+          parts.each_with_index do |part, part_index|
+            default_alignments << if part.start_with?(":") && !part.end_with?(":")
+                                    'align="left"'
+                                  elsif !part.start_with?(":") && part.end_with?(":")
+                                    'align="right"'
+                                  else
+                                    'align="center"'
+                                  end
+
+            start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1]
+            pos = lines[index][start_pos + 1..]&.index("+")
+            header_delimiter_positions << (pos ? pos + start_pos + 1 : -1)
+          end
+          break
+        end
+
+        # Process table body
+        data_rows = []
+
+        (separator_indices.length - 1).times do |row|
+          rows = []
+          rows_tracker = nil
+          in_data_row = false
+          start = separator_indices[row]
+          end_idx = separator_indices[row + 1]
+          row_lines = lines[start...end_idx]
+
+          next if row_lines.empty?
+
+          row_lines.each do |line|
+            if separator?(line) && !in_data_row
+              in_data_row = true
+              parts = line.strip.delete_prefix("+").split("+")
+              delimiter_index = 0
+              rows << Row.new(number_of_columns)
+              rows_tracker = RowTracker.new(number_of_columns)
+
+              i = 0
+              parts.each_with_index do |_, j|
+                next unless i < number_of_columns
+
+                delimiter_index += parts[j].length + 1
+                rows[-1][i].position = delimiter_index
+                rows[-1][i].set_alignment(default_alignments, header_delimiter_positions)
+
+                i += 1 while delimiter_index > delimiter_positions[i]
+                i += 1
+              end
+            elsif in_data_row
+              if GRID_TABLE_BODY_SEPARATOR.match?(line)
+                cells_content = line.strip
+                                   .delete_prefix("|")
+                                   .delete_prefix("+")
+                                   .delete_suffix("|")
+                                   .delete_suffix("+")
+                                   .split(/[\|\+]/)
+
+                rows << Row.new(number_of_columns)
+                aux_delimiter_index = 0
+                auxiliar_cell_index = 0
+
+                cells_content.each_with_index do |_, i|
+                  next unless auxiliar_cell_index < number_of_columns
+
+                  aux_delimiter_index += cells_content[i].length + 1
+                  rows[-1][auxiliar_cell_index].position = aux_delimiter_index
+                  rows[-1][auxiliar_cell_index].set_alignment(default_alignments, header_delimiter_positions)
+
+                  auxiliar_cell_index += 1 while aux_delimiter_index > delimiter_positions[auxiliar_cell_index]
+
+                  auxiliar_cell_index += 1
+                end
+
+                raise "More cells than columns found" unless cells_content.length <= number_of_columns
+
+                column_index = 0
+
+                cells_content.each_with_index do |content, _i|
+                  if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content)
+                    rows_tracker[column_index] += 1
+                    rows[rows_tracker[column_index]][column_index].list_flag = false
+
+                    column_forward = 0
+                    (column_index...delimiter_positions.length).each do |del_index|
+                      if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index]
+                        column_forward += 1
+                        rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1
+                      end
+                    end
+
+                    column_index += column_forward
+                  else
+                    rows[rows_tracker[column_index]][column_index] =
+                      handling_content(rows[rows_tracker[column_index]][column_index], content)
+                    rows[rows_tracker[column_index]][column_index].rowspan += 1
+
+                    unless rows[rows_tracker[column_index]][column_index].colspan_adjusted
+                      rows[rows_tracker[column_index]][column_index].colspan_adjusted = true
+                      rows[rows_tracker[column_index]][column_index] =
+                        adjust_colspan(rows[rows_tracker[column_index]],
+                          column_index,
+                          number_of_columns,
+                          line,
+                          number_of_columns,
+                          delimiter_positions)
+                    end
+
+                    if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index]
+                      colspan = rows[rows_tracker[column_index]][column_index].colspan
+                      column_index += (colspan == 0 ? 1 : colspan) # rubocop:disable Metrics/BlockNesting -- PoC
+                    end
+                  end
+                end
+
+              else
+                cells_content = line.strip.delete_prefix("|").split(/\s*\|\s*/)
+                column_index = 0
+
+                if cells_content.length < number_of_columns
+                  cells_content.each_with_index do |content, _i|
+                    rows[rows_tracker[column_index]][column_index] =
+                      handling_content(rows[rows_tracker[column_index]][column_index], content)
+
+                    unless rows[rows_tracker[column_index]][column_index].colspan_adjusted
+                      rows[rows_tracker[column_index]][column_index].colspan_adjusted = true
+                      rows[rows_tracker[column_index]][column_index] =
+                        adjust_colspan(rows[rows_tracker[column_index]],
+                          column_index,
+                          number_of_columns,
+                          line,
+                          number_of_columns,
+                          delimiter_positions)
+                    end
+
+                    if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[column_index]
+                      column_index += rows[rows_tracker[column_index]][column_index].colspan
+                    end
+                  end
+                elsif cells_content.length == number_of_columns
+                  cells_content.each_with_index do |content, i|
+                    rows[rows_tracker[i]][i] = handling_content(rows[rows_tracker[i]][i], content)
+                  end
+                else
+                  raise "More cells than columns found"
+                end
+              end
+            else
+              raise "No separator line found for row starting"
+            end
+          end
+
+          if has_header && start >= header_separator_index
+            rows.each { |body_row| data_rows << body_row.cells }
+          elsif has_header && start < header_separator_index
+            rows.each { |header_row| header_rows << header_row.cells }
+          end
+        end
+
+        raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty?
+
+        # Format text (bold and italic)
+        [header_rows, data_rows].each do |rows|
+          rows.each do |row|
+            row.each do |cell|
+              next if cell.content.nil?
+
+              delimters = ['**', '__']
+              delimters.each do |bold_chars|
+                while cell.content.include?(bold_chars)
+                  cell.content = cell.content.sub(bold_chars, "<strong>")
+                                          .sub(bold_chars, "</strong>")
+                end
+              end
+
+              while cell.content.include?("_") && cell.content.exclude?("\\_")
+                cell.content = cell.content.rstrip.sub("_", "<i>").sub("_", "</i>")
+              end
+
+              cell.content = cell.content.rstrip.sub("\\_", "_") while cell.content.include?("\\_")
+
+              # Convert newlines to HTML breaks
+              cell.content = cell.content&.gsub("\n", "<br />")
+            end
+          end
+
+          # Validate grid correctness
+          forward_rowspan = []
+
+          rows.each_with_index do |row, row_index|
+            forward_rowspan = Array.new(row.length, 0) if forward_rowspan.empty?
+            sum = 0
+
+            row.each_with_index do |cell, cell_index|
+              sum += cell.colspan
+
+              if row_index > 0 && cell.colspan == 0
+                sum += 1 if forward_rowspan[cell_index] > 0
+
+                forward_rowspan[cell_index] -= 1
+              end
+
+              forward_rowspan[cell_index] = cell.rowspan - 1 if forward_rowspan[cell_index] == 0 && cell.rowspan > 1
+            end
+
+            raise "Grid table not converted properly" unless sum == number_of_columns
+          end
+        end
+
+        [header_rows, data_rows]
+      end
+
+      def generate_html_table_with_spans(pandoc_table)
+        grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
+      rescue StandardError => e
+        logger = Logger.new($stdout)
+        logger.error("Grid table could not be generated: #{e.message}")
+
+        "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS"
+      else
+        html = "<table>\n"
+        has_header = false
+
+        grid_header.each do |row|
+          row.each do |cell|
+            if cell.rowspan != 0 && cell.colspan != 0
+              has_header = true
+              break
+            end
+          end
+        end
+
+        if has_header
+          html += "    <thead>\n"
+          grid_header.each do |row|
+            html += "        <tr>\n"
+            row.each do |cell|
+              next if cell.rowspan == 0 || cell.colspan == 0
+
+              # Prepare content, in case there's a list
+              matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br />|$)})
+              if matches
+                list = "<ul>"
+                matches.each do |match|
+                  list += "<li>#{match[1]}</li>"
+                end
+                list += "</ul>"
+                cell.content = cell.content.gsub(%r{(\s*([-*+]|\d+\.)\s+[^<]+?<br />)+}, list)
+                # Enforce left alignment if cell contains a list
+                cell.alignment = 'align="left"'
+              end
+
+              rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
+              colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
+              html += %(            <th#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</th>\n)
+            end
+            html += "        </tr>\n"
+          end
+          html += "    </thead>\n"
+        end
+
+        html += "    <tbody>\n"
+        grid_body.each do |row|
+          html += "        <tr>\n"
+          row.each do |cell|
+            next if cell.rowspan == 0 || cell.colspan == 0
+
+            matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br />|$)})
+            if matches
+              list = "<ul>"
+              matches.each do |match|
+                list += "<li>#{match[1]}</li>"
+              end
+              list += "</ul>"
+              cell.content = cell.content.gsub(%r{(\s*([-*+]|\d+\.)\s+[^<]+?<br />)+}, list)
+              # Enforce left alignment if cell contains a list
+              cell.alignment = 'align="left"'
+            end
+
+            rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
+            colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
+            html += %(            <td#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</td>\n)
+          end
+          html += "        </tr>\n"
+        end
+
+        html += "    </tbody>\n"
+        html += "</table>"
+        html
+      end
+      # rubocop:enable Metrics/PerceivedComplexity
+      # rubocop:enable Metrics/CyclomaticComplexity
+      # rubocop:enable Metrics/AbcSize
+
+      def call
+        return @text if MarkdownFilter.glfm_markdown?(context)
+
+        regex = Gitlab::UntrustedRegexp.new(MARKDOWN_GRID_TABLE_BLOCK_REGEX, multiline: true)
+        return @text unless regex.match?(@text)
+
+        regex.replace_gsub(@text) do |match|
+          # Extract the grid table content from the match
+          grid_table = match[:code]
+          if grid_table
+            # Convert grid table to HTML table
+            generate_html_table_with_spans(grid_table)
+          else
+            # Return original text if no grid table found
+            match.to_s
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/banzai/pipeline/plain_markdown_pipeline.rb b/lib/banzai/pipeline/plain_markdown_pipeline.rb
index 19aadb0cc049fe..f64437749c2ca6 100644
--- a/lib/banzai/pipeline/plain_markdown_pipeline.rb
+++ b/lib/banzai/pipeline/plain_markdown_pipeline.rb
@@ -6,6 +6,7 @@ class PlainMarkdownPipeline < BasePipeline
       def self.filters
         FilterArray[
           Filter::IncludeFilter,
+          Filter::GridTableFilter,
           Filter::MarkdownFilter,
           Filter::ConvertTextToDocFilter,
         ]
-- 
GitLab


From 3c44dbba54dd9c395cd05aba4908faa2591b5aa0 Mon Sep 17 00:00:00 2001
From: Brett Walker <bwalker@gitlab.com>
Date: Wed, 26 Feb 2025 17:30:10 -0600
Subject: [PATCH 02/12] Fix markdown guard

---
 lib/banzai/filter/grid_table_filter.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/banzai/filter/grid_table_filter.rb b/lib/banzai/filter/grid_table_filter.rb
index 1998287fd3efa4..6e9a3bd18a1cdb 100644
--- a/lib/banzai/filter/grid_table_filter.rb
+++ b/lib/banzai/filter/grid_table_filter.rb
@@ -509,7 +509,7 @@ def generate_html_table_with_spans(pandoc_table)
       # rubocop:enable Metrics/AbcSize
 
       def call
-        return @text if MarkdownFilter.glfm_markdown?(context)
+        return @text unless MarkdownFilter.glfm_markdown?(context)
 
         regex = Gitlab::UntrustedRegexp.new(MARKDOWN_GRID_TABLE_BLOCK_REGEX, multiline: true)
         return @text unless regex.match?(@text)
-- 
GitLab


From 8e15010c10783e3a679d28de1c7b47077c92bf8f Mon Sep 17 00:00:00 2001
From: Brett Walker <bwalker@gitlab.com>
Date: Wed, 26 Feb 2025 17:32:04 -0600
Subject: [PATCH 03/12] Fix regex

---
 lib/banzai/filter/grid_table_filter.rb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/banzai/filter/grid_table_filter.rb b/lib/banzai/filter/grid_table_filter.rb
index 6e9a3bd18a1cdb..28770e1b83307c 100644
--- a/lib/banzai/filter/grid_table_filter.rb
+++ b/lib/banzai/filter/grid_table_filter.rb
@@ -25,9 +25,9 @@ class GridTableFilter < HTML::Pipeline::TextFilter
           # Anything, starting with | blocks which are ignored by this filter
           # +---+---+---+---+
 
-          ^\s*\+-.*\+\s$          # First separator line
+          ^\s*\+(-+\+)+$          # First separator line
           (?:.*\n)*?              # Any number of rows (non-greedy)
-          \s*\+-.*\+\s$           # Last separator line
+          \s*\+(-+\+)+$           # Last separator line
         )
       }mx
 
-- 
GitLab


From 69e2a124e2fbaea2e04cbc3af51d91516e9e1ced Mon Sep 17 00:00:00 2001
From: Brett Walker <bwalker@gitlab.com>
Date: Wed, 26 Feb 2025 17:34:05 -0600
Subject: [PATCH 04/12] =?UTF-8?q?Don=E2=80=99t=20use=20Gitlab::UntrustedRe?=
 =?UTF-8?q?gexp=20for=20now?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lib/banzai/filter/grid_table_filter.rb | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/banzai/filter/grid_table_filter.rb b/lib/banzai/filter/grid_table_filter.rb
index 28770e1b83307c..0f00b0ccc457df 100644
--- a/lib/banzai/filter/grid_table_filter.rb
+++ b/lib/banzai/filter/grid_table_filter.rb
@@ -511,10 +511,12 @@ def generate_html_table_with_spans(pandoc_table)
       def call
         return @text unless MarkdownFilter.glfm_markdown?(context)
 
-        regex = Gitlab::UntrustedRegexp.new(MARKDOWN_GRID_TABLE_BLOCK_REGEX, multiline: true)
+        regex = MARKDOWN_GRID_TABLE_BLOCK_REGEX
         return @text unless regex.match?(@text)
 
-        regex.replace_gsub(@text) do |match|
+        @text.gsub(regex) do
+          match = Regexp.last_match
+
           # Extract the grid table content from the match
           grid_table = match[:code]
           if grid_table
-- 
GitLab


From fccd3a46dbc020cab30dcc7343319a9f39cb6690 Mon Sep 17 00:00:00 2001
From: Brett Walker <bwalker@gitlab.com>
Date: Fri, 7 Mar 2025 10:06:21 -0600
Subject: [PATCH 05/12] Latest changes made by @reinaortega

---
 lib/banzai/filter/grid_table_filter.rb | 214 +++++++++++++------------
 1 file changed, 112 insertions(+), 102 deletions(-)

diff --git a/lib/banzai/filter/grid_table_filter.rb b/lib/banzai/filter/grid_table_filter.rb
index 0f00b0ccc457df..a926e68f9e3e2f 100644
--- a/lib/banzai/filter/grid_table_filter.rb
+++ b/lib/banzai/filter/grid_table_filter.rb
@@ -25,51 +25,50 @@ class GridTableFilter < HTML::Pipeline::TextFilter
           # Anything, starting with | blocks which are ignored by this filter
           # +---+---+---+---+
 
-          ^\s*\+(-+\+)+$          # First separator line
-          (?:.*\n)*?              # Any number of rows (non-greedy)
-          \s*\+(-+\+)+$           # Last separator line
+          ^\s*\+(-+\+)+$\n         # First separator line
+          (?:^\s*[|+][^\n]*$\n)*
+          ^\s*\+(-+\+)+$           # Last separator line
+
         )
       }mx
 
       require 'logger'
 
       # Add these regex constants at the top of the file, after the require statement
-      GRID_TABLE_SEPARATOR = /\s*\+([-:=]+\+)+\s*$/
-      GRID_TABLE_HEADER_SEPARATOR = /.*\+([=:]+\+)+.*$/
-      GRID_TABLE_BODY_SEPARATOR = /.*\+([:-]+\+)+.*$/
-      GRID_TABLE_BODY_SEPARATOR_LINE = /[-:]+$/
+      GRID_TABLE_SEPARATOR = /^\s*\+([-:=]+\+)+\s*$/
+      GRID_TABLE_HEADER_SEPARATOR = /^\s*\+([=:]+\+)+\s*$/
+      GRID_TABLE_BODY_SEPARATOR = /[^\n]*\+([:-]+\+)+[^\n]*$/
+      GRID_TABLE_BODY_SEPARATOR_LINE = /^[-:]+$/
 
       class Cell
-        attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag
-
-        def initialize
-          @content = nil
-          @rowspan = 0
-          @colspan = 0
-          @colspan_adjusted = false
-          @alignment = 'align="center"'
-          @position = nil
-          @list_flag = false
-        end
-
-        def set_alignment(default_alignments, header_delimiter_positions)
-          header_delimiter_index = 0
-
-          while header_delimiter_index < default_alignments.length &&
-              @position > header_delimiter_positions[header_delimiter_index]
-            header_delimiter_index += 1
+          attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag
+
+          def initialize
+              @content = nil
+              @rowspan = 0
+              @colspan = 0
+              @colspan_adjusted = false
+              @alignment = 'align="center"'
+              @position = nil
+              @list_flag = false
           end
 
-          raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length
+          def set_alignment(default_alignments, header_delimiter_positions)
+            header_delimiter_index = 0
+            while header_delimiter_index < default_alignments.length &&
+                @position > header_delimiter_positions[header_delimiter_index]
+              header_delimiter_index += 1
+            end
 
-          if @position < header_delimiter_positions[header_delimiter_index]
-            @alignment = default_alignments[header_delimiter_index]
-          elsif @position == header_delimiter_positions[header_delimiter_index]
-            @alignment = default_alignments[header_delimiter_index]
-            header_delimiter_index + 1
+            raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length
+            if @position < header_delimiter_positions[header_delimiter_index]
+                @alignment = default_alignments[header_delimiter_index]
+            elsif @position == header_delimiter_positions[header_delimiter_index]
+                @alignment = default_alignments[header_delimiter_index]
+                header_delimiter_index += 1
+            end
           end
-        end
-      end
+      end # end of class Cell
 
       class Row
         attr_accessor :cells
@@ -85,7 +84,7 @@ def [](index)
         def []=(index, value)
           @cells[index] = value
         end
-      end
+      end # end of class Row
 
       class RowTracker
         attr_accessor :row_tracker
@@ -101,13 +100,16 @@ def [](index)
         def []=(index, value)
           @row_tracker[index] = value
         end
-      end
+
+        def maxValue
+          @row_tracker.max
+        end
+      end # end of class RowTracker
 
       # Helper method to detect separator lines
       def separator?(line)
         GRID_TABLE_SEPARATOR.match?(line)
       end
-
       # Helper method to handle content in cells
       def handling_content(cell, content)
         if cell.content.nil?
@@ -183,9 +185,11 @@ def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns,
       # rubocop:disable Metrics/CyclomaticComplexity -- PoC
       # rubocop:disable Metrics/PerceivedComplexity -- PoC
       def parse_pandoc_table_with_spans(pandoc_table)
+
         # Split the input into lines
         lines = pandoc_table.strip.split("\n").map(&:strip)
 
+    # Retrieve separator indices
         separator_indices = lines.each_index.select { |i| separator?(lines[i]) }
 
         raise "No valid separators found in the provided Pandoc table." if separator_indices.empty?
@@ -193,8 +197,9 @@ def parse_pandoc_table_with_spans(pandoc_table)
         # Calculate max number of columns and delimiter positions
         delimiter_positions = []
         number_of_columns = separator_indices.map { |i| lines[i].count("+") - 1 }.max
-        separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns }
 
+    # Determine delimiter positions
+        separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns }
         number_of_columns.times do |j|
           start_pos = j == 0 ? 0 : delimiter_positions[j - 1]
           pos = lines[separator_index_max_columns][start_pos + 1..]&.index("+")
@@ -208,6 +213,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
         header_rows = []
         header_separator_index = nil
 
+        # Determine header delimiter positions
         separator_indices.each do |index|
           next unless GRID_TABLE_HEADER_SEPARATOR.match?(lines[index])
 
@@ -231,7 +237,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
           break
         end
 
-        # Process table body
+        # Process table body (including rows belonging to header as they are processed in the same way)
         data_rows = []
 
         (separator_indices.length - 1).times do |row|
@@ -245,6 +251,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
           next if row_lines.empty?
 
           row_lines.each do |line|
+            # First line (normally a separator) of each block
             if separator?(line) && !in_data_row
               in_data_row = true
               parts = line.strip.delete_prefix("+").split("+")
@@ -263,8 +270,10 @@ def parse_pandoc_table_with_spans(pandoc_table)
                 i += 1 while delimiter_index > delimiter_positions[i]
                 i += 1
               end
+      # Lines in a block
             elsif in_data_row
-              if GRID_TABLE_BODY_SEPARATOR.match?(line)
+              # Regular data row or partial separator
+                if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator
                 cells_content = line.strip
                                    .delete_prefix("|")
                                    .delete_prefix("+")
@@ -291,22 +300,22 @@ def parse_pandoc_table_with_spans(pandoc_table)
                 raise "More cells than columns found" unless cells_content.length <= number_of_columns
 
                 column_index = 0
-
+                maxRowTracker = rows_tracker.maxValue
                 cells_content.each_with_index do |content, _i|
-                  if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content)
-                    rows_tracker[column_index] += 1
+                  if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) # Separator - split row
+                    rows_tracker[column_index] = maxRowTracker + 1
                     rows[rows_tracker[column_index]][column_index].list_flag = false
 
                     column_forward = 0
                     (column_index...delimiter_positions.length).each do |del_index|
                       if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index]
                         column_forward += 1
-                        rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1
+                        #rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1
                       end
                     end
 
                     column_index += column_forward
-                  else
+                  else # Regular cell in Partial separator line
                     rows[rows_tracker[column_index]][column_index] =
                       handling_content(rows[rows_tracker[column_index]][column_index], content)
                     rows[rows_tracker[column_index]][column_index].rowspan += 1
@@ -329,8 +338,8 @@ def parse_pandoc_table_with_spans(pandoc_table)
                   end
                 end
 
-              else
-                cells_content = line.strip.delete_prefix("|").split(/\s*\|\s*/)
+              else # Data row
+                cells_content = line.strip.delete_prefix("|").split(/\|/)
                 column_index = 0
 
                 if cells_content.length < number_of_columns
@@ -424,36 +433,64 @@ def parse_pandoc_table_with_spans(pandoc_table)
         end
 
         [header_rows, data_rows]
-      end
+      end # end of parse_pandoc_table_with_spans
 
       def generate_html_table_with_spans(pandoc_table)
-        grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
-      rescue StandardError => e
-        logger = Logger.new($stdout)
-        logger.error("Grid table could not be generated: #{e.message}")
-
-        "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS"
-      else
-        html = "<table>\n"
-        has_header = false
+        begin
+          grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
+        rescue StandardError => e
+          logger = Logger.new($stdout)
+          logger.error("Grid table could not be generated: #{e.message}")
+
+          "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS"
+        else
+          html = "<table>\n"
+          has_header = false
 
-        grid_header.each do |row|
-          row.each do |cell|
-            if cell.rowspan != 0 && cell.colspan != 0
-              has_header = true
-              break
+          grid_header.each do |row|
+            row.each do |cell|
+              if cell.rowspan != 0 && cell.colspan != 0
+                has_header = true
+                break
+              end
             end
           end
-        end
 
-        if has_header
-          html += "    <thead>\n"
-          grid_header.each do |row|
+          if has_header
+            html += "    <thead>\n"
+            grid_header.each do |row|
+              html += "        <tr>\n"
+              row.each do |cell|
+                next if cell.rowspan == 0 || cell.colspan == 0
+
+                # Prepare content, in case there's a list
+                matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br />|$)})
+                if matches
+                  list = "<ul>"
+                  matches.each do |match|
+                    list += "<li>#{match[1]}</li>"
+                  end
+                  list += "</ul>"
+                  cell.content = cell.content.gsub(%r{(\s*([-*+]|\d+\.)\s+[^<]+?<br />)+}, list)
+                  # Enforce left alignment if cell contains a list
+                  cell.alignment = 'align="left"'
+                end
+
+                rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
+                colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
+                html += %(            <th#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</th>\n)
+              end
+              html += "        </tr>\n"
+            end
+            html += "    </thead>\n"
+          end
+
+          html += "    <tbody>\n"
+          grid_body.each do |row|
             html += "        <tr>\n"
             row.each do |cell|
               next if cell.rowspan == 0 || cell.colspan == 0
 
-              # Prepare content, in case there's a list
               matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br />|$)})
               if matches
                 list = "<ul>"
@@ -468,42 +505,16 @@ def generate_html_table_with_spans(pandoc_table)
 
               rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
               colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
-              html += %(            <th#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</th>\n)
+              html += %(            <td#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</td>\n)
             end
             html += "        </tr>\n"
           end
-          html += "    </thead>\n"
-        end
-
-        html += "    <tbody>\n"
-        grid_body.each do |row|
-          html += "        <tr>\n"
-          row.each do |cell|
-            next if cell.rowspan == 0 || cell.colspan == 0
-
-            matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br />|$)})
-            if matches
-              list = "<ul>"
-              matches.each do |match|
-                list += "<li>#{match[1]}</li>"
-              end
-              list += "</ul>"
-              cell.content = cell.content.gsub(%r{(\s*([-*+]|\d+\.)\s+[^<]+?<br />)+}, list)
-              # Enforce left alignment if cell contains a list
-              cell.alignment = 'align="left"'
-            end
 
-            rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
-            colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
-            html += %(            <td#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</td>\n)
-          end
-          html += "        </tr>\n"
+          html += "    </tbody>\n"
+          html += "</table>"
+          html
         end
-
-        html += "    </tbody>\n"
-        html += "</table>"
-        html
-      end
+      end # end of def generate_html_table_with_spans
       # rubocop:enable Metrics/PerceivedComplexity
       # rubocop:enable Metrics/CyclomaticComplexity
       # rubocop:enable Metrics/AbcSize
@@ -516,7 +527,6 @@ def call
 
         @text.gsub(regex) do
           match = Regexp.last_match
-
           # Extract the grid table content from the match
           grid_table = match[:code]
           if grid_table
@@ -527,7 +537,7 @@ def call
             match.to_s
           end
         end
-      end
-    end
-  end
-end
+      end # end of def call
+    end # end of class GridTableFilter
+  end # end of module Filter
+end # end of module Banzai
-- 
GitLab


From 75d089cc9641e9720a0f2c39359bdb1650203a41 Mon Sep 17 00:00:00 2001
From: Brett Walker <bwalker@gitlab.com>
Date: Thu, 20 Mar 2025 10:50:12 -0500
Subject: [PATCH 06/12] Update from customer

---
 lib/banzai/filter/grid_table_filter.rb | 197 ++++++++++++++++---------
 1 file changed, 127 insertions(+), 70 deletions(-)

diff --git a/lib/banzai/filter/grid_table_filter.rb b/lib/banzai/filter/grid_table_filter.rb
index a926e68f9e3e2f..f1fe8c4fd4868b 100644
--- a/lib/banzai/filter/grid_table_filter.rb
+++ b/lib/banzai/filter/grid_table_filter.rb
@@ -40,33 +40,36 @@ class GridTableFilter < HTML::Pipeline::TextFilter
       GRID_TABLE_BODY_SEPARATOR = /[^\n]*\+([:-]+\+)+[^\n]*$/
       GRID_TABLE_BODY_SEPARATOR_LINE = /^[-:]+$/
 
+      NEXT_ELEMENT_LIST_MARK = "∆"
+
       class Cell
-          attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position, :list_flag
+          attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position_start, :position, :list_flag
 
           def initialize
-              @content = nil
-              @rowspan = 0
-              @colspan = 0
-              @colspan_adjusted = false
-              @alignment = 'align="center"'
-              @position = nil
-              @list_flag = false
+            @content = nil
+            @rowspan = 0
+            @colspan = 0
+            @colspan_adjusted = false
+            @alignment = 'align="center"'
+            @position_start = nil
+            @position = nil
+            @list_flag = false
           end
 
-          def set_alignment(default_alignments, header_delimiter_positions)
+          def calculateAndSetAlignment(header_delimiter_positions, default_alignments )
+
+            raise "Cell position must be set before calculating alignment" if @position.nil? || @position_start.nil?
+
             header_delimiter_index = 0
             while header_delimiter_index < default_alignments.length &&
-                @position > header_delimiter_positions[header_delimiter_index]
+                @position_start > header_delimiter_positions[header_delimiter_index]
               header_delimiter_index += 1
             end
 
             raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length
-            if @position < header_delimiter_positions[header_delimiter_index]
-                @alignment = default_alignments[header_delimiter_index]
-            elsif @position == header_delimiter_positions[header_delimiter_index]
-                @alignment = default_alignments[header_delimiter_index]
-                header_delimiter_index += 1
-            end
+
+            @alignment = default_alignments[header_delimiter_index]
+
           end
       end # end of class Cell
 
@@ -112,34 +115,40 @@ def separator?(line)
       end
       # Helper method to handle content in cells
       def handling_content(cell, content)
+        _c = content.strip
         if cell.content.nil?
           cell.rowspan += 1
           cell.colspan += 1
-          if content.strip.start_with?("- ") # List
+          if _c.start_with?("- ") # List
             cell.list_flag = true
-            cell.content = "#{content.strip}\n"
+            _c = _c.gsub(/\\\s*$/, '\n')
+				    cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}"  # Add list element end mark to know when the list element ends
           elsif cell.list_flag && !content.strip.empty?
-            cell.content += "#{content.strip}\n"
-          elsif content.strip == ""
-            cell.list_flag = false
+            _c = _c.gsub(/\\\s*$/, '\n')
+            cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}" #add the list element end mark
+          elsif _c.empty?
             cell.content = "\n"
           else
-            cell.content = content.strip.gsub(/\\\s*$/, "\n")
+            cell.content = _c.gsub(/\\\s*$/, "\n")
           end
-        elsif content.strip.start_with?("- ")
+        elsif _c.start_with?("- ")
           cell.content += "\n" unless cell.list_flag
-
           cell.list_flag = true
-          cell.content += "#{content.strip}\n"
-        elsif cell.list_flag && !content.strip.empty?
-          cell.content = cell.content.strip.chomp("\n")
-          cell.content += " #{content.strip}\n"
-        elsif content.strip.empty?
-          cell.list_flag = false
+          _c = _c.gsub(/\\\s*$/, '\n')
+          cell.content += "#{_c}#{NEXT_ELEMENT_LIST_MARK}"
+        elsif cell.list_flag && !_c.empty?
+          cell.content = cell.content.strip.chomp("#{NEXT_ELEMENT_LIST_MARK}")
+          _c = _c.gsub(/\\\s*$/, '\n')
+          cell.content += " #{_c}#{NEXT_ELEMENT_LIST_MARK}"
+        elsif _c.empty?
+          if cell.list_flag
+            cell.list_flag = false
+            cell.content += "\n\n"
+          end
           cell.content += cell.content.end_with?("\n") ? "" : "\n"
         else
-          content = content.strip.gsub(/\\\s*$/, "\n")
-          cell.content += " #{content}"
+          _c = _c.gsub(/\\\s*$/, "\n")
+          cell.content += " #{_c}"
         end
 
         cell
@@ -158,9 +167,9 @@ def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns,
 
           delimiters = ['|', '+']
           positions = delimiters.filter_map do |delimiter|
-            pos = line[delimiter_start + 1..]&.index(delimiter)
+            pos = line[delimiter_start + 1..-1]&.index(delimiter)
             pos ? pos + delimiter_start + 1 : nil
-          end
+          end.compact
 
           position = positions.min
 
@@ -181,15 +190,41 @@ def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns,
         row[column_index]
       end
 
+      def checkDelimiterAlignment(line, delimiterPositions)
+        return false if line.empty? || delimiterPositions.empty?
+
+        #puts "\nChecking line: #{line}"
+        #puts "Expected delimiter positions: #{delimiterPositions}"
+
+        # For any row (only +, only |, mix of + and |)
+        currentPositions = []
+        start_pos = 1
+
+        while start_pos < line.length
+          pos = line.index(/[|+]/, start_pos)  # Find the next occurrence of | or + starting from start_pos
+          break if pos.nil?  # Exit if no more delimiters are found
+
+          currentPositions << pos
+          start_pos = pos + 1  # Move to the next character after the found delimiter
+        end
+
+        #puts "Current positions: #{currentPositions}"
+
+        # Check if the last expected delimiter position is found in currentPositions
+        return currentPositions.include?(delimiterPositions[-1]) &&
+               line.match?(/\A[|+]/) &&  # Check if the line starts with | or +
+               currentPositions.all? { |pos| delimiterPositions.include?(pos) }  # Ensure all current positions are in delimiterPositions
+      end
+
       # rubocop:disable Metrics/AbcSize -- PoC
       # rubocop:disable Metrics/CyclomaticComplexity -- PoC
       # rubocop:disable Metrics/PerceivedComplexity -- PoC
       def parse_pandoc_table_with_spans(pandoc_table)
 
         # Split the input into lines
-        lines = pandoc_table.strip.split("\n").map(&:strip)
+        lines = pandoc_table.rstrip.split("\n").map(&:rstrip)
 
-    # Retrieve separator indices
+        # Retrieve separator indices
         separator_indices = lines.each_index.select { |i| separator?(lines[i]) }
 
         raise "No valid separators found in the provided Pandoc table." if separator_indices.empty?
@@ -198,11 +233,11 @@ def parse_pandoc_table_with_spans(pandoc_table)
         delimiter_positions = []
         number_of_columns = separator_indices.map { |i| lines[i].count("+") - 1 }.max
 
-    # Determine delimiter positions
+        # Determine delimiter positions
         separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns }
         number_of_columns.times do |j|
           start_pos = j == 0 ? 0 : delimiter_positions[j - 1]
-          pos = lines[separator_index_max_columns][start_pos + 1..]&.index("+")
+          pos = lines[separator_index_max_columns][start_pos + 1..-1]&.index("+")
           delimiter_positions << (pos ? pos + start_pos + 1 : -1)
         end
 
@@ -231,12 +266,36 @@ def parse_pandoc_table_with_spans(pandoc_table)
                                   end
 
             start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1]
-            pos = lines[index][start_pos + 1..]&.index("+")
+            pos = lines[index][start_pos + 1..-1]&.index("+")
             header_delimiter_positions << (pos ? pos + start_pos + 1 : -1)
           end
           break
         end
 
+        unless has_header
+          # Set default alignments from the first separator which takes the role of header
+          header_separator_index = 0
+          parts = lines[0].strip.delete_prefix("+").split("+")
+
+          parts.each_with_index do |part, part_index|
+            default_alignments << if part.start_with?(":") && !part.end_with?(":")
+                                      'align="left"'
+                                    elsif !part.start_with?(":") && part.end_with?(":")
+                                      'align="right"'
+                                    else
+                                      'align="center"'
+                                    end
+
+            start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1]
+            pos = lines[0][start_pos + 1..-1]&.index("+")
+            header_delimiter_positions << (pos ? pos + start_pos + 1 : -1)
+          end
+        end
+
+        #Check end table delimiter alignment (not checked during the lines processing)
+	      raise "Misaligned delimiters in table separators: #{lines[-1]}" unless checkDelimiterAlignment(lines[-1], delimiter_positions)
+
+
         # Process table body (including rows belonging to header as they are processed in the same way)
         data_rows = []
 
@@ -251,9 +310,13 @@ def parse_pandoc_table_with_spans(pandoc_table)
           next if row_lines.empty?
 
           row_lines.each do |line|
+            line = line.rstrip
             # First line (normally a separator) of each block
             if separator?(line) && !in_data_row
               in_data_row = true
+              #Check end table delimiter alignment (not checked during the lines processing)
+	          raise "Misaligned delimiters in separator row: #{line}" unless checkDelimiterAlignment(line, delimiter_positions)
+
               parts = line.strip.delete_prefix("+").split("+")
               delimiter_index = 0
               rows << Row.new(number_of_columns)
@@ -264,22 +327,21 @@ def parse_pandoc_table_with_spans(pandoc_table)
                 next unless i < number_of_columns
 
                 delimiter_index += parts[j].length + 1
+                rows[-1][i].position_start = delimiter_index - parts[j].length
                 rows[-1][i].position = delimiter_index
-                rows[-1][i].set_alignment(default_alignments, header_delimiter_positions)
+                rows[-1][i].calculateAndSetAlignment(header_delimiter_positions, default_alignments )
 
                 i += 1 while delimiter_index > delimiter_positions[i]
                 i += 1
               end
-      # Lines in a block
+            # Lines in a block
             elsif in_data_row
               # Regular data row or partial separator
-                if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator
-                cells_content = line.strip
-                                   .delete_prefix("|")
-                                   .delete_prefix("+")
-                                   .delete_suffix("|")
-                                   .delete_suffix("+")
-                                   .split(/[\|\+]/)
+              if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator
+                #Check end table delimiter alignment (not checked during the lines processing)
+                raise "Misaligned delimiters in partial separator: #{line}" unless checkDelimiterAlignment(line, delimiter_positions)
+
+                cells_content = line.strip.gsub(/^(\+|\|)/, '').split(/[\|\+]/)
 
                 rows << Row.new(number_of_columns)
                 aux_delimiter_index = 0
@@ -289,8 +351,9 @@ def parse_pandoc_table_with_spans(pandoc_table)
                   next unless auxiliar_cell_index < number_of_columns
 
                   aux_delimiter_index += cells_content[i].length + 1
+                  rows[-1][auxiliar_cell_index].position_start = aux_delimiter_index - cells_content[i].length
                   rows[-1][auxiliar_cell_index].position = aux_delimiter_index
-                  rows[-1][auxiliar_cell_index].set_alignment(default_alignments, header_delimiter_positions)
+                  rows[-1][auxiliar_cell_index].calculateAndSetAlignment(header_delimiter_positions, default_alignments )
 
                   auxiliar_cell_index += 1 while aux_delimiter_index > delimiter_positions[auxiliar_cell_index]
 
@@ -301,6 +364,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
 
                 column_index = 0
                 maxRowTracker = rows_tracker.maxValue
+
                 cells_content.each_with_index do |content, _i|
                   if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) # Separator - split row
                     rows_tracker[column_index] = maxRowTracker + 1
@@ -310,7 +374,6 @@ def parse_pandoc_table_with_spans(pandoc_table)
                     (column_index...delimiter_positions.length).each do |del_index|
                       if rows[rows_tracker[column_index]][column_index].position >= delimiter_positions[del_index]
                         column_forward += 1
-                        #rows_tracker[column_index + column_forward - 1] += 1 if column_forward > 1
                       end
                     end
 
@@ -337,9 +400,11 @@ def parse_pandoc_table_with_spans(pandoc_table)
                     end
                   end
                 end
-
               else # Data row
                 cells_content = line.strip.delete_prefix("|").split(/\|/)
+                #Check end table delimiter alignment (not checked during the lines processing)
+	              raise "Misaligned delimiters in row: #{line}" unless checkDelimiterAlignment(line, delimiter_positions)
+
                 column_index = 0
 
                 if cells_content.length < number_of_columns
@@ -379,10 +444,12 @@ def parse_pandoc_table_with_spans(pandoc_table)
             rows.each { |body_row| data_rows << body_row.cells }
           elsif has_header && start < header_separator_index
             rows.each { |header_row| header_rows << header_row.cells }
+          else
+            rows.each { |body_row| data_rows << body_row.cells }
           end
-        end
 
-        raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty?
+          raise "No valid rows found in the provided Pandoc table." if data_rows.empty? && header_rows.empty?
+        end
 
         # Format text (bold and italic)
         [header_rows, data_rows].each do |rows|
@@ -390,20 +457,11 @@ def parse_pandoc_table_with_spans(pandoc_table)
             row.each do |cell|
               next if cell.content.nil?
 
-              delimters = ['**', '__']
-              delimters.each do |bold_chars|
-                while cell.content.include?(bold_chars)
-                  cell.content = cell.content.sub(bold_chars, "<strong>")
-                                          .sub(bold_chars, "</strong>")
-                end
-              end
-
-              while cell.content.include?("_") && cell.content.exclude?("\\_")
-                cell.content = cell.content.rstrip.sub("_", "<i>").sub("_", "</i>")
-              end
+              cell.content = cell.content.gsub(/</, "&lt;")
 
-              cell.content = cell.content.rstrip.sub("\\_", "_") while cell.content.include?("\\_")
+              cell.content = cell.content.gsub(/(?<espace>^|\s)(?<bold>\*\*|__)(?<text>.+?)\g<bold>(?!\w)/, "\\k<espace><strong>\\k<text></strong>")
 
+              cell.content = cell.content.gsub(/(?<espace>^|\s)(?<italic>\*|_)(?<text>.+?)\g<italic>(?!\w)/, "\\k<espace><i>\\k<text></i>")
               # Convert newlines to HTML breaks
               cell.content = cell.content&.gsub("\n", "<br />")
             end
@@ -464,14 +522,14 @@ def generate_html_table_with_spans(pandoc_table)
                 next if cell.rowspan == 0 || cell.colspan == 0
 
                 # Prepare content, in case there's a list
-                matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br />|$)})
+                matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/)
                 if matches
                   list = "<ul>"
                   matches.each do |match|
                     list += "<li>#{match[1]}</li>"
                   end
                   list += "</ul>"
-                  cell.content = cell.content.gsub(%r{(\s*([-*+]|\d+\.)\s+[^<]+?<br />)+}, list)
+                  cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/, list)
                   # Enforce left alignment if cell contains a list
                   cell.alignment = 'align="left"'
                 end
@@ -491,14 +549,13 @@ def generate_html_table_with_spans(pandoc_table)
             row.each do |cell|
               next if cell.rowspan == 0 || cell.colspan == 0
 
-              matches = cell.content&.scan(%r{\s*([-*+]|\d+\.)\s+([^<]+?)(?=<br />|$)})
+              matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/)
               if matches
                 list = "<ul>"
                 matches.each do |match|
                   list += "<li>#{match[1]}</li>"
                 end
-                list += "</ul>"
-                cell.content = cell.content.gsub(%r{(\s*([-*+]|\d+\.)\s+[^<]+?<br />)+}, list)
+                cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/, list)
                 # Enforce left alignment if cell contains a list
                 cell.alignment = 'align="left"'
               end
-- 
GitLab


From 07ba1cffe66e384e8e5ef95925e165a1e4f9c88c Mon Sep 17 00:00:00 2001
From: Brett Walker <bwalker@gitlab.com>
Date: Tue, 25 Mar 2025 12:23:24 -0500
Subject: [PATCH 07/12] Rubocop auto corrections

---
 lib/banzai/filter/grid_table_filter.rb | 254 +++++++++++++------------
 1 file changed, 131 insertions(+), 123 deletions(-)

diff --git a/lib/banzai/filter/grid_table_filter.rb b/lib/banzai/filter/grid_table_filter.rb
index f1fe8c4fd4868b..952d6cb13387b6 100644
--- a/lib/banzai/filter/grid_table_filter.rb
+++ b/lib/banzai/filter/grid_table_filter.rb
@@ -43,35 +43,34 @@ class GridTableFilter < HTML::Pipeline::TextFilter
       NEXT_ELEMENT_LIST_MARK = "∆"
 
       class Cell
-          attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position_start, :position, :list_flag
-
-          def initialize
-            @content = nil
-            @rowspan = 0
-            @colspan = 0
-            @colspan_adjusted = false
-            @alignment = 'align="center"'
-            @position_start = nil
-            @position = nil
-            @list_flag = false
-          end
-
-          def calculateAndSetAlignment(header_delimiter_positions, default_alignments )
-
-            raise "Cell position must be set before calculating alignment" if @position.nil? || @position_start.nil?
+        attr_accessor :content, :rowspan, :colspan, :colspan_adjusted, :alignment, :position_start, :position,
+          :list_flag
+
+        def initialize
+          @content = nil
+          @rowspan = 0
+          @colspan = 0
+          @colspan_adjusted = false
+          @alignment = 'align="center"'
+          @position_start = nil
+          @position = nil
+          @list_flag = false
+        end
 
-            header_delimiter_index = 0
-            while header_delimiter_index < default_alignments.length &&
-                @position_start > header_delimiter_positions[header_delimiter_index]
-              header_delimiter_index += 1
-            end
+        def calculateAndSetAlignment(header_delimiter_positions, default_alignments)
+          raise "Cell position must be set before calculating alignment" if @position.nil? || @position_start.nil?
 
-            raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length
+          header_delimiter_index = 0
+          while header_delimiter_index < default_alignments.length &&
+              @position_start > header_delimiter_positions[header_delimiter_index]
+            header_delimiter_index += 1
+          end
 
-            @alignment = default_alignments[header_delimiter_index]
+          raise "Invalid table formatting" unless header_delimiter_index < default_alignments.length
 
-          end
-      end # end of class Cell
+          @alignment = default_alignments[header_delimiter_index]
+        end
+      end
 
       class Row
         attr_accessor :cells
@@ -87,7 +86,7 @@ def [](index)
         def []=(index, value)
           @cells[index] = value
         end
-      end # end of class Row
+      end
 
       class RowTracker
         attr_accessor :row_tracker
@@ -107,12 +106,13 @@ def []=(index, value)
         def maxValue
           @row_tracker.max
         end
-      end # end of class RowTracker
+      end
 
       # Helper method to detect separator lines
       def separator?(line)
         GRID_TABLE_SEPARATOR.match?(line)
       end
+
       # Helper method to handle content in cells
       def handling_content(cell, content)
         _c = content.strip
@@ -122,10 +122,10 @@ def handling_content(cell, content)
           if _c.start_with?("- ") # List
             cell.list_flag = true
             _c = _c.gsub(/\\\s*$/, '\n')
-				    cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}"  # Add list element end mark to know when the list element ends
+            cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}" # Add list element end mark to know when the list element ends
           elsif cell.list_flag && !content.strip.empty?
             _c = _c.gsub(/\\\s*$/, '\n')
-            cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}" #add the list element end mark
+            cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}" # add the list element end mark
           elsif _c.empty?
             cell.content = "\n"
           else
@@ -137,7 +137,7 @@ def handling_content(cell, content)
           _c = _c.gsub(/\\\s*$/, '\n')
           cell.content += "#{_c}#{NEXT_ELEMENT_LIST_MARK}"
         elsif cell.list_flag && !_c.empty?
-          cell.content = cell.content.strip.chomp("#{NEXT_ELEMENT_LIST_MARK}")
+          cell.content = cell.content.strip.chomp(NEXT_ELEMENT_LIST_MARK.to_s)
           _c = _c.gsub(/\\\s*$/, '\n')
           cell.content += " #{_c}#{NEXT_ELEMENT_LIST_MARK}"
         elsif _c.empty?
@@ -145,6 +145,7 @@ def handling_content(cell, content)
             cell.list_flag = false
             cell.content += "\n\n"
           end
+
           cell.content += cell.content.end_with?("\n") ? "" : "\n"
         else
           _c = _c.gsub(/\\\s*$/, "\n")
@@ -167,7 +168,7 @@ def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns,
 
           delimiters = ['|', '+']
           positions = delimiters.filter_map do |delimiter|
-            pos = line[delimiter_start + 1..-1]&.index(delimiter)
+            pos = line[delimiter_start + 1..]&.index(delimiter)
             pos ? pos + delimiter_start + 1 : nil
           end.compact
 
@@ -193,34 +194,36 @@ def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns,
       def checkDelimiterAlignment(line, delimiterPositions)
         return false if line.empty? || delimiterPositions.empty?
 
-        #puts "\nChecking line: #{line}"
-        #puts "Expected delimiter positions: #{delimiterPositions}"
+        # puts "\nChecking line: #{line}"
+        # puts "Expected delimiter positions: #{delimiterPositions}"
 
         # For any row (only +, only |, mix of + and |)
         currentPositions = []
         start_pos = 1
 
         while start_pos < line.length
-          pos = line.index(/[|+]/, start_pos)  # Find the next occurrence of | or + starting from start_pos
-          break if pos.nil?  # Exit if no more delimiters are found
+          pos = line.index(/[|+]/, start_pos) # Find the next occurrence of | or + starting from start_pos
+          break if pos.nil? # Exit if no more delimiters are found
 
           currentPositions << pos
-          start_pos = pos + 1  # Move to the next character after the found delimiter
+          start_pos = pos + 1 # Move to the next character after the found delimiter
         end
 
-        #puts "Current positions: #{currentPositions}"
+        # puts "Current positions: #{currentPositions}"
 
         # Check if the last expected delimiter position is found in currentPositions
-        return currentPositions.include?(delimiterPositions[-1]) &&
-               line.match?(/\A[|+]/) &&  # Check if the line starts with | or +
-               currentPositions.all? { |pos| delimiterPositions.include?(pos) }  # Ensure all current positions are in delimiterPositions
+        currentPositions.include?(delimiterPositions[-1]) &&
+          line.match?(/\A[|+]/) && # Check if the line starts with | or +
+          # Ensure all current positions are in delimiterPositions
+          currentPositions.all? do |pos|
+            delimiterPositions.include?(pos)
+          end
       end
 
       # rubocop:disable Metrics/AbcSize -- PoC
       # rubocop:disable Metrics/CyclomaticComplexity -- PoC
       # rubocop:disable Metrics/PerceivedComplexity -- PoC
       def parse_pandoc_table_with_spans(pandoc_table)
-
         # Split the input into lines
         lines = pandoc_table.rstrip.split("\n").map(&:rstrip)
 
@@ -237,7 +240,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
         separator_index_max_columns = separator_indices.find { |i| lines[i].count("+") - 1 == number_of_columns }
         number_of_columns.times do |j|
           start_pos = j == 0 ? 0 : delimiter_positions[j - 1]
-          pos = lines[separator_index_max_columns][start_pos + 1..-1]&.index("+")
+          pos = lines[separator_index_max_columns][start_pos + 1..]&.index("+")
           delimiter_positions << (pos ? pos + start_pos + 1 : -1)
         end
 
@@ -266,7 +269,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
                                   end
 
             start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1]
-            pos = lines[index][start_pos + 1..-1]&.index("+")
+            pos = lines[index][start_pos + 1..]&.index("+")
             header_delimiter_positions << (pos ? pos + start_pos + 1 : -1)
           end
           break
@@ -279,22 +282,22 @@ def parse_pandoc_table_with_spans(pandoc_table)
 
           parts.each_with_index do |part, part_index|
             default_alignments << if part.start_with?(":") && !part.end_with?(":")
-                                      'align="left"'
-                                    elsif !part.start_with?(":") && part.end_with?(":")
-                                      'align="right"'
-                                    else
-                                      'align="center"'
-                                    end
+                                    'align="left"'
+                                  elsif !part.start_with?(":") && part.end_with?(":")
+                                    'align="right"'
+                                  else
+                                    'align="center"'
+                                  end
 
             start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1]
-            pos = lines[0][start_pos + 1..-1]&.index("+")
+            pos = lines[0][start_pos + 1..]&.index("+")
             header_delimiter_positions << (pos ? pos + start_pos + 1 : -1)
           end
         end
 
-        #Check end table delimiter alignment (not checked during the lines processing)
-	      raise "Misaligned delimiters in table separators: #{lines[-1]}" unless checkDelimiterAlignment(lines[-1], delimiter_positions)
-
+        # Check end table delimiter alignment (not checked during the lines processing)
+        raise "Misaligned delimiters in table separators: #{lines[-1]}" unless checkDelimiterAlignment(lines[-1],
+          delimiter_positions)
 
         # Process table body (including rows belonging to header as they are processed in the same way)
         data_rows = []
@@ -314,8 +317,9 @@ def parse_pandoc_table_with_spans(pandoc_table)
             # First line (normally a separator) of each block
             if separator?(line) && !in_data_row
               in_data_row = true
-              #Check end table delimiter alignment (not checked during the lines processing)
-	          raise "Misaligned delimiters in separator row: #{line}" unless checkDelimiterAlignment(line, delimiter_positions)
+              # Check end table delimiter alignment (not checked during the lines processing)
+              raise "Misaligned delimiters in separator row: #{line}" unless checkDelimiterAlignment(line,
+                delimiter_positions)
 
               parts = line.strip.delete_prefix("+").split("+")
               delimiter_index = 0
@@ -329,7 +333,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
                 delimiter_index += parts[j].length + 1
                 rows[-1][i].position_start = delimiter_index - parts[j].length
                 rows[-1][i].position = delimiter_index
-                rows[-1][i].calculateAndSetAlignment(header_delimiter_positions, default_alignments )
+                rows[-1][i].calculateAndSetAlignment(header_delimiter_positions, default_alignments)
 
                 i += 1 while delimiter_index > delimiter_positions[i]
                 i += 1
@@ -338,8 +342,9 @@ def parse_pandoc_table_with_spans(pandoc_table)
             elsif in_data_row
               # Regular data row or partial separator
               if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator
-                #Check end table delimiter alignment (not checked during the lines processing)
-                raise "Misaligned delimiters in partial separator: #{line}" unless checkDelimiterAlignment(line, delimiter_positions)
+                # Check end table delimiter alignment (not checked during the lines processing)
+                raise "Misaligned delimiters in partial separator: #{line}" unless checkDelimiterAlignment(line,
+                  delimiter_positions)
 
                 cells_content = line.strip.gsub(/^(\+|\|)/, '').split(/[\|\+]/)
 
@@ -353,7 +358,8 @@ def parse_pandoc_table_with_spans(pandoc_table)
                   aux_delimiter_index += cells_content[i].length + 1
                   rows[-1][auxiliar_cell_index].position_start = aux_delimiter_index - cells_content[i].length
                   rows[-1][auxiliar_cell_index].position = aux_delimiter_index
-                  rows[-1][auxiliar_cell_index].calculateAndSetAlignment(header_delimiter_positions, default_alignments )
+                  rows[-1][auxiliar_cell_index].calculateAndSetAlignment(header_delimiter_positions,
+                    default_alignments)
 
                   auxiliar_cell_index += 1 while aux_delimiter_index > delimiter_positions[auxiliar_cell_index]
 
@@ -401,9 +407,9 @@ def parse_pandoc_table_with_spans(pandoc_table)
                   end
                 end
               else # Data row
-                cells_content = line.strip.delete_prefix("|").split(/\|/)
-                #Check end table delimiter alignment (not checked during the lines processing)
-	              raise "Misaligned delimiters in row: #{line}" unless checkDelimiterAlignment(line, delimiter_positions)
+                cells_content = line.strip.delete_prefix("|").split("|")
+                # Check end table delimiter alignment (not checked during the lines processing)
+                raise "Misaligned delimiters in row: #{line}" unless checkDelimiterAlignment(line, delimiter_positions)
 
                 column_index = 0
 
@@ -459,9 +465,11 @@ def parse_pandoc_table_with_spans(pandoc_table)
 
               cell.content = cell.content.gsub(/</, "&lt;")
 
-              cell.content = cell.content.gsub(/(?<espace>^|\s)(?<bold>\*\*|__)(?<text>.+?)\g<bold>(?!\w)/, "\\k<espace><strong>\\k<text></strong>")
+              cell.content = cell.content.gsub(/(?<espace>^|\s)(?<bold>\*\*|__)(?<text>.+?)\g<bold>(?!\w)/,
+                "\\k<espace><strong>\\k<text></strong>")
 
-              cell.content = cell.content.gsub(/(?<espace>^|\s)(?<italic>\*|_)(?<text>.+?)\g<italic>(?!\w)/, "\\k<espace><i>\\k<text></i>")
+              cell.content = cell.content.gsub(/(?<espace>^|\s)(?<italic>\*|_)(?<text>.+?)\g<italic>(?!\w)/,
+                "\\k<espace><i>\\k<text></i>")
               # Convert newlines to HTML breaks
               cell.content = cell.content&.gsub("\n", "<br />")
             end
@@ -491,87 +499,87 @@ def parse_pandoc_table_with_spans(pandoc_table)
         end
 
         [header_rows, data_rows]
-      end # end of parse_pandoc_table_with_spans
+      end
 
       def generate_html_table_with_spans(pandoc_table)
-        begin
-          grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
-        rescue StandardError => e
-          logger = Logger.new($stdout)
-          logger.error("Grid table could not be generated: #{e.message}")
-
-          "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS"
-        else
-          html = "<table>\n"
-          has_header = false
+        grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
+      rescue StandardError => e
+        logger = Logger.new($stdout)
+        logger.error("Grid table could not be generated: #{e.message}")
+
+        "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS"
+      else
+        html = "<table>\n"
+        has_header = false
 
-          grid_header.each do |row|
-            row.each do |cell|
-              if cell.rowspan != 0 && cell.colspan != 0
-                has_header = true
-                break
-              end
+        grid_header.each do |row|
+          row.each do |cell|
+            if cell.rowspan != 0 && cell.colspan != 0
+              has_header = true
+              break
             end
           end
+        end
 
-          if has_header
-            html += "    <thead>\n"
-            grid_header.each do |row|
-              html += "        <tr>\n"
-              row.each do |cell|
-                next if cell.rowspan == 0 || cell.colspan == 0
-
-                # Prepare content, in case there's a list
-                matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/)
-                if matches
-                  list = "<ul>"
-                  matches.each do |match|
-                    list += "<li>#{match[1]}</li>"
-                  end
-                  list += "</ul>"
-                  cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/, list)
-                  # Enforce left alignment if cell contains a list
-                  cell.alignment = 'align="left"'
-                end
-
-                rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
-                colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
-                html += %(            <th#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</th>\n)
-              end
-              html += "        </tr>\n"
-            end
-            html += "    </thead>\n"
-          end
-
-          html += "    <tbody>\n"
-          grid_body.each do |row|
+        if has_header
+          html += "    <thead>\n"
+          grid_header.each do |row|
             html += "        <tr>\n"
             row.each do |cell|
               next if cell.rowspan == 0 || cell.colspan == 0
 
-              matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/)
+              # Prepare content, in case there's a list
+              matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o)
               if matches
                 list = "<ul>"
                 matches.each do |match|
                   list += "<li>#{match[1]}</li>"
                 end
-                cell.content = cell.content.gsub(/(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/, list)
+                list += "</ul>"
+                cell.content = cell.content.gsub(
+                  /(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/o, list)
                 # Enforce left alignment if cell contains a list
                 cell.alignment = 'align="left"'
               end
 
               rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
               colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
-              html += %(            <td#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</td>\n)
+              html += %(            <th#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</th>\n)
             end
             html += "        </tr>\n"
           end
+          html += "    </thead>\n"
+        end
 
-          html += "    </tbody>\n"
-          html += "</table>"
-          html
+        html += "    <tbody>\n"
+        grid_body.each do |row|
+          html += "        <tr>\n"
+          row.each do |cell|
+            next if cell.rowspan == 0 || cell.colspan == 0
+
+            matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o)
+            if matches
+              list = "<ul>"
+              matches.each do |match|
+                list += "<li>#{match[1]}</li>"
+              end
+              cell.content = cell.content.gsub(
+                /(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/o, list)
+              # Enforce left alignment if cell contains a list
+              cell.alignment = 'align="left"'
+            end
+
+            rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
+            colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
+            html += %(            <td#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</td>\n)
+          end
+          html += "        </tr>\n"
         end
-      end # end of def generate_html_table_with_spans
+
+        html += "    </tbody>\n"
+        html += "</table>"
+        html
+      end
       # rubocop:enable Metrics/PerceivedComplexity
       # rubocop:enable Metrics/CyclomaticComplexity
       # rubocop:enable Metrics/AbcSize
@@ -594,7 +602,7 @@ def call
             match.to_s
           end
         end
-      end # end of def call
-    end # end of class GridTableFilter
-  end # end of module Filter
-end # end of module Banzai
+      end
+    end
+  end
+end
-- 
GitLab


From 08b0d44ef87931c2ac67221a209bb8bda02c46c6 Mon Sep 17 00:00:00 2001
From: Brett Walker <bwalker@gitlab.com>
Date: Tue, 25 Mar 2025 12:35:47 -0500
Subject: [PATCH 08/12] Additional rubocop fixes

---
 lib/banzai/filter/grid_table_filter.rb | 95 ++++++++++++++------------
 1 file changed, 52 insertions(+), 43 deletions(-)

diff --git a/lib/banzai/filter/grid_table_filter.rb b/lib/banzai/filter/grid_table_filter.rb
index 952d6cb13387b6..980a5b47b9d9f5 100644
--- a/lib/banzai/filter/grid_table_filter.rb
+++ b/lib/banzai/filter/grid_table_filter.rb
@@ -18,6 +18,7 @@
 module Banzai
   module Filter
     class GridTableFilter < HTML::Pipeline::TextFilter
+      # rubocop:disable Lint/MixedRegexpCaptureTypes -- PoC
       MARKDOWN_GRID_TABLE_BLOCK_REGEX = %r{
         (?<code>
           # Grid table blocks:
@@ -31,6 +32,7 @@ class GridTableFilter < HTML::Pipeline::TextFilter
 
         )
       }mx
+      # rubocop:enable Lint/MixedRegexpCaptureTypes
 
       require 'logger'
 
@@ -57,7 +59,7 @@ def initialize
           @list_flag = false
         end
 
-        def calculateAndSetAlignment(header_delimiter_positions, default_alignments)
+        def calculate_and_set_alignment(header_delimiter_positions, default_alignments)
           raise "Cell position must be set before calculating alignment" if @position.nil? || @position_start.nil?
 
           header_delimiter_index = 0
@@ -103,7 +105,7 @@ def []=(index, value)
           @row_tracker[index] = value
         end
 
-        def maxValue
+        def max_value
           @row_tracker.max
         end
       end
@@ -114,33 +116,36 @@ def separator?(line)
       end
 
       # Helper method to handle content in cells
+      # rubocop:disable Metrics/PerceivedComplexity -- PoC
       def handling_content(cell, content)
-        _c = content.strip
+        modified_content = content.strip
         if cell.content.nil?
           cell.rowspan += 1
           cell.colspan += 1
-          if _c.start_with?("- ") # List
+          if modified_content.start_with?("- ") # List
             cell.list_flag = true
-            _c = _c.gsub(/\\\s*$/, '\n')
-            cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}" # Add list element end mark to know when the list element ends
+            modified_content = modified_content.gsub(/\\\s*$/, '\n')
+
+            # Add list element end mark to know when the list element ends
+            cell.content = "#{modified_content}#{NEXT_ELEMENT_LIST_MARK}"
           elsif cell.list_flag && !content.strip.empty?
-            _c = _c.gsub(/\\\s*$/, '\n')
-            cell.content = "#{_c}#{NEXT_ELEMENT_LIST_MARK}" # add the list element end mark
-          elsif _c.empty?
+            modified_content = modified_content.gsub(/\\\s*$/, '\n')
+            cell.content = "#{modified_content}#{NEXT_ELEMENT_LIST_MARK}" # add the list element end mark
+          elsif modified_content.empty?
             cell.content = "\n"
           else
-            cell.content = _c.gsub(/\\\s*$/, "\n")
+            cell.content = modified_content.gsub(/\\\s*$/, "\n")
           end
-        elsif _c.start_with?("- ")
+        elsif modified_content.start_with?("- ")
           cell.content += "\n" unless cell.list_flag
           cell.list_flag = true
-          _c = _c.gsub(/\\\s*$/, '\n')
-          cell.content += "#{_c}#{NEXT_ELEMENT_LIST_MARK}"
-        elsif cell.list_flag && !_c.empty?
+          modified_content = modified_content.gsub(/\\\s*$/, '\n')
+          cell.content += "#{modified_content}#{NEXT_ELEMENT_LIST_MARK}"
+        elsif cell.list_flag && !modified_content.empty?
           cell.content = cell.content.strip.chomp(NEXT_ELEMENT_LIST_MARK.to_s)
-          _c = _c.gsub(/\\\s*$/, '\n')
-          cell.content += " #{_c}#{NEXT_ELEMENT_LIST_MARK}"
-        elsif _c.empty?
+          modified_content = modified_content.gsub(/\\\s*$/, '\n')
+          cell.content += " #{modified_content}#{NEXT_ELEMENT_LIST_MARK}"
+        elsif modified_content.empty?
           if cell.list_flag
             cell.list_flag = false
             cell.content += "\n\n"
@@ -148,12 +153,13 @@ def handling_content(cell, content)
 
           cell.content += cell.content.end_with?("\n") ? "" : "\n"
         else
-          _c = _c.gsub(/\\\s*$/, "\n")
-          cell.content += " #{_c}"
+          modified_content = modified_content.gsub(/\\\s*$/, "\n")
+          cell.content += " #{modified_content}"
         end
 
         cell
       end
+      # rubocop:enable Metrics/PerceivedComplexity
 
       # Helper method to adjust colspan
       def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns, delimiter_positions)
@@ -191,32 +197,32 @@ def adjust_colspan(row, column_index, number_of_parts, line, number_of_columns,
         row[column_index]
       end
 
-      def checkDelimiterAlignment(line, delimiterPositions)
-        return false if line.empty? || delimiterPositions.empty?
+      def check_delimiter_alignment(line, delimiter_positions)
+        return false if line.empty? || delimiter_positions.empty?
 
         # puts "\nChecking line: #{line}"
-        # puts "Expected delimiter positions: #{delimiterPositions}"
+        # puts "Expected delimiter positions: #{delimiter_positions}"
 
         # For any row (only +, only |, mix of + and |)
-        currentPositions = []
+        current_positions = []
         start_pos = 1
 
         while start_pos < line.length
           pos = line.index(/[|+]/, start_pos) # Find the next occurrence of | or + starting from start_pos
           break if pos.nil? # Exit if no more delimiters are found
 
-          currentPositions << pos
+          current_positions << pos
           start_pos = pos + 1 # Move to the next character after the found delimiter
         end
 
-        # puts "Current positions: #{currentPositions}"
+        # puts "Current positions: #{current_positions}"
 
-        # Check if the last expected delimiter position is found in currentPositions
-        currentPositions.include?(delimiterPositions[-1]) &&
+        # Check if the last expected delimiter position is found in current_positions
+        current_positions.include?(delimiter_positions[-1]) &&
           line.match?(/\A[|+]/) && # Check if the line starts with | or +
-          # Ensure all current positions are in delimiterPositions
-          currentPositions.all? do |pos|
-            delimiterPositions.include?(pos)
+          # Ensure all current positions are in delimiter_positions
+          current_positions.all? do |pos|
+            delimiter_positions.include?(pos)
           end
       end
 
@@ -296,7 +302,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
         end
 
         # Check end table delimiter alignment (not checked during the lines processing)
-        raise "Misaligned delimiters in table separators: #{lines[-1]}" unless checkDelimiterAlignment(lines[-1],
+        raise "Misaligned delimiters in table separators: #{lines[-1]}" unless check_delimiter_alignment(lines[-1],
           delimiter_positions)
 
         # Process table body (including rows belonging to header as they are processed in the same way)
@@ -318,7 +324,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
             if separator?(line) && !in_data_row
               in_data_row = true
               # Check end table delimiter alignment (not checked during the lines processing)
-              raise "Misaligned delimiters in separator row: #{line}" unless checkDelimiterAlignment(line,
+              raise "Misaligned delimiters in separator row: #{line}" unless check_delimiter_alignment(line,
                 delimiter_positions)
 
               parts = line.strip.delete_prefix("+").split("+")
@@ -333,7 +339,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
                 delimiter_index += parts[j].length + 1
                 rows[-1][i].position_start = delimiter_index - parts[j].length
                 rows[-1][i].position = delimiter_index
-                rows[-1][i].calculateAndSetAlignment(header_delimiter_positions, default_alignments)
+                rows[-1][i].calculate_and_set_alignment(header_delimiter_positions, default_alignments)
 
                 i += 1 while delimiter_index > delimiter_positions[i]
                 i += 1
@@ -343,7 +349,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
               # Regular data row or partial separator
               if GRID_TABLE_BODY_SEPARATOR.match?(line) # Partial separator
                 # Check end table delimiter alignment (not checked during the lines processing)
-                raise "Misaligned delimiters in partial separator: #{line}" unless checkDelimiterAlignment(line,
+                raise "Misaligned delimiters in partial separator: #{line}" unless check_delimiter_alignment(line,
                   delimiter_positions)
 
                 cells_content = line.strip.gsub(/^(\+|\|)/, '').split(/[\|\+]/)
@@ -358,7 +364,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
                   aux_delimiter_index += cells_content[i].length + 1
                   rows[-1][auxiliar_cell_index].position_start = aux_delimiter_index - cells_content[i].length
                   rows[-1][auxiliar_cell_index].position = aux_delimiter_index
-                  rows[-1][auxiliar_cell_index].calculateAndSetAlignment(header_delimiter_positions,
+                  rows[-1][auxiliar_cell_index].calculate_and_set_alignment(header_delimiter_positions,
                     default_alignments)
 
                   auxiliar_cell_index += 1 while aux_delimiter_index > delimiter_positions[auxiliar_cell_index]
@@ -369,11 +375,11 @@ def parse_pandoc_table_with_spans(pandoc_table)
                 raise "More cells than columns found" unless cells_content.length <= number_of_columns
 
                 column_index = 0
-                maxRowTracker = rows_tracker.maxValue
+                max_row_tracker = rows_tracker.max_value
 
                 cells_content.each_with_index do |content, _i|
                   if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) # Separator - split row
-                    rows_tracker[column_index] = maxRowTracker + 1
+                    rows_tracker[column_index] = max_row_tracker + 1
                     rows[rows_tracker[column_index]][column_index].list_flag = false
 
                     column_forward = 0
@@ -409,7 +415,8 @@ def parse_pandoc_table_with_spans(pandoc_table)
               else # Data row
                 cells_content = line.strip.delete_prefix("|").split("|")
                 # Check end table delimiter alignment (not checked during the lines processing)
-                raise "Misaligned delimiters in row: #{line}" unless checkDelimiterAlignment(line, delimiter_positions)
+                raise "Misaligned delimiters in row: #{line}" unless check_delimiter_alignment(
+                  line, delimiter_positions)
 
                 column_index = 0
 
@@ -446,9 +453,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
             end
           end
 
-          if has_header && start >= header_separator_index
-            rows.each { |body_row| data_rows << body_row.cells }
-          elsif has_header && start < header_separator_index
+          if has_header && start < header_separator_index
             rows.each { |header_row| header_rows << header_row.cells }
           else
             rows.each { |body_row| data_rows << body_row.cells }
@@ -529,7 +534,9 @@ def generate_html_table_with_spans(pandoc_table)
               next if cell.rowspan == 0 || cell.colspan == 0
 
               # Prepare content, in case there's a list
-              matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o)
+              matches = cell.content&.scan(
+                /\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o)
+
               if matches
                 list = "<ul>"
                 matches.each do |match|
@@ -557,7 +564,9 @@ def generate_html_table_with_spans(pandoc_table)
           row.each do |cell|
             next if cell.rowspan == 0 || cell.colspan == 0
 
-            matches = cell.content&.scan(/\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o)
+            matches = cell.content&.scan(
+              /\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o)
+
             if matches
               list = "<ul>"
               matches.each do |match|
-- 
GitLab


From d7cf2c9f6f89cf18e2816ba2c2fb53cebf57c357 Mon Sep 17 00:00:00 2001
From: Brett Walker <bwalker@gitlab.com>
Date: Fri, 29 Aug 2025 11:48:23 -0500
Subject: [PATCH 09/12] Update from customer

---
 lib/banzai/filter/grid_table_filter.rb | 182 +++++++++++++------------
 1 file changed, 98 insertions(+), 84 deletions(-)

diff --git a/lib/banzai/filter/grid_table_filter.rb b/lib/banzai/filter/grid_table_filter.rb
index 980a5b47b9d9f5..d36d05c09b66e1 100644
--- a/lib/banzai/filter/grid_table_filter.rb
+++ b/lib/banzai/filter/grid_table_filter.rb
@@ -267,11 +267,11 @@ def parse_pandoc_table_with_spans(pandoc_table)
 
           parts.each_with_index do |part, part_index|
             default_alignments << if part.start_with?(":") && !part.end_with?(":")
-                                    'align="left"'
+                                    'left'
                                   elsif !part.start_with?(":") && part.end_with?(":")
-                                    'align="right"'
+                                    'right'
                                   else
-                                    'align="center"'
+                                    'center'
                                   end
 
             start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1]
@@ -284,19 +284,20 @@ def parse_pandoc_table_with_spans(pandoc_table)
         unless has_header
           # Set default alignments from the first separator which takes the role of header
           header_separator_index = 0
-          parts = lines[0].strip.delete_prefix("+").split("+")
+          line  = lines.find { |l| !l.strip.empty? }  # first non-blank line
+          parts = line.strip.delete_prefix("+").split("+")
 
           parts.each_with_index do |part, part_index|
             default_alignments << if part.start_with?(":") && !part.end_with?(":")
-                                    'align="left"'
-                                  elsif !part.start_with?(":") && part.end_with?(":")
-                                    'align="right"'
-                                  else
-                                    'align="center"'
-                                  end
+                                      'left'
+                                    elsif !part.start_with?(":") && part.end_with?(":")
+                                      'right'
+                                    else
+                                      'center'
+                                    end
 
             start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1]
-            pos = lines[0][start_pos + 1..]&.index("+")
+            pos = line[start_pos + 1..]&.index("+")
             header_delimiter_positions << (pos ? pos + start_pos + 1 : -1)
           end
         end
@@ -352,17 +353,17 @@ def parse_pandoc_table_with_spans(pandoc_table)
                 raise "Misaligned delimiters in partial separator: #{line}" unless check_delimiter_alignment(line,
                   delimiter_positions)
 
-                cells_content = line.strip.gsub(/^(\+|\|)/, '').split(/[\|\+]/)
+                parts = line.strip.gsub(/^(\+|\|)/, '').split(/[\|\+]/)
 
                 rows << Row.new(number_of_columns)
                 aux_delimiter_index = 0
                 auxiliar_cell_index = 0
 
-                cells_content.each_with_index do |_, i|
+                parts.each_with_index do |_, i|
                   next unless auxiliar_cell_index < number_of_columns
 
-                  aux_delimiter_index += cells_content[i].length + 1
-                  rows[-1][auxiliar_cell_index].position_start = aux_delimiter_index - cells_content[i].length
+                  aux_delimiter_index += parts[i].length + 1
+                  rows[-1][auxiliar_cell_index].position_start = aux_delimiter_index - parts[i].length
                   rows[-1][auxiliar_cell_index].position = aux_delimiter_index
                   rows[-1][auxiliar_cell_index].calculate_and_set_alignment(header_delimiter_positions,
                     default_alignments)
@@ -372,12 +373,12 @@ def parse_pandoc_table_with_spans(pandoc_table)
                   auxiliar_cell_index += 1
                 end
 
-                raise "More cells than columns found" unless cells_content.length <= number_of_columns
+                raise "More cells than columns found" unless parts.length <= number_of_columns
 
                 column_index = 0
                 max_row_tracker = rows_tracker.max_value
 
-                cells_content.each_with_index do |content, _i|
+                parts.each_with_index do |content, _i|
                   if GRID_TABLE_BODY_SEPARATOR_LINE.match?(content) # Separator - split row
                     rows_tracker[column_index] = max_row_tracker + 1
                     rows[rows_tracker[column_index]][column_index].list_flag = false
@@ -418,6 +419,10 @@ def parse_pandoc_table_with_spans(pandoc_table)
                 raise "Misaligned delimiters in row: #{line}" unless check_delimiter_alignment(
                   line, delimiter_positions)
 
+                raise "Missing delimiters in previous separator line" if parts.length < cells_content.length
+
+                #raise "Missing delimiters in row: #{line}: delimiters = #{cells_content.length}, expected delimiters = #{parts.length}" if parts.length > cells_content.length
+
                 column_index = 0
 
                 if cells_content.length < number_of_columns
@@ -468,13 +473,13 @@ def parse_pandoc_table_with_spans(pandoc_table)
             row.each do |cell|
               next if cell.content.nil?
 
-              cell.content = cell.content.gsub(/</, "&lt;")
+              #cell.content = cell.content.gsub(/</, "&lt;")
 
-              cell.content = cell.content.gsub(/(?<espace>^|\s)(?<bold>\*\*|__)(?<text>.+?)\g<bold>(?!\w)/,
-                "\\k<espace><strong>\\k<text></strong>")
+              #cell.content = cell.content.gsub(/(?<espace>^|\s)(?<bold>\*\*|__)(?<text>.+?)\g<bold>(?!\w)/,
+              #  "\\k<espace><strong>\\k<text></strong>")
 
-              cell.content = cell.content.gsub(/(?<espace>^|\s)(?<italic>\*|_)(?<text>.+?)\g<italic>(?!\w)/,
-                "\\k<espace><i>\\k<text></i>")
+              #cell.content = cell.content.gsub(/(?<espace>^|\s)(?<italic>\*|_)(?<text>.+?)\g<italic>(?!\w)/,
+              #  "\\k<espace><i>\\k<text></i>")
               # Convert newlines to HTML breaks
               cell.content = cell.content&.gsub("\n", "<br />")
             end
@@ -486,17 +491,24 @@ def parse_pandoc_table_with_spans(pandoc_table)
           rows.each_with_index do |row, row_index|
             forward_rowspan = Array.new(row.length, 0) if forward_rowspan.empty?
             sum = 0
-
+            row_forward_rowspan = forward_rowspan.dup
             row.each_with_index do |cell, cell_index|
               sum += cell.colspan
 
-              if row_index > 0 && cell.colspan == 0
-                sum += 1 if forward_rowspan[cell_index] > 0
-
-                forward_rowspan[cell_index] -= 1
+              if cell.colspan == 0
+                if row_forward_rowspan[cell_index] > 0
+                  sum += 1
+                  forward_rowspan[cell_index] -= 1
+                end
+              end
+              if row_forward_rowspan[cell_index] == 0 && cell.rowspan > 1
+                forward_rowspan[cell_index] = cell.rowspan - 1
+                colspan = 1
+                while cell.colspan > colspan
+                  forward_rowspan[cell_index + colspan] = cell.rowspan - 1
+                  colspan += 1
+                end
               end
-
-              forward_rowspan[cell_index] = cell.rowspan - 1 if forward_rowspan[cell_index] == 0 && cell.rowspan > 1
             end
 
             raise "Grid table not converted properly" unless sum == number_of_columns
@@ -507,33 +519,64 @@ def parse_pandoc_table_with_spans(pandoc_table)
       end
 
       def generate_html_table_with_spans(pandoc_table)
-        grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
-      rescue StandardError => e
-        logger = Logger.new($stdout)
-        logger.error("Grid table could not be generated: #{e.message}")
-
-        "HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOGS"
-      else
-        html = "<table>\n"
-        has_header = false
+        begin
+          grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
+        rescue StandardError => e
+          logger = Logger.new($stdout)
+          logger.error("Grid table could not be generated: #{e.message}")
+
+          "\n\nHTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOG FILE\n\n#{e.message}\n\nCommit ID: ce3607dbcafafe03531c1c50b3f749cc2318656c\n\n"
+        else
+          html = '<table markdown="1">'
+          has_header = false
+
+          grid_header.each do |row|
+            row.each do |cell|
+              if cell.rowspan != 0 && cell.colspan != 0
+                has_header = true
+                break
+              end
+            end
+          end
 
-        grid_header.each do |row|
-          row.each do |cell|
-            if cell.rowspan != 0 && cell.colspan != 0
-              has_header = true
-              break
+          if has_header
+            html += '<thead markdown="1">'
+            grid_header.each do |row|
+              html += '<tr markdown="1">'
+              row.each do |cell|
+                next if cell.rowspan == 0 || cell.colspan == 0
+
+                # Prepare content, in case there's a list
+                matches = cell.content&.scan(
+                  /\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o)
+
+                if matches
+                  list = "<ul>"
+                  matches.each do |match|
+                    list += "<li>#{match[1]}</li>"
+                  end
+                  list += "</ul>"
+                  cell.content = cell.content.gsub(
+                    /(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/o, list)
+                  # Enforce left alignment if cell contains a list
+                  cell.alignment = 'left'
+                end
+
+                rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
+                colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
+                html += %(<th#{rowspan}#{colspan} markdown="1" style="text-align: #{cell.alignment}">\n\n#{cell.content}\n\n</th>)
+              end
+              html += '</tr>'
             end
+            html += '</thead>'
           end
-        end
 
-        if has_header
-          html += "    <thead>\n"
-          grid_header.each do |row|
-            html += "        <tr>\n"
+          html += '<tbody markdown="1">'
+          grid_body.each do |row|
+            html += '<tr markdown="1">'
             row.each do |cell|
               next if cell.rowspan == 0 || cell.colspan == 0
 
-              # Prepare content, in case there's a list
               matches = cell.content&.scan(
                 /\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o)
 
@@ -542,52 +585,23 @@ def generate_html_table_with_spans(pandoc_table)
                 matches.each do |match|
                   list += "<li>#{match[1]}</li>"
                 end
-                list += "</ul>"
                 cell.content = cell.content.gsub(
                   /(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/o, list)
                 # Enforce left alignment if cell contains a list
-                cell.alignment = 'align="left"'
+                cell.alignment = 'left'
               end
 
               rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
               colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
-              html += %(            <th#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</th>\n)
+              html += %(<td#{rowspan}#{colspan} markdown="1" style="text-align: #{cell.alignment}">\n\n#{cell.content}\n\n</td>)
             end
-            html += "        </tr>\n"
+            html += '</tr>'
           end
-          html += "    </thead>\n"
-        end
-
-        html += "    <tbody>\n"
-        grid_body.each do |row|
-          html += "        <tr>\n"
-          row.each do |cell|
-            next if cell.rowspan == 0 || cell.colspan == 0
-
-            matches = cell.content&.scan(
-              /\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+?)#{NEXT_ELEMENT_LIST_MARK}\n?/o)
-
-            if matches
-              list = "<ul>"
-              matches.each do |match|
-                list += "<li>#{match[1]}</li>"
-              end
-              cell.content = cell.content.gsub(
-                /(\s*([-*+]|\d+\.)\s+([^#{NEXT_ELEMENT_LIST_MARK}]+#{NEXT_ELEMENT_LIST_MARK}\n?))+/o, list)
-              # Enforce left alignment if cell contains a list
-              cell.alignment = 'align="left"'
-            end
 
-            rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
-            colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
-            html += %(            <td#{rowspan}#{colspan} #{cell.alignment}>#{cell.content}</td>\n)
-          end
-          html += "        </tr>\n"
+          html += '</tbody>'
+          html += '</table>'
+          html
         end
-
-        html += "    </tbody>\n"
-        html += "</table>"
-        html
       end
       # rubocop:enable Metrics/PerceivedComplexity
       # rubocop:enable Metrics/CyclomaticComplexity
-- 
GitLab


From cecca3efadc401c38c0ff23a468e474500ca27d6 Mon Sep 17 00:00:00 2001
From: Brett Walker <bwalker@gitlab.com>
Date: Fri, 29 Aug 2025 11:56:11 -0500
Subject: [PATCH 10/12] Rubocop fixes

---
 lib/banzai/filter/grid_table_filter.rb | 57 ++++++++++++++++++--------
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/lib/banzai/filter/grid_table_filter.rb b/lib/banzai/filter/grid_table_filter.rb
index d36d05c09b66e1..ff5869f8c8f226 100644
--- a/lib/banzai/filter/grid_table_filter.rb
+++ b/lib/banzai/filter/grid_table_filter.rb
@@ -284,17 +284,17 @@ def parse_pandoc_table_with_spans(pandoc_table)
         unless has_header
           # Set default alignments from the first separator which takes the role of header
           header_separator_index = 0
-          line  = lines.find { |l| !l.strip.empty? }  # first non-blank line
+          line  = lines.find { |l| !l.strip.empty? } # first non-blank line
           parts = line.strip.delete_prefix("+").split("+")
 
           parts.each_with_index do |part, part_index|
             default_alignments << if part.start_with?(":") && !part.end_with?(":")
-                                      'left'
-                                    elsif !part.start_with?(":") && part.end_with?(":")
-                                      'right'
-                                    else
-                                      'center'
-                                    end
+                                    'left'
+                                  elsif !part.start_with?(":") && part.end_with?(":")
+                                    'right'
+                                  else
+                                    'center'
+                                  end
 
             start_pos = part_index == 0 ? 0 : header_delimiter_positions[part_index - 1]
             pos = line[start_pos + 1..]&.index("+")
@@ -421,7 +421,8 @@ def parse_pandoc_table_with_spans(pandoc_table)
 
                 raise "Missing delimiters in previous separator line" if parts.length < cells_content.length
 
-                #raise "Missing delimiters in row: #{line}: delimiters = #{cells_content.length}, expected delimiters = #{parts.length}" if parts.length > cells_content.length
+                # raise "Missing delimiters in row: #{line}: delimiters = #{cells_content.length},
+                # expected delimiters = #{parts.length}" if parts.length > cells_content.length
 
                 column_index = 0
 
@@ -473,13 +474,13 @@ def parse_pandoc_table_with_spans(pandoc_table)
             row.each do |cell|
               next if cell.content.nil?
 
-              #cell.content = cell.content.gsub(/</, "&lt;")
+              # cell.content = cell.content.gsub(/</, "&lt;")
 
-              #cell.content = cell.content.gsub(/(?<espace>^|\s)(?<bold>\*\*|__)(?<text>.+?)\g<bold>(?!\w)/,
-              #  "\\k<espace><strong>\\k<text></strong>")
+              # cell.content = cell.content.gsub(/(?<espace>^|\s)(?<bold>\*\*|__)(?<text>.+?)\g<bold>(?!\w)/,
+              #   "\\k<espace><strong>\\k<text></strong>")
 
-              #cell.content = cell.content.gsub(/(?<espace>^|\s)(?<italic>\*|_)(?<text>.+?)\g<italic>(?!\w)/,
-              #  "\\k<espace><i>\\k<text></i>")
+              # cell.content = cell.content.gsub(/(?<espace>^|\s)(?<italic>\*|_)(?<text>.+?)\g<italic>(?!\w)/,
+              #   "\\k<espace><i>\\k<text></i>")
               # Convert newlines to HTML breaks
               cell.content = cell.content&.gsub("\n", "<br />")
             end
@@ -488,7 +489,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
           # Validate grid correctness
           forward_rowspan = []
 
-          rows.each_with_index do |row, row_index|
+          rows.each_with_index do |row, _row_index|
             forward_rowspan = Array.new(row.length, 0) if forward_rowspan.empty?
             sum = 0
             row_forward_rowspan = forward_rowspan.dup
@@ -501,6 +502,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
                   forward_rowspan[cell_index] -= 1
                 end
               end
+
               if row_forward_rowspan[cell_index] == 0 && cell.rowspan > 1
                 forward_rowspan[cell_index] = cell.rowspan - 1
                 colspan = 1
@@ -525,7 +527,16 @@ def generate_html_table_with_spans(pandoc_table)
           logger = Logger.new($stdout)
           logger.error("Grid table could not be generated: #{e.message}")
 
-          "\n\nHTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOG FILE\n\n#{e.message}\n\nCommit ID: ce3607dbcafafe03531c1c50b3f749cc2318656c\n\n"
+          <<~MESSAGE
+
+
+            HTML TABLE COULD NOT BE GENERATED FROM MARKDOWN GRID TABLE. CHECK LOG FILE
+
+            #{e.message}
+
+            Commit ID: ce3607dbcafafe03531c1c50b3f749cc2318656c
+
+          MESSAGE
         else
           html = '<table markdown="1">'
           has_header = false
@@ -564,7 +575,13 @@ def generate_html_table_with_spans(pandoc_table)
 
                 rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
                 colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
-                html += %(<th#{rowspan}#{colspan} markdown="1" style="text-align: #{cell.alignment}">\n\n#{cell.content}\n\n</th>)
+                html += <<~TABLE_HEADER
+                  <th#{rowspan}#{colspan} markdown="1" style="text-align: #{cell.alignment}">
+
+                  #{cell.content}
+
+                  </th>
+                TABLE_HEADER
               end
               html += '</tr>'
             end
@@ -593,7 +610,13 @@ def generate_html_table_with_spans(pandoc_table)
 
               rowspan = cell.rowspan > 1 ? %( rowspan="#{cell.rowspan}") : ""
               colspan = cell.colspan > 1 ? %( colspan="#{cell.colspan}") : ""
-              html += %(<td#{rowspan}#{colspan} markdown="1" style="text-align: #{cell.alignment}">\n\n#{cell.content}\n\n</td>)
+              html += <<~TABLE_DATA
+                <td#{rowspan}#{colspan} markdown="1" style="text-align: #{cell.alignment}">
+
+                #{cell.content}
+
+                </td>
+              TABLE_DATA
             end
             html += '</tr>'
           end
-- 
GitLab


From 536fe4adc199b729237450794221376c7594020f Mon Sep 17 00:00:00 2001
From: Brett Walker <bwalker@gitlab.com>
Date: Fri, 29 Aug 2025 13:16:02 -0500
Subject: [PATCH 11/12] Adding an initial spec file

---
 .../banzai/filter/grid_table_filter_spec.rb   | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 spec/lib/banzai/filter/grid_table_filter_spec.rb

diff --git a/spec/lib/banzai/filter/grid_table_filter_spec.rb b/spec/lib/banzai/filter/grid_table_filter_spec.rb
new file mode 100644
index 00000000000000..745f9e2dcfa834
--- /dev/null
+++ b/spec/lib/banzai/filter/grid_table_filter_spec.rb
@@ -0,0 +1,71 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Banzai::Filter::GridTableFilter, feature_category: :markdown do
+  include FilterSpecHelper
+  using RSpec::Parameterized::TableSyntax
+
+  def run_pipeline(text, context = { project: nil })
+    stub_commonmark_sourcepos_disabled
+
+    Banzai.render_and_post_process(text, context)
+  end
+
+  context 'when testing just with the filter' do
+    it 'parses a basic single row table' do
+      text = <<~TEXT
+        +-----+-----+-----+
+        |  A  |  B  |  C  |
+        +-----+-----+-----+
+      TEXT
+
+      html = <<~HTML
+        <table markdown="1"><tbody markdown="1"><tr markdown="1"><td markdown="1" style="text-align: left">
+
+        A
+
+        </td>
+        <td markdown="1" style="text-align: left">
+
+        B
+
+        </td>
+        <td markdown="1" style="text-align: left">
+
+        C
+
+        </td>
+        </tr></tbody></table>
+      HTML
+
+      expect(filter(text)).to eq html
+    end
+  end
+
+  context 'when testing with the full pipeline' do
+    it 'parses a basic single row table' do
+      text = <<~TEXT
+        +-----+-----+-----+
+        |  A  |  B  |  C  |
+        +-----+-----+-----+
+      TEXT
+
+      html = <<~HTML
+        <table dir="auto"><tbody><tr>
+        <td style="text-align: left">
+        <p>A</p>
+        </td>
+        <td style="text-align: left">
+        <p>B</p>
+        </td>
+        <td style="text-align: left">
+        <p>C</p>
+        </td>
+        </tr></tbody></table>
+      HTML
+
+      expect(run_pipeline(text)).to eq html.strip
+    end
+  end
+end
-- 
GitLab


From 7935fc23b22fc34bde1d29bfdd34be2e9bd5627a Mon Sep 17 00:00:00 2001
From: Brett Walker <bwalker@gitlab.com>
Date: Fri, 29 Aug 2025 13:40:48 -0500
Subject: [PATCH 12/12] Minor rubocop fixes

---
 lib/banzai/filter/grid_table_filter.rb | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/lib/banzai/filter/grid_table_filter.rb b/lib/banzai/filter/grid_table_filter.rb
index ff5869f8c8f226..7515c0c8636583 100644
--- a/lib/banzai/filter/grid_table_filter.rb
+++ b/lib/banzai/filter/grid_table_filter.rb
@@ -496,20 +496,18 @@ def parse_pandoc_table_with_spans(pandoc_table)
             row.each_with_index do |cell, cell_index|
               sum += cell.colspan
 
-              if cell.colspan == 0
-                if row_forward_rowspan[cell_index] > 0
-                  sum += 1
-                  forward_rowspan[cell_index] -= 1
-                end
+              if cell.colspan == 0 && row_forward_rowspan[cell_index] > 0
+                sum += 1
+                forward_rowspan[cell_index] -= 1
               end
 
-              if row_forward_rowspan[cell_index] == 0 && cell.rowspan > 1
-                forward_rowspan[cell_index] = cell.rowspan - 1
-                colspan = 1
-                while cell.colspan > colspan
-                  forward_rowspan[cell_index + colspan] = cell.rowspan - 1
-                  colspan += 1
-                end
+              next unless row_forward_rowspan[cell_index] == 0 && cell.rowspan > 1
+
+              forward_rowspan[cell_index] = cell.rowspan - 1
+              colspan = 1
+              while cell.colspan > colspan
+                forward_rowspan[cell_index + colspan] = cell.rowspan - 1
+                colspan += 1
               end
             end
 
@@ -520,6 +518,7 @@ def parse_pandoc_table_with_spans(pandoc_table)
         [header_rows, data_rows]
       end
 
+      # rubocop:disable Style/RedundantBegin -- PoC
       def generate_html_table_with_spans(pandoc_table)
         begin
           grid_header, grid_body = parse_pandoc_table_with_spans(pandoc_table)
@@ -626,6 +625,7 @@ def generate_html_table_with_spans(pandoc_table)
           html
         end
       end
+      # rubocop:enable Style/RedundantBegin
       # rubocop:enable Metrics/PerceivedComplexity
       # rubocop:enable Metrics/CyclomaticComplexity
       # rubocop:enable Metrics/AbcSize
-- 
GitLab