diff --git a/Gemfile b/Gemfile index d62959899cecce307f2577160cff3b8bd3fc5805..73dee87044d04a25851641fd363f532480c441db 100644 --- a/Gemfile +++ b/Gemfile @@ -257,7 +257,6 @@ gem 'gitlab-active-context', path: 'gems/gitlab-active-context', require: 'activ gem 'html-pipeline', '~> 2.14.3', feature_category: :markdown gem 'deckar01-task_list', '2.3.4', feature_category: :markdown gem 'gitlab-markup', '~> 2.0.0', require: 'github/markup', feature_category: :markdown -gem 'commonmarker', '~> 0.23.10', feature_category: :markdown gem 'kramdown', '~> 2.5.0', feature_category: :markdown gem 'RedCloth', '~> 4.3.3', feature_category: :markdown gem 'org-ruby', '~> 0.9.12', feature_category: :markdown diff --git a/Gemfile.lock b/Gemfile.lock index 44ef79d1478d09cf142f3f773b29baac733020b6..02fffee2872d0a4903c46c1e62828db5928d47d7 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2099,7 +2099,6 @@ DEPENDENCIES circuitbox (= 2.0.0) click_house-client! cloud_profiler_agent (~> 0.0.0)! - commonmarker (~> 0.23.10) concurrent-ruby (~> 1.1) connection_pool (~> 2.5.3) countries (~> 4.0.0) diff --git a/Gemfile.next.lock b/Gemfile.next.lock index 9f3018e8a28072da8cf55b8fb8f1d60bbf057fdf..e85f13ee9cf521db18288834ff352bead54350c2 100644 --- a/Gemfile.next.lock +++ b/Gemfile.next.lock @@ -2094,7 +2094,6 @@ DEPENDENCIES circuitbox (= 2.0.0) click_house-client! cloud_profiler_agent (~> 0.0.0)! - commonmarker (~> 0.23.10) concurrent-ruby (~> 1.1) connection_pool (~> 2.5.3) countries (~> 4.0.0) diff --git a/doc/development/gitlab_flavored_markdown/banzai_pipeline_and_parsing.md b/doc/development/gitlab_flavored_markdown/banzai_pipeline_and_parsing.md index 5cef90f0cd1aaaae7bc45f2cbda3411e3f547cbd..797bd964f234c09438d85ae4b2c02043cd993253 100644 --- a/doc/development/gitlab_flavored_markdown/banzai_pipeline_and_parsing.md +++ b/doc/development/gitlab_flavored_markdown/banzai_pipeline_and_parsing.md @@ -41,9 +41,7 @@ This pipeline contains the filters for transforming raw Markdown into HTML, hand #### `Filter::MarkdownFilter` -This filter interfaces with the actual Markdown parser. The primary parser uses our [`gitlab-glfm-markdown`](https://gitlab.com/gitlab-org/ruby/gems/gitlab-glfm-markdown) Ruby gem that uses the [`comrak`](https://github.com/kivikakk/comrak) Rust crate. - -A secondary deprecated parser engine uses the [`commonmarker`](https://github.com/gjtorikian/commonmarker/releases/tag/v0.23.11) Ruby gem to interact with the [`cmark-gfm`](https://github.com/github/cmark-gfm) library. +This filter interfaces with the actual Markdown parser. The parser uses our [`gitlab-glfm-markdown`](https://gitlab.com/gitlab-org/ruby/gems/gitlab-glfm-markdown) Ruby gem that uses the [`comrak`](https://github.com/kivikakk/comrak) Rust crate. Text is passed into this filter, and by calling the specified parser engine, generates the corresponding basic HTML. diff --git a/lib/banzai/filter/autolink_filter.rb b/lib/banzai/filter/autolink_filter.rb index 8b2399ab3d2d24d075ab81151f22d85786d32245..53dfea2f70498c755ec92cc776626d5893ca1d21 100644 --- a/lib/banzai/filter/autolink_filter.rb +++ b/lib/banzai/filter/autolink_filter.rb @@ -124,7 +124,7 @@ def autolink_match(match) end # Since this came from a Text node, make sure the new href is encoded. - # `commonmarker` percent encodes the domains of links it handles, so + # Markdown renderer percent encodes the domains of links it handles, so # do the same (instead of using `normalized_encode`). begin href_safe = Addressable::URI.encode(match).html_safe diff --git a/lib/banzai/filter/code_language_filter.rb b/lib/banzai/filter/code_language_filter.rb index 37994d0e7072236fec0321a44506d24268559e3f..d30fd6c43b9c8330aea014dba4ba7f77c20bf8a7 100644 --- a/lib/banzai/filter/code_language_filter.rb +++ b/lib/banzai/filter/code_language_filter.rb @@ -41,14 +41,14 @@ def transform_node(code_node) pre_node.set_attribute(LANG_ATTR, escape_once(lang)) if lang.present? pre_node.set_attribute(LANG_PARAMS_ATTR, escape_once(lang_params)) if lang_params.present? - # cmark-gfm added this, it's now in data-lang-params + # markdown rendered added this, it's now in data-lang-params pre_node.remove_attribute('data-meta') code_node.remove_attribute('data-meta') end private - # cmark-gfm's FULL_INFO_STRING render option works with the space delimiter. + # the `full_info_string` render option works with the space delimiter. # Which means the language specified on a code block is parsed with spaces. Anything # after the first space is placed in the `data-meta` attribute. # However GitLab recognizes `:` as an additional delimiter on the lang attribute. @@ -67,7 +67,7 @@ def parse_lang_params(code_node) language, language_params = language.split(LANG_PARAMS_DELIMITER, 2) - # cmark-gfm places extra lang parameters into data-meta + # markdown renderer places extra lang parameters into data-meta language_params = [pre_node.attr('data-meta'), code_node.attr('data-meta'), language_params].compact.join(' ') [language, language_params] diff --git a/lib/banzai/filter/gollum_tags_filter.rb b/lib/banzai/filter/gollum_tags_filter.rb index 6aad2613c8602448d3f71b501b8717ab9dbe472f..0899bacffdf2358f94bb07fab5ac385f5e0ff67a 100644 --- a/lib/banzai/filter/gollum_tags_filter.rb +++ b/lib/banzai/filter/gollum_tags_filter.rb @@ -3,7 +3,7 @@ module Banzai module Filter # HTML Filter for parsing Gollum's tags in HTML. - # Only used for the ascii_doc pipeline or the older cmark parser. + # Only used for the ascii_doc pipeline. # # It's only parses the following tags: # @@ -35,7 +35,7 @@ class GollumTagsFilter < HTML::Pipeline::Filter IGNORED_ANCESTOR_TAGS = %w[pre code tt].to_set def call - return doc if MarkdownFilter.glfm_markdown?(context) && context[:pipeline] != :ascii_doc + return doc if context[:pipeline] != :ascii_doc doc.xpath('descendant-or-self::text()').each do |node| next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS) diff --git a/lib/banzai/filter/markdown_engines/cmark.rb b/lib/banzai/filter/markdown_engines/cmark.rb deleted file mode 100644 index abcfd9149d6a1bef79d8ca74028739ba0e9d340f..0000000000000000000000000000000000000000 --- a/lib/banzai/filter/markdown_engines/cmark.rb +++ /dev/null @@ -1,51 +0,0 @@ -# frozen_string_literal: true - -# `CommonMark` markdown engine for GitLab's Banzai markdown filter. -# This module is used in Banzai::Filter::MarkdownFilter. -# Used gem is `commonmarker` which is a ruby wrapper for cmark-gfm (CommonMark parser) -# including GitHub's GFM extensions. -# We now utilize the renderer built in `C`, rather than the ruby based renderer. -# Homepage: https://github.com/gjtorikian/commonmarker -# -# Although this engine is currently not actively used, let's keep it here -# for performance testing and as a backup. -module Banzai - module Filter - module MarkdownEngines - class Cmark < Base - EXTENSIONS = [ - :autolink, # provides support for automatically converting URLs to anchor tags. - :strikethrough, # provides support for strikethroughs. - :table # provides support for tables. - ].freeze - - PARSE_OPTIONS = [ - :FOOTNOTES, # parse footnotes. - :STRIKETHROUGH_DOUBLE_TILDE, # parse strikethroughs by double tildes (as redcarpet does). - :VALIDATE_UTF8 # replace illegal sequences with the replacement character U+FFFD. - ].freeze - - RENDER_OPTIONS = [ - :GITHUB_PRE_LANG, # use GitHub-style
 for fenced code blocks.
-          :FOOTNOTES,        # render footnotes.
-          :FULL_INFO_STRING, # include full info strings of code blocks in separate attribute.
-          :UNSAFE            # allow raw/custom HTML and unsafe links.
-        ].freeze
-
-        RENDER_OPTIONS_SOURCEPOS = RENDER_OPTIONS + [:SOURCEPOS].freeze
-
-        def render(text)
-          CommonMarker.render_html(text, render_options, EXTENSIONS)
-        end
-
-        private
-
-        def render_options
-          sourcepos_disabled? ? RENDER_OPTIONS : RENDER_OPTIONS_SOURCEPOS
-        end
-      end
-    end
-  end
-end
-
-Banzai::Filter::MarkdownEngines::Cmark.prepend_mod
diff --git a/lib/banzai/filter/markdown_filter.rb b/lib/banzai/filter/markdown_filter.rb
index 7b4a8f31836b8f30131c1361d47ba5d15cf28c92..e6827d5d1c234309ac5399268038531399aee21c 100644
--- a/lib/banzai/filter/markdown_filter.rb
+++ b/lib/banzai/filter/markdown_filter.rb
@@ -4,7 +4,6 @@ module Banzai
   module Filter
     class MarkdownFilter < HTML::Pipeline::TextFilter
       GLFM_ENGINE    = :glfm_markdown # glfm_markdown/comrak
-      CMARK_ENGINE   = :cmark         # original commonmarker/cmark-gfm
       DEFAULT_ENGINE = GLFM_ENGINE
 
       def initialize(text, context = nil, result = nil)
diff --git a/spec/benchmarks/banzai_benchmark.rb b/spec/benchmarks/banzai_benchmark.rb
index 79948f725a74e67f1e82caefc2a7a1766b042f21..f46c9ac7e8f3e34a9ae97c8e46f1c43a3c8a7d99 100644
--- a/spec/benchmarks/banzai_benchmark.rb
+++ b/spec/benchmarks/banzai_benchmark.rb
@@ -34,7 +34,6 @@
   let_it_be(:wiki_page)     { feature.wiki_page }
   let_it_be(:markdown_text) { feature.raw_markdown }
   let_it_be(:glfm_engine)   { Banzai::Filter::MarkdownFilter::GLFM_ENGINE }
-  let_it_be(:cmark_engine)  { Banzai::Filter::MarkdownFilter::CMARK_ENGINE }
   let_it_be(:grafana_integration) { create(:grafana_integration, project: project) }
   let_it_be(:default_context) do
     {
@@ -85,33 +84,31 @@
 
   context 'markdown engines' do
     it 'benchmarks full pipeline using different markdown engines' do
-      puts "\n--> Benchmarking FullPipeline with Cmark and Glfm engines\n"
+      puts "\n--> Benchmarking FullPipeline with Glfm engine\n"
 
       Benchmark.ips do |x|
         x.config(time: 10, warmup: 2)
 
         x.report('glfm') { Banzai::Pipeline::FullPipeline.call(markdown_text, context.merge(markdown_engine: glfm_engine)) }
-        x.report('cmark') { Banzai::Pipeline::FullPipeline.call(markdown_text, context.merge(markdown_engine: cmark_engine)) }
 
         x.compare!
       end
     end
 
     it 'benchmarks plain markdown pipeline using different markdown engines' do
-      puts "\n--> Benchmarking PlainMarkdownPipeline with Cmark and Glfm engines\n"
+      puts "\n--> Benchmarking PlainMarkdownPipeline with Glfm engine\n"
 
       Benchmark.ips do |x|
         x.config(time: 10, warmup: 2)
 
         x.report('glfm') { Banzai::Pipeline::PlainMarkdownPipeline.call(markdown_text, context.merge(markdown_engine: glfm_engine)) }
-        x.report('cmark') { Banzai::Pipeline::PlainMarkdownPipeline.call(markdown_text, context.merge(markdown_engine: cmark_engine)) }
 
         x.compare!
       end
     end
 
     it 'benchmarks MarkdownFilter using different markdown engines' do
-      puts "\n--> Benchmarking MarkdownFilter with Cmark and Glfm engines\n"
+      puts "\n--> Benchmarking MarkdownFilter with Glfm engine\n"
 
       pipeline      = Banzai::Pipeline[:plain_markdown]
       filter_klass  = Banzai::Filter::MarkdownFilter
@@ -127,13 +124,6 @@
           filter.call
         end
 
-        x.report('Markdown-cmark') do
-          filter = filter_klass.new(filter_source[filter_klass][:input_text],
-            context.merge(markdown_engine: cmark_engine),
-            filter_source[filter_klass][:input_result])
-          filter.call
-        end
-
         x.compare!
       end
     end
diff --git a/spec/lib/banzai/filter/autolink_filter_spec.rb b/spec/lib/banzai/filter/autolink_filter_spec.rb
index 5e7ceb21a9fbc69419b0b1127f6b61932d1b91fb..642ba99f5dd184ee2fc499d6304e97bb7c1bd732 100644
--- a/spec/lib/banzai/filter/autolink_filter_spec.rb
+++ b/spec/lib/banzai/filter/autolink_filter_spec.rb
@@ -5,7 +5,7 @@
 RSpec.describe Banzai::Filter::AutolinkFilter, feature_category: :markdown do
   include FilterSpecHelper
 
-  let_it_be(:context) { { markdown_engine: Banzai::Filter::MarkdownFilter::CMARK_ENGINE } }
+  let_it_be(:context) { { pipeline: :single_line } }
 
   let(:link) { 'http://about.gitlab.com/' }
   let(:quotes) { ['"', "'"] }
diff --git a/spec/lib/banzai/filter/gollum_tags_filter_spec.rb b/spec/lib/banzai/filter/gollum_tags_filter_spec.rb
index 1f581f9b2d462c54639a7cc4b726e3186e4f4aea..10f6fbe89b82939617de4b27cb1f06d0e9e4ecfa 100644
--- a/spec/lib/banzai/filter/gollum_tags_filter_spec.rb
+++ b/spec/lib/banzai/filter/gollum_tags_filter_spec.rb
@@ -83,10 +83,6 @@
     end
   end
 
-  it_behaves_like 'gollum tag parsing' do
-    let_it_be(:context) { { markdown_engine: Banzai::Filter::MarkdownFilter::CMARK_ENGINE } }
-  end
-
   it_behaves_like 'gollum tag parsing' do
     let_it_be(:context) { { pipeline: :ascii_doc } }
   end
diff --git a/spec/lib/banzai/filter/markdown_engines/cmark_spec.rb b/spec/lib/banzai/filter/markdown_engines/cmark_spec.rb
deleted file mode 100644
index ad562e0ebb1f8c4bacf5b099d945db817b0c6e1a..0000000000000000000000000000000000000000
--- a/spec/lib/banzai/filter/markdown_engines/cmark_spec.rb
+++ /dev/null
@@ -1,17 +0,0 @@
-# frozen_string_literal: true
-
-require 'spec_helper'
-
-RSpec.describe Banzai::Filter::MarkdownEngines::Cmark, feature_category: :markdown do
-  it 'defaults to generating sourcepos' do
-    engine = described_class.new({})
-
-    expect(engine.render('# hi')).to eq %(

hi

\n) - end - - it 'turns off sourcepos' do - engine = described_class.new({ no_sourcepos: true }) - - expect(engine.render('# hi')).to eq %(

hi

\n) - end -end diff --git a/spec/lib/banzai/filter/markdown_filter_spec.rb b/spec/lib/banzai/filter/markdown_filter_spec.rb index 69ca6452623f7d100cf5086b33cca514a8ab9ad5..99e22c77661acb590292e297d112f8744e21f1c6 100644 --- a/spec/lib/banzai/filter/markdown_filter_spec.rb +++ b/spec/lib/banzai/filter/markdown_filter_spec.rb @@ -8,8 +8,8 @@ describe 'markdown engine from context' do it 'finds the correct engine' do - expect(described_class.new('foo', { markdown_engine: :cmark }).render_engine) - .to eq Banzai::Filter::MarkdownEngines::Cmark + expect(described_class.new('foo', { markdown_engine: :glfm_markdown }).render_engine) + .to eq Banzai::Filter::MarkdownEngines::GlfmMarkdown end it 'defaults to the GLFM_ENGINE' do diff --git a/spec/lib/gitlab/asciidoc_spec.rb b/spec/lib/gitlab/asciidoc_spec.rb index 292d327a88b466d8f6ef0055a699a89c6db43245..49916f3685b592fec1be2d80607f864853817627 100644 --- a/spec/lib/gitlab/asciidoc_spec.rb +++ b/spec/lib/gitlab/asciidoc_spec.rb @@ -72,43 +72,49 @@ module Gitlab items = { 'link with extra attribute' => { input: 'link:mylink"onmouseover="alert(1)[Click Here]', - output: "
\n

Click Here

\n
" + output: <<~HTML +
+

Click Here

+
+ HTML }, 'link with unsafe scheme' => { input: 'link:data://danger[Click Here]', - output: "
\n

Click Here

\n
" + output: <<~HTML +
+

Click Here

+
+ HTML }, 'image with onerror' => { input: 'image:https://localhost.com/image.png[Alt text" onerror="alert(7)]', - output: "
\n

Alt text\" onerror=\"alert(7)

\n
" + output: <<~HTML +
+

Alt text" onerror="alert(7)

+
+ HTML + }, + 'fenced code with inline script' => { + input: '```mypre">', + output: <<~HTML +
+
+
+
+ +
+
+
+ HTML } } items.each do |name, data| it "does not convert dangerous #{name} into HTML" do - expect(render(data[:input], context)).to include(data[:output]) + expect(render(data[:input], context)).to include(data[:output].strip) end end - # `stub_feature_flags method` runs AFTER declaration of `items` above. - # So the spec in its current implementation won't pass. - # Move this test back to the items hash when removing `use_cmark_renderer` feature flag. - it "does not convert dangerous fenced code with inline script into HTML" do - input = '```mypre">' - output = <<~HTML -
-
-
-
- -
-
-
- HTML - - expect(render(input, context)).to include(output.strip) - end - it 'does not allow locked attributes to be overridden' do input = <<~ADOC {counter:max-include-depth:1234}