From 69705ec39454ab8f03975f0755a10524f3c86a41 Mon Sep 17 00:00:00 2001 From: Long Date: Thu, 17 Jul 2025 12:37:42 -0700 Subject: [PATCH 1/7] Fix link over-zealousl escape --- lib/banzai/filter/external_link_filter.rb | 3 +++ .../services/serializer/link_spec.js | 20 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb index 8561066ee552ea..4e47e9e64fddc0 100644 --- a/lib/banzai/filter/external_link_filter.rb +++ b/lib/banzai/filter/external_link_filter.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'uri' + module Banzai module Filter # HTML Filter to modify the attributes of external links. @@ -20,6 +22,7 @@ def call uri = uri_strict(node_src(node)) if uri node.set_attribute(node_src_attribute(node), uri.to_s) + node.set_attribute('data-canonical-src', URI.decode_www_form_component(uri.to_s)) addressable_uri = addressable_uri(node_src(node)) else addressable_uri = nil diff --git a/spec/frontend/content_editor/services/serializer/link_spec.js b/spec/frontend/content_editor/services/serializer/link_spec.js index a8bdf21df19b35..90b7ccf8edb8b2 100644 --- a/spec/frontend/content_editor/services/serializer/link_spec.js +++ b/spec/frontend/content_editor/services/serializer/link_spec.js @@ -151,3 +151,23 @@ it('correctly serializes links as an HTML tag', () => { ), ).toBe('new content'); }); + +describe('decoding URL fragments', () => { + it('correctly decodes non-ASCII characters in URL fragments', () => { + const encodedUrl = + '#%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80-%D0%B7%D0%B0%D0%B3%D0%BE%D0%BB%D0%BE%D0%B2%D0%BA%D0%B0'; + const decodedUrl = '#пример-заголовка'; + + expect(serialize(paragraph(link({ href: encodedUrl }, 'link to this header')))).toBe( + `[link to this header](${decodedUrl})`, + ); + }); + + it('returns original link when fragment cannot be decoded', () => { + const malformedUrl = '#%D0%B'; + + expect(serialize(paragraph(link({ href: malformedUrl }, 'link to header')))).toBe( + `[link to header](${malformedUrl})`, + ); + }); +}); -- GitLab From 13fe6cbe6148c5b9648441c0cdaa56a3111b359d Mon Sep 17 00:00:00 2001 From: long nguyen huy Date: Thu, 17 Jul 2025 19:44:08 +0000 Subject: [PATCH 2/7] Apply 1 suggestion(s) to 1 file(s) Co-authored-by: GitLab Duo --- lib/banzai/filter/external_link_filter.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb index 4e47e9e64fddc0..2850416eec3703 100644 --- a/lib/banzai/filter/external_link_filter.rb +++ b/lib/banzai/filter/external_link_filter.rb @@ -22,7 +22,11 @@ def call uri = uri_strict(node_src(node)) if uri node.set_attribute(node_src_attribute(node), uri.to_s) - node.set_attribute('data-canonical-src', URI.decode_www_form_component(uri.to_s)) + node.set_attribute('data-canonical-src', begin + URI.decode_www_form_component(uri.to_s) + rescue ArgumentError + uri.to_s + end) addressable_uri = addressable_uri(node_src(node)) else addressable_uri = nil -- GitLab From 3667167a949b77294f88ac6afac625c87d8ee3ac Mon Sep 17 00:00:00 2001 From: Long Date: Fri, 18 Jul 2025 21:11:43 -0700 Subject: [PATCH 3/7] Change add data-canonical-src attribute position --- lib/banzai/filter/external_link_filter.rb | 2 +- .../services/serializer/link_spec.js | 20 ------------------- .../filter/external_link_filter_spec.rb | 10 +++++----- 3 files changed, 6 insertions(+), 26 deletions(-) diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb index 2850416eec3703..c3ae5e2bf6dffc 100644 --- a/lib/banzai/filter/external_link_filter.rb +++ b/lib/banzai/filter/external_link_filter.rb @@ -22,12 +22,12 @@ def call uri = uri_strict(node_src(node)) if uri node.set_attribute(node_src_attribute(node), uri.to_s) + addressable_uri = addressable_uri(node_src(node)) node.set_attribute('data-canonical-src', begin URI.decode_www_form_component(uri.to_s) rescue ArgumentError uri.to_s end) - addressable_uri = addressable_uri(node_src(node)) else addressable_uri = nil end diff --git a/spec/frontend/content_editor/services/serializer/link_spec.js b/spec/frontend/content_editor/services/serializer/link_spec.js index 90b7ccf8edb8b2..a8bdf21df19b35 100644 --- a/spec/frontend/content_editor/services/serializer/link_spec.js +++ b/spec/frontend/content_editor/services/serializer/link_spec.js @@ -151,23 +151,3 @@ it('correctly serializes links as an HTML tag', () => { ), ).toBe('new content'); }); - -describe('decoding URL fragments', () => { - it('correctly decodes non-ASCII characters in URL fragments', () => { - const encodedUrl = - '#%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80-%D0%B7%D0%B0%D0%B3%D0%BE%D0%BB%D0%BE%D0%B2%D0%BA%D0%B0'; - const decodedUrl = '#пример-заголовка'; - - expect(serialize(paragraph(link({ href: encodedUrl }, 'link to this header')))).toBe( - `[link to this header](${decodedUrl})`, - ); - }); - - it('returns original link when fragment cannot be decoded', () => { - const malformedUrl = '#%D0%B'; - - expect(serialize(paragraph(link({ href: malformedUrl }, 'link to header')))).toBe( - `[link to header](${malformedUrl})`, - ); - }); -}); diff --git a/spec/lib/banzai/filter/external_link_filter_spec.rb b/spec/lib/banzai/filter/external_link_filter_spec.rb index c1375ffc45ebcc..17d1feeb0f6eee 100644 --- a/spec/lib/banzai/filter/external_link_filter_spec.rb +++ b/spec/lib/banzai/filter/external_link_filter_spec.rb @@ -28,13 +28,13 @@ end it 'ignores non-HTTP(S) links' do - exp = act = %q(IRC) + exp = act = %q(IRC) expect(filter(act).to_html).to eq exp end it 'skips internal links' do internal = Gitlab.config.gitlab.url - exp = act = %(Login) + exp = act = %(Login) expect(filter(act).to_html).to eq exp end @@ -74,7 +74,7 @@ it 'adds rel and target attributes to improperly formatted protocols' do doc = filter %q(

Reverse Tabnabbing

) - expected = %q(

Reverse Tabnabbing

) + expected = %q(

Reverse Tabnabbing

) expect(doc.to_html).to eq(expected) end @@ -113,12 +113,12 @@ internal_link = Gitlab.config.gitlab.url + "/sign_in" url = internal_link.gsub(/\Ahttp/, 'HtTp') act = %(Login) - exp = %(Login) + exp = %(Login) expect(filter(act).to_html).to eq(exp) end it 'skips relative links' do - exp = act = %q(Relative URL) + exp = act = %q(Relative URL) expect(filter(act).to_html).to eq(exp) end end -- GitLab From f25437030323e83016f5cb2e9cef99681a052425 Mon Sep 17 00:00:00 2001 From: Long Date: Sat, 19 Jul 2025 02:51:11 -0700 Subject: [PATCH 4/7] Add data-canonical-src and RTLO sanitization --- lib/banzai/filter/external_link_filter.rb | 5 +++-- spec/lib/banzai/filter/external_link_filter_spec.rb | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb index c3ae5e2bf6dffc..c4ad2fc4c25efa 100644 --- a/lib/banzai/filter/external_link_filter.rb +++ b/lib/banzai/filter/external_link_filter.rb @@ -24,9 +24,9 @@ def call node.set_attribute(node_src_attribute(node), uri.to_s) addressable_uri = addressable_uri(node_src(node)) node.set_attribute('data-canonical-src', begin - URI.decode_www_form_component(uri.to_s) + URI.decode_www_form_component(addressable_uri.to_s) rescue ArgumentError - uri.to_s + addressable_uri.to_s end) else addressable_uri = nil @@ -113,6 +113,7 @@ def punycode_autolink_node!(uri, node) # escape any right-to-left (RTLO) characters in link text def sanitize_link_text!(node) node.inner_html = node.inner_html.gsub(RTLO, ENCODED_RTLO) + node['data-canonical-src'] = node['data-canonical-src'].gsub(RTLO, ENCODED_RTLO) if node['data-canonical-src'] end # If the domain is an international domain name (IDN), diff --git a/spec/lib/banzai/filter/external_link_filter_spec.rb b/spec/lib/banzai/filter/external_link_filter_spec.rb index 17d1feeb0f6eee..c095779088954f 100644 --- a/spec/lib/banzai/filter/external_link_filter_spec.rb +++ b/spec/lib/banzai/filter/external_link_filter_spec.rb @@ -186,6 +186,10 @@ expect(doc.to_html).to include('class="has-tooltip"') expect(doc.to_html).to include('title="http://xn--example-6p25f.com/"') end + + it 'adds a data-canonical-src with the original link' do + expect(doc.to_html).to include('data-canonical-src="http://exa😄mple.com"') + end end context 'with RTLO character' do @@ -196,6 +200,10 @@ expect(doc.to_html).to include('class="has-tooltip"') expect(doc.to_html).to include('title="http://example.com/evil%E2%80%AE3pm.exe"') end + + it 'adds a data-canonical-src with encoded RTLO' do + expect(doc.to_html).to include('data-canonical-src="http://example.com/evil%E2%80%AE3pm.exe"') + end end end -- GitLab From c78b40b8aec8b83ebc23fd82aef9f42498d76544 Mon Sep 17 00:00:00 2001 From: Long Date: Sat, 19 Jul 2025 03:11:16 -0700 Subject: [PATCH 5/7] encode canonical source in tooltip --- .../content_editor/components/bubble_menus/link_bubble_menu.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/assets/javascripts/content_editor/components/bubble_menus/link_bubble_menu.vue b/app/assets/javascripts/content_editor/components/bubble_menus/link_bubble_menu.vue index c4b696a235fc53..beccfbeff96c99 100644 --- a/app/assets/javascripts/content_editor/components/bubble_menus/link_bubble_menu.vue +++ b/app/assets/javascripts/content_editor/components/bubble_menus/link_bubble_menu.vue @@ -152,7 +152,7 @@ export default { this.linkText = text; this.linkHref = href; - this.linkCanonicalSrc = canonicalSrc || href; + this.linkCanonicalSrc = encodeURI(canonicalSrc || href); }, onTransaction({ transaction }) { -- GitLab From b676252655c5987f3bb4233fb78f5026428bee7e Mon Sep 17 00:00:00 2001 From: Long Date: Sat, 19 Jul 2025 03:59:58 -0700 Subject: [PATCH 6/7] Add more spec test for cryllic character --- .../filter/external_link_filter_spec.rb | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/spec/lib/banzai/filter/external_link_filter_spec.rb b/spec/lib/banzai/filter/external_link_filter_spec.rb index c095779088954f..48a73bcc5de993 100644 --- a/spec/lib/banzai/filter/external_link_filter_spec.rb +++ b/spec/lib/banzai/filter/external_link_filter_spec.rb @@ -218,6 +218,39 @@ end end + context 'for Cyrillic URLs' do + context 'with fragment link' do + it 'treats Cyrillic fragment links as external due to URI parsing limitations' do + doc = filter %q(Fragment Link) + + expect(doc.at_css('a')).to have_attribute('rel') + expect(doc.at_css('a')['rel']).to include 'nofollow noreferrer noopener' + expect(doc.at_css('a')).to have_attribute('target') + expect(doc.at_css('a')['target']).to eq '_blank' + expect(doc.at_css('a')['href']).to eq '#пример-заголовка' + end + end + + context 'with IDN domain' do + let(:doc) { filter %q(http://пример-заголовка) } + + it_behaves_like 'an external link with rel attribute' + + it 'processes IDN domains but tooltip functionality is limited by URI parsing' do + expect(doc.to_html).to include('http://пример-заголовка') + + expect(doc.at_css('a')['href']).to eq 'http://пример-заголовка' + + expect(doc.at_css('a')['class']).to be_nil + expect(doc.at_css('a')['title']).to be_nil + expect(doc.at_css('a')).to have_attribute('rel') + expect(doc.at_css('a')['rel']).to include 'nofollow noreferrer noopener' + expect(doc.at_css('a')).to have_attribute('target') + expect(doc.at_css('a')['target']).to eq '_blank' + end + end + end + it_behaves_like 'does not use pipeline timing check' it_behaves_like 'a filter timeout' do -- GitLab From 9d7a3797d7c188f826e8c2fdb2df1d64b213dd7d Mon Sep 17 00:00:00 2001 From: long nguyen huy Date: Tue, 22 Jul 2025 16:14:47 +0000 Subject: [PATCH 7/7] Apply 1 suggestion(s) to 1 file(s) Co-authored-by: Himanshu Kapoor --- .../content_editor/components/bubble_menus/link_bubble_menu.vue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/assets/javascripts/content_editor/components/bubble_menus/link_bubble_menu.vue b/app/assets/javascripts/content_editor/components/bubble_menus/link_bubble_menu.vue index beccfbeff96c99..c4b696a235fc53 100644 --- a/app/assets/javascripts/content_editor/components/bubble_menus/link_bubble_menu.vue +++ b/app/assets/javascripts/content_editor/components/bubble_menus/link_bubble_menu.vue @@ -152,7 +152,7 @@ export default { this.linkText = text; this.linkHref = href; - this.linkCanonicalSrc = encodeURI(canonicalSrc || href); + this.linkCanonicalSrc = canonicalSrc || href; }, onTransaction({ transaction }) { -- GitLab