diff --git a/lib/banzai/filter/external_link_filter.rb b/lib/banzai/filter/external_link_filter.rb index 8561066ee552ea4a0ed612c043c266637e01ffb8..c4ad2fc4c25efaf73fc67de14e7e10a483e055ce 100644 --- a/lib/banzai/filter/external_link_filter.rb +++ b/lib/banzai/filter/external_link_filter.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'uri' + module Banzai module Filter # HTML Filter to modify the attributes of external links. @@ -21,6 +23,11 @@ def call if uri node.set_attribute(node_src_attribute(node), uri.to_s) addressable_uri = addressable_uri(node_src(node)) + node.set_attribute('data-canonical-src', begin + URI.decode_www_form_component(addressable_uri.to_s) + rescue ArgumentError + addressable_uri.to_s + end) else addressable_uri = nil end @@ -106,6 +113,7 @@ def punycode_autolink_node!(uri, node) # escape any right-to-left (RTLO) characters in link text def sanitize_link_text!(node) node.inner_html = node.inner_html.gsub(RTLO, ENCODED_RTLO) + node['data-canonical-src'] = node['data-canonical-src'].gsub(RTLO, ENCODED_RTLO) if node['data-canonical-src'] end # If the domain is an international domain name (IDN), diff --git a/spec/lib/banzai/filter/external_link_filter_spec.rb b/spec/lib/banzai/filter/external_link_filter_spec.rb index c1375ffc45ebcc0f24b05319af9ccb333dd5a53d..48a73bcc5de9939ceee23d162a4ac1638fbe5ec8 100644 --- a/spec/lib/banzai/filter/external_link_filter_spec.rb +++ b/spec/lib/banzai/filter/external_link_filter_spec.rb @@ -28,13 +28,13 @@ end it 'ignores non-HTTP(S) links' do - exp = act = %q(IRC) + exp = act = %q(IRC) expect(filter(act).to_html).to eq exp end it 'skips internal links' do internal = Gitlab.config.gitlab.url - exp = act = %(Login) + exp = act = %(Login) expect(filter(act).to_html).to eq exp end @@ -74,7 +74,7 @@ it 'adds rel and target attributes to improperly formatted protocols' do doc = filter %q(
) - expected = %q() + expected = %q() expect(doc.to_html).to eq(expected) end @@ -113,12 +113,12 @@ internal_link = Gitlab.config.gitlab.url + "/sign_in" url = internal_link.gsub(/\Ahttp/, 'HtTp') act = %(Login) - exp = %(Login) + exp = %(Login) expect(filter(act).to_html).to eq(exp) end it 'skips relative links' do - exp = act = %q(Relative URL) + exp = act = %q(Relative URL) expect(filter(act).to_html).to eq(exp) end end @@ -186,6 +186,10 @@ expect(doc.to_html).to include('class="has-tooltip"') expect(doc.to_html).to include('title="http://xn--example-6p25f.com/"') end + + it 'adds a data-canonical-src with the original link' do + expect(doc.to_html).to include('data-canonical-src="http://exa😄mple.com"') + end end context 'with RTLO character' do @@ -196,6 +200,10 @@ expect(doc.to_html).to include('class="has-tooltip"') expect(doc.to_html).to include('title="http://example.com/evil%E2%80%AE3pm.exe"') end + + it 'adds a data-canonical-src with encoded RTLO' do + expect(doc.to_html).to include('data-canonical-src="http://example.com/evil%E2%80%AE3pm.exe"') + end end end @@ -210,6 +218,39 @@ end end + context 'for Cyrillic URLs' do + context 'with fragment link' do + it 'treats Cyrillic fragment links as external due to URI parsing limitations' do + doc = filter %q(Fragment Link) + + expect(doc.at_css('a')).to have_attribute('rel') + expect(doc.at_css('a')['rel']).to include 'nofollow noreferrer noopener' + expect(doc.at_css('a')).to have_attribute('target') + expect(doc.at_css('a')['target']).to eq '_blank' + expect(doc.at_css('a')['href']).to eq '#пример-заголовка' + end + end + + context 'with IDN domain' do + let(:doc) { filter %q(http://пример-заголовка) } + + it_behaves_like 'an external link with rel attribute' + + it 'processes IDN domains but tooltip functionality is limited by URI parsing' do + expect(doc.to_html).to include('http://пример-заголовка') + + expect(doc.at_css('a')['href']).to eq 'http://пример-заголовка' + + expect(doc.at_css('a')['class']).to be_nil + expect(doc.at_css('a')['title']).to be_nil + expect(doc.at_css('a')).to have_attribute('rel') + expect(doc.at_css('a')['rel']).to include 'nofollow noreferrer noopener' + expect(doc.at_css('a')).to have_attribute('target') + expect(doc.at_css('a')['target']).to eq '_blank' + end + end + end + it_behaves_like 'does not use pipeline timing check' it_behaves_like 'a filter timeout' do