From 24445d670bc1d8d52378a27243038aa1d1322a14 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Fri, 4 Jul 2025 10:20:54 +0200 Subject: [PATCH 01/43] Make delete method private --- lib/gitlab/github_import/attachments_downloader.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 3f17f3fbe94201..0b9ab4a351dcdc 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -44,12 +44,12 @@ def perform file end + private + def delete FileUtils.rm_rf File.dirname(filepath) end - private - def raise_error(message) raise DownloadError, message end -- GitLab From 6de1b361057b290f7bc41288c52147931fca683a Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Fri, 4 Jul 2025 10:37:44 +0200 Subject: [PATCH 02/43] Make delete public again --- lib/gitlab/github_import/attachments_downloader.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 0b9ab4a351dcdc..3f17f3fbe94201 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -44,12 +44,12 @@ def perform file end - private - def delete FileUtils.rm_rf File.dirname(filepath) end + private + def raise_error(message) raise DownloadError, message end -- GitLab From 1719cb9a5163fcdf11bbb51c308809552fe2dc07 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Tue, 8 Jul 2025 12:58:09 +0200 Subject: [PATCH 03/43] Add support for embedded media links This update GH importer to handle import of notes with embedded video media. As GH links to media using a different format, and without a filetype extension, we now update the filename and filepath for upload, and the note text with correct markdown format. Changelog: fixed --- .../github_import/attachments_downloader.rb | 9 +++++---- .../importer/note_attachments_importer.rb | 18 +++++++++++++----- .../github_import/markdown/attachment.rb | 16 +++++++--------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 3f17f3fbe94201..3426dde23b295d 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -30,12 +30,12 @@ def perform download_url = get_assets_download_redirection_url - parsed_file_name = File.basename(URI.parse(download_url).path) + parsed_file_name = URI.parse(download_url).path.split('/').last # if the file is a media type, we update both the @filename and @filepath with the filetype extension if parsed_file_name.end_with?(*SUPPORTED_VIDEO_MEDIA_TYPES.map { |ext| ".#{ext}" }) @filename = ensure_filename_size(parsed_file_name) - add_extension_to_file_path(filename) + update_filepath(filename) end file = download_from(download_url) @@ -101,8 +101,9 @@ def filepath end end - def add_extension_to_file_path(filename) - @filepath = "#{filepath}#{File.extname(filename)}" + # adds the filetype extension to the filepath + def update_filepath(filename) + @filepath = "#{filepath}.#{filename.split('.').last}" end end end diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 8f829634ed74d1..c6f1df88fddcae 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -21,14 +21,22 @@ def execute return unless note_text.has_attachments? new_text = note_text.attachments.reduce(note_text.text) do |text, attachment| + # GH media links appear as plain html and are passed in as an array + # we need to convert it to a Markdown::Attachment object + if attachment.is_a?(Array) + url = attachment.first + attachment = Gitlab::GithubImport::Markdown::Attachment.new("media_attachment", url) + end + new_url = gitlab_attachment_link(attachment) - # we need to update video media file links with the correct markdown format - if new_url.end_with?(*supported_video_media_types) - text.gsub(attachment.url, "![media_attachment](#{new_url})") - else - text.gsub(attachment.url, new_url) + # we need to update text so that video media file links have correct markdown format + if new_url.end_with?( + *::Gitlab::GithubImport::AttachmentsDownloader::SUPPORTED_VIDEO_MEDIA_TYPES.map { |ext| ".#{ext}" }) + text = "![media](#{url})" end + + text.gsub(attachment.url, new_url) end update_note_record(new_text) diff --git a/lib/gitlab/github_import/markdown/attachment.rb b/lib/gitlab/github_import/markdown/attachment.rb index 3b1af797f38000..ebcd3f4960eb5d 100644 --- a/lib/gitlab/github_import/markdown/attachment.rb +++ b/lib/gitlab/github_import/markdown/attachment.rb @@ -29,19 +29,17 @@ def from_markdown(markdown_node) # this checks for any attachment links that appear as plain text without # a filetype suffix e.g. "https://github.com/user-attachments/assets/75334fd4" - # each markdown_node will only ever have a single url as embedded media on - # GitHub is always on its own line def from_markdown_text(markdown_node) text = markdown_node.to_plaintext.strip + urls = URI.extract(text, %w[http https]) + return if urls.empty? - url = URI.extract(text, %w[http https]).first - return if url.nil? + urls.each do |url| + next unless github_url?(url, media: true) + next unless whitelisted_type?(url, media: true) - return unless github_url?(url, media: true) - return unless whitelisted_type?(url, media: true) - - # we don't have the :alt or :name so we use a default name - new("media_attachment", url) + new("media_attachment", url) + end end def from_markdown_image(markdown_node) -- GitLab From 5734fd0e1b2ebc1b0232a5a040b8807f43f67c58 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Tue, 8 Jul 2025 16:42:16 +0200 Subject: [PATCH 04/43] Add attachments_downloader spec for when attachment is media file --- spec/lib/gitlab/github_import/attachments_downloader_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb index ce792f673a6f42..42107c0db774ab 100644 --- a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb +++ b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb @@ -141,7 +141,7 @@ file = downloader.perform - expect(file.path).to include(File.basename(URI.parse(redirect_url).path)) + expect(file.path).to include(URI.parse(redirect_url).path.split('/').last) end end -- GitLab From 170cb5a963a370c43513e715a8f71bdd173f3738 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Tue, 8 Jul 2025 17:18:52 +0200 Subject: [PATCH 05/43] Add spec for media attachment --- spec/lib/gitlab/github_import/markdown/attachment_spec.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/spec/lib/gitlab/github_import/markdown/attachment_spec.rb b/spec/lib/gitlab/github_import/markdown/attachment_spec.rb index 4869db6a35400d..bd755dc0d83037 100644 --- a/spec/lib/gitlab/github_import/markdown/attachment_spec.rb +++ b/spec/lib/gitlab/github_import/markdown/attachment_spec.rb @@ -121,14 +121,14 @@ context "when it's a video media attachment" do let(:media_attachment_url) { "https://github.com/user-attachments/assets/73433gh" } let(:markdown_node) do - instance_double(CommonMarker::Node, to_plaintext: media_attachment_url, type: :text) + instance_double('CommonMarker::Node', to_plaintext: media_attachment_url, type: :text) end - it 'returns an attachment object with the download url and default name' do + it 'returns an array with the download url' do attachment = described_class.from_markdown(markdown_node) - expect(attachment.name).to be("media_attachment") - expect(attachment.url).to eq media_attachment_url + expect(attachment.class).to be(Array) + expect(attachment.first).to eq media_attachment_url end end end -- GitLab From db370d4638cd5dd737f8b72ec0271d7fb444ad7c Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Wed, 9 Jul 2025 12:21:28 +0200 Subject: [PATCH 06/43] Fix text substitution logic --- .../importer/note_attachments_importer.rb | 15 ++++----------- lib/gitlab/github_import/markdown/attachment.rb | 16 +++++++++------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index c6f1df88fddcae..55d351652c73f4 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -21,22 +21,15 @@ def execute return unless note_text.has_attachments? new_text = note_text.attachments.reduce(note_text.text) do |text, attachment| - # GH media links appear as plain html and are passed in as an array - # we need to convert it to a Markdown::Attachment object - if attachment.is_a?(Array) - url = attachment.first - attachment = Gitlab::GithubImport::Markdown::Attachment.new("media_attachment", url) - end - new_url = gitlab_attachment_link(attachment) - # we need to update text so that video media file links have correct markdown format + # we need to update video media file links with the correct markdown format if new_url.end_with?( *::Gitlab::GithubImport::AttachmentsDownloader::SUPPORTED_VIDEO_MEDIA_TYPES.map { |ext| ".#{ext}" }) - text = "![media](#{url})" + text.gsub(attachment.url, "![media_attachment](#{new_url})") + else + text.gsub(attachment.url, new_url) end - - text.gsub(attachment.url, new_url) end update_note_record(new_text) diff --git a/lib/gitlab/github_import/markdown/attachment.rb b/lib/gitlab/github_import/markdown/attachment.rb index ebcd3f4960eb5d..3b1af797f38000 100644 --- a/lib/gitlab/github_import/markdown/attachment.rb +++ b/lib/gitlab/github_import/markdown/attachment.rb @@ -29,17 +29,19 @@ def from_markdown(markdown_node) # this checks for any attachment links that appear as plain text without # a filetype suffix e.g. "https://github.com/user-attachments/assets/75334fd4" + # each markdown_node will only ever have a single url as embedded media on + # GitHub is always on its own line def from_markdown_text(markdown_node) text = markdown_node.to_plaintext.strip - urls = URI.extract(text, %w[http https]) - return if urls.empty? - urls.each do |url| - next unless github_url?(url, media: true) - next unless whitelisted_type?(url, media: true) + url = URI.extract(text, %w[http https]).first + return if url.nil? - new("media_attachment", url) - end + return unless github_url?(url, media: true) + return unless whitelisted_type?(url, media: true) + + # we don't have the :alt or :name so we use a default name + new("media_attachment", url) end def from_markdown_image(markdown_node) -- GitLab From 2aa4b98d70351de52b2cd8e084a8fb2a453314ae Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Wed, 9 Jul 2025 12:26:18 +0200 Subject: [PATCH 07/43] Update attachment spec --- spec/lib/gitlab/github_import/markdown/attachment_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/lib/gitlab/github_import/markdown/attachment_spec.rb b/spec/lib/gitlab/github_import/markdown/attachment_spec.rb index bd755dc0d83037..c075d7cc38e641 100644 --- a/spec/lib/gitlab/github_import/markdown/attachment_spec.rb +++ b/spec/lib/gitlab/github_import/markdown/attachment_spec.rb @@ -124,11 +124,11 @@ instance_double('CommonMarker::Node', to_plaintext: media_attachment_url, type: :text) end - it 'returns an array with the download url' do + it 'returns an attachment object with the download url and default name' do attachment = described_class.from_markdown(markdown_node) - expect(attachment.class).to be(Array) - expect(attachment.first).to eq media_attachment_url + expect(attachment.name).to be("media_attachment") + expect(attachment.url).to eq media_attachment_url end end end -- GitLab From d8985fdbc09b284a6be9ceae40489960778a4b8e Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Wed, 9 Jul 2025 15:45:02 +0200 Subject: [PATCH 08/43] Update note attachment importer spec --- .../importer/note_attachments_importer_spec.rb | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index ef3f35b26ff099..90cfa4d6105bcf 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -144,12 +144,14 @@ context 'when the attachment is video media' do let(:media_attachment_url) { 'https://github.com/user-attachments/assets/media-attachment' } + let(:uploader_service_stub) { class_double(UploadService) } + let(:uploader_stub) { instance_double(FileUploader) } let(:text) { media_attachment_url } let(:record) { create(:note, project: project, note: text) } let(:tmp_stub_media_attachment) { Tempfile.create('media-attachment.mp4') } let(:uploader_hash) do { alt: nil, - url: '/uploads/test-secret/video.mp4', + url: '/uploads/1234/media-attachment.mp4', markdown: nil } end @@ -158,20 +160,20 @@ options: options) .and_return(downloader_stub) allow(downloader_stub).to receive(:perform).and_return(tmp_stub_media_attachment) - - allow_next_instance_of(FileUploader) do |uploader| - allow(uploader).to receive(:to_h).and_return(uploader_hash) - end - + allow(uploader_service_stub).to receive_message_chain(:new, :execute) + .with(project, tmp_stub_media_attachment, FileUploader) + .and_return(uploader_stub) + allow(uploader_stub).to receive(:to_h).and_return(uploader_hash) allow(downloader_stub).to receive(:delete).once allow(client).to receive_message_chain(:octokit, :access_token).and_return(access_token) end - it 'changes standalone attachment link to correct markdown' do + it 'changes stand alone attachment link to correct markdown' do importer.execute record.reload - expect(record.note).to include("![media_attachment](/uploads/test-secret/video.mp4)") + expect(record.note).to include("![media_attachment](/uploads/)") + expect(record.note).to include(".mp4") end end end -- GitLab From b81adf1c5676e7c699eacae5e84190f3ac536b0d Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Wed, 9 Jul 2025 16:37:11 +0200 Subject: [PATCH 09/43] Fix media spec --- .../importer/note_attachments_importer_spec.rb | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index 90cfa4d6105bcf..41487c6fa314b1 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -144,14 +144,12 @@ context 'when the attachment is video media' do let(:media_attachment_url) { 'https://github.com/user-attachments/assets/media-attachment' } - let(:uploader_service_stub) { class_double(UploadService) } - let(:uploader_stub) { instance_double(FileUploader) } let(:text) { media_attachment_url } let(:record) { create(:note, project: project, note: text) } let(:tmp_stub_media_attachment) { Tempfile.create('media-attachment.mp4') } let(:uploader_hash) do { alt: nil, - url: '/uploads/1234/media-attachment.mp4', + url: '/uploads/test-secret/video.mp4', markdown: nil } end @@ -160,10 +158,11 @@ options: options) .and_return(downloader_stub) allow(downloader_stub).to receive(:perform).and_return(tmp_stub_media_attachment) - allow(uploader_service_stub).to receive_message_chain(:new, :execute) - .with(project, tmp_stub_media_attachment, FileUploader) - .and_return(uploader_stub) - allow(uploader_stub).to receive(:to_h).and_return(uploader_hash) + + allow_next_instance_of(FileUploader) do |uploader| + allow(uploader).to receive(:to_h).and_return(uploader_hash) + end + allow(downloader_stub).to receive(:delete).once allow(client).to receive_message_chain(:octokit, :access_token).and_return(access_token) end @@ -172,8 +171,7 @@ importer.execute record.reload - expect(record.note).to include("![media_attachment](/uploads/)") - expect(record.note).to include(".mp4") + expect(record.note).to include("![media_attachment](/uploads/test-secret/video.mp4)") end end end -- GitLab From 3a3ba0aceb15de488f1ab2b1416ac38a86954e82 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Thu, 10 Jul 2025 09:26:59 +0200 Subject: [PATCH 10/43] Apply MR suggestions --- lib/gitlab/github_import/attachments_downloader.rb | 4 ++-- .../github_import/importer/note_attachments_importer.rb | 7 +++---- .../importer/note_attachments_importer_spec.rb | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 3426dde23b295d..5e8bc68a423730 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -30,7 +30,7 @@ def perform download_url = get_assets_download_redirection_url - parsed_file_name = URI.parse(download_url).path.split('/').last + parsed_file_name = File.basename(URI.parse(download_url).path) # if the file is a media type, we update both the @filename and @filepath with the filetype extension if parsed_file_name.end_with?(*SUPPORTED_VIDEO_MEDIA_TYPES.map { |ext| ".#{ext}" }) @@ -103,7 +103,7 @@ def filepath # adds the filetype extension to the filepath def update_filepath(filename) - @filepath = "#{filepath}.#{filename.split('.').last}" + @filepath = "#{filepath}.#{File.extname(filename)}" end end end diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 55d351652c73f4..7b788614ef3fca 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -25,7 +25,7 @@ def execute # we need to update video media file links with the correct markdown format if new_url.end_with?( - *::Gitlab::GithubImport::AttachmentsDownloader::SUPPORTED_VIDEO_MEDIA_TYPES.map { |ext| ".#{ext}" }) + *supported_media_types.map { |ext| ".#{ext}" }) text.gsub(attachment.url, "![media_attachment](#{new_url})") else text.gsub(attachment.url, new_url) @@ -50,9 +50,8 @@ def gitlab_attachment_link(attachment) end end - def supported_video_media_types - @supported_video_media_types ||= - ::Gitlab::GithubImport::AttachmentsDownloader::SUPPORTED_VIDEO_MEDIA_TYPES.map { |ext| ".#{ext}" } + def supported_media_types + @supported_media_types ||= ::Gitlab::GithubImport::AttachmentsDownloader::SUPPORTED_VIDEO_MEDIA_TYPES end # From: https://github.com/login/test-import-attachments-source/blob/main/example.md diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index 41487c6fa314b1..ef3f35b26ff099 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -167,7 +167,7 @@ allow(client).to receive_message_chain(:octokit, :access_token).and_return(access_token) end - it 'changes stand alone attachment link to correct markdown' do + it 'changes standalone attachment link to correct markdown' do importer.execute record.reload -- GitLab From 67753298178e23f26cc483762c21a7288164a2fb Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Thu, 10 Jul 2025 09:56:32 +0200 Subject: [PATCH 11/43] Fix failing spec --- lib/gitlab/github_import/attachments_downloader.rb | 2 +- spec/lib/gitlab/github_import/attachments_downloader_spec.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 5e8bc68a423730..9cadef4762f54c 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -103,7 +103,7 @@ def filepath # adds the filetype extension to the filepath def update_filepath(filename) - @filepath = "#{filepath}.#{File.extname(filename)}" + @filepath = "#{filepath}#{File.extname(filename)}" end end end diff --git a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb index 42107c0db774ab..ce792f673a6f42 100644 --- a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb +++ b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb @@ -141,7 +141,7 @@ file = downloader.perform - expect(file.path).to include(URI.parse(redirect_url).path.split('/').last) + expect(file.path).to include(File.basename(URI.parse(redirect_url).path)) end end -- GitLab From f5234c6ab981d1248970e0dc62b4a0da2da7749a Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Thu, 10 Jul 2025 14:04:26 +0200 Subject: [PATCH 12/43] Apply suggestions an fix rubocop errors --- lib/gitlab/github_import/attachments_downloader.rb | 5 ++--- .../github_import/importer/note_attachments_importer.rb | 8 ++++---- spec/lib/gitlab/github_import/markdown/attachment_spec.rb | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 9cadef4762f54c..3f17f3fbe94201 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -35,7 +35,7 @@ def perform # if the file is a media type, we update both the @filename and @filepath with the filetype extension if parsed_file_name.end_with?(*SUPPORTED_VIDEO_MEDIA_TYPES.map { |ext| ".#{ext}" }) @filename = ensure_filename_size(parsed_file_name) - update_filepath(filename) + add_extension_to_file_path(filename) end file = download_from(download_url) @@ -101,8 +101,7 @@ def filepath end end - # adds the filetype extension to the filepath - def update_filepath(filename) + def add_extension_to_file_path(filename) @filepath = "#{filepath}#{File.extname(filename)}" end end diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 7b788614ef3fca..8f829634ed74d1 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -24,8 +24,7 @@ def execute new_url = gitlab_attachment_link(attachment) # we need to update video media file links with the correct markdown format - if new_url.end_with?( - *supported_media_types.map { |ext| ".#{ext}" }) + if new_url.end_with?(*supported_video_media_types) text.gsub(attachment.url, "![media_attachment](#{new_url})") else text.gsub(attachment.url, new_url) @@ -50,8 +49,9 @@ def gitlab_attachment_link(attachment) end end - def supported_media_types - @supported_media_types ||= ::Gitlab::GithubImport::AttachmentsDownloader::SUPPORTED_VIDEO_MEDIA_TYPES + def supported_video_media_types + @supported_video_media_types ||= + ::Gitlab::GithubImport::AttachmentsDownloader::SUPPORTED_VIDEO_MEDIA_TYPES.map { |ext| ".#{ext}" } end # From: https://github.com/login/test-import-attachments-source/blob/main/example.md diff --git a/spec/lib/gitlab/github_import/markdown/attachment_spec.rb b/spec/lib/gitlab/github_import/markdown/attachment_spec.rb index c075d7cc38e641..4869db6a35400d 100644 --- a/spec/lib/gitlab/github_import/markdown/attachment_spec.rb +++ b/spec/lib/gitlab/github_import/markdown/attachment_spec.rb @@ -121,7 +121,7 @@ context "when it's a video media attachment" do let(:media_attachment_url) { "https://github.com/user-attachments/assets/73433gh" } let(:markdown_node) do - instance_double('CommonMarker::Node', to_plaintext: media_attachment_url, type: :text) + instance_double(CommonMarker::Node, to_plaintext: media_attachment_url, type: :text) end it 'returns an attachment object with the download url and default name' do -- GitLab From b95a764163967159fc32bf7e63c406320a16d8b6 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Mon, 14 Jul 2025 18:57:36 +0200 Subject: [PATCH 13/43] Accommodate ghe hosts This accommodates ghe domains in github importer. Changelog: fixed --- lib/gitlab/github_import/client.rb | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index 2bfc9ba49caebe..814b2bb61a9598 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -247,7 +247,7 @@ def api_endpoint end def web_endpoint - formatted_host || custom_api_endpoint || ::Octokit::Default.web_endpoint + enterprise_server_host || custom_api_endpoint || ::Octokit::Default.web_endpoint end def custom_api_endpoint @@ -292,6 +292,16 @@ def formatted_host end end + def enterprise_server_host + strong_memoize(:enterprise_server_host) do + next if @host.nil? + + uri = URI.parse(@host) + uri.path = '' if uri.path == '/api/v3' + uri.to_s + end + end + def api_endpoint_host strong_memoize(:api_endpoint_host) do URI.parse(api_endpoint).host -- GitLab From e190678af5013d5f1f1c0d7a092a7df5d02bc532 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Tue, 15 Jul 2025 16:10:37 +0200 Subject: [PATCH 14/43] Pass project down the line --- .../github_import/attachments_downloader.rb | 7 +- lib/gitlab/github_import/client.rb | 2 +- .../importer/note_attachments_importer.rb | 2 +- .../github_import/markdown/attachment.rb | 70 ++++++++++--------- lib/gitlab/github_import/markdown_text.rb | 25 +++++-- 5 files changed, 61 insertions(+), 45 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 3f17f3fbe94201..6f4116c2b8494a 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -14,12 +14,13 @@ class AttachmentsDownloader TMP_DIR = File.join(Dir.tmpdir, 'github_attachments').freeze SUPPORTED_VIDEO_MEDIA_TYPES = %w[mov mp4 webm].freeze - attr_reader :file_url, :filename, :file_size_limit, :options + attr_reader :file_url, :filename, :file_size_limit, :options, :project - def initialize(file_url, options: {}, file_size_limit: DEFAULT_FILE_SIZE_LIMIT) + def initialize(file_url, options: {}, file_size_limit: DEFAULT_FILE_SIZE_LIMIT, project: nil) @file_url = file_url @options = options @file_size_limit = file_size_limit + @project = project filename = URI(file_url).path.split('/').last @filename = ensure_filename_size(filename) @@ -72,7 +73,7 @@ def get_assets_download_redirection_url end def github_assets_url_regex - %r{#{Regexp.escape(::Gitlab::GithubImport::MarkdownText.github_url)}/.*/(assets|files)/} + %r{#{Regexp.escape(::Gitlab::GithubImport::MarkdownText.github_url(project))}/.*/(assets|files)/} end def download_from(url) diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index 814b2bb61a9598..b014065dd24108 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -297,7 +297,7 @@ def enterprise_server_host next if @host.nil? uri = URI.parse(@host) - uri.path = '' if uri.path == '/api/v3' + uri.path = '' uri.to_s end end diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 8f829634ed74d1..b2d8be72b0708b 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -57,7 +57,7 @@ def supported_video_media_types # From: https://github.com/login/test-import-attachments-source/blob/main/example.md # To: https://gitlab.com/login/test-import-attachments-target/-/blob/main/example.md def convert_project_content_link(attachment_url, import_source) - path_without_domain = attachment_url.gsub(::Gitlab::GithubImport::MarkdownText.github_url, '') + path_without_domain = attachment_url.gsub(::Gitlab::GithubImport::MarkdownText.github_url(project), '') path_without_import_source = path_without_domain.gsub(import_source, '').delete_prefix('/') path_with_blob_prefix = "/-#{path_without_import_source}" diff --git a/lib/gitlab/github_import/markdown/attachment.rb b/lib/gitlab/github_import/markdown/attachment.rb index 3b1af797f38000..d5a054deaf1692 100644 --- a/lib/gitlab/github_import/markdown/attachment.rb +++ b/lib/gitlab/github_import/markdown/attachment.rb @@ -12,16 +12,16 @@ class Attachment class << self # markdown_node - CommonMarker::Node - def from_markdown(markdown_node) + def from_markdown(markdown_node, project) case markdown_node.type when :html, :inline_html - from_inline_html(markdown_node) + from_inline_html(markdown_node, project) when :image - from_markdown_image(markdown_node) + from_markdown_image(markdown_node, project) when :link - from_markdown_link(markdown_node) + from_markdown_link(markdown_node, project) when :text, :paragraph - from_markdown_text(markdown_node) + from_markdown_text(markdown_node, project) end end @@ -31,62 +31,63 @@ def from_markdown(markdown_node) # a filetype suffix e.g. "https://github.com/user-attachments/assets/75334fd4" # each markdown_node will only ever have a single url as embedded media on # GitHub is always on its own line - def from_markdown_text(markdown_node) + def from_markdown_text(markdown_node, project) text = markdown_node.to_plaintext.strip url = URI.extract(text, %w[http https]).first return if url.nil? - return unless github_url?(url, media: true) - return unless whitelisted_type?(url, media: true) + return unless github_url?(url, media: true, project: project) + return unless whitelisted_type?(url, media: true, project: project) # we don't have the :alt or :name so we use a default name - new("media_attachment", url) + new("media_attachment", url, project) end - def from_markdown_image(markdown_node) + def from_markdown_image(markdown_node, project) url = markdown_node.url return unless url - return unless github_url?(url, media: true) - return unless whitelisted_type?(url, media: true) + return unless github_url?(url, media: true, project: project) + return unless whitelisted_type?(url, media: true, project: project) - new(markdown_node.to_plaintext.strip, url) + new(markdown_node.to_plaintext.strip, url, project) end - def from_markdown_link(markdown_node) + def from_markdown_link(markdown_node, project) url = markdown_node.url return unless url - return unless github_url?(url, docs: true) - return unless whitelisted_type?(url, docs: true) + return unless github_url?(url, docs: true, project: project) + return unless whitelisted_type?(url, docs: true, project: project) - new(markdown_node.to_plaintext.strip, url) + new(markdown_node.to_plaintext.strip, url, project) end - def from_inline_html(markdown_node) + def from_inline_html(markdown_node, project) img = Nokogiri::HTML.parse(markdown_node.string_content).xpath('//img')[0] return if img.nil? || img[:src].blank? - return unless github_url?(img[:src], media: true) - return unless whitelisted_type?(img[:src], media: true) + return unless github_url?(img[:src], media: true, project: project) + return unless whitelisted_type?(img[:src], media: true, project: project) - new(img[:alt], img[:src]) + new(img[:alt], img[:src], project) end - def github_url?(url, docs: false, media: false) + def github_url?(url, docs: false, media: false, project: nil) if media - url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url, - ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN) + url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(project), + ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN + ) elsif docs - url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url) + url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(project)) end end - def whitelisted_type?(url, docs: false, media: false) + def whitelisted_type?(url, docs: false, media: false, project: nil) if media # We do not know the file extension type from the /assets markdown - return true if url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url) + return true if url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(project)) MEDIA_TYPES.any? { |type| url.end_with?(type) } elsif docs @@ -95,38 +96,39 @@ def whitelisted_type?(url, docs: false, media: false) end end - attr_reader :name, :url + attr_reader :name, :url, :project - def initialize(name, url) + def initialize(name, url, project) @name = name @url = url + @project = project end def part_of_project_blob?(import_source) url.start_with?( - "#{::Gitlab::GithubImport::MarkdownText.github_url}/#{import_source}/blob" + "#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/#{import_source}/blob" ) end def doc_belongs_to_project?(import_source) url.start_with?( - "#{::Gitlab::GithubImport::MarkdownText.github_url}/#{import_source}/files" + "#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/#{import_source}/files" ) end def media?(import_source) url.start_with?( - "#{::Gitlab::GithubImport::MarkdownText.github_url}/#{import_source}/assets", + "#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/#{import_source}/assets", ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN ) end def user_attachment? - url.start_with?("#{::Gitlab::GithubImport::MarkdownText.github_url}/user-attachments/") + url.start_with?("#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/user-attachments/") end def inspect - "<#{self.class.name}: { name: #{name}, url: #{url} }>" + "<#{self.class.name}: { name: #{name}, url: #{url}, project: #{project} }>" end end end diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index 3d22552269c663..6b576ad3c9894e 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -27,24 +27,37 @@ def fetch_attachments(text) doc = CommonMarker.render_doc(text) doc.walk do |node| - attachment = extract_attachment(node) + attachment = extract_attachment(node, @project) attachments << attachment if attachment end attachments end # Returns github domain without slash in the end - def github_url + def github_url(project = nil) + project = get_project(project) oauth_config = Gitlab::Auth::OAuth::Provider.config_for('github') || {} - url = oauth_config['url'].presence || 'https://github.com' + url = oauth_config['url'].presence || enterprise_domain(project) || 'https://github.com' url = url.chop if url.end_with?('/') url end private - def extract_attachment(node) - ::Gitlab::GithubImport::Markdown::Attachment.from_markdown(node) + def get_project(project) + @project ||= project + end + + def extract_attachment(node, project) + ::Gitlab::GithubImport::Markdown::Attachment.from_markdown(node, project) + end + + def enterprise_domain(project) + return if project.nil? + + uri = URI.parse(project.import_url) + uri.path = '' + uri.to_s end end @@ -81,7 +94,7 @@ def formatted_text # Links like `https://domain.github.com///pull/` needs to be converted def convert_ref_links(text, project) - matcher_options = { github_url: self.class.github_url, import_source: project.import_source } + matcher_options = { github_url: self.class.github_url(project), import_source: project.import_source } issue_ref_matcher = ISSUE_REF_MATCHER % matcher_options pull_ref_matcher = PULL_REF_MATCHER % matcher_options -- GitLab From 290d651f6eb7118efc3819b03894e0e8a32b8fdd Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Wed, 16 Jul 2025 18:30:21 +0200 Subject: [PATCH 15/43] Update client and use web_endpoint --- .../github_import/attachments_downloader.rb | 8 +-- lib/gitlab/github_import/client.rb | 16 +++-- .../importer/note_attachments_importer.rb | 5 +- .../github_import/markdown/attachment.rb | 66 +++++++++---------- lib/gitlab/github_import/markdown_text.rb | 41 +++++------- 5 files changed, 67 insertions(+), 69 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 6f4116c2b8494a..6e4814b7cdf83d 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -14,13 +14,13 @@ class AttachmentsDownloader TMP_DIR = File.join(Dir.tmpdir, 'github_attachments').freeze SUPPORTED_VIDEO_MEDIA_TYPES = %w[mov mp4 webm].freeze - attr_reader :file_url, :filename, :file_size_limit, :options, :project + attr_reader :file_url, :filename, :file_size_limit, :options, :web_endpoint - def initialize(file_url, options: {}, file_size_limit: DEFAULT_FILE_SIZE_LIMIT, project: nil) + def initialize(file_url, options: {}, file_size_limit: DEFAULT_FILE_SIZE_LIMIT, web_endpoint: nil) @file_url = file_url @options = options @file_size_limit = file_size_limit - @project = project + @web_endpoint = web_endpoint filename = URI(file_url).path.split('/').last @filename = ensure_filename_size(filename) @@ -73,7 +73,7 @@ def get_assets_download_redirection_url end def github_assets_url_regex - %r{#{Regexp.escape(::Gitlab::GithubImport::MarkdownText.github_url(project))}/.*/(assets|files)/} + %r{#{Regexp.escape(web_endpoint)}/.*/(assets|files)/} end def download_from(url) diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index b014065dd24108..e82add29f7c4ad 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -243,17 +243,21 @@ def rate_limiting_enabled? end def api_endpoint - formatted_host || custom_api_endpoint || default_api_endpoint + formatted_api_endpoint || custom_api_endpoint || default_api_endpoint end def web_endpoint - enterprise_server_host || custom_api_endpoint || ::Octokit::Default.web_endpoint + formatted_web_endpoint || custom_web_endpoint || ::Octokit::Default.web_endpoint end def custom_api_endpoint github_omniauth_provider.dig('args', 'client_options', 'site') end + def custom_web_endpoint + custom_api_endpoint['url']&.chomp('/') + end + def default_api_endpoint OmniAuth::Strategies::GitHub.default_options[:client_options][:site] || ::Octokit::Default.api_endpoint end @@ -282,8 +286,8 @@ def request_count_counter private - def formatted_host - strong_memoize(:formatted_host) do + def formatted_api_endpoint + strong_memoize(:formatted_api_endpoint) do next if @host.nil? uri = URI.parse(@host) @@ -292,8 +296,8 @@ def formatted_host end end - def enterprise_server_host - strong_memoize(:enterprise_server_host) do + def formatted_web_endpoint + strong_memoize(:formatted_web_endpoint) do next if @host.nil? uri = URI.parse(@host) diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index b2d8be72b0708b..4cb7c2f08f3072 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -57,7 +57,7 @@ def supported_video_media_types # From: https://github.com/login/test-import-attachments-source/blob/main/example.md # To: https://gitlab.com/login/test-import-attachments-target/-/blob/main/example.md def convert_project_content_link(attachment_url, import_source) - path_without_domain = attachment_url.gsub(::Gitlab::GithubImport::MarkdownText.github_url(project), '') + path_without_domain = attachment_url.gsub(client.web_endpoint, '') path_without_import_source = path_without_domain.gsub(import_source, '').delete_prefix('/') path_with_blob_prefix = "/-#{path_without_import_source}" @@ -67,7 +67,8 @@ def convert_project_content_link(attachment_url, import_source) # in: an instance of Gitlab::GithubImport::Markdown::Attachment # out: gitlab attachment markdown url def download_attachment(attachment) - downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(attachment.url, options: options) + downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(attachment.url, options: options, + web_endpoint: client.web_endpoint) file = downloader.perform uploader = UploadService.new(project, file, FileUploader).execute uploader.to_h[:url] diff --git a/lib/gitlab/github_import/markdown/attachment.rb b/lib/gitlab/github_import/markdown/attachment.rb index d5a054deaf1692..4935bf41e2499e 100644 --- a/lib/gitlab/github_import/markdown/attachment.rb +++ b/lib/gitlab/github_import/markdown/attachment.rb @@ -12,16 +12,16 @@ class Attachment class << self # markdown_node - CommonMarker::Node - def from_markdown(markdown_node, project) + def from_markdown(markdown_node, web_endpoint) case markdown_node.type when :html, :inline_html - from_inline_html(markdown_node, project) + from_inline_html(markdown_node, web_endpoint) when :image - from_markdown_image(markdown_node, project) + from_markdown_image(markdown_node, web_endpoint) when :link - from_markdown_link(markdown_node, project) + from_markdown_link(markdown_node, web_endpoint) when :text, :paragraph - from_markdown_text(markdown_node, project) + from_markdown_text(markdown_node, web_endpoint) end end @@ -31,63 +31,63 @@ def from_markdown(markdown_node, project) # a filetype suffix e.g. "https://github.com/user-attachments/assets/75334fd4" # each markdown_node will only ever have a single url as embedded media on # GitHub is always on its own line - def from_markdown_text(markdown_node, project) + def from_markdown_text(markdown_node, web_endpoint) text = markdown_node.to_plaintext.strip url = URI.extract(text, %w[http https]).first return if url.nil? - return unless github_url?(url, media: true, project: project) - return unless whitelisted_type?(url, media: true, project: project) + return unless github_url?(url, media: true, web_endpoint: web_endpoint) + return unless whitelisted_type?(url, media: true, web_endpoint: web_endpoint) # we don't have the :alt or :name so we use a default name new("media_attachment", url, project) end - def from_markdown_image(markdown_node, project) + def from_markdown_image(markdown_node, web_endpoint) url = markdown_node.url return unless url - return unless github_url?(url, media: true, project: project) - return unless whitelisted_type?(url, media: true, project: project) + return unless github_url?(url, media: true, web_endpoint: web_endpoint) + return unless whitelisted_type?(url, media: true, web_endpoint: web_endpoint) - new(markdown_node.to_plaintext.strip, url, project) + new(markdown_node.to_plaintext.strip, url, web_endpoint) end - def from_markdown_link(markdown_node, project) + def from_markdown_link(markdown_node, web_endpoint) url = markdown_node.url return unless url - return unless github_url?(url, docs: true, project: project) - return unless whitelisted_type?(url, docs: true, project: project) + return unless github_url?(url, docs: true, web_endpoint: web_endpoint) + return unless whitelisted_type?(url, docs: true, web_endpoint: web_endpoint) - new(markdown_node.to_plaintext.strip, url, project) + new(markdown_node.to_plaintext.strip, url, web_endpoint) end - def from_inline_html(markdown_node, project) + def from_inline_html(markdown_node, web_endpoint) img = Nokogiri::HTML.parse(markdown_node.string_content).xpath('//img')[0] return if img.nil? || img[:src].blank? - return unless github_url?(img[:src], media: true, project: project) - return unless whitelisted_type?(img[:src], media: true, project: project) + return unless github_url?(img[:src], media: true, web_endpoint: web_endpoint) + return unless whitelisted_type?(img[:src], media: true, web_endpoint: web_endpoint) - new(img[:alt], img[:src], project) + new(img[:alt], img[:src], web_endpoint) end - def github_url?(url, docs: false, media: false, project: nil) + def github_url?(url, docs: false, media: false, web_endpoint: nil) if media - url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(project), + url.start_with?(web_endpoint, ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN ) elsif docs - url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(project)) + url.start_with?(web_endpoint) end end - def whitelisted_type?(url, docs: false, media: false, project: nil) + def whitelisted_type?(url, docs: false, media: false, web_endpoint: nil) if media # We do not know the file extension type from the /assets markdown - return true if url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(project)) + return true if url.start_with?(web_endpoint) MEDIA_TYPES.any? { |type| url.end_with?(type) } elsif docs @@ -96,39 +96,39 @@ def whitelisted_type?(url, docs: false, media: false, project: nil) end end - attr_reader :name, :url, :project + attr_reader :name, :url, :web_endpoint - def initialize(name, url, project) + def initialize(name, url, web_endpoint) @name = name @url = url - @project = project + @web_endpont = web_endpoint end def part_of_project_blob?(import_source) url.start_with?( - "#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/#{import_source}/blob" + "#{web_endpoint}/#{import_source}/blob" ) end def doc_belongs_to_project?(import_source) url.start_with?( - "#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/#{import_source}/files" + "#{web_endpoint}/#{import_source}/files" ) end def media?(import_source) url.start_with?( - "#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/#{import_source}/assets", + "#{web_endpoint}/#{import_source}/assets", ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN ) end def user_attachment? - url.start_with?("#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/user-attachments/") + url.start_with?("#{web_endpoint}/user-attachments/") end def inspect - "<#{self.class.name}: { name: #{name}, url: #{url}, project: #{project} }>" + "<#{self.class.name}: { name: #{name}, url: #{url}, web_endpoint: #{web_endpoint} }>" end end end diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index 6b576ad3c9894e..132856a39e3185 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -27,37 +27,29 @@ def fetch_attachments(text) doc = CommonMarker.render_doc(text) doc.walk do |node| - attachment = extract_attachment(node, @project) + attachment = extract_attachment(node, @web_endpoint) attachments << attachment if attachment end attachments end # Returns github domain without slash in the end - def github_url(project = nil) - project = get_project(project) - oauth_config = Gitlab::Auth::OAuth::Provider.config_for('github') || {} - url = oauth_config['url'].presence || enterprise_domain(project) || 'https://github.com' - url = url.chop if url.end_with?('/') - url - end + # def github_url(web_endpoint = nil) + # web_endpoint = get_web_endpoint(web_endpoint) + # oauth_config = Gitlab::Auth::OAuth::Provider.config_for('github') || {} + # url = oauth_config['url'].presence || web_endpoint || 'https://github.com' + # url = url.chop if url.end_with?('/') + # url + # end private - def get_project(project) - @project ||= project - end - - def extract_attachment(node, project) - ::Gitlab::GithubImport::Markdown::Attachment.from_markdown(node, project) - end - - def enterprise_domain(project) - return if project.nil? + # def get_web_endpoint(web_endpoint) + # @web_endpoint ||= web_endpoint + # end - uri = URI.parse(project.import_url) - uri.path = '' - uri.to_s + def extract_attachment(node, web_endpoint) + ::Gitlab::GithubImport::Markdown::Attachment.from_markdown(node, web_endpoint) end end @@ -65,11 +57,12 @@ def enterprise_domain(project) # author - An instance of `Gitlab::GithubImport::Representation::User` # exists - Boolean that indicates the user exists in the GitLab database. # project - An instance of `Project`. - def initialize(text, author = nil, exists = false, project: nil) + def initialize(text, author = nil, exists = false, project: nil, web_endpoint: nil) @text = text @author = author @exists = exists @project = project + @web_endpoint = web_endpoint end def perform @@ -83,7 +76,7 @@ def perform private - attr_reader :text, :author, :exists, :project + attr_reader :text, :author, :exists, :project, :web_endpoint def formatted_text login = author.respond_to?(:fetch) ? author.fetch(:login, nil) : author.try(:login) @@ -94,7 +87,7 @@ def formatted_text # Links like `https://domain.github.com///pull/` needs to be converted def convert_ref_links(text, project) - matcher_options = { github_url: self.class.github_url(project), import_source: project.import_source } + matcher_options = { github_url: web_endpoint, import_source: project.import_source } issue_ref_matcher = ISSUE_REF_MATCHER % matcher_options pull_ref_matcher = PULL_REF_MATCHER % matcher_options -- GitLab From 420423722892acaa75a0b00ddde93d8c6710abd1 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Thu, 24 Jul 2025 12:13:18 +0200 Subject: [PATCH 16/43] Revert "Update client and use web_endpoint" This reverts commit 0b64650d425d581a158a38bc24c2e76d222be04f. --- .../github_import/attachments_downloader.rb | 8 +-- lib/gitlab/github_import/client.rb | 16 ++--- .../importer/note_attachments_importer.rb | 5 +- .../github_import/markdown/attachment.rb | 66 +++++++++---------- lib/gitlab/github_import/markdown_text.rb | 41 +++++++----- 5 files changed, 69 insertions(+), 67 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 6e4814b7cdf83d..6f4116c2b8494a 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -14,13 +14,13 @@ class AttachmentsDownloader TMP_DIR = File.join(Dir.tmpdir, 'github_attachments').freeze SUPPORTED_VIDEO_MEDIA_TYPES = %w[mov mp4 webm].freeze - attr_reader :file_url, :filename, :file_size_limit, :options, :web_endpoint + attr_reader :file_url, :filename, :file_size_limit, :options, :project - def initialize(file_url, options: {}, file_size_limit: DEFAULT_FILE_SIZE_LIMIT, web_endpoint: nil) + def initialize(file_url, options: {}, file_size_limit: DEFAULT_FILE_SIZE_LIMIT, project: nil) @file_url = file_url @options = options @file_size_limit = file_size_limit - @web_endpoint = web_endpoint + @project = project filename = URI(file_url).path.split('/').last @filename = ensure_filename_size(filename) @@ -73,7 +73,7 @@ def get_assets_download_redirection_url end def github_assets_url_regex - %r{#{Regexp.escape(web_endpoint)}/.*/(assets|files)/} + %r{#{Regexp.escape(::Gitlab::GithubImport::MarkdownText.github_url(project))}/.*/(assets|files)/} end def download_from(url) diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index e82add29f7c4ad..b014065dd24108 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -243,21 +243,17 @@ def rate_limiting_enabled? end def api_endpoint - formatted_api_endpoint || custom_api_endpoint || default_api_endpoint + formatted_host || custom_api_endpoint || default_api_endpoint end def web_endpoint - formatted_web_endpoint || custom_web_endpoint || ::Octokit::Default.web_endpoint + enterprise_server_host || custom_api_endpoint || ::Octokit::Default.web_endpoint end def custom_api_endpoint github_omniauth_provider.dig('args', 'client_options', 'site') end - def custom_web_endpoint - custom_api_endpoint['url']&.chomp('/') - end - def default_api_endpoint OmniAuth::Strategies::GitHub.default_options[:client_options][:site] || ::Octokit::Default.api_endpoint end @@ -286,8 +282,8 @@ def request_count_counter private - def formatted_api_endpoint - strong_memoize(:formatted_api_endpoint) do + def formatted_host + strong_memoize(:formatted_host) do next if @host.nil? uri = URI.parse(@host) @@ -296,8 +292,8 @@ def formatted_api_endpoint end end - def formatted_web_endpoint - strong_memoize(:formatted_web_endpoint) do + def enterprise_server_host + strong_memoize(:enterprise_server_host) do next if @host.nil? uri = URI.parse(@host) diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 4cb7c2f08f3072..b2d8be72b0708b 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -57,7 +57,7 @@ def supported_video_media_types # From: https://github.com/login/test-import-attachments-source/blob/main/example.md # To: https://gitlab.com/login/test-import-attachments-target/-/blob/main/example.md def convert_project_content_link(attachment_url, import_source) - path_without_domain = attachment_url.gsub(client.web_endpoint, '') + path_without_domain = attachment_url.gsub(::Gitlab::GithubImport::MarkdownText.github_url(project), '') path_without_import_source = path_without_domain.gsub(import_source, '').delete_prefix('/') path_with_blob_prefix = "/-#{path_without_import_source}" @@ -67,8 +67,7 @@ def convert_project_content_link(attachment_url, import_source) # in: an instance of Gitlab::GithubImport::Markdown::Attachment # out: gitlab attachment markdown url def download_attachment(attachment) - downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(attachment.url, options: options, - web_endpoint: client.web_endpoint) + downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(attachment.url, options: options) file = downloader.perform uploader = UploadService.new(project, file, FileUploader).execute uploader.to_h[:url] diff --git a/lib/gitlab/github_import/markdown/attachment.rb b/lib/gitlab/github_import/markdown/attachment.rb index 4935bf41e2499e..d5a054deaf1692 100644 --- a/lib/gitlab/github_import/markdown/attachment.rb +++ b/lib/gitlab/github_import/markdown/attachment.rb @@ -12,16 +12,16 @@ class Attachment class << self # markdown_node - CommonMarker::Node - def from_markdown(markdown_node, web_endpoint) + def from_markdown(markdown_node, project) case markdown_node.type when :html, :inline_html - from_inline_html(markdown_node, web_endpoint) + from_inline_html(markdown_node, project) when :image - from_markdown_image(markdown_node, web_endpoint) + from_markdown_image(markdown_node, project) when :link - from_markdown_link(markdown_node, web_endpoint) + from_markdown_link(markdown_node, project) when :text, :paragraph - from_markdown_text(markdown_node, web_endpoint) + from_markdown_text(markdown_node, project) end end @@ -31,63 +31,63 @@ def from_markdown(markdown_node, web_endpoint) # a filetype suffix e.g. "https://github.com/user-attachments/assets/75334fd4" # each markdown_node will only ever have a single url as embedded media on # GitHub is always on its own line - def from_markdown_text(markdown_node, web_endpoint) + def from_markdown_text(markdown_node, project) text = markdown_node.to_plaintext.strip url = URI.extract(text, %w[http https]).first return if url.nil? - return unless github_url?(url, media: true, web_endpoint: web_endpoint) - return unless whitelisted_type?(url, media: true, web_endpoint: web_endpoint) + return unless github_url?(url, media: true, project: project) + return unless whitelisted_type?(url, media: true, project: project) # we don't have the :alt or :name so we use a default name new("media_attachment", url, project) end - def from_markdown_image(markdown_node, web_endpoint) + def from_markdown_image(markdown_node, project) url = markdown_node.url return unless url - return unless github_url?(url, media: true, web_endpoint: web_endpoint) - return unless whitelisted_type?(url, media: true, web_endpoint: web_endpoint) + return unless github_url?(url, media: true, project: project) + return unless whitelisted_type?(url, media: true, project: project) - new(markdown_node.to_plaintext.strip, url, web_endpoint) + new(markdown_node.to_plaintext.strip, url, project) end - def from_markdown_link(markdown_node, web_endpoint) + def from_markdown_link(markdown_node, project) url = markdown_node.url return unless url - return unless github_url?(url, docs: true, web_endpoint: web_endpoint) - return unless whitelisted_type?(url, docs: true, web_endpoint: web_endpoint) + return unless github_url?(url, docs: true, project: project) + return unless whitelisted_type?(url, docs: true, project: project) - new(markdown_node.to_plaintext.strip, url, web_endpoint) + new(markdown_node.to_plaintext.strip, url, project) end - def from_inline_html(markdown_node, web_endpoint) + def from_inline_html(markdown_node, project) img = Nokogiri::HTML.parse(markdown_node.string_content).xpath('//img')[0] return if img.nil? || img[:src].blank? - return unless github_url?(img[:src], media: true, web_endpoint: web_endpoint) - return unless whitelisted_type?(img[:src], media: true, web_endpoint: web_endpoint) + return unless github_url?(img[:src], media: true, project: project) + return unless whitelisted_type?(img[:src], media: true, project: project) - new(img[:alt], img[:src], web_endpoint) + new(img[:alt], img[:src], project) end - def github_url?(url, docs: false, media: false, web_endpoint: nil) + def github_url?(url, docs: false, media: false, project: nil) if media - url.start_with?(web_endpoint, + url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(project), ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN ) elsif docs - url.start_with?(web_endpoint) + url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(project)) end end - def whitelisted_type?(url, docs: false, media: false, web_endpoint: nil) + def whitelisted_type?(url, docs: false, media: false, project: nil) if media # We do not know the file extension type from the /assets markdown - return true if url.start_with?(web_endpoint) + return true if url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(project)) MEDIA_TYPES.any? { |type| url.end_with?(type) } elsif docs @@ -96,39 +96,39 @@ def whitelisted_type?(url, docs: false, media: false, web_endpoint: nil) end end - attr_reader :name, :url, :web_endpoint + attr_reader :name, :url, :project - def initialize(name, url, web_endpoint) + def initialize(name, url, project) @name = name @url = url - @web_endpont = web_endpoint + @project = project end def part_of_project_blob?(import_source) url.start_with?( - "#{web_endpoint}/#{import_source}/blob" + "#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/#{import_source}/blob" ) end def doc_belongs_to_project?(import_source) url.start_with?( - "#{web_endpoint}/#{import_source}/files" + "#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/#{import_source}/files" ) end def media?(import_source) url.start_with?( - "#{web_endpoint}/#{import_source}/assets", + "#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/#{import_source}/assets", ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN ) end def user_attachment? - url.start_with?("#{web_endpoint}/user-attachments/") + url.start_with?("#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/user-attachments/") end def inspect - "<#{self.class.name}: { name: #{name}, url: #{url}, web_endpoint: #{web_endpoint} }>" + "<#{self.class.name}: { name: #{name}, url: #{url}, project: #{project} }>" end end end diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index 132856a39e3185..6b576ad3c9894e 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -27,29 +27,37 @@ def fetch_attachments(text) doc = CommonMarker.render_doc(text) doc.walk do |node| - attachment = extract_attachment(node, @web_endpoint) + attachment = extract_attachment(node, @project) attachments << attachment if attachment end attachments end # Returns github domain without slash in the end - # def github_url(web_endpoint = nil) - # web_endpoint = get_web_endpoint(web_endpoint) - # oauth_config = Gitlab::Auth::OAuth::Provider.config_for('github') || {} - # url = oauth_config['url'].presence || web_endpoint || 'https://github.com' - # url = url.chop if url.end_with?('/') - # url - # end + def github_url(project = nil) + project = get_project(project) + oauth_config = Gitlab::Auth::OAuth::Provider.config_for('github') || {} + url = oauth_config['url'].presence || enterprise_domain(project) || 'https://github.com' + url = url.chop if url.end_with?('/') + url + end private - # def get_web_endpoint(web_endpoint) - # @web_endpoint ||= web_endpoint - # end + def get_project(project) + @project ||= project + end + + def extract_attachment(node, project) + ::Gitlab::GithubImport::Markdown::Attachment.from_markdown(node, project) + end + + def enterprise_domain(project) + return if project.nil? - def extract_attachment(node, web_endpoint) - ::Gitlab::GithubImport::Markdown::Attachment.from_markdown(node, web_endpoint) + uri = URI.parse(project.import_url) + uri.path = '' + uri.to_s end end @@ -57,12 +65,11 @@ def extract_attachment(node, web_endpoint) # author - An instance of `Gitlab::GithubImport::Representation::User` # exists - Boolean that indicates the user exists in the GitLab database. # project - An instance of `Project`. - def initialize(text, author = nil, exists = false, project: nil, web_endpoint: nil) + def initialize(text, author = nil, exists = false, project: nil) @text = text @author = author @exists = exists @project = project - @web_endpoint = web_endpoint end def perform @@ -76,7 +83,7 @@ def perform private - attr_reader :text, :author, :exists, :project, :web_endpoint + attr_reader :text, :author, :exists, :project def formatted_text login = author.respond_to?(:fetch) ? author.fetch(:login, nil) : author.try(:login) @@ -87,7 +94,7 @@ def formatted_text # Links like `https://domain.github.com///pull/` needs to be converted def convert_ref_links(text, project) - matcher_options = { github_url: web_endpoint, import_source: project.import_source } + matcher_options = { github_url: self.class.github_url(project), import_source: project.import_source } issue_ref_matcher = ISSUE_REF_MATCHER % matcher_options pull_ref_matcher = PULL_REF_MATCHER % matcher_options -- GitLab From b355576510481cd0cc8ebfd75b837bd3c849b727 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Fri, 25 Jul 2025 14:38:26 +0200 Subject: [PATCH 17/43] Use web endpoint --- .../github_import/attachments_downloader.rb | 8 +-- lib/gitlab/github_import/client.rb | 16 +++-- .../importer/note_attachments_importer.rb | 8 ++- .../github_import/markdown/attachment.rb | 68 +++++++++---------- lib/gitlab/github_import/markdown_text.rb | 35 ++++------ 5 files changed, 68 insertions(+), 67 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 6f4116c2b8494a..f76c638e8c51a4 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -14,13 +14,13 @@ class AttachmentsDownloader TMP_DIR = File.join(Dir.tmpdir, 'github_attachments').freeze SUPPORTED_VIDEO_MEDIA_TYPES = %w[mov mp4 webm].freeze - attr_reader :file_url, :filename, :file_size_limit, :options, :project + attr_reader :file_url, :filename, :file_size_limit, :options, :web_endpoint - def initialize(file_url, options: {}, file_size_limit: DEFAULT_FILE_SIZE_LIMIT, project: nil) + def initialize(file_url, options: {}, file_size_limit: DEFAULT_FILE_SIZE_LIMIT, web_endpoint: nil) @file_url = file_url @options = options @file_size_limit = file_size_limit - @project = project + @web_endpoint = web_endpoint filename = URI(file_url).path.split('/').last @filename = ensure_filename_size(filename) @@ -73,7 +73,7 @@ def get_assets_download_redirection_url end def github_assets_url_regex - %r{#{Regexp.escape(::Gitlab::GithubImport::MarkdownText.github_url(project))}/.*/(assets|files)/} + %r{#{Regexp.escape(::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint))}/.*/(assets|files)/} end def download_from(url) diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index b014065dd24108..e82add29f7c4ad 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -243,17 +243,21 @@ def rate_limiting_enabled? end def api_endpoint - formatted_host || custom_api_endpoint || default_api_endpoint + formatted_api_endpoint || custom_api_endpoint || default_api_endpoint end def web_endpoint - enterprise_server_host || custom_api_endpoint || ::Octokit::Default.web_endpoint + formatted_web_endpoint || custom_web_endpoint || ::Octokit::Default.web_endpoint end def custom_api_endpoint github_omniauth_provider.dig('args', 'client_options', 'site') end + def custom_web_endpoint + custom_api_endpoint['url']&.chomp('/') + end + def default_api_endpoint OmniAuth::Strategies::GitHub.default_options[:client_options][:site] || ::Octokit::Default.api_endpoint end @@ -282,8 +286,8 @@ def request_count_counter private - def formatted_host - strong_memoize(:formatted_host) do + def formatted_api_endpoint + strong_memoize(:formatted_api_endpoint) do next if @host.nil? uri = URI.parse(@host) @@ -292,8 +296,8 @@ def formatted_host end end - def enterprise_server_host - strong_memoize(:enterprise_server_host) do + def formatted_web_endpoint + strong_memoize(:formatted_web_endpoint) do next if @host.nil? uri = URI.parse(@host) diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index b2d8be72b0708b..800b91f1ab405e 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -4,7 +4,7 @@ module Gitlab module GithubImport module Importer class NoteAttachmentsImporter - attr_reader :note_text, :project, :client + attr_reader :note_text, :project, :client, :web_endpoint SUPPORTED_RECORD_TYPES = [::Release.name, ::Issue.name, ::MergeRequest.name, ::Note.name].freeze @@ -15,6 +15,7 @@ def initialize(note_text, project, client) @note_text = note_text @project = project @client = client + @web_endpoint = client.web_endpoint end def execute @@ -57,7 +58,7 @@ def supported_video_media_types # From: https://github.com/login/test-import-attachments-source/blob/main/example.md # To: https://gitlab.com/login/test-import-attachments-target/-/blob/main/example.md def convert_project_content_link(attachment_url, import_source) - path_without_domain = attachment_url.gsub(::Gitlab::GithubImport::MarkdownText.github_url(project), '') + path_without_domain = attachment_url.gsub(::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint), '') path_without_import_source = path_without_domain.gsub(import_source, '').delete_prefix('/') path_with_blob_prefix = "/-#{path_without_import_source}" @@ -67,7 +68,8 @@ def convert_project_content_link(attachment_url, import_source) # in: an instance of Gitlab::GithubImport::Markdown::Attachment # out: gitlab attachment markdown url def download_attachment(attachment) - downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(attachment.url, options: options) + downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(attachment.url, options: options, + web_endpoint: web_endpoint) file = downloader.perform uploader = UploadService.new(project, file, FileUploader).execute uploader.to_h[:url] diff --git a/lib/gitlab/github_import/markdown/attachment.rb b/lib/gitlab/github_import/markdown/attachment.rb index d5a054deaf1692..9da6b080193f21 100644 --- a/lib/gitlab/github_import/markdown/attachment.rb +++ b/lib/gitlab/github_import/markdown/attachment.rb @@ -12,16 +12,16 @@ class Attachment class << self # markdown_node - CommonMarker::Node - def from_markdown(markdown_node, project) + def from_markdown(markdown_node, web_endpoint) case markdown_node.type when :html, :inline_html - from_inline_html(markdown_node, project) + from_inline_html(markdown_node, web_endpoint) when :image - from_markdown_image(markdown_node, project) + from_markdown_image(markdown_node, web_endpoint) when :link - from_markdown_link(markdown_node, project) + from_markdown_link(markdown_node, web_endpoint) when :text, :paragraph - from_markdown_text(markdown_node, project) + from_markdown_text(markdown_node, web_endpoint) end end @@ -31,63 +31,63 @@ def from_markdown(markdown_node, project) # a filetype suffix e.g. "https://github.com/user-attachments/assets/75334fd4" # each markdown_node will only ever have a single url as embedded media on # GitHub is always on its own line - def from_markdown_text(markdown_node, project) + def from_markdown_text(markdown_node, web_endpoint) text = markdown_node.to_plaintext.strip url = URI.extract(text, %w[http https]).first return if url.nil? - return unless github_url?(url, media: true, project: project) - return unless whitelisted_type?(url, media: true, project: project) + return unless github_url?(url, media: true, web_endpoint: web_endpoint) + return unless whitelisted_type?(url, media: true, web_endpoint: web_endpoint) # we don't have the :alt or :name so we use a default name - new("media_attachment", url, project) + new("media_attachment", url, web_endpoint) end - def from_markdown_image(markdown_node, project) + def from_markdown_image(markdown_node, web_endpoint) url = markdown_node.url return unless url - return unless github_url?(url, media: true, project: project) - return unless whitelisted_type?(url, media: true, project: project) + return unless github_url?(url, media: true, web_endpoint: web_endpoint) + return unless whitelisted_type?(url, media: true, web_endpoint: web_endpoint) - new(markdown_node.to_plaintext.strip, url, project) + new(markdown_node.to_plaintext.strip, url, web_endpoint) end - def from_markdown_link(markdown_node, project) + def from_markdown_link(markdown_node, web_endpoint) url = markdown_node.url return unless url - return unless github_url?(url, docs: true, project: project) - return unless whitelisted_type?(url, docs: true, project: project) + return unless github_url?(url, docs: true, web_endpoint: web_endpoint) + return unless whitelisted_type?(url, docs: true, web_endpoint: web_endpoint) - new(markdown_node.to_plaintext.strip, url, project) + new(markdown_node.to_plaintext.strip, url, web_endpoint) end - def from_inline_html(markdown_node, project) + def from_inline_html(markdown_node, web_endpoint) img = Nokogiri::HTML.parse(markdown_node.string_content).xpath('//img')[0] return if img.nil? || img[:src].blank? - return unless github_url?(img[:src], media: true, project: project) - return unless whitelisted_type?(img[:src], media: true, project: project) + return unless github_url?(img[:src], media: true, web_endpoint: web_endpoint) + return unless whitelisted_type?(img[:src], media: true, web_endpoint: web_endpoint) - new(img[:alt], img[:src], project) + new(img[:alt], img[:src], web_endpoint) end - def github_url?(url, docs: false, media: false, project: nil) + def github_url?(url, docs: false, media: false, web_endpoint: nil) if media - url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(project), + url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint), ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN ) elsif docs - url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(project)) + url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)) end end - def whitelisted_type?(url, docs: false, media: false, project: nil) + def whitelisted_type?(url, docs: false, media: false, web_endpoint: nil) if media # We do not know the file extension type from the /assets markdown - return true if url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(project)) + return true if url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)) MEDIA_TYPES.any? { |type| url.end_with?(type) } elsif docs @@ -96,39 +96,39 @@ def whitelisted_type?(url, docs: false, media: false, project: nil) end end - attr_reader :name, :url, :project + attr_reader :name, :url, :web_endpoint - def initialize(name, url, project) + def initialize(name, url, web_endpoint) @name = name @url = url - @project = project + @web_endpoint = web_endpoint end def part_of_project_blob?(import_source) url.start_with?( - "#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/#{import_source}/blob" + "#{::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)}/#{import_source}/blob" ) end def doc_belongs_to_project?(import_source) url.start_with?( - "#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/#{import_source}/files" + "#{::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)}/#{import_source}/files" ) end def media?(import_source) url.start_with?( - "#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/#{import_source}/assets", + "#{::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)}/#{import_source}/assets", ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN ) end def user_attachment? - url.start_with?("#{::Gitlab::GithubImport::MarkdownText.github_url(project)}/user-attachments/") + url.start_with?("#{::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)}/user-attachments/") end def inspect - "<#{self.class.name}: { name: #{name}, url: #{url}, project: #{project} }>" + "<#{self.class.name}: { name: #{name}, url: #{url}, web_endpoint: #{web_endpoint} }>" end end end diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index 6b576ad3c9894e..ee8f414caedf89 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -27,37 +27,25 @@ def fetch_attachments(text) doc = CommonMarker.render_doc(text) doc.walk do |node| - attachment = extract_attachment(node, @project) + # the @web_endpoint is nil here. Need to work out how to obtain it + attachment = extract_attachment(node, @web_endpoint) attachments << attachment if attachment end attachments end # Returns github domain without slash in the end - def github_url(project = nil) - project = get_project(project) + def github_url(web_endpoint = nil) oauth_config = Gitlab::Auth::OAuth::Provider.config_for('github') || {} - url = oauth_config['url'].presence || enterprise_domain(project) || 'https://github.com' + url = oauth_config['url'].presence || web_endpoint || 'https://github.com' url = url.chop if url.end_with?('/') url end private - def get_project(project) - @project ||= project - end - - def extract_attachment(node, project) - ::Gitlab::GithubImport::Markdown::Attachment.from_markdown(node, project) - end - - def enterprise_domain(project) - return if project.nil? - - uri = URI.parse(project.import_url) - uri.path = '' - uri.to_s + def extract_attachment(node, web_endpoint) + ::Gitlab::GithubImport::Markdown::Attachment.from_markdown(node, web_endpoint) end end @@ -70,6 +58,7 @@ def initialize(text, author = nil, exists = false, project: nil) @author = author @exists = exists @project = project + @web_endpoint = format_web_endpoint(project.import_url) end def perform @@ -83,7 +72,7 @@ def perform private - attr_reader :text, :author, :exists, :project + attr_reader :text, :author, :exists, :project, :web_endpoint def formatted_text login = author.respond_to?(:fetch) ? author.fetch(:login, nil) : author.try(:login) @@ -92,9 +81,15 @@ def formatted_text text end + def format_web_endpoint(import_url) + uri = URI.parse(import_url) + uri.path = '' + uri.to_s + end + # Links like `https://domain.github.com///pull/` needs to be converted def convert_ref_links(text, project) - matcher_options = { github_url: self.class.github_url(project), import_source: project.import_source } + matcher_options = { github_url: self.class.github_url(web_endpoint), import_source: project.import_source } issue_ref_matcher = ISSUE_REF_MATCHER % matcher_options pull_ref_matcher = PULL_REF_MATCHER % matcher_options -- GitLab From 04f0c9bfc7d9c5065456caeeccf1302d1c754709 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Wed, 30 Jul 2025 18:18:51 +0200 Subject: [PATCH 18/43] Pass web endpoint --- .../github_import/importer/attachments/base_importer.rb | 2 +- lib/gitlab/github_import/markdown_text.rb | 4 ++-- lib/gitlab/github_import/representation/note_text.rb | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/gitlab/github_import/importer/attachments/base_importer.rb b/lib/gitlab/github_import/importer/attachments/base_importer.rb index 9dfa6bb2ff6fa5..55021e69d0344d 100644 --- a/lib/gitlab/github_import/importer/attachments/base_importer.rb +++ b/lib/gitlab/github_import/importer/attachments/base_importer.rb @@ -52,7 +52,7 @@ def object_representation(object) end def has_attachments?(object) - object_representation(object).has_attachments? + object_representation(object).has_attachments?(client.web_endpoint) end end end diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index ee8f414caedf89..89b6ef98674fb7 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -20,7 +20,7 @@ def format(...) new(...).perform end - def fetch_attachments(text) + def fetch_attachments(text, web_endpoint = nil) attachments = [] return attachments if text.nil? @@ -28,7 +28,7 @@ def fetch_attachments(text) doc.walk do |node| # the @web_endpoint is nil here. Need to work out how to obtain it - attachment = extract_attachment(node, @web_endpoint) + attachment = extract_attachment(node, web_endpoint) attachments << attachment if attachment end attachments diff --git a/lib/gitlab/github_import/representation/note_text.rb b/lib/gitlab/github_import/representation/note_text.rb index 79bef4ec363470..5e18ef785c2d98 100644 --- a/lib/gitlab/github_import/representation/note_text.rb +++ b/lib/gitlab/github_import/representation/note_text.rb @@ -55,12 +55,12 @@ def github_identifiers }.merge(record_type_specific_attribute) end - def has_attachments? - attachments.present? + def has_attachments?(web_endpoint = nil) + attachments(web_endpoint).present? end - def attachments - @attachments ||= MarkdownText.fetch_attachments(text) + def attachments(web_endpoint = nil) + @attachments ||= MarkdownText.fetch_attachments(text, web_endpoint) end private -- GitLab From 49e24f5c39c1060cf833adc866456e467b92194e Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Thu, 31 Jul 2025 14:34:48 +0200 Subject: [PATCH 19/43] Add has_attachments? to accessible classes --- lib/gitlab/github_import/client.rb | 6 +++++- .../github_import/importer/attachments/base_importer.rb | 5 ++++- .../github_import/importer/note_attachments_importer.rb | 4 ++-- lib/gitlab/github_import/markdown_text.rb | 1 - lib/gitlab/github_import/representation/note_text.rb | 4 ---- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb index e82add29f7c4ad..2928d0f30f7385 100644 --- a/lib/gitlab/github_import/client.rb +++ b/lib/gitlab/github_import/client.rb @@ -255,7 +255,11 @@ def custom_api_endpoint end def custom_web_endpoint - custom_api_endpoint['url']&.chomp('/') + return unless custom_api_endpoint + + uri = URI.parse(custom_api_endpoint) + uri.path = '' + uri.to_s.chomp('/') end def default_api_endpoint diff --git a/lib/gitlab/github_import/importer/attachments/base_importer.rb b/lib/gitlab/github_import/importer/attachments/base_importer.rb index 55021e69d0344d..7fa6b6c8e1de78 100644 --- a/lib/gitlab/github_import/importer/attachments/base_importer.rb +++ b/lib/gitlab/github_import/importer/attachments/base_importer.rb @@ -52,7 +52,10 @@ def object_representation(object) end def has_attachments?(object) - object_representation(object).has_attachments?(client.web_endpoint) + note_text = object_representation(object) + return false if note_text.text.blank? + + Gitlab::GithubImport::MarkdownText.fetch_attachments(note_text.text, client.web_endpoint).present? end end end diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 800b91f1ab405e..2093a2c9d4918c 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -19,9 +19,9 @@ def initialize(note_text, project, client) end def execute - return unless note_text.has_attachments? + return unless Gitlab::GithubImport::MarkdownText.fetch_attachments(note_text.text, web_endpoint).present? - new_text = note_text.attachments.reduce(note_text.text) do |text, attachment| + new_text = note_text.attachments(web_endpoint).reduce(note_text.text) do |text, attachment| new_url = gitlab_attachment_link(attachment) # we need to update video media file links with the correct markdown format diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index 89b6ef98674fb7..17e8df6ae6b5fc 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -27,7 +27,6 @@ def fetch_attachments(text, web_endpoint = nil) doc = CommonMarker.render_doc(text) doc.walk do |node| - # the @web_endpoint is nil here. Need to work out how to obtain it attachment = extract_attachment(node, web_endpoint) attachments << attachment if attachment end diff --git a/lib/gitlab/github_import/representation/note_text.rb b/lib/gitlab/github_import/representation/note_text.rb index 5e18ef785c2d98..25981299f2edb8 100644 --- a/lib/gitlab/github_import/representation/note_text.rb +++ b/lib/gitlab/github_import/representation/note_text.rb @@ -55,10 +55,6 @@ def github_identifiers }.merge(record_type_specific_attribute) end - def has_attachments?(web_endpoint = nil) - attachments(web_endpoint).present? - end - def attachments(web_endpoint = nil) @attachments ||= MarkdownText.fetch_attachments(text, web_endpoint) end -- GitLab From 3b2f1c268bf93d3f57730b8edab6942a1e744a4f Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Thu, 31 Jul 2025 18:41:03 +0200 Subject: [PATCH 20/43] Add some tests --- .../github_import/markdown/attachment.rb | 11 +-- lib/gitlab/github_import/markdown_text.rb | 4 +- .../attachments_downloader_spec.rb | 28 ++++++++ spec/lib/gitlab/github_import/client_spec.rb | 26 ++++++- .../note_attachments_importer_spec.rb | 39 +++++++++++ .../github_import/markdown/attachment_spec.rb | 67 ++++++++++++++----- .../github_import/markdown_text_spec.rb | 48 ++++++++++++- .../representation/note_text_spec.rb | 30 ++++----- 8 files changed, 211 insertions(+), 42 deletions(-) diff --git a/lib/gitlab/github_import/markdown/attachment.rb b/lib/gitlab/github_import/markdown/attachment.rb index 9da6b080193f21..65e6924835106b 100644 --- a/lib/gitlab/github_import/markdown/attachment.rb +++ b/lib/gitlab/github_import/markdown/attachment.rb @@ -74,20 +74,21 @@ def from_inline_html(markdown_node, web_endpoint) new(img[:alt], img[:src], web_endpoint) end - def github_url?(url, docs: false, media: false, web_endpoint: nil) + def github_url?(url, docs: false, media: false, web_endpoint: 'nil') if media - url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint), + url.start_with?(web_endpoint, ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN ) elsif docs - url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)) + # url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)) + url.start_with?(web_endpoint) end end - def whitelisted_type?(url, docs: false, media: false, web_endpoint: nil) + def whitelisted_type?(url, docs: false, media: false, web_endpoint: 'nil') if media # We do not know the file extension type from the /assets markdown - return true if url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)) + return true if url.start_with?(web_endpoint) MEDIA_TYPES.any? { |type| url.end_with?(type) } elsif docs diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index 17e8df6ae6b5fc..51bc7afa2bba08 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -57,7 +57,7 @@ def initialize(text, author = nil, exists = false, project: nil) @author = author @exists = exists @project = project - @web_endpoint = format_web_endpoint(project.import_url) + @web_endpoint = format_web_endpoint(project&.import_url) || "https://github.com" end def perform @@ -81,6 +81,8 @@ def formatted_text end def format_web_endpoint(import_url) + return unless import_url + uri = URI.parse(import_url) uri.path = '' uri.to_s diff --git a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb index ce792f673a6f42..1262a8420321ce 100644 --- a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb +++ b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb @@ -10,6 +10,18 @@ let(:chunk_double) { instance_double(HTTParty::ResponseFragment, code: 200) } + describe '#initialize' do + context 'with GHE web_endpoint' do + let(:web_endpoint) { 'https://github.enterprise.com' } + + subject(:downloader) { described_class.new(file_url, web_endpoint: web_endpoint) } + + it 'sets the web_endpoint' do + expect(downloader.web_endpoint).to eq(web_endpoint) + end + end + end + describe '#perform' do before do allow(Gitlab::HTTP).to receive(:perform_request) @@ -186,4 +198,20 @@ expect(Dir.exist?(tmp_dir_path)).to eq false end end + + describe '#github_assets_url_regex' do + context 'with GHE domain' do + let(:web_endpoint) { 'https://github.enterprise.com' } + let(:file_url) { "#{web_endpoint}/project/assets/142635249/4b9f9c90-f060-4845-97cf-b24c558bcb11" } + + subject(:downloader) { described_class.new(file_url, web_endpoint: web_endpoint) } + + it 'matches GHE assets URLs' do + allow(Gitlab::Auth::OAuth::Provider).to receive(:config_for).with('github').and_return(nil) + + regex = downloader.send(:github_assets_url_regex) + expect(file_url).to match(regex) + end + end + end end diff --git a/spec/lib/gitlab/github_import/client_spec.rb b/spec/lib/gitlab/github_import/client_spec.rb index cccd97fed75a03..3a67722215c127 100644 --- a/spec/lib/gitlab/github_import/client_spec.rb +++ b/spec/lib/gitlab/github_import/client_spec.rb @@ -632,7 +632,7 @@ endpoint = 'https://github.kittens.com' expect(client) - .to receive(:custom_api_endpoint) + .to receive(:custom_api_endpoint).twice .and_return(endpoint) expect(client.web_endpoint).to eq(endpoint) @@ -640,6 +640,30 @@ end end + describe '#custom_web_endpoint' do + context 'with custom API endpoint' do + it 'returns the web endpoint derived from API endpoint' do + endpoint = 'https://github.enterprise.com' + + expect(client) + .to receive(:custom_api_endpoint).twice + .and_return('https://github.enterprise.com/api/v3') + + expect(client.custom_web_endpoint).to eq(endpoint) + end + end + + context 'without custom API endpoint' do + it 'returns nil' do + expect(client) + .to receive(:custom_api_endpoint) + .and_return(nil) + + expect(client.custom_web_endpoint).to be_nil + end + end + end + describe '#custom_api_endpoint' do context 'without a custom endpoint' do it 'returns nil' do diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index ef3f35b26ff099..ca0a39f8d89604 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -174,6 +174,45 @@ expect(record.note).to include("![media_attachment](/uploads/test-secret/video.mp4)") end end + + context 'with GHE domain' do + let(:web_endpoint) { 'https://github.enterprise.com' } + let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: web_endpoint) } + let(:ghe_doc_url) { "#{web_endpoint}/nickname/public-test-repo/files/9020437/git-cheat-sheet.txt" } + let(:ghe_image_url) { "#{web_endpoint}/user-attachments/assets/73433gh3" } + + let(:text) do + <<-TEXT.split("\n").map(&:strip).join("\n") + Some text... + [ghe-doc](#{ghe_doc_url}) + ![ghe-image](#{ghe_image_url}) + TEXT + end + + let(:record) { create(:release, project: project, description: text) } + + before do + allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) + .with(ghe_doc_url, options: options, web_endpoint: web_endpoint) + .and_return(downloader_stub) + allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) + .with(ghe_image_url, options: options, web_endpoint: web_endpoint) + .and_return(downloader_stub) + allow(downloader_stub).to receive(:perform).and_return(tmp_stub_doc, tmp_stub_image, tmp_stub_image_tag, + tmp_stub_user_attachment) + allow(downloader_stub).to receive(:delete).twice + allow(client).to receive_message_chain(:octokit, :access_token).and_return(access_token) + allow(Gitlab::Auth::OAuth::Provider).to receive(:config_for).with('github').and_return(nil) + end + + it 'changes attachment links' do + importer.execute + + record.reload + expect(record.description).to start_with("Some text...\n[ghe-doc](/uploads/") + expect(record.description).to include("![ghe-image](/uploads/") + end + end end end end diff --git a/spec/lib/gitlab/github_import/markdown/attachment_spec.rb b/spec/lib/gitlab/github_import/markdown/attachment_spec.rb index 4869db6a35400d..01afffd400c468 100644 --- a/spec/lib/gitlab/github_import/markdown/attachment_spec.rb +++ b/spec/lib/gitlab/github_import/markdown/attachment_spec.rb @@ -6,6 +6,7 @@ let(:name) { FFaker::Lorem.word } let(:url) { FFaker::Internet.uri('https') } let(:import_source) { 'nickname/public-test-repo' } + let(:web_endpoint) { 'https://github.com' } describe '.from_markdown' do context "when it's a doc attachment" do @@ -17,16 +18,34 @@ end it 'returns instance with attachment info' do - attachment = described_class.from_markdown(markdown_node) + attachment = described_class.from_markdown(markdown_node, web_endpoint) expect(attachment.name).to eq name expect(attachment.url).to eq url end + context "when it's a doc attachment from GHE" do + let(:web_endpoint) { 'https://gce.kitty.com' } + let(:url) { "#{web_endpoint}/nickname/public-test-repo/files/3/git-cheat-sheet.pdf" } + let(:name) { FFaker::Lorem.word } + let(:markdown_node) do + instance_double(CommonMarker::Node, url: url, to_plaintext: name, type: :link) + end + + it 'returns instance with attachment info' do + puts "markdown_node: #{markdown_node.inspect}" + puts "web_endpoint: #{web_endpoint}" + attachment = described_class.from_markdown(markdown_node, web_endpoint) + + expect(attachment.name).to eq name + expect(attachment.url).to eq url + end + end + context "when type is not in whitelist" do let(:doc_extension) { 'exe' } - it { expect(described_class.from_markdown(markdown_node)).to eq nil } + it { expect(described_class.from_markdown(markdown_node, web_endpoint)).to eq nil } end context 'when domain name is unknown' do @@ -34,13 +53,13 @@ "https://bitbucket.com/nickname/public-test-repo/files/3/git-cheat-sheet.#{doc_extension}" end - it { expect(described_class.from_markdown(markdown_node)).to eq nil } + it { expect(described_class.from_markdown(markdown_node, web_endpoint)).to eq nil } end context 'when URL is blank' do let(:url) { nil } - it { expect(described_class.from_markdown(markdown_node)).to eq nil } + it { expect(described_class.from_markdown(markdown_node, web_endpoint)).to eq nil } end end @@ -53,7 +72,7 @@ end it 'returns instance with attachment info' do - attachment = described_class.from_markdown(markdown_node) + attachment = described_class.from_markdown(markdown_node, web_endpoint) expect(attachment.name).to eq name expect(attachment.url).to eq url @@ -62,32 +81,44 @@ context "when type is not in whitelist" do let(:image_extension) { 'mkv' } - it { expect(described_class.from_markdown(markdown_node)).to eq nil } + it { expect(described_class.from_markdown(markdown_node, web_endpoint)).to eq nil } end context 'when domain name is unknown' do let(:url) { "https://user-images.github.com/1/uuid-1.#{image_extension}" } - it { expect(described_class.from_markdown(markdown_node)).to eq nil } + it { expect(described_class.from_markdown(markdown_node, web_endpoint)).to eq nil } end context 'with allowed domain as subdomain' do let(:url) { "https://user-images.githubusercontent.com.attacker.controlled.domain/1/uuid-1.#{image_extension}" } - it { expect(described_class.from_markdown(markdown_node)).to eq nil } + it { expect(described_class.from_markdown(markdown_node, web_endpoint)).to eq nil } end context 'when URL is blank' do let(:url) { nil } - it { expect(described_class.from_markdown(markdown_node)).to eq nil } + it { expect(described_class.from_markdown(markdown_node, web_endpoint)).to eq nil } end context 'when image attachment is in the new format' do let(:url) { "https://github.com/#{import_source}/assets/142635249/4b9f9c90-f060-4845-97cf-b24c558bcb11" } it 'returns instance with attachment info' do - attachment = described_class.from_markdown(markdown_node) + attachment = described_class.from_markdown(markdown_node, web_endpoint) + + expect(attachment.name).to eq name + expect(attachment.url).to eq url + end + end + + context 'when the instance is a ghe instance' do + let(:web_endpoint) { "https://ghe.doggo.com" } + let(:url) { "#{web_endpoint}/user-attachments/assets/142635249/4b9f9c90-f060-4845-97cf-b24c558bcb11" } + + it 'returns instance with attachment info' do + attachment = described_class.from_markdown(markdown_node, web_endpoint) expect(attachment.name).to eq name expect(attachment.url).to eq url @@ -105,7 +136,7 @@ end it 'returns instance with attachment info' do - attachment = described_class.from_markdown(markdown_node) + attachment = described_class.from_markdown(markdown_node, web_endpoint) expect(attachment.name).to eq name expect(attachment.url).to eq url @@ -114,7 +145,7 @@ context 'when image src is not present' do let(:img) { "\"#{name}\"" } - it { expect(described_class.from_markdown(markdown_node)).to eq nil } + it { expect(described_class.from_markdown(markdown_node, web_endpoint)).to eq nil } end end @@ -125,7 +156,7 @@ end it 'returns an attachment object with the download url and default name' do - attachment = described_class.from_markdown(markdown_node) + attachment = described_class.from_markdown(markdown_node, web_endpoint) expect(attachment.name).to be("media_attachment") expect(attachment.url).to eq media_attachment_url @@ -134,7 +165,7 @@ end describe '#part_of_project_blob?' do - let(:attachment) { described_class.new('test', url) } + let(:attachment) { described_class.new('test', url, web_endpoint) } context 'when url is a part of project blob' do let(:url) { "https://github.com/#{import_source}/blob/main/example.md" } @@ -150,7 +181,7 @@ end describe '#doc_belongs_to_project?' do - let(:attachment) { described_class.new('test', url) } + let(:attachment) { described_class.new('test', url, web_endpoint) } context 'when url relates to this project' do let(:url) { "https://github.com/#{import_source}/files/9020437/git-cheat-sheet.txt" } @@ -172,7 +203,7 @@ end describe '#media?' do - let(:attachment) { described_class.new('test', url) } + let(:attachment) { described_class.new('test', url, web_endpoint) } context 'when it is a media link' do let(:url) { 'https://user-images.githubusercontent.com/6833842/0cf366b61ef2.jpeg' } @@ -195,9 +226,9 @@ describe '#inspect' do it 'returns attachment basic info' do - attachment = described_class.new(name, url) + attachment = described_class.new(name, url, web_endpoint) - expect(attachment.inspect).to eq "" + expect(attachment.inspect).to eq "" # rubocop:disable Layout/LineLength, Lint/RedundantCopDisableDirective -- Needs to be on one line end end end diff --git a/spec/lib/gitlab/github_import/markdown_text_spec.rb b/spec/lib/gitlab/github_import/markdown_text_spec.rb index 4143875fee1794..9e006234a07b30 100644 --- a/spec/lib/gitlab/github_import/markdown_text_spec.rb +++ b/spec/lib/gitlab/github_import/markdown_text_spec.rb @@ -94,7 +94,7 @@ end it 'fetches attachments' do - attachments = described_class.fetch_attachments(text) + attachments = described_class.fetch_attachments(text, "https://github.com") expect(attachments.map(&:name)).to contain_exactly('special-image', 'tag-image', 'some-doc') expect(attachments.map(&:url)).to contain_exactly( @@ -107,6 +107,30 @@ it 'returns an empty array when passed nil' do expect(described_class.fetch_attachments(nil)).to be_empty end + + context 'with GHE web endpoint' do + let(:web_endpoint) { 'https://github.enterprise.com' } + let(:image_extension) { Gitlab::GithubImport::Markdown::Attachment::MEDIA_TYPES.sample } + let(:ghe_image_attachment) do + "![ghe-image](#{web_endpoint}/user-attachments/assets/uuid-1.#{image_extension})" + end + + let(:text) do + <<-TEXT.split("\n").map(&:strip).join("\n") + Comment with GHE attachment + #{ghe_image_attachment} + TEXT + end + + it 'fetches attachments from GHE domain' do + attachments = described_class.fetch_attachments(text, web_endpoint) + + expect(attachments.map(&:name)).to contain_exactly('ghe-image') + expect(attachments.map(&:url)).to contain_exactly( + "#{web_endpoint}/user-attachments/assets/uuid-1.#{image_extension}" + ) + end + end end describe '#perform' do @@ -168,4 +192,26 @@ end end end + + describe '.github_url' do + let(:web_endpoint) { 'https://cat.enterprise.com' } + + context 'with web_endpoint parameter' do + it 'returns the provided web_endpoint' do + allow(Gitlab::Auth::OAuth::Provider).to receive(:config_for).with('github').and_return(nil) + expect(described_class.github_url(web_endpoint)).to eq(web_endpoint) + end + end + + context 'with oauth config and web_endpoint' do + before do + allow(Gitlab::Auth::OAuth::Provider) + .to receive(:config_for).with('github').and_return({ 'url' => 'https://oauth.github.com/' }) + end + + it 'prefers oauth config over web_endpoint' do + expect(described_class.github_url(web_endpoint)).to eq('https://oauth.github.com') + end + end + end end diff --git a/spec/lib/gitlab/github_import/representation/note_text_spec.rb b/spec/lib/gitlab/github_import/representation/note_text_spec.rb index b1ca15128557a8..63dd6dab5b8052 100644 --- a/spec/lib/gitlab/github_import/representation/note_text_spec.rb +++ b/spec/lib/gitlab/github_import/representation/note_text_spec.rb @@ -154,35 +154,33 @@ end end - describe '#has_attachments?' do - subject { described_class.new({ text: text }).has_attachments? } + describe '#attachments' do + let(:web_endpoint) { "https://github.com" } + + subject { described_class.new({ text: text }).attachments(web_endpoint) } context 'when text has attachments' do let(:text) { 'See ![image](https://user-images.githubusercontent.com/1/uuid-1.png) for details' } - it { is_expected.to eq(true) } + it { is_expected.to contain_exactly(instance_of(Gitlab::GithubImport::Markdown::Attachment)) } end context 'when text does not have attachments' do let(:text) { 'Some text here' } - it { is_expected.to eq(false) } + it { is_expected.to be_empty } end - end - - describe '#attachments' do - subject { described_class.new({ text: text }).attachments } - - context 'when text has attachments' do - let(:text) { 'See ![image](https://user-images.githubusercontent.com/1/uuid-1.png) for details' } - it { is_expected.to contain_exactly(instance_of(Gitlab::GithubImport::Markdown::Attachment)) } - end + context 'when text has GHE attachments' do + let(:web_endpoint) { "https://foo.ghe.com" } + let(:text) { "See ![image](#{web_endpoint}/user-attachments/assets/uuid-1.png) for details" } - context 'when text does not have attachments' do - let(:text) { 'Some text here' } + it 'returns attachments with web_endpoint' do + attachments = described_class.new({ text: text }).attachments(web_endpoint) - it { is_expected.to be_empty } + expect(attachments).to contain_exactly(instance_of(Gitlab::GithubImport::Markdown::Attachment)) + expect(attachments.first.web_endpoint).to eq(web_endpoint) + end end end end -- GitLab From 8cb212b7e0c3bc822d878787cc500b1dacde54f2 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Fri, 1 Aug 2025 10:58:31 +0200 Subject: [PATCH 21/43] Fix additional specs --- .../attachments/issues_importer_spec.rb | 2 +- .../merge_requests_importer_spec.rb | 2 +- .../attachments/notes_importer_spec.rb | 2 +- .../attachments/releases_importer_spec.rb | 2 +- .../note_attachments_importer_spec.rb | 21 +++++++++++++------ 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/spec/lib/gitlab/github_import/importer/attachments/issues_importer_spec.rb b/spec/lib/gitlab/github_import/importer/attachments/issues_importer_spec.rb index 6ee6a943484e52..b761939c91ff0d 100644 --- a/spec/lib/gitlab/github_import/importer/attachments/issues_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/attachments/issues_importer_spec.rb @@ -7,7 +7,7 @@ let_it_be(:project) { create(:project) } - let(:client) { instance_double(Gitlab::GithubImport::Client) } + let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: "https://github.com") } describe '#sequential_import', :clean_gitlab_redis_shared_state do let_it_be(:issue) { create(:issue, project: project) } diff --git a/spec/lib/gitlab/github_import/importer/attachments/merge_requests_importer_spec.rb b/spec/lib/gitlab/github_import/importer/attachments/merge_requests_importer_spec.rb index 4329337dc9fa72..e4ee7fd415aa4b 100644 --- a/spec/lib/gitlab/github_import/importer/attachments/merge_requests_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/attachments/merge_requests_importer_spec.rb @@ -7,7 +7,7 @@ let_it_be(:project) { create(:project) } - let(:client) { instance_double(Gitlab::GithubImport::Client) } + let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: "https://github.com") } describe '#sequential_import', :clean_gitlab_redis_shared_state do let_it_be(:mr) { create(:merge_request, source_project: project, target_branch: 'feature1') } diff --git a/spec/lib/gitlab/github_import/importer/attachments/notes_importer_spec.rb b/spec/lib/gitlab/github_import/importer/attachments/notes_importer_spec.rb index 65433b482c2f87..74b754957cdd09 100644 --- a/spec/lib/gitlab/github_import/importer/attachments/notes_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/attachments/notes_importer_spec.rb @@ -7,7 +7,7 @@ let_it_be(:project) { create(:project) } - let(:client) { instance_double(Gitlab::GithubImport::Client) } + let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: "https://github.com") } describe '#sequential_import', :clean_gitlab_redis_shared_state do let_it_be(:note) { create(:note, project: project) } diff --git a/spec/lib/gitlab/github_import/importer/attachments/releases_importer_spec.rb b/spec/lib/gitlab/github_import/importer/attachments/releases_importer_spec.rb index 8f1b6debcd6aa0..f0ed34f48f9741 100644 --- a/spec/lib/gitlab/github_import/importer/attachments/releases_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/attachments/releases_importer_spec.rb @@ -7,7 +7,7 @@ let_it_be(:project) { create(:project) } - let(:client) { instance_double(Gitlab::GithubImport::Client) } + let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: "https://github.com") } describe '#sequential_import', :clean_gitlab_redis_shared_state do let_it_be(:release) { create(:release, project: project) } diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index ca0a39f8d89604..2b5100c24c166f 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -8,7 +8,12 @@ let_it_be(:project) { create(:project, import_source: 'nickname/public-test-repo') } let(:note_text) { Gitlab::GithubImport::Representation::NoteText.from_db_record(record) } - let(:client) { instance_double(Gitlab::GithubImport::Client) } + let(:web_endpoint) { ::Octokit::Default.web_endpoint } + let(:client) do + instance_double(Gitlab::GithubImport::Client).tap do |double| + allow(double).to receive(:web_endpoint).and_return(web_endpoint) + end + end let(:doc_url) { 'https://github.com/nickname/public-test-repo/files/9020437/git-cheat-sheet.txt' } let(:image_url) { 'https://user-images.githubusercontent.com/6833842/0cf366b61ef2.jpeg' } @@ -82,13 +87,17 @@ let(:options) { { headers: { 'Authorization' => "Bearer #{access_token}" } } } before do - allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new).with(doc_url, options: options) + allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) + .with(doc_url, options: options, web_endpoint: web_endpoint) .and_return(downloader_stub) - allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new).with(image_url, options: options) + allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) + .with(image_url, options: options, web_endpoint: web_endpoint) .and_return(downloader_stub) - allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new).with(image_tag_url, options: options) + allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) + .with(image_tag_url, options: options, web_endpoint: web_endpoint) .and_return(downloader_stub) - allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new).with(user_attachment_url, options: options) + allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) + .with(user_attachment_url, options: options, web_endpoint: web_endpoint) .and_return(downloader_stub) allow(downloader_stub).to receive(:perform).and_return(tmp_stub_doc, tmp_stub_image, tmp_stub_image_tag, tmp_stub_user_attachment) @@ -155,7 +164,7 @@ before do allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new).with(media_attachment_url, - options: options) + options: options, web_endpoint: web_endpoint) .and_return(downloader_stub) allow(downloader_stub).to receive(:perform).and_return(tmp_stub_media_attachment) -- GitLab From a6305e7caf29213fadb729e6841a0616b47d0fa9 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Mon, 4 Aug 2025 19:41:57 +0200 Subject: [PATCH 22/43] Add mime type detection --- .../github_import/attachments_downloader.rb | 16 ++++++ .../attachments_downloader_spec.rb | 52 +++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index f76c638e8c51a4..8ad519e40f8b0d 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -7,6 +7,8 @@ class AttachmentsDownloader include ::BulkImports::FileDownloads::FilenameFetch include ::BulkImports::FileDownloads::Validations + require 'marcel' + DownloadError = Class.new(StandardError) FILENAME_SIZE_LIMIT = 255 # chars before the extension @@ -42,6 +44,10 @@ def perform file = download_from(download_url) validate_symlink + + # if the source instance is not github.com we need to add extname to video file types + update_ghe_video_path(file) unless web_endpoint == ::Octokit::Default.web_endpoint + file end @@ -105,6 +111,16 @@ def filepath def add_extension_to_file_path(filename) @filepath = "#{filepath}#{File.extname(filename)}" end + + def update_ghe_video_path(file) + file = File.open(file) + mime_type = Marcel::MimeType.for(file) + return unless mime_type.start_with?('video/') && File.extname(file.path).empty? + + extension = Mime::Type.lookup(mime_type)&.symbol&.to_s + @filename = ensure_filename_size(File.basename(file.path) + ".#{extension}") + @filepath = "#{filepath}.#{extension}" + end end end end diff --git a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb index 1262a8420321ce..b02ba70c98da09 100644 --- a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb +++ b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb @@ -177,6 +177,58 @@ end end end + + context 'when the source is a GHE server instance' do + let(:web_endpoint) { "https://enterprise.company.com" } + let(:file_url) { "#{web_endpoint}/storage/user/2/files/73433gh3" } + let(:redirect_url) { "#{web_endpoint}/storage/user/2/files/73433gh3" } + let(:mime_type_double) { instance_double(Mime::Type, symbol: :mov) } + let(:sample_response) do + instance_double(HTTParty::Response, redirection?: true, headers: { location: redirect_url }) + end + + subject(:downloader) { described_class.new(file_url, web_endpoint: web_endpoint) } + + before do + allow(Gitlab::Auth::OAuth::Provider).to receive(:config_for).with('github').and_return nil + end + + it 'updates the filename and filepath for video attachments' do + expect(::Import::Clients::HTTP).to receive(:get).with(file_url, { follow_redirects: false }) + .and_return sample_response + + expect(Gitlab::HTTP).to receive(:perform_request) + .with(Net::HTTP::Get, redirect_url, stream_body: true).and_yield(chunk_double) + + allow(Marcel::MimeType).to receive(:for).with(anything).and_return('video/mov') + allow(Mime::Type).to receive(:lookup).with('video/mov').and_return(mime_type_double) + + downloader.perform + + expect(downloader.instance_variable_get(:@filename)).to eq(File.basename("#{URI.parse(redirect_url).path}.mov")) + expect(downloader.instance_variable_get(:@filepath)).to include(File.basename("#{URI.parse(redirect_url).path}.mov")) # rubocop:disable Layout/LineLength, Lint/RedundantCopDisableDirective -- minor + end + + context 'when the attachment is not a video file' do + let(:mime_type_double) { instance_double(Mime::Type, symbol: :jpg) } + + it 'does not update the filename and filepath' do + expect(::Import::Clients::HTTP).to receive(:get).with(file_url, { follow_redirects: false }) + .and_return sample_response + + expect(Gitlab::HTTP).to receive(:perform_request) + .with(Net::HTTP::Get, redirect_url, stream_body: true).and_yield(chunk_double) + + allow(Marcel::MimeType).to receive(:for).with(anything).and_return('image/jpg') + allow(Mime::Type).to receive(:lookup).with('image/jpg').and_return(mime_type_double) + + downloader.perform + + expect(downloader.instance_variable_get(:@filename)).to eq(File.basename(URI.parse(redirect_url).path)) + expect(downloader.instance_variable_get(:@filepath)).to include(File.basename(URI.parse(redirect_url).path)) + end + end + end end describe '#delete' do -- GitLab From 66159be33e5cf614b712e3156ea4ef7cee552b4d Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Tue, 5 Aug 2025 15:20:36 +0200 Subject: [PATCH 23/43] Move extname logic into note_attachments_importer --- .../github_import/attachments_downloader.rb | 18 +---------------- .../importer/note_attachments_importer.rb | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 8ad519e40f8b0d..5eae1b60b1ac11 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -7,8 +7,6 @@ class AttachmentsDownloader include ::BulkImports::FileDownloads::FilenameFetch include ::BulkImports::FileDownloads::Validations - require 'marcel' - DownloadError = Class.new(StandardError) FILENAME_SIZE_LIMIT = 255 # chars before the extension @@ -35,7 +33,7 @@ def perform parsed_file_name = File.basename(URI.parse(download_url).path) - # if the file is a media type, we update both the @filename and @filepath with the filetype extension + # if the file has a video filetype extension, we update both the @filename and @filepath with the filetype ext. if parsed_file_name.end_with?(*SUPPORTED_VIDEO_MEDIA_TYPES.map { |ext| ".#{ext}" }) @filename = ensure_filename_size(parsed_file_name) add_extension_to_file_path(filename) @@ -44,10 +42,6 @@ def perform file = download_from(download_url) validate_symlink - - # if the source instance is not github.com we need to add extname to video file types - update_ghe_video_path(file) unless web_endpoint == ::Octokit::Default.web_endpoint - file end @@ -111,16 +105,6 @@ def filepath def add_extension_to_file_path(filename) @filepath = "#{filepath}#{File.extname(filename)}" end - - def update_ghe_video_path(file) - file = File.open(file) - mime_type = Marcel::MimeType.for(file) - return unless mime_type.start_with?('video/') && File.extname(file.path).empty? - - extension = Mime::Type.lookup(mime_type)&.symbol&.to_s - @filename = ensure_filename_size(File.basename(file.path) + ".#{extension}") - @filepath = "#{filepath}.#{extension}" - end end end end diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 2093a2c9d4918c..ade1b0ae787969 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -6,6 +6,8 @@ module Importer class NoteAttachmentsImporter attr_reader :note_text, :project, :client, :web_endpoint + require 'marcel' + SUPPORTED_RECORD_TYPES = [::Release.name, ::Issue.name, ::MergeRequest.name, ::Note.name].freeze # note_text - An instance of `Gitlab::GithubImport::Representation::NoteText`. @@ -71,7 +73,12 @@ def download_attachment(attachment) downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(attachment.url, options: options, web_endpoint: web_endpoint) file = downloader.perform + + # for ghe imports check on filetype to add ext to video attachments + file = update_ghe_video_path(file) unless web_endpoint == ::Octokit::Default.web_endpoint + uploader = UploadService.new(project, file, FileUploader).execute + uploader.to_h[:url] end @@ -110,6 +117,19 @@ def record_column_for_type(record_type) :note end end + + def update_ghe_video_path(file) + file = File.open(file) + content = file.read + filepath = file.path + mime_type = Marcel::MimeType.for(file) + return file unless mime_type.start_with?('video/') && File.extname(filepath).empty? + + extension = Mime::Type.lookup(mime_type)&.symbol&.to_s + file_with_extension = File.open("#{filepath}.#{extension}", 'wb') + file_with_extension.write(content) + file_with_extension + end end end end -- GitLab From b7a05b544c93dd7f4e112c62f204b7bc936cb472 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Tue, 5 Aug 2025 16:30:20 +0200 Subject: [PATCH 24/43] Update specs for new implementation --- .../attachments_downloader_spec.rb | 52 --------------- .../note_attachments_importer_spec.rb | 64 +++++++++++++++++++ .../github_import/markdown/attachment_spec.rb | 2 - 3 files changed, 64 insertions(+), 54 deletions(-) diff --git a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb index b02ba70c98da09..1262a8420321ce 100644 --- a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb +++ b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb @@ -177,58 +177,6 @@ end end end - - context 'when the source is a GHE server instance' do - let(:web_endpoint) { "https://enterprise.company.com" } - let(:file_url) { "#{web_endpoint}/storage/user/2/files/73433gh3" } - let(:redirect_url) { "#{web_endpoint}/storage/user/2/files/73433gh3" } - let(:mime_type_double) { instance_double(Mime::Type, symbol: :mov) } - let(:sample_response) do - instance_double(HTTParty::Response, redirection?: true, headers: { location: redirect_url }) - end - - subject(:downloader) { described_class.new(file_url, web_endpoint: web_endpoint) } - - before do - allow(Gitlab::Auth::OAuth::Provider).to receive(:config_for).with('github').and_return nil - end - - it 'updates the filename and filepath for video attachments' do - expect(::Import::Clients::HTTP).to receive(:get).with(file_url, { follow_redirects: false }) - .and_return sample_response - - expect(Gitlab::HTTP).to receive(:perform_request) - .with(Net::HTTP::Get, redirect_url, stream_body: true).and_yield(chunk_double) - - allow(Marcel::MimeType).to receive(:for).with(anything).and_return('video/mov') - allow(Mime::Type).to receive(:lookup).with('video/mov').and_return(mime_type_double) - - downloader.perform - - expect(downloader.instance_variable_get(:@filename)).to eq(File.basename("#{URI.parse(redirect_url).path}.mov")) - expect(downloader.instance_variable_get(:@filepath)).to include(File.basename("#{URI.parse(redirect_url).path}.mov")) # rubocop:disable Layout/LineLength, Lint/RedundantCopDisableDirective -- minor - end - - context 'when the attachment is not a video file' do - let(:mime_type_double) { instance_double(Mime::Type, symbol: :jpg) } - - it 'does not update the filename and filepath' do - expect(::Import::Clients::HTTP).to receive(:get).with(file_url, { follow_redirects: false }) - .and_return sample_response - - expect(Gitlab::HTTP).to receive(:perform_request) - .with(Net::HTTP::Get, redirect_url, stream_body: true).and_yield(chunk_double) - - allow(Marcel::MimeType).to receive(:for).with(anything).and_return('image/jpg') - allow(Mime::Type).to receive(:lookup).with('image/jpg').and_return(mime_type_double) - - downloader.perform - - expect(downloader.instance_variable_get(:@filename)).to eq(File.basename(URI.parse(redirect_url).path)) - expect(downloader.instance_variable_get(:@filepath)).to include(File.basename(URI.parse(redirect_url).path)) - end - end - end end describe '#delete' do diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index 2b5100c24c166f..6fa6695c9ce375 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -221,6 +221,70 @@ expect(record.description).to start_with("Some text...\n[ghe-doc](/uploads/") expect(record.description).to include("![ghe-image](/uploads/") end + + context 'when the attachment is a video file' do # rubocop:disable RSpec/MultipleMemoizedHelpers -- required + let(:ghe_video_url) { "#{web_endpoint}/user-attachments/assets/video-attachment" } + let(:text) { ghe_video_url } + let(:record) { create(:note, project: project, note: text) } + let(:tmp_stub_video) { Tempfile.create('video-attachment') } + let(:tmp_stub_video_with_ext) { Tempfile.create('video-attachment.mp4') } + let(:video_content) { 'fake video content' } + let(:uploader_hash) do + { alt: nil, + url: '/uploads/test-secret/video.mp4', + markdown: nil } + end + + before do + allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) + .with(ghe_video_url, options: options, web_endpoint: web_endpoint) + .and_return(downloader_stub) + allow(downloader_stub).to receive(:perform).and_return(tmp_stub_video) + allow(downloader_stub).to receive(:delete).once + + allow(File).to receive(:open).and_call_original + allow(File).to receive(:open).with(tmp_stub_video).and_return(tmp_stub_video) + allow(File).to receive(:open).with( + a_string_matching(/\.mp4/), + 2562, + perm: 384 + ).and_return(tmp_stub_video_with_ext) + + allow(tmp_stub_video).to receive_messages( + read: video_content, + path: tmp_stub_video.path + ) + + allow(Marcel::MimeType).to receive(:for).with(tmp_stub_video).and_return('video/mp4') + allow(File).to receive(:extname).and_call_original + allow(File).to receive(:extname).with(tmp_stub_video.path).and_return('') + + mime_type_double = instance_double(Mime::Type, symbol: :mp4) + allow(Mime::Type).to receive(:lookup).with('video/mp4').and_return(mime_type_double) + + allow(tmp_stub_video_with_ext).to receive(:write).with(video_content) + + allow_next_instance_of(FileUploader) do |uploader| + allow(uploader).to receive(:to_h).and_return(uploader_hash) + end + + allow(client).to receive_message_chain(:octokit, :access_token).and_return(access_token) + allow(Gitlab::Auth::OAuth::Provider).to receive(:config_for).with('github').and_return(nil) + end + + it 'calls update_ghe_video_path for video files on GHE' do + expect(importer).to receive(:update_ghe_video_path).with(tmp_stub_video).at_least(:once).and_call_original + + importer.execute + end + + it 'updates video attachment path correctly' do + importer.execute + + record.reload + expect(record.note).to include("![media_attachment](/uploads/test-secret/video.mp4)") + end + end end end end diff --git a/spec/lib/gitlab/github_import/markdown/attachment_spec.rb b/spec/lib/gitlab/github_import/markdown/attachment_spec.rb index 01afffd400c468..12824c3f7f83e7 100644 --- a/spec/lib/gitlab/github_import/markdown/attachment_spec.rb +++ b/spec/lib/gitlab/github_import/markdown/attachment_spec.rb @@ -33,8 +33,6 @@ end it 'returns instance with attachment info' do - puts "markdown_node: #{markdown_node.inspect}" - puts "web_endpoint: #{web_endpoint}" attachment = described_class.from_markdown(markdown_node, web_endpoint) expect(attachment.name).to eq name -- GitLab From 69b3005f59a2ddf6f9cf99a8d67731343d9bb596 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Wed, 6 Aug 2025 16:41:52 +0200 Subject: [PATCH 25/43] Apply MR comments --- lib/gitlab/github_import/attachments_downloader.rb | 2 +- .../importer/note_attachments_importer.rb | 12 +++++------- lib/gitlab/github_import/markdown/attachment.rb | 9 ++++----- lib/gitlab/github_import/markdown_text.rb | 12 ++---------- 4 files changed, 12 insertions(+), 23 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 5eae1b60b1ac11..f0ce9f61e28a37 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -73,7 +73,7 @@ def get_assets_download_redirection_url end def github_assets_url_regex - %r{#{Regexp.escape(::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint))}/.*/(assets|files)/} + %r{#{Regexp.escape(web_endpoint)}/.*/(assets|files)/} end def download_from(url) diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index ade1b0ae787969..2c0b45b4913fb2 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -60,7 +60,7 @@ def supported_video_media_types # From: https://github.com/login/test-import-attachments-source/blob/main/example.md # To: https://gitlab.com/login/test-import-attachments-target/-/blob/main/example.md def convert_project_content_link(attachment_url, import_source) - path_without_domain = attachment_url.gsub(::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint), '') + path_without_domain = attachment_url.gsub(web_endpoint, '') path_without_import_source = path_without_domain.gsub(import_source, '').delete_prefix('/') path_with_blob_prefix = "/-#{path_without_import_source}" @@ -119,16 +119,14 @@ def record_column_for_type(record_type) end def update_ghe_video_path(file) - file = File.open(file) - content = file.read filepath = file.path - mime_type = Marcel::MimeType.for(file) + mime_type = Marcel::MimeType.for(Pathname.new(filepath)) return file unless mime_type.start_with?('video/') && File.extname(filepath).empty? extension = Mime::Type.lookup(mime_type)&.symbol&.to_s - file_with_extension = File.open("#{filepath}.#{extension}", 'wb') - file_with_extension.write(content) - file_with_extension + new_path = "#{filepath}.#{extension}" + FileUtils.mv(filepath, new_path) + File.open(new_path, 'wb') end end end diff --git a/lib/gitlab/github_import/markdown/attachment.rb b/lib/gitlab/github_import/markdown/attachment.rb index 65e6924835106b..0ae8f704a8fb85 100644 --- a/lib/gitlab/github_import/markdown/attachment.rb +++ b/lib/gitlab/github_import/markdown/attachment.rb @@ -80,7 +80,6 @@ def github_url?(url, docs: false, media: false, web_endpoint: 'nil') ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN ) elsif docs - # url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)) url.start_with?(web_endpoint) end end @@ -107,25 +106,25 @@ def initialize(name, url, web_endpoint) def part_of_project_blob?(import_source) url.start_with?( - "#{::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)}/#{import_source}/blob" + "#{web_endpoint}/#{import_source}/blob" ) end def doc_belongs_to_project?(import_source) url.start_with?( - "#{::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)}/#{import_source}/files" + "#{web_endpoint}/#{import_source}/files" ) end def media?(import_source) url.start_with?( - "#{::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)}/#{import_source}/assets", + "#{web_endpoint}/#{import_source}/assets", ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN ) end def user_attachment? - url.start_with?("#{::Gitlab::GithubImport::MarkdownText.github_url(web_endpoint)}/user-attachments/") + url.start_with?("#{web_endpoint}/user-attachments/") end def inspect diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index 51bc7afa2bba08..1aa5dbe32d7cc7 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -20,7 +20,7 @@ def format(...) new(...).perform end - def fetch_attachments(text, web_endpoint = nil) + def fetch_attachments(text, web_endpoint) attachments = [] return attachments if text.nil? @@ -33,14 +33,6 @@ def fetch_attachments(text, web_endpoint = nil) attachments end - # Returns github domain without slash in the end - def github_url(web_endpoint = nil) - oauth_config = Gitlab::Auth::OAuth::Provider.config_for('github') || {} - url = oauth_config['url'].presence || web_endpoint || 'https://github.com' - url = url.chop if url.end_with?('/') - url - end - private def extract_attachment(node, web_endpoint) @@ -90,7 +82,7 @@ def format_web_endpoint(import_url) # Links like `https://domain.github.com///pull/` needs to be converted def convert_ref_links(text, project) - matcher_options = { github_url: self.class.github_url(web_endpoint), import_source: project.import_source } + matcher_options = { github_url: web_endpoint, import_source: project.import_source } issue_ref_matcher = ISSUE_REF_MATCHER % matcher_options pull_ref_matcher = PULL_REF_MATCHER % matcher_options -- GitLab From 7765e8a604e90314f40041532af66a1ed7f8a6b4 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Thu, 7 Aug 2025 13:03:41 +0200 Subject: [PATCH 26/43] Update specs --- .../attachments_downloader_spec.rb | 1 + .../note_attachments_importer_spec.rb | 25 ++++++++----------- .../github_import/markdown_text_spec.rb | 24 +----------------- 3 files changed, 12 insertions(+), 38 deletions(-) diff --git a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb index 1262a8420321ce..d858ff908df3a9 100644 --- a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb +++ b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb @@ -26,6 +26,7 @@ before do allow(Gitlab::HTTP).to receive(:perform_request) .with(Net::HTTP::Get, file_url, stream_body: true).and_yield(chunk_double) + allow(downloader).to receive(:web_endpoint).and_return('https://github.com') end context 'when file valid' do diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index 6fa6695c9ce375..9610f7ba793637 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -227,6 +227,7 @@ let(:text) { ghe_video_url } let(:record) { create(:note, project: project, note: text) } let(:tmp_stub_video) { Tempfile.create('video-attachment') } + let(:tmp_stub_path) { Pathname.new(tmp_stub_video.path) } let(:tmp_stub_video_with_ext) { Tempfile.create('video-attachment.mp4') } let(:video_content) { 'fake video content' } let(:uploader_hash) do @@ -242,29 +243,23 @@ allow(downloader_stub).to receive(:perform).and_return(tmp_stub_video) allow(downloader_stub).to receive(:delete).once - allow(File).to receive(:open).and_call_original - allow(File).to receive(:open).with(tmp_stub_video).and_return(tmp_stub_video) - allow(File).to receive(:open).with( - a_string_matching(/\.mp4/), - 2562, - perm: 384 - ).and_return(tmp_stub_video_with_ext) - - allow(tmp_stub_video).to receive_messages( - read: video_content, - path: tmp_stub_video.path - ) + allow(Marcel::MimeType).to receive(:for).with(tmp_stub_path).and_return('video/mp4') + allow_next_instance_of(described_class) do |instance| + allow(instance).to receive(:update_ghe_video_path).and_return(tmp_stub_video_with_ext) + end - allow(Marcel::MimeType).to receive(:for).with(tmp_stub_video).and_return('video/mp4') allow(File).to receive(:extname).and_call_original allow(File).to receive(:extname).with(tmp_stub_video.path).and_return('') mime_type_double = instance_double(Mime::Type, symbol: :mp4) allow(Mime::Type).to receive(:lookup).with('video/mp4').and_return(mime_type_double) - - allow(tmp_stub_video_with_ext).to receive(:write).with(video_content) + allow(FileUtils).to receive(:mv) + allow(File).to receive(:open).and_call_original + allow(File).to receive(:open).with( + a_string_matching(/\.mp4/), 'wb').and_return(tmp_stub_video_with_ext) allow_next_instance_of(FileUploader) do |uploader| + allow(uploader).to receive_message_chain(:new, :store!).and_return(true) allow(uploader).to receive(:to_h).and_return(uploader_hash) end diff --git a/spec/lib/gitlab/github_import/markdown_text_spec.rb b/spec/lib/gitlab/github_import/markdown_text_spec.rb index 9e006234a07b30..25bbcc1c7d835a 100644 --- a/spec/lib/gitlab/github_import/markdown_text_spec.rb +++ b/spec/lib/gitlab/github_import/markdown_text_spec.rb @@ -105,7 +105,7 @@ end it 'returns an empty array when passed nil' do - expect(described_class.fetch_attachments(nil)).to be_empty + expect(described_class.fetch_attachments(nil, nil)).to be_empty end context 'with GHE web endpoint' do @@ -192,26 +192,4 @@ end end end - - describe '.github_url' do - let(:web_endpoint) { 'https://cat.enterprise.com' } - - context 'with web_endpoint parameter' do - it 'returns the provided web_endpoint' do - allow(Gitlab::Auth::OAuth::Provider).to receive(:config_for).with('github').and_return(nil) - expect(described_class.github_url(web_endpoint)).to eq(web_endpoint) - end - end - - context 'with oauth config and web_endpoint' do - before do - allow(Gitlab::Auth::OAuth::Provider) - .to receive(:config_for).with('github').and_return({ 'url' => 'https://oauth.github.com/' }) - end - - it 'prefers oauth config over web_endpoint' do - expect(described_class.github_url(web_endpoint)).to eq('https://oauth.github.com') - end - end - end end -- GitLab From 6b4fac4693de9ea0c07ddb9eb576bf23c8ccb0d6 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Fri, 8 Aug 2025 13:12:19 +0200 Subject: [PATCH 27/43] Pass client with every MarkdownText call --- .../importer/diff_note_importer.rb | 2 +- .../github_import/importer/issue_importer.rb | 3 ++- .../importer/milestones_importer.rb | 2 +- .../github_import/importer/note_importer.rb | 2 +- .../importer/pull_request_importer.rb | 2 +- .../importer/pull_requests/review_importer.rb | 3 ++- .../importer/releases_importer.rb | 2 +- .../github_import/markdown/attachment.rb | 20 +++++++++---------- lib/gitlab/github_import/markdown_text.rb | 4 ++-- 9 files changed, 21 insertions(+), 19 deletions(-) diff --git a/lib/gitlab/github_import/importer/diff_note_importer.rb b/lib/gitlab/github_import/importer/diff_note_importer.rb index 223a286774825a..92c9bc0bbd42ff 100644 --- a/lib/gitlab/github_import/importer/diff_note_importer.rb +++ b/lib/gitlab/github_import/importer/diff_note_importer.rb @@ -110,7 +110,7 @@ def import_with_diff_note end def note_body - @note_body ||= MarkdownText.format(note.note, note.author, author_found) + @note_body ||= MarkdownText.format(note.note, note.author, author_found, client: client) end def author diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb index f68ee9e916ffc7..53518b868bdbbf 100644 --- a/lib/gitlab/github_import/importer/issue_importer.rb +++ b/lib/gitlab/github_import/importer/issue_importer.rb @@ -51,7 +51,8 @@ def issue_assignee_map def create_issue author_id, author_found = user_finder.author_id_for(issue) - description = MarkdownText.format(issue.description, issue.author, author_found, project: project) + description = MarkdownText.format(issue.description, issue.author, author_found, project: project, + client: client) assignee_ids = issue_assignee_map.keys attributes = { diff --git a/lib/gitlab/github_import/importer/milestones_importer.rb b/lib/gitlab/github_import/importer/milestones_importer.rb index 79e6a848a5dc6f..9471526fe91a03 100644 --- a/lib/gitlab/github_import/importer/milestones_importer.rb +++ b/lib/gitlab/github_import/importer/milestones_importer.rb @@ -46,7 +46,7 @@ def build_attributes(milestone) end def description_for(milestone) - MarkdownText.format(milestone[:description], project: project) + MarkdownText.format(milestone[:description], project: project, client: client) end def state_for(milestone) diff --git a/lib/gitlab/github_import/importer/note_importer.rb b/lib/gitlab/github_import/importer/note_importer.rb index d8858b5bd61aa9..875eae18d4e146 100644 --- a/lib/gitlab/github_import/importer/note_importer.rb +++ b/lib/gitlab/github_import/importer/note_importer.rb @@ -61,7 +61,7 @@ def find_noteable_id private def note_body(author_found) - MarkdownText.format(note.note, note.author, author_found, project: project) + MarkdownText.format(note.note, note.author, author_found, project: project, client: client) end end end diff --git a/lib/gitlab/github_import/importer/pull_request_importer.rb b/lib/gitlab/github_import/importer/pull_request_importer.rb index 4aba28729756bb..0cff758b0e5a22 100644 --- a/lib/gitlab/github_import/importer/pull_request_importer.rb +++ b/lib/gitlab/github_import/importer/pull_request_importer.rb @@ -54,7 +54,7 @@ def execute def create_merge_request author_id, author_found = user_finder.author_id_for(pull_request) - description = MarkdownText.format(pull_request.description, pull_request.author, author_found, project: project) + description = MarkdownText.format(pull_request.description, pull_request.author, author_found, project: project, client: client) attributes = { iid: pull_request.iid, diff --git a/lib/gitlab/github_import/importer/pull_requests/review_importer.rb b/lib/gitlab/github_import/importer/pull_requests/review_importer.rb index 9ac29dd8b91fae..2a35fa851dafe0 100644 --- a/lib/gitlab/github_import/importer/pull_requests/review_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests/review_importer.rb @@ -52,7 +52,8 @@ def add_complementary_review_note!(author_id) note_body = MarkdownText.format( review_note_content, - review.author + review.author, + client: client ) add_note!(author_id, note_body) diff --git a/lib/gitlab/github_import/importer/releases_importer.rb b/lib/gitlab/github_import/importer/releases_importer.rb index 2b93c691e2b4d4..6608445884d5fb 100644 --- a/lib/gitlab/github_import/importer/releases_importer.rb +++ b/lib/gitlab/github_import/importer/releases_importer.rb @@ -75,7 +75,7 @@ def description_for(release) description = release[:body].presence || "Release for tag #{release[:tag_name]}" user = map_github_user_info(release) - MarkdownText.format(description, user, user[:id], project: project) + MarkdownText.format(description, user, user[:id], project: project, client: client) end def object_type diff --git a/lib/gitlab/github_import/markdown/attachment.rb b/lib/gitlab/github_import/markdown/attachment.rb index 0ae8f704a8fb85..7bb9a1cdd16783 100644 --- a/lib/gitlab/github_import/markdown/attachment.rb +++ b/lib/gitlab/github_import/markdown/attachment.rb @@ -37,8 +37,8 @@ def from_markdown_text(markdown_node, web_endpoint) url = URI.extract(text, %w[http https]).first return if url.nil? - return unless github_url?(url, media: true, web_endpoint: web_endpoint) - return unless whitelisted_type?(url, media: true, web_endpoint: web_endpoint) + return unless github_url?(url, web_endpoint, media: true) + return unless whitelisted_type?(url, web_endpoint, media: true) # we don't have the :alt or :name so we use a default name new("media_attachment", url, web_endpoint) @@ -48,8 +48,8 @@ def from_markdown_image(markdown_node, web_endpoint) url = markdown_node.url return unless url - return unless github_url?(url, media: true, web_endpoint: web_endpoint) - return unless whitelisted_type?(url, media: true, web_endpoint: web_endpoint) + return unless github_url?(url, web_endpoint, media: true) + return unless whitelisted_type?(url, web_endpoint, media: true) new(markdown_node.to_plaintext.strip, url, web_endpoint) end @@ -58,8 +58,8 @@ def from_markdown_link(markdown_node, web_endpoint) url = markdown_node.url return unless url - return unless github_url?(url, docs: true, web_endpoint: web_endpoint) - return unless whitelisted_type?(url, docs: true, web_endpoint: web_endpoint) + return unless github_url?(url, web_endpoint, docs: true) + return unless whitelisted_type?(url, web_endpoint, docs: true) new(markdown_node.to_plaintext.strip, url, web_endpoint) end @@ -68,13 +68,13 @@ def from_inline_html(markdown_node, web_endpoint) img = Nokogiri::HTML.parse(markdown_node.string_content).xpath('//img')[0] return if img.nil? || img[:src].blank? - return unless github_url?(img[:src], media: true, web_endpoint: web_endpoint) - return unless whitelisted_type?(img[:src], media: true, web_endpoint: web_endpoint) + return unless github_url?(img[:src], web_endpoint, media: true) + return unless whitelisted_type?(img[:src], web_endpoint, media: true) new(img[:alt], img[:src], web_endpoint) end - def github_url?(url, docs: false, media: false, web_endpoint: 'nil') + def github_url?(url, web_endpoint, docs: false, media: false) if media url.start_with?(web_endpoint, ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN @@ -84,7 +84,7 @@ def github_url?(url, docs: false, media: false, web_endpoint: 'nil') end end - def whitelisted_type?(url, docs: false, media: false, web_endpoint: 'nil') + def whitelisted_type?(url, web_endpoint, docs: false, media: false) if media # We do not know the file extension type from the /assets markdown return true if url.start_with?(web_endpoint) diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index 1aa5dbe32d7cc7..fd41a6981b8eef 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -44,12 +44,12 @@ def extract_attachment(node, web_endpoint) # author - An instance of `Gitlab::GithubImport::Representation::User` # exists - Boolean that indicates the user exists in the GitLab database. # project - An instance of `Project`. - def initialize(text, author = nil, exists = false, project: nil) + def initialize(text, author = nil, exists = false, project: nil, client: nil) @text = text @author = author @exists = exists @project = project - @web_endpoint = format_web_endpoint(project&.import_url) || "https://github.com" + @web_endpoint = client.web_endpoint || ::Octokit::Default.web_endpoint end def perform -- GitLab From f51ef4b4a363c69c10d67dffe09d2d8e96ca7c30 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Fri, 8 Aug 2025 14:54:56 +0200 Subject: [PATCH 28/43] Update sepcs with client.web_endpoint --- Gemfile | 1 + Gemfile.checksum | 2 +- Gemfile.lock | 3 ++- Gemfile.next.checksum | 2 +- Gemfile.next.lock | 3 ++- lib/gitlab/github_import/markdown_text.rb | 10 +--------- lib/gitlab/github_import/representation/diff_note.rb | 11 ++++++++++- .../github_import/importer/diff_note_importer_spec.rb | 2 +- .../importer/diff_notes_importer_spec.rb | 2 +- .../github_import/importer/events/commented_spec.rb | 2 +- .../github_import/importer/issue_importer_spec.rb | 2 +- .../importer/milestones_importer_spec.rb | 2 +- .../importer/note_attachments_importer_spec.rb | 2 +- .../github_import/importer/note_importer_spec.rb | 6 +----- .../importer/pull_request_importer_spec.rb | 2 +- .../importer/pull_requests/review_importer_spec.rb | 3 ++- .../github_import/importer/releases_importer_spec.rb | 2 +- spec/lib/gitlab/github_import/markdown_text_spec.rb | 8 ++------ 18 files changed, 31 insertions(+), 34 deletions(-) diff --git a/Gemfile b/Gemfile index fcf77487e865a4..938f5579d86c2f 100644 --- a/Gemfile +++ b/Gemfile @@ -192,6 +192,7 @@ gem 'hamlit', '~> 3.0.0', feature_category: :shared # Files attachments gem 'carrierwave', '~> 1.3', feature_category: :shared gem 'mini_magick', '~> 4.12', feature_category: :shared +gem 'marcel', '~> 1.0.4', feature_category: :shared # PDF generation gem 'prawn', feature_category: :vulnerability_management diff --git a/Gemfile.checksum b/Gemfile.checksum index ac0264bf73c6f2..2764ad5baabe6d 100644 --- a/Gemfile.checksum +++ b/Gemfile.checksum @@ -386,7 +386,7 @@ {"name":"lru_redux","version":"1.1.0","platform":"ruby","checksum":"ee71d0ccab164c51de146c27b480a68b3631d5b4297b8ffe8eda1c72de87affb"}, {"name":"lumberjack","version":"1.2.7","platform":"ruby","checksum":"a5c6aae6b4234f1420dbcd80b23e3bca0817bd239440dde097ebe3fa63c63b1f"}, {"name":"mail","version":"2.8.1","platform":"ruby","checksum":"ec3b9fadcf2b3755c78785cb17bc9a0ca9ee9857108a64b6f5cfc9c0b5bfc9ad"}, -{"name":"marcel","version":"1.0.2","platform":"ruby","checksum":"a013b677ef46cbcb49fd5c59b3d35803d2ee04dd75d8bfdc43533fc5a31f7e4e"}, +{"name":"marcel","version":"1.0.4","platform":"ruby","checksum":"0d5649feb64b8f19f3d3468b96c680bae9746335d02194270287868a661516a4"}, {"name":"marginalia","version":"1.11.1","platform":"ruby","checksum":"cb63212ab63e42746e27595e912cb20408a1a28bcd0edde55d15b7c45fa289cf"}, {"name":"matrix","version":"0.4.2","platform":"ruby","checksum":"71083ccbd67a14a43bfa78d3e4dc0f4b503b9cc18e5b4b1d686dc0f9ef7c4cc0"}, {"name":"memory_profiler","version":"1.1.0","platform":"ruby","checksum":"79a17df7980a140c83c469785905409d3027ca614c42c086089d128b805aa8f8"}, diff --git a/Gemfile.lock b/Gemfile.lock index b987a09bb106d3..aa462222cc3519 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1172,7 +1172,7 @@ GEM net-imap net-pop net-smtp - marcel (1.0.2) + marcel (1.0.4) marginalia (1.11.1) actionpack (>= 5.2) activerecord (>= 5.2) @@ -2253,6 +2253,7 @@ DEPENDENCIES lru_redux mail (= 2.8.1) mail-smtp_pool (~> 0.1.0)! + marcel (~> 1.0.4) marginalia (~> 1.11.1) memory_profiler (~> 1.0) microsoft_graph_mailer (~> 0.1.0)! diff --git a/Gemfile.next.checksum b/Gemfile.next.checksum index 8f35ba49d0b274..47617579d02550 100644 --- a/Gemfile.next.checksum +++ b/Gemfile.next.checksum @@ -386,7 +386,7 @@ {"name":"lru_redux","version":"1.1.0","platform":"ruby","checksum":"ee71d0ccab164c51de146c27b480a68b3631d5b4297b8ffe8eda1c72de87affb"}, {"name":"lumberjack","version":"1.2.7","platform":"ruby","checksum":"a5c6aae6b4234f1420dbcd80b23e3bca0817bd239440dde097ebe3fa63c63b1f"}, {"name":"mail","version":"2.8.1","platform":"ruby","checksum":"ec3b9fadcf2b3755c78785cb17bc9a0ca9ee9857108a64b6f5cfc9c0b5bfc9ad"}, -{"name":"marcel","version":"1.0.2","platform":"ruby","checksum":"a013b677ef46cbcb49fd5c59b3d35803d2ee04dd75d8bfdc43533fc5a31f7e4e"}, +{"name":"marcel","version":"1.0.4","platform":"ruby","checksum":"0d5649feb64b8f19f3d3468b96c680bae9746335d02194270287868a661516a4"}, {"name":"marginalia","version":"1.11.1","platform":"ruby","checksum":"cb63212ab63e42746e27595e912cb20408a1a28bcd0edde55d15b7c45fa289cf"}, {"name":"matrix","version":"0.4.2","platform":"ruby","checksum":"71083ccbd67a14a43bfa78d3e4dc0f4b503b9cc18e5b4b1d686dc0f9ef7c4cc0"}, {"name":"memory_profiler","version":"1.1.0","platform":"ruby","checksum":"79a17df7980a140c83c469785905409d3027ca614c42c086089d128b805aa8f8"}, diff --git a/Gemfile.next.lock b/Gemfile.next.lock index 3bdc10fe62b511..33d7b4f1ebb855 100644 --- a/Gemfile.next.lock +++ b/Gemfile.next.lock @@ -1166,7 +1166,7 @@ GEM net-imap net-pop net-smtp - marcel (1.0.2) + marcel (1.0.4) marginalia (1.11.1) actionpack (>= 5.2) activerecord (>= 5.2) @@ -2248,6 +2248,7 @@ DEPENDENCIES lru_redux mail (= 2.8.1) mail-smtp_pool (~> 0.1.0)! + marcel (~> 1.0.4) marginalia (~> 1.11.1) memory_profiler (~> 1.0) microsoft_graph_mailer (~> 0.1.0)! diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index fd41a6981b8eef..60be6fb98c9c7e 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -49,7 +49,7 @@ def initialize(text, author = nil, exists = false, project: nil, client: nil) @author = author @exists = exists @project = project - @web_endpoint = client.web_endpoint || ::Octokit::Default.web_endpoint + @web_endpoint = client&.web_endpoint || ::Octokit::Default.web_endpoint end def perform @@ -72,14 +72,6 @@ def formatted_text text end - def format_web_endpoint(import_url) - return unless import_url - - uri = URI.parse(import_url) - uri.path = '' - uri.to_s - end - # Links like `https://domain.github.com///pull/` needs to be converted def convert_ref_links(text, project) matcher_options = { github_url: web_endpoint, import_source: project.import_source } diff --git a/lib/gitlab/github_import/representation/diff_note.rb b/lib/gitlab/github_import/representation/diff_note.rb index e8f56adcf88822..18f3b3c6f85b78 100644 --- a/lib/gitlab/github_import/representation/diff_note.rb +++ b/lib/gitlab/github_import/representation/diff_note.rb @@ -52,13 +52,22 @@ def self.from_api_response(note, additional_data = {}) # Builds a new note using a Hash that was built from a JSON payload. def self.from_json_hash(raw_hash) hash = Representation.symbolize_hash(raw_hash) + web_endpoint = formatted_web_endpoint(hash[:html_url]) hash[:author] &&= Representation::User.from_json_hash(hash[:author]) - hash[:note] &&= GithubImport::MarkdownText.format(hash[:note]) + hash[:note] &&= GithubImport::MarkdownText.format(hash[:note], web_endpoint) new(hash) end + def self.formatted_web_endpoint(html_url) + return unless html_url + + uri = URI.parse(html_url) + uri.path = '' + uri.to_s + end + attr_accessor :merge_request # attributes - A Hash containing the raw note details. The keys of this diff --git a/spec/lib/gitlab/github_import/importer/diff_note_importer_spec.rb b/spec/lib/gitlab/github_import/importer/diff_note_importer_spec.rb index f511e2a1055871..7085d0f5f889bd 100644 --- a/spec/lib/gitlab/github_import/importer/diff_note_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/diff_note_importer_spec.rb @@ -14,7 +14,7 @@ let_it_be(:user) { create(:user) } - let(:client) { instance_double(Gitlab::GithubImport::Client) } + let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: 'https://github.com') } let(:discussion_id) { 'b0fa404393eeebb4e82becb8104f238812bb1fe6' } let(:created_at) { Time.new(2017, 1, 1, 12, 00).utc } let(:updated_at) { Time.new(2017, 1, 1, 12, 15).utc } diff --git a/spec/lib/gitlab/github_import/importer/diff_notes_importer_spec.rb b/spec/lib/gitlab/github_import/importer/diff_notes_importer_spec.rb index 5eb87a7f89fb7f..f1273aa37294f4 100644 --- a/spec/lib/gitlab/github_import/importer/diff_notes_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/diff_notes_importer_spec.rb @@ -4,7 +4,7 @@ RSpec.describe Gitlab::GithubImport::Importer::DiffNotesImporter, feature_category: :importers do let(:project) { build(:project, id: 4, import_source: 'foo/bar') } - let(:client) { instance_double(Gitlab::GithubImport::Client) } + let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: 'https://github.com') } let(:github_comment) do { diff --git a/spec/lib/gitlab/github_import/importer/events/commented_spec.rb b/spec/lib/gitlab/github_import/importer/events/commented_spec.rb index 41958dd40bc93e..22639e7e40d7e1 100644 --- a/spec/lib/gitlab/github_import/importer/events/commented_spec.rb +++ b/spec/lib/gitlab/github_import/importer/events/commented_spec.rb @@ -16,7 +16,7 @@ let_it_be(:source_user) { generate_source_user(project, 1000) } - let(:client) { instance_double('Gitlab::GithubImport::Client') } + let(:client) { instance_double('Gitlab::GithubImport::Client', web_endpoint: 'https://github.com') } let(:issuable) { create(:issue, project: project) } let(:issue_event) do diff --git a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb index 99f23fe8ec43ba..550eb24bc41f3b 100644 --- a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb @@ -18,7 +18,7 @@ let_it_be(:milestone) { create(:milestone, project: project) } - let(:client) { instance_double(Gitlab::GithubImport::Client) } + let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: 'https://github.com') } let(:created_at) { Time.new(2017, 1, 1, 12, 00) } let(:updated_at) { Time.new(2017, 1, 1, 12, 15) } let(:description) { 'This is my issue' } diff --git a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb index aacc2e9586b163..bdd15cf4aa348d 100644 --- a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb @@ -4,7 +4,7 @@ RSpec.describe Gitlab::GithubImport::Importer::MilestonesImporter, :clean_gitlab_redis_shared_state, feature_category: :importers do let(:project) { create(:project, import_source: 'foo/bar') } - let(:client) { double(:client) } + let(:client) { double(:client, web_endpoint: 'https://github.com') } let(:importer) { described_class.new(project, client) } let(:due_on) { Time.new(2017, 2, 1, 12, 00) } let(:created_at) { Time.new(2017, 1, 1, 12, 00) } diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index 9610f7ba793637..123f32dea822da 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -8,7 +8,7 @@ let_it_be(:project) { create(:project, import_source: 'nickname/public-test-repo') } let(:note_text) { Gitlab::GithubImport::Representation::NoteText.from_db_record(record) } - let(:web_endpoint) { ::Octokit::Default.web_endpoint } + let(:web_endpoint) { 'https://github.com' } let(:client) do instance_double(Gitlab::GithubImport::Client).tap do |double| allow(double).to receive(:web_endpoint).and_return(web_endpoint) diff --git a/spec/lib/gitlab/github_import/importer/note_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_importer_spec.rb index 7a4c985f8a28df..8d828c80c46b7f 100644 --- a/spec/lib/gitlab/github_import/importer/note_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_importer_spec.rb @@ -12,11 +12,7 @@ ) end - let_it_be(:imported_from) { ::Import::HasImportSource::IMPORT_SOURCES[:github] } - let_it_be(:user) { create(:user) } - let_it_be(:source_user) { generate_source_user(project, '4') } - - let(:client) { double(:client) } + let(:client) { double(:client, web_endpoint: 'https://github.com') } let(:created_at) { Time.new(2017, 1, 1, 12, 00) } let(:updated_at) { Time.new(2017, 1, 1, 12, 15) } let(:note_body) { 'This is my note' } diff --git a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb index 505219350a45d7..7d6d381f31ec34 100644 --- a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb @@ -18,7 +18,7 @@ let_it_be(:source_user_2) { generate_source_user(project, user_representation_2.id) } let_it_be(:user) { create(:user) } - let(:client) { instance_double(Gitlab::GithubImport::Client) } + let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: 'https://github.com') } let(:created_at) { DateTime.strptime('2024-11-05T20:10:15Z') } let(:updated_at) { DateTime.strptime('2024-11-06T20:10:15Z') } let(:merged_at) { DateTime.strptime('2024-11-07T20:10:15Z') } diff --git a/spec/lib/gitlab/github_import/importer/pull_requests/review_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_requests/review_importer_spec.rb index 2f7e8f037ba48a..dbe2107f96efc1 100644 --- a/spec/lib/gitlab/github_import/importer/pull_requests/review_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/pull_requests/review_importer_spec.rb @@ -20,7 +20,8 @@ let(:client_double) do instance_double( 'Gitlab::GithubImport::Client', - user: { id: 999, login: 'author', email: 'author@email.com' } + user: { id: 999, login: 'author', email: 'author@email.com' }, + web_endpoint: 'https://github.com' ) end diff --git a/spec/lib/gitlab/github_import/importer/releases_importer_spec.rb b/spec/lib/gitlab/github_import/importer/releases_importer_spec.rb index 1f77031043c343..b030a2fc37c193 100644 --- a/spec/lib/gitlab/github_import/importer/releases_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/releases_importer_spec.rb @@ -20,7 +20,7 @@ } end - let(:client) { double(:client) } + let(:client) { double(:client, web_endpoint: "https:github.com") } let(:github_release_name) { 'Initial Release' } let(:created_at) { Time.new(2017, 1, 1, 12, 00) } let(:released_at) { Time.new(2017, 1, 1, 12, 00) } diff --git a/spec/lib/gitlab/github_import/markdown_text_spec.rb b/spec/lib/gitlab/github_import/markdown_text_spec.rb index 25bbcc1c7d835a..53f822c1f6591f 100644 --- a/spec/lib/gitlab/github_import/markdown_text_spec.rb +++ b/spec/lib/gitlab/github_import/markdown_text_spec.rb @@ -42,6 +42,7 @@ context 'when Github EE with custom domain name' do let(:github_domain) { 'https://custom.github.com/' } + let(:client) { double(:client, web_endpoint: github_domain) } let(:text_in) do <<-TEXT #{paragraph} @@ -51,12 +52,7 @@ TEXT end - before do - allow(Gitlab::Auth::OAuth::Provider) - .to receive(:config_for).with('github').and_return({ 'url' => github_domain }) - end - - it { expect(described_class.format(text_in, project: project)).to eq text_out } + it { expect(described_class.format(text_in, project: project, client: client)).to eq text_out } end end -- GitLab From 48e0a13b062a024665ac6cc3f54a06aa4c9901af Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Fri, 8 Aug 2025 15:45:13 +0200 Subject: [PATCH 29/43] Only process attachments --- .../github_import/importer/note_attachments_importer.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 2c0b45b4913fb2..539f92dcb2db4d 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -21,9 +21,10 @@ def initialize(note_text, project, client) end def execute - return unless Gitlab::GithubImport::MarkdownText.fetch_attachments(note_text.text, web_endpoint).present? + attachments = Gitlab::GithubImport::MarkdownText.fetch_attachments(note_text.text, web_endpoint) + return if attachments.blank? - new_text = note_text.attachments(web_endpoint).reduce(note_text.text) do |text, attachment| + new_text = attachments.reduce(note_text.text) do |text, attachment| new_url = gitlab_attachment_link(attachment) # we need to update video media file links with the correct markdown format -- GitLab From caa6e26c2bc393adb4446dd0d056133823846b96 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Fri, 8 Aug 2025 17:45:48 +0200 Subject: [PATCH 30/43] Remove trailing / from web_endpoint --- lib/gitlab/github_import/markdown_text.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb index 60be6fb98c9c7e..c97bd1fd056a2c 100644 --- a/lib/gitlab/github_import/markdown_text.rb +++ b/lib/gitlab/github_import/markdown_text.rb @@ -57,7 +57,7 @@ def perform # Gitlab::EncodingHelper#clean remove `null` chars from the string text = clean(formatted_text) - text = convert_ref_links(text, project) if project.present? + text = convert_ref_links(text, project, web_endpoint) if project.present? wrap_mentions_in_backticks(text) end @@ -73,7 +73,8 @@ def formatted_text end # Links like `https://domain.github.com///pull/` needs to be converted - def convert_ref_links(text, project) + def convert_ref_links(text, project, web_endpoint) + web_endpoint = web_endpoint.chop if web_endpoint.end_with?('/') matcher_options = { github_url: web_endpoint, import_source: project.import_source } issue_ref_matcher = ISSUE_REF_MATCHER % matcher_options pull_ref_matcher = PULL_REF_MATCHER % matcher_options -- GitLab From 0b0234af40ea6e366b0d16ef497ca8bbd694c947 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Mon, 11 Aug 2025 14:42:22 +0200 Subject: [PATCH 31/43] Fix failing specs --- .../importer/note_attachments_importer_spec.rb | 15 +++++---------- .../attachments/import_issue_worker_spec.rb | 2 +- .../import_merge_request_worker_spec.rb | 2 +- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index 123f32dea822da..caea5891311ce6 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -229,7 +229,6 @@ let(:tmp_stub_video) { Tempfile.create('video-attachment') } let(:tmp_stub_path) { Pathname.new(tmp_stub_video.path) } let(:tmp_stub_video_with_ext) { Tempfile.create('video-attachment.mp4') } - let(:video_content) { 'fake video content' } let(:uploader_hash) do { alt: nil, url: '/uploads/test-secret/video.mp4', @@ -241,7 +240,7 @@ .with(ghe_video_url, options: options, web_endpoint: web_endpoint) .and_return(downloader_stub) allow(downloader_stub).to receive(:perform).and_return(tmp_stub_video) - allow(downloader_stub).to receive(:delete).once + allow(downloader_stub).to receive(:delete) allow(Marcel::MimeType).to receive(:for).with(tmp_stub_path).and_return('video/mp4') allow_next_instance_of(described_class) do |instance| @@ -253,15 +252,12 @@ mime_type_double = instance_double(Mime::Type, symbol: :mp4) allow(Mime::Type).to receive(:lookup).with('video/mp4').and_return(mime_type_double) - allow(FileUtils).to receive(:mv) - allow(File).to receive(:open).and_call_original + allow(FileUtils).to receive(:mv).and_return(true) + allow(File).to receive(:open).with( a_string_matching(/\.mp4/), 'wb').and_return(tmp_stub_video_with_ext) - allow_next_instance_of(FileUploader) do |uploader| - allow(uploader).to receive_message_chain(:new, :store!).and_return(true) - allow(uploader).to receive(:to_h).and_return(uploader_hash) - end + allow(UploadService).to receive_message_chain(:new, :execute).and_return(uploader_hash) allow(client).to receive_message_chain(:octokit, :access_token).and_return(access_token) allow(Gitlab::Auth::OAuth::Provider).to receive(:config_for).with('github').and_return(nil) @@ -273,9 +269,8 @@ importer.execute end - it 'updates video attachment path correctly' do + it 'changes standalone attachment link to correct markdown' do importer.execute - record.reload expect(record.note).to include("![media_attachment](/uploads/test-secret/video.mp4)") end diff --git a/spec/workers/gitlab/github_import/attachments/import_issue_worker_spec.rb b/spec/workers/gitlab/github_import/attachments/import_issue_worker_spec.rb index e0db440232cf25..0300937cbb13d4 100644 --- a/spec/workers/gitlab/github_import/attachments/import_issue_worker_spec.rb +++ b/spec/workers/gitlab/github_import/attachments/import_issue_worker_spec.rb @@ -12,7 +12,7 @@ instance_double('Project', full_path: 'foo/bar', id: 1, import_state: import_state) end - let(:client) { instance_double('Gitlab::GithubImport::Client') } + let(:client) { instance_double('Gitlab::GithubImport::Client', web_endpoint: "https://github.com") } let(:issue_hash) do { diff --git a/spec/workers/gitlab/github_import/attachments/import_merge_request_worker_spec.rb b/spec/workers/gitlab/github_import/attachments/import_merge_request_worker_spec.rb index b4be229af2a0be..cb7d04b69de3d7 100644 --- a/spec/workers/gitlab/github_import/attachments/import_merge_request_worker_spec.rb +++ b/spec/workers/gitlab/github_import/attachments/import_merge_request_worker_spec.rb @@ -12,7 +12,7 @@ instance_double('Project', full_path: 'foo/bar', id: 1, import_state: import_state) end - let(:client) { instance_double('Gitlab::GithubImport::Client') } + let(:client) { instance_double('Gitlab::GithubImport::Client', web_endpoint: "https://github.com") } let(:mr_hash) do { -- GitLab From b2afc9ade6de0c1834b40a9bcd8eafdd219d97f2 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Tue, 12 Aug 2025 16:51:36 +0200 Subject: [PATCH 32/43] Apply 8 suggestion(s) to 7 file(s) Co-authored-by: Rodrigo Tomonari --- lib/gitlab/github_import/attachments_downloader.rb | 4 +++- .../importer/note_attachments_importer.rb | 10 ++++++---- .../importer/pull_requests/review_importer.rb | 1 + lib/gitlab/github_import/representation/note_text.rb | 4 ---- .../github_import/importer/milestones_importer_spec.rb | 2 +- .../github_import/importer/note_importer_spec.rb | 2 +- 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index f0ce9f61e28a37..9d25368b61c599 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -16,7 +16,9 @@ class AttachmentsDownloader attr_reader :file_url, :filename, :file_size_limit, :options, :web_endpoint - def initialize(file_url, options: {}, file_size_limit: DEFAULT_FILE_SIZE_LIMIT, web_endpoint: nil) + def initialize( + file_url, options: {}, file_size_limit: DEFAULT_FILE_SIZE_LIMIT, + web_endpoint: ::Octokit::Default.web_endpoint) @file_url = file_url @options = options @file_size_limit = file_size_limit diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 539f92dcb2db4d..8d505fc96efe3e 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -6,8 +6,6 @@ module Importer class NoteAttachmentsImporter attr_reader :note_text, :project, :client, :web_endpoint - require 'marcel' - SUPPORTED_RECORD_TYPES = [::Release.name, ::Issue.name, ::MergeRequest.name, ::Note.name].freeze # note_text - An instance of `Gitlab::GithubImport::Representation::NoteText`. @@ -121,13 +119,17 @@ def record_column_for_type(record_type) def update_ghe_video_path(file) filepath = file.path + return file if File.extname(filepath).present? + mime_type = Marcel::MimeType.for(Pathname.new(filepath)) - return file unless mime_type.start_with?('video/') && File.extname(filepath).empty? + return file unless mime_type.start_with?('video/') extension = Mime::Type.lookup(mime_type)&.symbol&.to_s + return file if extension.blank? + new_path = "#{filepath}.#{extension}" FileUtils.mv(filepath, new_path) - File.open(new_path, 'wb') + File.open(new_path, 'rb') end end end diff --git a/lib/gitlab/github_import/importer/pull_requests/review_importer.rb b/lib/gitlab/github_import/importer/pull_requests/review_importer.rb index 2a35fa851dafe0..508b98efaa2d29 100644 --- a/lib/gitlab/github_import/importer/pull_requests/review_importer.rb +++ b/lib/gitlab/github_import/importer/pull_requests/review_importer.rb @@ -53,6 +53,7 @@ def add_complementary_review_note!(author_id) note_body = MarkdownText.format( review_note_content, review.author, + project: project, client: client ) diff --git a/lib/gitlab/github_import/representation/note_text.rb b/lib/gitlab/github_import/representation/note_text.rb index 25981299f2edb8..43e18a923d603d 100644 --- a/lib/gitlab/github_import/representation/note_text.rb +++ b/lib/gitlab/github_import/representation/note_text.rb @@ -55,10 +55,6 @@ def github_identifiers }.merge(record_type_specific_attribute) end - def attachments(web_endpoint = nil) - @attachments ||= MarkdownText.fetch_attachments(text, web_endpoint) - end - private def record_type_specific_attribute diff --git a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb index bdd15cf4aa348d..e3d9d07f9453a8 100644 --- a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb @@ -4,7 +4,7 @@ RSpec.describe Gitlab::GithubImport::Importer::MilestonesImporter, :clean_gitlab_redis_shared_state, feature_category: :importers do let(:project) { create(:project, import_source: 'foo/bar') } - let(:client) { double(:client, web_endpoint: 'https://github.com') } + let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: 'https://github.com') } let(:importer) { described_class.new(project, client) } let(:due_on) { Time.new(2017, 2, 1, 12, 00) } let(:created_at) { Time.new(2017, 1, 1, 12, 00) } diff --git a/spec/lib/gitlab/github_import/importer/note_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_importer_spec.rb index 8d828c80c46b7f..6424dbd5c069e3 100644 --- a/spec/lib/gitlab/github_import/importer/note_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_importer_spec.rb @@ -12,7 +12,7 @@ ) end - let(:client) { double(:client, web_endpoint: 'https://github.com') } + let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: 'https://github.com') } let(:created_at) { Time.new(2017, 1, 1, 12, 00) } let(:updated_at) { Time.new(2017, 1, 1, 12, 15) } let(:note_body) { 'This is my note' } -- GitLab From c84be69b2e03c98a6eaa14473a0fca6b33d7f362 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Tue, 12 Aug 2025 18:42:59 +0200 Subject: [PATCH 33/43] Update specs following suggestions --- .../note_attachments_importer_spec.rb | 2 +- .../representation/note_text_spec.rb | 30 ------------------- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index caea5891311ce6..2aec4924b7fa79 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -255,7 +255,7 @@ allow(FileUtils).to receive(:mv).and_return(true) allow(File).to receive(:open).with( - a_string_matching(/\.mp4/), 'wb').and_return(tmp_stub_video_with_ext) + a_string_matching(/\.mp4/), 'rb').and_return(tmp_stub_video_with_ext) allow(UploadService).to receive_message_chain(:new, :execute).and_return(uploader_hash) diff --git a/spec/lib/gitlab/github_import/representation/note_text_spec.rb b/spec/lib/gitlab/github_import/representation/note_text_spec.rb index 63dd6dab5b8052..f81f82fb36bf6d 100644 --- a/spec/lib/gitlab/github_import/representation/note_text_spec.rb +++ b/spec/lib/gitlab/github_import/representation/note_text_spec.rb @@ -153,34 +153,4 @@ end end end - - describe '#attachments' do - let(:web_endpoint) { "https://github.com" } - - subject { described_class.new({ text: text }).attachments(web_endpoint) } - - context 'when text has attachments' do - let(:text) { 'See ![image](https://user-images.githubusercontent.com/1/uuid-1.png) for details' } - - it { is_expected.to contain_exactly(instance_of(Gitlab::GithubImport::Markdown::Attachment)) } - end - - context 'when text does not have attachments' do - let(:text) { 'Some text here' } - - it { is_expected.to be_empty } - end - - context 'when text has GHE attachments' do - let(:web_endpoint) { "https://foo.ghe.com" } - let(:text) { "See ![image](#{web_endpoint}/user-attachments/assets/uuid-1.png) for details" } - - it 'returns attachments with web_endpoint' do - attachments = described_class.new({ text: text }).attachments(web_endpoint) - - expect(attachments).to contain_exactly(instance_of(Gitlab::GithubImport::Markdown::Attachment)) - expect(attachments.first.web_endpoint).to eq(web_endpoint) - end - end - end end -- GitLab From e9eecee578e2da44d40427b4c7b71e71d1226932 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Wed, 13 Aug 2025 12:58:33 +0200 Subject: [PATCH 34/43] Handle quicktime files, update specs --- .../importer/note_attachments_importer.rb | 7 ++- .../attachments_downloader_spec.rb | 1 - .../note_attachments_importer_spec.rb | 47 ++++++++++++++----- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 8d505fc96efe3e..ca9961dfa9d135 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -124,7 +124,12 @@ def update_ghe_video_path(file) mime_type = Marcel::MimeType.for(Pathname.new(filepath)) return file unless mime_type.start_with?('video/') - extension = Mime::Type.lookup(mime_type)&.symbol&.to_s + extension = case mime_type + when 'video/quicktime' then '.mov' + when 'video/mp4' then '.mp4' + when 'video/webm' then '.webm' + end + return file if extension.blank? new_path = "#{filepath}.#{extension}" diff --git a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb index d858ff908df3a9..1262a8420321ce 100644 --- a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb +++ b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb @@ -26,7 +26,6 @@ before do allow(Gitlab::HTTP).to receive(:perform_request) .with(Net::HTTP::Get, file_url, stream_body: true).and_yield(chunk_double) - allow(downloader).to receive(:web_endpoint).and_return('https://github.com') end context 'when file valid' do diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index 2aec4924b7fa79..90e4d0790e3ee5 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -151,7 +151,7 @@ expect(record.note).to include("[link to other project blob file](#{other_project_blob_url})") end - context 'when the attachment is video media' do + context 'when the attachment is supported video media' do let(:media_attachment_url) { 'https://github.com/user-attachments/assets/media-attachment' } let(:text) { media_attachment_url } let(:record) { create(:note, project: project, note: text) } @@ -222,16 +222,17 @@ expect(record.description).to include("![ghe-image](/uploads/") end - context 'when the attachment is a video file' do # rubocop:disable RSpec/MultipleMemoizedHelpers -- required + # rubocop:disable RSpec/MultipleMemoizedHelpers -- required + context 'when the attachment is a video file' do let(:ghe_video_url) { "#{web_endpoint}/user-attachments/assets/video-attachment" } let(:text) { ghe_video_url } let(:record) { create(:note, project: project, note: text) } let(:tmp_stub_video) { Tempfile.create('video-attachment') } let(:tmp_stub_path) { Pathname.new(tmp_stub_video.path) } - let(:tmp_stub_video_with_ext) { Tempfile.create('video-attachment.mp4') } + let(:tmp_stub_video_with_ext) { Tempfile.create('video-attachment.mov') } let(:uploader_hash) do { alt: nil, - url: '/uploads/test-secret/video.mp4', + url: '/uploads/test-secret/video.mov', markdown: nil } end @@ -242,20 +243,19 @@ allow(downloader_stub).to receive(:perform).and_return(tmp_stub_video) allow(downloader_stub).to receive(:delete) - allow(Marcel::MimeType).to receive(:for).with(tmp_stub_path).and_return('video/mp4') - allow_next_instance_of(described_class) do |instance| - allow(instance).to receive(:update_ghe_video_path).and_return(tmp_stub_video_with_ext) - end + allow(Marcel::MimeType).to receive(:for).with(tmp_stub_path).and_return('video/quicktime') allow(File).to receive(:extname).and_call_original allow(File).to receive(:extname).with(tmp_stub_video.path).and_return('') - mime_type_double = instance_double(Mime::Type, symbol: :mp4) - allow(Mime::Type).to receive(:lookup).with('video/mp4').and_return(mime_type_double) + mime_type_double = instance_double(Mime::Type) + allow(Mime::Type).to receive(:lookup).with('video/quicktime').and_return(mime_type_double) + allow(Mime::Type).to receive(:lookup).with('video/webm').and_return(mime_type_double) + allow(Mime::Type).to receive(:lookup).with('video/avi').and_return(mime_type_double) allow(FileUtils).to receive(:mv).and_return(true) allow(File).to receive(:open).with( - a_string_matching(/\.mp4/), 'rb').and_return(tmp_stub_video_with_ext) + a_string_matching(/\.mov/), 'rb').and_return(tmp_stub_video_with_ext) allow(UploadService).to receive_message_chain(:new, :execute).and_return(uploader_hash) @@ -272,9 +272,32 @@ it 'changes standalone attachment link to correct markdown' do importer.execute record.reload - expect(record.note).to include("![media_attachment](/uploads/test-secret/video.mp4)") + expect(record.note).to include("![media_attachment](/uploads/test-secret/video.mov)") + end + + context 'with webm format' do + let(:uploader_hash) do + { alt: nil, + url: '/uploads/test-secret/video.webm', + markdown: nil } + end + + it 'changes standalone attachment link to correct markdown' do + importer.execute + record.reload + expect(record.note).to include("![media_attachment](/uploads/test-secret/video.webm)") + end + end + + context 'with unsupported video format' do + it 'does not update the link with a file extension' do + importer.execute + record.reload + expect(record.note).to include("/uploads/test-secret/video") + end end end + # rubocop:enable RSpec/MultipleMemoizedHelpers end end end -- GitLab From 9df55592ece1abd50635740a74da2c8e3c810634 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Wed, 13 Aug 2025 16:02:20 +0200 Subject: [PATCH 35/43] Apply suggestions, handle zip and pdf attachments --- doc/user/project/import/github.md | 4 +- .../importer/diff_note_importer.rb | 2 +- .../importer/note_attachments_importer.rb | 5 +++ .../github_import/representation/diff_note.rb | 12 +----- .../attachments_downloader_spec.rb | 12 ------ .../importer/diff_note_importer_spec.rb | 14 +++++++ .../note_attachments_importer_spec.rb | 40 +++++++++++++++++++ .../representation/diff_note_spec.rb | 14 +------ 8 files changed, 65 insertions(+), 38 deletions(-) diff --git a/doc/user/project/import/github.md b/doc/user/project/import/github.md index 69207e06d3e1a2..8b87c1e43408ad 100644 --- a/doc/user/project/import/github.md +++ b/doc/user/project/import/github.md @@ -88,8 +88,8 @@ GitHub Enterprise does not require a public email address, so you might have to - GitHub pull request comments (known as diff notes in GitLab) created before 2017 are imported in separate threads. This occurs because of a limitation of the GitHub API that doesn't include `in_reply_to_id` for comments before 2017. -- Because of a [known issue](https://gitlab.com/gitlab-org/gitlab/-/issues/424400), Markdown attachments from - repositories on GitHub Enterprise Server instances aren't imported. +- Because of a [known issue](https://gitlab.com/gitlab-org/gitlab/-/issues/424400), only video and image files in Markdown attachments from + repositories on GitHub Enterprise Server instances are imported. PDF and Zip file attachments are not imported. - Because of a [known issue](https://gitlab.com/gitlab-org/gitlab/-/issues/418800), when importing projects that used [GitHub auto-merge](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/incorporating-changes-from-a-pull-request/automatically-merging-a-pull-request), the imported project in GitLab can have merge commits labeled `unverified` if the commit was signed with the GitHub internal GPG key. - GitLab [can't import](https://gitlab.com/gitlab-org/gitlab/-/issues/424046) GitHub Markdown image attachments that diff --git a/lib/gitlab/github_import/importer/diff_note_importer.rb b/lib/gitlab/github_import/importer/diff_note_importer.rb index 92c9bc0bbd42ff..4c09ce675d8594 100644 --- a/lib/gitlab/github_import/importer/diff_note_importer.rb +++ b/lib/gitlab/github_import/importer/diff_note_importer.rb @@ -110,7 +110,7 @@ def import_with_diff_note end def note_body - @note_body ||= MarkdownText.format(note.note, note.author, author_found, client: client) + @note_body ||= MarkdownText.format(note.note, note.author, author_found, project: project, client: client) end def author diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index ca9961dfa9d135..e85d19e6a41e5c 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -76,6 +76,11 @@ def download_attachment(attachment) # for ghe imports check on filetype to add ext to video attachments file = update_ghe_video_path(file) unless web_endpoint == ::Octokit::Default.web_endpoint + # for ghe imports skip pdf or zip attachments + if web_endpoint != ::Octokit::Default.web_endpoint && file.path.ends_with?('.zip', '.pdf') + return attachment.url + end + uploader = UploadService.new(project, file, FileUploader).execute uploader.to_h[:url] diff --git a/lib/gitlab/github_import/representation/diff_note.rb b/lib/gitlab/github_import/representation/diff_note.rb index 18f3b3c6f85b78..d2f6325819f264 100644 --- a/lib/gitlab/github_import/representation/diff_note.rb +++ b/lib/gitlab/github_import/representation/diff_note.rb @@ -34,7 +34,7 @@ def self.from_api_response(note, additional_data = {}) original_commit_id: note[:original_commit_id], diff_hunk: note[:diff_hunk], author: user, - note: GithubImport::MarkdownText.format(note[:body]), + note: note[:body], created_at: note[:created_at], updated_at: note[:updated_at], note_id: note[:id], @@ -52,22 +52,12 @@ def self.from_api_response(note, additional_data = {}) # Builds a new note using a Hash that was built from a JSON payload. def self.from_json_hash(raw_hash) hash = Representation.symbolize_hash(raw_hash) - web_endpoint = formatted_web_endpoint(hash[:html_url]) hash[:author] &&= Representation::User.from_json_hash(hash[:author]) - hash[:note] &&= GithubImport::MarkdownText.format(hash[:note], web_endpoint) new(hash) end - def self.formatted_web_endpoint(html_url) - return unless html_url - - uri = URI.parse(html_url) - uri.path = '' - uri.to_s - end - attr_accessor :merge_request # attributes - A Hash containing the raw note details. The keys of this diff --git a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb index 1262a8420321ce..a14fed3e2a21bc 100644 --- a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb +++ b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb @@ -10,18 +10,6 @@ let(:chunk_double) { instance_double(HTTParty::ResponseFragment, code: 200) } - describe '#initialize' do - context 'with GHE web_endpoint' do - let(:web_endpoint) { 'https://github.enterprise.com' } - - subject(:downloader) { described_class.new(file_url, web_endpoint: web_endpoint) } - - it 'sets the web_endpoint' do - expect(downloader.web_endpoint).to eq(web_endpoint) - end - end - end - describe '#perform' do before do allow(Gitlab::HTTP).to receive(:perform_request) diff --git a/spec/lib/gitlab/github_import/importer/diff_note_importer_spec.rb b/spec/lib/gitlab/github_import/importer/diff_note_importer_spec.rb index 7085d0f5f889bd..d002ba012502a6 100644 --- a/spec/lib/gitlab/github_import/importer/diff_note_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/diff_note_importer_spec.rb @@ -111,6 +111,20 @@ expect(note.updated_at).to eq(updated_at) end + context 'when the note body includes @ mentions' do + let(:note_body) { "I said to @sam_allen\0 the code should follow @bob's\0 advice. @.ali-ce/group#9?\0" } + let(:expected_note_body) { "I said to `@sam_allen` the code should follow `@bob`'s advice. `@.ali-ce/group#9`?" } + + it 'formats the text correctly' do + stub_user_finder(user.id, true) + + subject.execute + + note = project.notes.diff_notes.take + expect(note.note).to eq(expected_note_body) + end + end + context 'when the note has suggestions' do let(:note_body) do <<~EOB diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index 90e4d0790e3ee5..de55776a6c5f2b 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -222,6 +222,46 @@ expect(record.description).to include("![ghe-image](/uploads/") end + context 'when the attachment is a pdf or zip file' do + let(:pdf_file_url) { "#{web_endpoint}/user-attachments/files/3/pdf-attachment.pdf" } + let(:zip_file_url) { "#{web_endpoint}/user-attachments/files/4/zip-attachment.zip" } + let(:tmp_stub_pdf) { Tempfile.create('pdf-attachment.pdf') } + let(:tmp_stub_zip) { Tempfile.create('zip-attachment.zip') } + + let(:text) do + <<-TEXT.split("\n").map(&:strip).join("\n") + Some text... + [pdf-file](#{pdf_file_url}) + [zip-file](#{zip_file_url}) + TEXT + end + + let(:record) { create(:note, project: project, note: text) } + + before do + allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) + .with(pdf_file_url, options: options, web_endpoint: web_endpoint) + .and_return(downloader_stub) + allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) + .with(zip_file_url, options: options, web_endpoint: web_endpoint) + .and_return(downloader_stub) + allow(downloader_stub).to receive(:perform).and_return(tmp_stub_pdf, tmp_stub_zip) + allow(downloader_stub).to receive(:delete).twice + allow(File).to receive(:extname).with(anything).and_return('.pdf') + allow(tmp_stub_pdf).to receive_message_chain(:path, :ends_with?).with('.zip', '.pdf').and_return(true) + allow(tmp_stub_zip).to receive_message_chain(:path, :ends_with?).with('.zip', '.pdf').and_return(true) + allow(client).to receive_message_chain(:octokit, :access_token).and_return(access_token) + end + + it 'does not changes attachment link' do + importer.execute + + record.reload + expect(record.note).to include(pdf_file_url) + expect(record.note).to include(zip_file_url) + end + end + # rubocop:disable RSpec/MultipleMemoizedHelpers -- required context 'when the attachment is a video file' do let(:ghe_video_url) { "#{web_endpoint}/user-attachments/assets/video-attachment" } diff --git a/spec/lib/gitlab/github_import/representation/diff_note_spec.rb b/spec/lib/gitlab/github_import/representation/diff_note_spec.rb index f8f34d7bf92bff..4295627ff4f165 100644 --- a/spec/lib/gitlab/github_import/representation/diff_note_spec.rb +++ b/spec/lib/gitlab/github_import/representation/diff_note_spec.rb @@ -35,18 +35,8 @@ shared_examples 'a DiffNote representation' do context 'when note body is present' do - let(:note_body) { "I said to @sam_allen\0 the code should follow @bob's\0 advice. @.ali-ce/group#9?\0" } - let(:expected_note_body) { "I said to `@sam_allen` the code should follow `@bob`'s advice. `@.ali-ce/group#9`?" } - - before do - allow(Gitlab::GithubImport::MarkdownText).to receive(:format).and_call_original - - note - end - - it 'verify that the formatted description using MarkdownText equals the expected description' do - expect(Gitlab::GithubImport::MarkdownText).to have_received(:format) - expect(note.note).to eq(expected_note_body) + it 'includes the note body' do + expect(note.note).to eq(note_body) end end -- GitLab From f64da89d2b599fefd52a7c551547d5fb3470e0dd Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Thu, 14 Aug 2025 11:52:52 +0200 Subject: [PATCH 36/43] Apply doc suggestion --- doc/user/project/import/github.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/user/project/import/github.md b/doc/user/project/import/github.md index 8b87c1e43408ad..274a9d6bdd501f 100644 --- a/doc/user/project/import/github.md +++ b/doc/user/project/import/github.md @@ -88,8 +88,11 @@ GitHub Enterprise does not require a public email address, so you might have to - GitHub pull request comments (known as diff notes in GitLab) created before 2017 are imported in separate threads. This occurs because of a limitation of the GitHub API that doesn't include `in_reply_to_id` for comments before 2017. -- Because of a [known issue](https://gitlab.com/gitlab-org/gitlab/-/issues/424400), only video and image files in Markdown attachments from - repositories on GitHub Enterprise Server instances are imported. PDF and Zip file attachments are not imported. +- [In GitLab 18.2 and earlier](https://gitlab.com/gitlab-org/gitlab/-/issues/424400), Markdown attachments + from repositories on GitHub Enterprise Server instances are not imported. + [In GitLab 18.3 and later](https://gitlab.com/gitlab-org/gitlab/-/issues/553386): + - Only video and image files in Markdown attachments are imported. + - Other file attachments are not imported. - Because of a [known issue](https://gitlab.com/gitlab-org/gitlab/-/issues/418800), when importing projects that used [GitHub auto-merge](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/incorporating-changes-from-a-pull-request/automatically-merging-a-pull-request), the imported project in GitLab can have merge commits labeled `unverified` if the commit was signed with the GitHub internal GPG key. - GitLab [can't import](https://gitlab.com/gitlab-org/gitlab/-/issues/424046) GitHub Markdown image attachments that -- GitLab From d47adf1cf7f6e16f977c543bbe78a2f8d21d5751 Mon Sep 17 00:00:00 2001 From: Sam Word Date: Wed, 13 Aug 2025 20:44:15 +0000 Subject: [PATCH 37/43] Map contributions to personal namespace owner For projects imported to personal namespaces from GitHub, immediately map all user contributions to the personal namespace owner when user_mapping_to_personal_namespace_owner feature flag is enabled. --- .../lib/gitlab/github_import/importer/releases_importer_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/lib/gitlab/github_import/importer/releases_importer_spec.rb b/spec/lib/gitlab/github_import/importer/releases_importer_spec.rb index b030a2fc37c193..7be41b75ee200b 100644 --- a/spec/lib/gitlab/github_import/importer/releases_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/releases_importer_spec.rb @@ -20,7 +20,7 @@ } end - let(:client) { double(:client, web_endpoint: "https:github.com") } + let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: 'https://github.com') } let(:github_release_name) { 'Initial Release' } let(:created_at) { Time.new(2017, 1, 1, 12, 00) } let(:released_at) { Time.new(2017, 1, 1, 12, 00) } -- GitLab From c1d9a45b63e73eafe0c55df825569bb266d5a986 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Fri, 8 Aug 2025 14:54:56 +0200 Subject: [PATCH 38/43] Update sepcs with client.web_endpoint --- lib/gitlab/github_import/representation/diff_note.rb | 8 ++++++++ .../gitlab/github_import/importer/note_importer_spec.rb | 6 +++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/gitlab/github_import/representation/diff_note.rb b/lib/gitlab/github_import/representation/diff_note.rb index d2f6325819f264..e009284dfe3fb2 100644 --- a/lib/gitlab/github_import/representation/diff_note.rb +++ b/lib/gitlab/github_import/representation/diff_note.rb @@ -58,6 +58,14 @@ def self.from_json_hash(raw_hash) new(hash) end + def self.formatted_web_endpoint(html_url) + return unless html_url + + uri = URI.parse(html_url) + uri.path = '' + uri.to_s + end + attr_accessor :merge_request # attributes - A Hash containing the raw note details. The keys of this diff --git a/spec/lib/gitlab/github_import/importer/note_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_importer_spec.rb index 6424dbd5c069e3..6d3be1c58353d4 100644 --- a/spec/lib/gitlab/github_import/importer/note_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_importer_spec.rb @@ -12,7 +12,11 @@ ) end - let(:client) { instance_double(Gitlab::GithubImport::Client, web_endpoint: 'https://github.com') } + let_it_be(:imported_from) { ::Import::HasImportSource::IMPORT_SOURCES[:github] } + let_it_be(:user) { create(:user) } + let_it_be(:source_user) { generate_source_user(project, '4') } + + let(:client) { instance_double(:client, web_endpoint: 'https://github.com') } let(:created_at) { Time.new(2017, 1, 1, 12, 00) } let(:updated_at) { Time.new(2017, 1, 1, 12, 15) } let(:note_body) { 'This is my note' } -- GitLab From ea9e08e03c0f3c202b67754a0cd87e68b82787a4 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Wed, 13 Aug 2025 16:02:20 +0200 Subject: [PATCH 39/43] Apply suggestions, handle zip and pdf attachments --- lib/gitlab/github_import/representation/diff_note.rb | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lib/gitlab/github_import/representation/diff_note.rb b/lib/gitlab/github_import/representation/diff_note.rb index e009284dfe3fb2..d2f6325819f264 100644 --- a/lib/gitlab/github_import/representation/diff_note.rb +++ b/lib/gitlab/github_import/representation/diff_note.rb @@ -58,14 +58,6 @@ def self.from_json_hash(raw_hash) new(hash) end - def self.formatted_web_endpoint(html_url) - return unless html_url - - uri = URI.parse(html_url) - uri.path = '' - uri.to_s - end - attr_accessor :merge_request # attributes - A Hash containing the raw note details. The keys of this -- GitLab From 50e54dc2571eb7d8c6e70575babef71e9de05cef Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Thu, 14 Aug 2025 14:37:34 +0200 Subject: [PATCH 40/43] Filter login redirects, filter file attachments --- .../github_import/attachments_downloader.rb | 3 +++ .../importer/note_attachments_importer.rb | 17 ++++++++++++----- .../attachments_downloader_spec.rb | 12 ++++++++++++ .../importer/note_attachments_importer_spec.rb | 12 +++--------- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index 9d25368b61c599..ae03ee276304eb 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -33,6 +33,9 @@ def perform download_url = get_assets_download_redirection_url + # skip download for GHE server urls that redirect to a login url + return file_url if download_url.include?("login?return_to=") + parsed_file_name = File.basename(URI.parse(download_url).path) # if the file has a video filetype extension, we update both the @filename and @filepath with the filetype ext. diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index e85d19e6a41e5c..c7139c0abf9b48 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -71,16 +71,19 @@ def convert_project_content_link(attachment_url, import_source) def download_attachment(attachment) downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(attachment.url, options: options, web_endpoint: web_endpoint) - file = downloader.perform - # for ghe imports check on filetype to add ext to video attachments - file = update_ghe_video_path(file) unless web_endpoint == ::Octokit::Default.web_endpoint + file = downloader.perform - # for ghe imports skip pdf or zip attachments - if web_endpoint != ::Octokit::Default.web_endpoint && file.path.ends_with?('.zip', '.pdf') + # for ghe imports skip file attachments + # in these cases the AttachmentsDownloader returns the redirect url + # so we return the original attachment.url + if web_endpoint != ::Octokit::Default.web_endpoint && file.is_a?(String) && file.starts_with?(github_file_url_regex) # rubocop:disable Layout/LineLength,Lint/RedundantCopDisableDirective -- minor infraction return attachment.url end + # for ghe imports check on filetype to add ext to video attachments + file = update_ghe_video_path(file) unless web_endpoint == ::Octokit::Default.web_endpoint + uploader = UploadService.new(project, file, FileUploader).execute uploader.to_h[:url] @@ -141,6 +144,10 @@ def update_ghe_video_path(file) FileUtils.mv(filepath, new_path) File.open(new_path, 'rb') end + + def github_file_url_regex + %r{#{Regexp.escape(web_endpoint)}/.*/files/} + end end end end diff --git a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb index a14fed3e2a21bc..27e0ce6127c486 100644 --- a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb +++ b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb @@ -125,6 +125,18 @@ expect(File.exist?(file.path)).to eq(true) end + context 'when filename includes login redirect' do + let(:redirect_url) { 'https://example.com/some-text/login?return_to=gpre366' } + + it 'returns the original file_url' do + expect(::Import::Clients::HTTP).to receive(:get).with(file_url, { follow_redirects: false }) + .and_return sample_response + + file = downloader.perform + expect(file).to eq(file_url) + end + end + context 'when attachment is a video media file' do let(:file_url) { "https://github.com/user-attachments/assets/73433gh3" } let(:redirect_url) { "https://github-production-user-asset-6210df.s3.amazonaws.com/73433gh3.mov" } diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index de55776a6c5f2b..46a6cdeb0e44ef 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -222,11 +222,9 @@ expect(record.description).to include("![ghe-image](/uploads/") end - context 'when the attachment is a pdf or zip file' do + context 'when the attachment is file' do let(:pdf_file_url) { "#{web_endpoint}/user-attachments/files/3/pdf-attachment.pdf" } let(:zip_file_url) { "#{web_endpoint}/user-attachments/files/4/zip-attachment.zip" } - let(:tmp_stub_pdf) { Tempfile.create('pdf-attachment.pdf') } - let(:tmp_stub_zip) { Tempfile.create('zip-attachment.zip') } let(:text) do <<-TEXT.split("\n").map(&:strip).join("\n") @@ -245,15 +243,11 @@ allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) .with(zip_file_url, options: options, web_endpoint: web_endpoint) .and_return(downloader_stub) - allow(downloader_stub).to receive(:perform).and_return(tmp_stub_pdf, tmp_stub_zip) - allow(downloader_stub).to receive(:delete).twice - allow(File).to receive(:extname).with(anything).and_return('.pdf') - allow(tmp_stub_pdf).to receive_message_chain(:path, :ends_with?).with('.zip', '.pdf').and_return(true) - allow(tmp_stub_zip).to receive_message_chain(:path, :ends_with?).with('.zip', '.pdf').and_return(true) + allow(downloader_stub).to receive(:perform).and_return(pdf_file_url, zip_file_url) allow(client).to receive_message_chain(:octokit, :access_token).and_return(access_token) end - it 'does not changes attachment link' do + it 'does not change attachment link' do importer.execute record.reload -- GitLab From 311b3a1612fe7faeaccfe3435f813c9df7fb8c61 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Thu, 14 Aug 2025 14:41:17 +0200 Subject: [PATCH 41/43] Filter login redirects, filter file attachments --- Gemfile.next.checksum | 26 ++--- Gemfile.next.lock | 106 +++++++++--------- .../importer/diff_note_importer_spec.rb | 2 - .../importer/note_importer_spec.rb | 2 +- 4 files changed, 67 insertions(+), 69 deletions(-) diff --git a/Gemfile.next.checksum b/Gemfile.next.checksum index 47617579d02550..7ff229b6c3b81d 100644 --- a/Gemfile.next.checksum +++ b/Gemfile.next.checksum @@ -2,17 +2,17 @@ {"name":"CFPropertyList","version":"3.0.7","platform":"ruby","checksum":"c45721614aca8d5eb6fa216f2ec28ec38de1a94505e9766a20e98745492c3c4c"}, {"name":"RedCloth","version":"4.3.4","platform":"ruby","checksum":"5231b2fdd91a933915cba330e5fd1a74025e77b56f57b7404c7191ebf2812297"}, {"name":"acme-client","version":"2.0.25","platform":"ruby","checksum":"e0bba7b9f785fd9ffe0933f8733ca81357ac46e4a979cb4f84806ab88fee0f31"}, -{"name":"actioncable","version":"7.2.2.1","platform":"ruby","checksum":"5b3b885075a80767d63cbf2b586cbf82466a241675b7985233f957abb01bffb4"}, -{"name":"actionmailbox","version":"7.2.2.1","platform":"ruby","checksum":"896a47c2520f4507c75dde67c6ea1f5eec3a041fe7bfbf3568c4e0149a080e25"}, -{"name":"actionmailer","version":"7.2.2.1","platform":"ruby","checksum":"b02ae523c32c8ad762d4db941e76f3c108c106030132247ee7a7b8c86bc7b21f"}, -{"name":"actionpack","version":"7.2.2.1","platform":"ruby","checksum":"17b2160a7bcbd5a569d06b1ae54a4bb5ccc7ba0815d73ff5768100a79dc1f734"}, -{"name":"actiontext","version":"7.2.2.1","platform":"ruby","checksum":"f369cee41a6674b697bf9257d917a3dce575a2c89935af437b432d6737a3f0d6"}, -{"name":"actionview","version":"7.2.2.1","platform":"ruby","checksum":"69fc880cf3d8b1baf21b048cf7bb68f1eef08760ff8104d7d60a6a1be8b359a5"}, -{"name":"activejob","version":"7.2.2.1","platform":"ruby","checksum":"f2f95a8573b394aa4f7c24843f0c4a6065c073a5c64d6f15ecd98d98c2c23e5b"}, -{"name":"activemodel","version":"7.2.2.1","platform":"ruby","checksum":"8398861f9ee2c4671a8357ab39e9b38a045fd656f6685a3dd5890c2419dbfdaf"}, -{"name":"activerecord","version":"7.2.2.1","platform":"ruby","checksum":"79a31f71c32d5138717c2104e0ff105f5d82922247c85bdca144f2720e67fab9"}, -{"name":"activestorage","version":"7.2.2.1","platform":"ruby","checksum":"b4ec35ff94d4d6656ee6952ce439c3f80e249552d49fd2d3996ee53880c5525f"}, -{"name":"activesupport","version":"7.2.2.1","platform":"ruby","checksum":"842bcbf8a92977f80fb4750661a237cf5dd4fdd442066b3c35e88afb488647f5"}, +{"name":"actioncable","version":"7.2.2.2","platform":"ruby","checksum":"3d957adc9d1d2ddb5ac8ed8791dc35b273c722f2dca2644f415bd24ba64c7425"}, +{"name":"actionmailbox","version":"7.2.2.2","platform":"ruby","checksum":"7f784a3685a877ca0937bf28f989bbafbffd0f3966d724e41bf9319f881487ef"}, +{"name":"actionmailer","version":"7.2.2.2","platform":"ruby","checksum":"d37c8ab094f3b73c3fbcbbf41d2e31fc15b607178569a58057ed878c2063a6e6"}, +{"name":"actionpack","version":"7.2.2.2","platform":"ruby","checksum":"ccd2f96ffb378060dd02e86318bca3faae1ecf483603a525fabfd84197c86a6e"}, +{"name":"actiontext","version":"7.2.2.2","platform":"ruby","checksum":"8e80623cf206f077f4b671846ba74b0cb154b2a306a6569d3c4b3deb22e2b701"}, +{"name":"actionview","version":"7.2.2.2","platform":"ruby","checksum":"5bf67e9716fbd159f09cbc8cf87f4813d3e8725f0197a7321910e9dc8c165b07"}, +{"name":"activejob","version":"7.2.2.2","platform":"ruby","checksum":"e706383862084022d531eee64f74ac4b5fd751f160a7138d3a3c1018b2facb55"}, +{"name":"activemodel","version":"7.2.2.2","platform":"ruby","checksum":"6898b91af028d725729f65d8e0f6ccfef5993e085ed70d5b93c42ba1bf7384dd"}, +{"name":"activerecord","version":"7.2.2.2","platform":"ruby","checksum":"e6b1e1499018f1c3ffd9f7828a8560588da1f5bd85dc2b7a95e49c5467cda800"}, +{"name":"activestorage","version":"7.2.2.2","platform":"ruby","checksum":"0b28d0c191b03162e83d3bf6875c3692ab48abd1e371bb0b428136dd8509ae66"}, +{"name":"activesupport","version":"7.2.2.2","platform":"ruby","checksum":"c54e84bb3d9027f1f372fb8f68203538fcfe0d5ff42801774c03974daa15bef0"}, {"name":"addressable","version":"2.8.7","platform":"ruby","checksum":"462986537cf3735ab5f3c0f557f14155d778f4b43ea4f485a9deb9c8f7c58232"}, {"name":"aes_key_wrap","version":"1.1.0","platform":"ruby","checksum":"b935f4756b37375895db45669e79dfcdc0f7901e12d4e08974d5540c8e0776a5"}, {"name":"akismet","version":"3.0.0","platform":"ruby","checksum":"74991b8e3d3257eeea996b47069abb8da2006c84a144255123e8dffd1c86b230"}, @@ -572,12 +572,12 @@ {"name":"rack-test","version":"2.1.0","platform":"ruby","checksum":"0c61fc61904049d691922ea4bb99e28004ed3f43aa5cfd495024cc345f125dfb"}, {"name":"rack-timeout","version":"0.7.0","platform":"ruby","checksum":"757337e9793cca999bb73a61fe2a7d4280aa9eefbaf787ce3b98d860749c87d9"}, {"name":"rackup","version":"1.0.1","platform":"ruby","checksum":"ba86604a28989fe1043bff20d819b360944ca08156406812dca6742b24b3c249"}, -{"name":"rails","version":"7.2.2.1","platform":"ruby","checksum":"aedb1604b40f4e43b5e8066e5a1aa34dae02c33aa9669b21fd4497d0f8c9bb40"}, +{"name":"rails","version":"7.2.2.2","platform":"ruby","checksum":"f38ff86c5e6905e5d30a762575f92ddad5b60346f5acfc282b0e22dbb36eca97"}, {"name":"rails-controller-testing","version":"1.0.5","platform":"ruby","checksum":"741448db59366073e86fc965ba403f881c636b79a2c39a48d0486f2607182e94"}, {"name":"rails-dom-testing","version":"2.2.0","platform":"ruby","checksum":"e515712e48df1f687a1d7c380fd7b07b8558faa26464474da64183a7426fa93b"}, {"name":"rails-html-sanitizer","version":"1.6.1","platform":"ruby","checksum":"e3d2fb10339f03b802e39c7f6cac28c54fd404d3f65ae39c31cca9d150c5cbf0"}, {"name":"rails-i18n","version":"7.0.10","platform":"ruby","checksum":"efae16e0ac28c0f42e98555c8db1327d69ab02058c8b535e0933cb106dd931ca"}, -{"name":"railties","version":"7.2.2.1","platform":"ruby","checksum":"e3f11bf116dd6d0d874522843ccc70ec0f89fbfed3e9c2ee48a4778cd042fe1f"}, +{"name":"railties","version":"7.2.2.2","platform":"ruby","checksum":"457506d29b7be2c5644b5bd4740edaf42bcefa52b50f9bed519a9b10ec9069e0"}, {"name":"rainbow","version":"3.1.1","platform":"ruby","checksum":"039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a"}, {"name":"rake","version":"13.0.6","platform":"ruby","checksum":"5ce4bf5037b4196c24ac62834d8db1ce175470391026bd9e557d669beeb19097"}, {"name":"rake-compiler-dock","version":"1.9.1","platform":"ruby","checksum":"e73720a29aba9c114728ce39cc0d8eef69ba61d88e7978c57bac171724cd4d53"}, diff --git a/Gemfile.next.lock b/Gemfile.next.lock index 33d7b4f1ebb855..019fb36cbeeadc 100644 --- a/Gemfile.next.lock +++ b/Gemfile.next.lock @@ -214,29 +214,29 @@ GEM base64 (~> 0.2) faraday (>= 1.0, < 3.0.0) faraday-retry (>= 1.0, < 3.0.0) - actioncable (7.2.2.1) - actionpack (= 7.2.2.1) - activesupport (= 7.2.2.1) + actioncable (7.2.2.2) + actionpack (= 7.2.2.2) + activesupport (= 7.2.2.2) nio4r (~> 2.0) websocket-driver (>= 0.6.1) zeitwerk (~> 2.6) - actionmailbox (7.2.2.1) - actionpack (= 7.2.2.1) - activejob (= 7.2.2.1) - activerecord (= 7.2.2.1) - activestorage (= 7.2.2.1) - activesupport (= 7.2.2.1) + actionmailbox (7.2.2.2) + actionpack (= 7.2.2.2) + activejob (= 7.2.2.2) + activerecord (= 7.2.2.2) + activestorage (= 7.2.2.2) + activesupport (= 7.2.2.2) mail (>= 2.8.0) - actionmailer (7.2.2.1) - actionpack (= 7.2.2.1) - actionview (= 7.2.2.1) - activejob (= 7.2.2.1) - activesupport (= 7.2.2.1) + actionmailer (7.2.2.2) + actionpack (= 7.2.2.2) + actionview (= 7.2.2.2) + activejob (= 7.2.2.2) + activesupport (= 7.2.2.2) mail (>= 2.8.0) rails-dom-testing (~> 2.2) - actionpack (7.2.2.1) - actionview (= 7.2.2.1) - activesupport (= 7.2.2.1) + actionpack (7.2.2.2) + actionview (= 7.2.2.2) + activesupport (= 7.2.2.2) nokogiri (>= 1.8.5) racc rack (>= 2.2.4, < 3.2) @@ -245,35 +245,35 @@ GEM rails-dom-testing (~> 2.2) rails-html-sanitizer (~> 1.6) useragent (~> 0.16) - actiontext (7.2.2.1) - actionpack (= 7.2.2.1) - activerecord (= 7.2.2.1) - activestorage (= 7.2.2.1) - activesupport (= 7.2.2.1) + actiontext (7.2.2.2) + actionpack (= 7.2.2.2) + activerecord (= 7.2.2.2) + activestorage (= 7.2.2.2) + activesupport (= 7.2.2.2) globalid (>= 0.6.0) nokogiri (>= 1.8.5) - actionview (7.2.2.1) - activesupport (= 7.2.2.1) + actionview (7.2.2.2) + activesupport (= 7.2.2.2) builder (~> 3.1) erubi (~> 1.11) rails-dom-testing (~> 2.2) rails-html-sanitizer (~> 1.6) - activejob (7.2.2.1) - activesupport (= 7.2.2.1) + activejob (7.2.2.2) + activesupport (= 7.2.2.2) globalid (>= 0.3.6) - activemodel (7.2.2.1) - activesupport (= 7.2.2.1) - activerecord (7.2.2.1) - activemodel (= 7.2.2.1) - activesupport (= 7.2.2.1) + activemodel (7.2.2.2) + activesupport (= 7.2.2.2) + activerecord (7.2.2.2) + activemodel (= 7.2.2.2) + activesupport (= 7.2.2.2) timeout (>= 0.4.0) - activestorage (7.2.2.1) - actionpack (= 7.2.2.1) - activejob (= 7.2.2.1) - activerecord (= 7.2.2.1) - activesupport (= 7.2.2.1) + activestorage (7.2.2.2) + actionpack (= 7.2.2.2) + activejob (= 7.2.2.2) + activerecord (= 7.2.2.2) + activesupport (= 7.2.2.2) marcel (~> 1.0) - activesupport (7.2.2.1) + activesupport (7.2.2.2) base64 benchmark (>= 0.3) bigdecimal @@ -1545,20 +1545,20 @@ GEM rackup (1.0.1) rack (< 3) webrick - rails (7.2.2.1) - actioncable (= 7.2.2.1) - actionmailbox (= 7.2.2.1) - actionmailer (= 7.2.2.1) - actionpack (= 7.2.2.1) - actiontext (= 7.2.2.1) - actionview (= 7.2.2.1) - activejob (= 7.2.2.1) - activemodel (= 7.2.2.1) - activerecord (= 7.2.2.1) - activestorage (= 7.2.2.1) - activesupport (= 7.2.2.1) + rails (7.2.2.2) + actioncable (= 7.2.2.2) + actionmailbox (= 7.2.2.2) + actionmailer (= 7.2.2.2) + actionpack (= 7.2.2.2) + actiontext (= 7.2.2.2) + actionview (= 7.2.2.2) + activejob (= 7.2.2.2) + activemodel (= 7.2.2.2) + activerecord (= 7.2.2.2) + activestorage (= 7.2.2.2) + activesupport (= 7.2.2.2) bundler (>= 1.15.0) - railties (= 7.2.2.1) + railties (= 7.2.2.2) rails-controller-testing (1.0.5) actionpack (>= 5.0.1.rc1) actionview (>= 5.0.1.rc1) @@ -1573,9 +1573,9 @@ GEM rails-i18n (7.0.10) i18n (>= 0.7, < 2) railties (>= 6.0.0, < 8) - railties (7.2.2.1) - actionpack (= 7.2.2.1) - activesupport (= 7.2.2.1) + railties (7.2.2.2) + actionpack (= 7.2.2.2) + activesupport (= 7.2.2.2) irb (~> 1.13) rackup (>= 1.0.0) rake (>= 12.2) diff --git a/spec/lib/gitlab/github_import/importer/diff_note_importer_spec.rb b/spec/lib/gitlab/github_import/importer/diff_note_importer_spec.rb index d002ba012502a6..3a1a552ab0ba67 100644 --- a/spec/lib/gitlab/github_import/importer/diff_note_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/diff_note_importer_spec.rb @@ -116,8 +116,6 @@ let(:expected_note_body) { "I said to `@sam_allen` the code should follow `@bob`'s advice. `@.ali-ce/group#9`?" } it 'formats the text correctly' do - stub_user_finder(user.id, true) - subject.execute note = project.notes.diff_notes.take diff --git a/spec/lib/gitlab/github_import/importer/note_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_importer_spec.rb index 6d3be1c58353d4..c767e14d5fc0ae 100644 --- a/spec/lib/gitlab/github_import/importer/note_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_importer_spec.rb @@ -16,7 +16,7 @@ let_it_be(:user) { create(:user) } let_it_be(:source_user) { generate_source_user(project, '4') } - let(:client) { instance_double(:client, web_endpoint: 'https://github.com') } + let(:client) { double(:client, web_endpoint: 'https://github.com') } let(:created_at) { Time.new(2017, 1, 1, 12, 00) } let(:updated_at) { Time.new(2017, 1, 1, 12, 15) } let(:note_body) { 'This is my note' } -- GitLab From 63d71cad16ffe1f90aa165ad03b7ed6163b604ab Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Thu, 14 Aug 2025 18:07:02 +0200 Subject: [PATCH 42/43] Fix undercoverage failure --- .../note_attachments_importer_spec.rb | 171 +++++++++++++++--- 1 file changed, 150 insertions(+), 21 deletions(-) diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index 46a6cdeb0e44ef..fe6bb0a4ce98a9 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -257,7 +257,7 @@ end # rubocop:disable RSpec/MultipleMemoizedHelpers -- required - context 'when the attachment is a video file' do + context 'when the attachment is a mov file' do let(:ghe_video_url) { "#{web_endpoint}/user-attachments/assets/video-attachment" } let(:text) { ghe_video_url } let(:record) { create(:note, project: project, note: text) } @@ -284,8 +284,6 @@ mime_type_double = instance_double(Mime::Type) allow(Mime::Type).to receive(:lookup).with('video/quicktime').and_return(mime_type_double) - allow(Mime::Type).to receive(:lookup).with('video/webm').and_return(mime_type_double) - allow(Mime::Type).to receive(:lookup).with('video/avi').and_return(mime_type_double) allow(FileUtils).to receive(:mv).and_return(true) allow(File).to receive(:open).with( @@ -308,27 +306,158 @@ record.reload expect(record.note).to include("![media_attachment](/uploads/test-secret/video.mov)") end + end + + context 'when the attachment is a mp4 file' do + let(:ghe_video_url) { "#{web_endpoint}/user-attachments/assets/video-attachment" } + let(:text) { ghe_video_url } + let(:record) { create(:note, project: project, note: text) } + let(:tmp_stub_video) { Tempfile.create('video-attachment') } + let(:tmp_stub_path) { Pathname.new(tmp_stub_video.path) } + let(:tmp_stub_video_with_ext) { Tempfile.create('video-attachment.mp4') } + let(:uploader_hash) do + { alt: nil, + url: '/uploads/test-secret/video.mp4', + markdown: nil } + end + + before do + allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) + .with(ghe_video_url, options: options, web_endpoint: web_endpoint) + .and_return(downloader_stub) + allow(downloader_stub).to receive(:perform).and_return(tmp_stub_video) + allow(downloader_stub).to receive(:delete) + + allow(Marcel::MimeType).to receive(:for).with(tmp_stub_path).and_return('video/mp4') + + allow(File).to receive(:extname).and_call_original + allow(File).to receive(:extname).with(tmp_stub_video.path).and_return('') + + mime_type_double = instance_double(Mime::Type) + allow(Mime::Type).to receive(:lookup).with('video/mp4').and_return(mime_type_double) + allow(FileUtils).to receive(:mv).and_return(true) + + allow(File).to receive(:open).with( + a_string_matching(/\.mp4/), 'rb').and_return(tmp_stub_video_with_ext) + + allow(UploadService).to receive_message_chain(:new, :execute).and_return(uploader_hash) + + allow(client).to receive_message_chain(:octokit, :access_token).and_return(access_token) + allow(Gitlab::Auth::OAuth::Provider).to receive(:config_for).with('github').and_return(nil) + end + + it 'calls update_ghe_video_path for video files on GHE' do + expect(importer).to receive(:update_ghe_video_path).with(tmp_stub_video).at_least(:once).and_call_original - context 'with webm format' do - let(:uploader_hash) do - { alt: nil, - url: '/uploads/test-secret/video.webm', - markdown: nil } - end - - it 'changes standalone attachment link to correct markdown' do - importer.execute - record.reload - expect(record.note).to include("![media_attachment](/uploads/test-secret/video.webm)") - end + importer.execute end - context 'with unsupported video format' do - it 'does not update the link with a file extension' do - importer.execute - record.reload - expect(record.note).to include("/uploads/test-secret/video") - end + it 'changes standalone attachment link to correct markdown' do + importer.execute + record.reload + expect(record.note).to include("![media_attachment](/uploads/test-secret/video.mp4)") + end + end + + context 'when the attachment is a webm file' do + let(:ghe_video_url) { "#{web_endpoint}/user-attachments/assets/video-attachment" } + let(:text) { ghe_video_url } + let(:record) { create(:note, project: project, note: text) } + let(:tmp_stub_video) { Tempfile.create('video-attachment') } + let(:tmp_stub_path) { Pathname.new(tmp_stub_video.path) } + let(:tmp_stub_video_with_ext) { Tempfile.create('video-attachment.webm') } + let(:uploader_hash) do + { alt: nil, + url: '/uploads/test-secret/video.webm', + markdown: nil } + end + + before do + allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) + .with(ghe_video_url, options: options, web_endpoint: web_endpoint) + .and_return(downloader_stub) + allow(downloader_stub).to receive(:perform).and_return(tmp_stub_video) + allow(downloader_stub).to receive(:delete) + + allow(Marcel::MimeType).to receive(:for).with(tmp_stub_path).and_return('video/webm') + + allow(File).to receive(:extname).and_call_original + allow(File).to receive(:extname).with(tmp_stub_video.path).and_return('') + + mime_type_double = instance_double(Mime::Type) + allow(Mime::Type).to receive(:lookup).with('video/webm').and_return(mime_type_double) + allow(FileUtils).to receive(:mv).and_return(true) + + allow(File).to receive(:open).with( + a_string_matching(/\.webm/), 'rb').and_return(tmp_stub_video_with_ext) + + allow(UploadService).to receive_message_chain(:new, :execute).and_return(uploader_hash) + + allow(client).to receive_message_chain(:octokit, :access_token).and_return(access_token) + allow(Gitlab::Auth::OAuth::Provider).to receive(:config_for).with('github').and_return(nil) + end + + it 'calls update_ghe_video_path for video files on GHE' do + expect(importer).to receive(:update_ghe_video_path).with(tmp_stub_video).at_least(:once).and_call_original + + importer.execute + end + + it 'changes standalone attachment link to correct markdown' do + importer.execute + record.reload + expect(record.note).to include("![media_attachment](/uploads/test-secret/video.webm)") + end + end + + context 'when the attachment is an unsupported video format' do + let(:ghe_video_url) { "#{web_endpoint}/user-attachments/assets/video-attachment" } + let(:text) { ghe_video_url } + let(:record) { create(:note, project: project, note: text) } + let(:tmp_stub_video) { Tempfile.create('video-attachment') } + let(:tmp_stub_path) { Pathname.new(tmp_stub_video.path) } + let(:tmp_stub_video_with_ext) { Tempfile.create('video-attachment.avi') } + let(:uploader_hash) do + { alt: nil, + url: '/uploads/test-secret/video.avi', + markdown: nil } + end + + before do + allow(Gitlab::GithubImport::AttachmentsDownloader).to receive(:new) + .with(ghe_video_url, options: options, web_endpoint: web_endpoint) + .and_return(downloader_stub) + allow(downloader_stub).to receive(:perform).and_return(tmp_stub_video) + allow(downloader_stub).to receive(:delete) + + allow(Marcel::MimeType).to receive(:for).with(tmp_stub_path).and_return('video/avi') + + allow(File).to receive(:extname).and_call_original + allow(File).to receive(:extname).with(tmp_stub_video.path).and_return('') + + mime_type_double = instance_double(Mime::Type) + allow(Mime::Type).to receive(:lookup).with('video/avi').and_return(mime_type_double) + allow(FileUtils).to receive(:mv).and_return(true) + + allow(File).to receive(:open).with( + a_string_matching(/\.avi/), 'rb').and_return(tmp_stub_video_with_ext) + + allow(UploadService).to receive_message_chain(:new, :execute).and_return(uploader_hash) + + allow(client).to receive_message_chain(:octokit, :access_token).and_return(access_token) + allow(Gitlab::Auth::OAuth::Provider).to receive(:config_for).with('github').and_return(nil) + end + + it 'calls update_ghe_video_path for video files on GHE' do + expect(importer).to receive(:update_ghe_video_path).with(tmp_stub_video).at_least(:once).and_call_original + + importer.execute + end + + it 'does not update the link with a file extension' do + importer.execute + record.reload + expect(record.note).to include("/uploads/test-secret/video") end end # rubocop:enable RSpec/MultipleMemoizedHelpers -- GitLab From 346cf34a67bf7f9819d3ae9698e02c65571eb129 Mon Sep 17 00:00:00 2001 From: Carla Drago Date: Tue, 19 Aug 2025 09:06:52 +0200 Subject: [PATCH 43/43] Apply 1 suggestion(s) to 1 file(s) Co-authored-by: Ashraf Khamis --- doc/user/project/import/github.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/user/project/import/github.md b/doc/user/project/import/github.md index 274a9d6bdd501f..183b0aa145eb24 100644 --- a/doc/user/project/import/github.md +++ b/doc/user/project/import/github.md @@ -88,9 +88,9 @@ GitHub Enterprise does not require a public email address, so you might have to - GitHub pull request comments (known as diff notes in GitLab) created before 2017 are imported in separate threads. This occurs because of a limitation of the GitHub API that doesn't include `in_reply_to_id` for comments before 2017. -- [In GitLab 18.2 and earlier](https://gitlab.com/gitlab-org/gitlab/-/issues/424400), Markdown attachments +- [In GitLab 18.3 and earlier](https://gitlab.com/gitlab-org/gitlab/-/issues/424400), Markdown attachments from repositories on GitHub Enterprise Server instances are not imported. - [In GitLab 18.3 and later](https://gitlab.com/gitlab-org/gitlab/-/issues/553386): + [In GitLab 18.4 and later](https://gitlab.com/gitlab-org/gitlab/-/issues/553386): - Only video and image files in Markdown attachments are imported. - Other file attachments are not imported. - Because of a [known issue](https://gitlab.com/gitlab-org/gitlab/-/issues/418800), when importing projects that used -- GitLab