diff --git a/lib/gitlab/github_import/attachments_downloader.rb b/lib/gitlab/github_import/attachments_downloader.rb index b71d5f753f2b66e7a0442caa55cebe6e7ca6ce79..7dc04f21dd6e77b1c655ee8b1d2eafc1204e8888 100644 --- a/lib/gitlab/github_import/attachments_downloader.rb +++ b/lib/gitlab/github_import/attachments_downloader.rb @@ -12,11 +12,13 @@ class AttachmentsDownloader FILENAME_SIZE_LIMIT = 255 # chars before the extension DEFAULT_FILE_SIZE_LIMIT = 25.megabytes TMP_DIR = File.join(Dir.tmpdir, 'github_attachments').freeze + GITHUB_ASSETS_URL_REGEX = %r{#{Regexp.escape(::Gitlab::GithubImport::MarkdownText.github_url)}/.*/assets/} - attr_reader :file_url, :filename, :file_size_limit + attr_reader :file_url, :filename, :file_size_limit, :options - def initialize(file_url, file_size_limit: DEFAULT_FILE_SIZE_LIMIT) + def initialize(file_url, options: {}, file_size_limit: DEFAULT_FILE_SIZE_LIMIT) @file_url = file_url + @options = options @file_size_limit = file_size_limit filename = URI(file_url).path.split('/').last @@ -27,7 +29,9 @@ def perform validate_content_length validate_filepath - file = download + redirection_url = get_download_redirection_url + file = download_from(redirection_url) + validate_symlink file end @@ -47,9 +51,23 @@ def response_headers Gitlab::HTTP.perform_request(Net::HTTP::Head, file_url, {}).headers end - def download + # Github /assets redirection link will redirect to aws which has its own authorization. + # Keeping our bearer token will cause request rejection + # eg. Only one auth mechanism allowed; only the X-Amz-Algorithm query parameter, + # Signature query string parameter or the Authorization header should be specified. + def get_download_redirection_url + return file_url unless file_url.starts_with?(GITHUB_ASSETS_URL_REGEX) + + options[:follow_redirects] = false + response = Gitlab::HTTP.perform_request(Net::HTTP::Get, file_url, options) + raise_error("expected a redirect response, got #{response.code}") unless response.redirection? + + response.headers[:location] + end + + def download_from(url) file = File.open(filepath, 'wb') - Gitlab::HTTP.perform_request(Net::HTTP::Get, file_url, stream_body: true) { |batch| file.write(batch) } + Gitlab::HTTP.perform_request(Net::HTTP::Get, url, stream_body: true) { |batch| file.write(batch) } file end diff --git a/lib/gitlab/github_import/importer/note_attachments_importer.rb b/lib/gitlab/github_import/importer/note_attachments_importer.rb index 266ee2938ba1e35571c88f02f5e5d6adb4aadd7d..f9173f9a9ba7b93e7c2eefd25a31daa2687d42a1 100644 --- a/lib/gitlab/github_import/importer/note_attachments_importer.rb +++ b/lib/gitlab/github_import/importer/note_attachments_importer.rb @@ -4,14 +4,15 @@ module Gitlab module GithubImport module Importer class NoteAttachmentsImporter - attr_reader :note_text, :project + attr_reader :note_text, :project, :client # note_text - An instance of `Gitlab::GithubImport::Representation::NoteText`. # project - An instance of `Project`. # client - An instance of `Gitlab::GithubImport::Client`. - def initialize(note_text, project, _client = nil) + def initialize(note_text, project, client) @note_text = note_text @project = project + @client = client end def execute @@ -33,7 +34,7 @@ def gitlab_attachment_link(attachment) if attachment.part_of_project_blob?(project_import_source) convert_project_content_link(attachment.url, project_import_source) - elsif attachment.media? || attachment.doc_belongs_to_project?(project_import_source) + elsif attachment.media?(project_import_source) || attachment.doc_belongs_to_project?(project_import_source) download_attachment(attachment) else # url to other GitHub project attachment.url @@ -53,7 +54,7 @@ def convert_project_content_link(attachment_url, import_source) # in: an instance of Gitlab::GithubImport::Markdown::Attachment # out: gitlab attachment markdown url def download_attachment(attachment) - downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(attachment.url) + downloader = ::Gitlab::GithubImport::AttachmentsDownloader.new(attachment.url, options: options) file = downloader.perform uploader = UploadService.new(project, file, FileUploader).execute uploader.to_h[:url] @@ -61,6 +62,14 @@ def download_attachment(attachment) downloader&.delete end + def options + { + headers: { + 'Authorization' => "Bearer #{client.octokit.access_token}" + } + } + end + def update_note_record(text) case note_text.record_type when ::Release.name diff --git a/lib/gitlab/github_import/markdown/attachment.rb b/lib/gitlab/github_import/markdown/attachment.rb index e270cfba619185dc0b1981c840dd2c6bcdaa9e40..0d8f319671978ba57305676b2cb7e2d6834f9831 100644 --- a/lib/gitlab/github_import/markdown/attachment.rb +++ b/lib/gitlab/github_import/markdown/attachment.rb @@ -57,7 +57,8 @@ def from_inline_html(markdown_node) def github_url?(url, docs: false, media: false) if media - url.start_with?(::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN) + url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url, + ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN) elsif docs url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url) end @@ -65,6 +66,9 @@ def github_url?(url, docs: false, media: false) def whitelisted_type?(url, docs: false, media: false) if media + # We do not know the file extension type from the /assets markdown + return true if url.start_with?(::Gitlab::GithubImport::MarkdownText.github_url) + MEDIA_TYPES.any? { |type| url.end_with?(type) } elsif docs DOC_TYPES.any? { |type| url.end_with?(type) } @@ -91,8 +95,11 @@ def doc_belongs_to_project?(import_source) ) end - def media? - url.start_with?(::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN) + def media?(import_source) + url.start_with?( + "#{::Gitlab::GithubImport::MarkdownText.github_url}/#{import_source}/assets", + ::Gitlab::GithubImport::MarkdownText::GITHUB_MEDIA_CDN + ) end def inspect diff --git a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb index 086aa4be17e6a3630e02080b6d47214ff4d27485..27007e09ba41f8cd7330c718ee08946ec08f0f16 100644 --- a/spec/lib/gitlab/github_import/attachments_downloader_spec.rb +++ b/spec/lib/gitlab/github_import/attachments_downloader_spec.rb @@ -93,6 +93,43 @@ expect(File.basename(file)).to eq('av.png') end end + + context 'when attachment is behind a redirect' do + let_it_be(:file_url) { "https://github.com/test/project/assets/142635249/4b9f9c90-f060-4845-97cf-b24c558bcb11" } + let(:redirect_url) { "https://https://github-production-user-asset-6210df.s3.amazonaws.com/142635249/740edb05293e.jpg" } + let(:sample_response) do + instance_double(HTTParty::Response, redirection?: true, headers: { location: redirect_url }) + end + + it 'gets redirection url' do + expect(Gitlab::HTTP).to receive(:perform_request) + .with(Net::HTTP::Get, file_url, { follow_redirects: false }) + .and_return sample_response + + expect(Gitlab::HTTP).to receive(:perform_request) + .with(Net::HTTP::Get, redirect_url, stream_body: true).and_yield(chunk_double) + + file = downloader.perform + + expect(File.exist?(file.path)).to eq(true) + end + + context 'when url is not a redirection' do + let(:sample_response) do + instance_double(HTTParty::Response, code: 200, redirection?: false) + end + + before do + allow(Gitlab::HTTP).to receive(:perform_request) + .with(Net::HTTP::Get, file_url, { follow_redirects: false }) + .and_return sample_response + end + + it 'raises upon unsuccessful redirection' do + expect { downloader.perform }.to raise_error("expected a redirect response, got #{sample_response.code}") + end + end + end end describe '#delete' do diff --git a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb index 450ebe9a719992cc7ec858ba212c1f84c2c76bde..ffd68c92caf8e3cb985b50b09d611e7a8647e0a4 100644 --- a/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/note_attachments_importer_spec.rb @@ -53,6 +53,19 @@ record.reload expect(record.description).to include("[link to other project blob file](#{other_project_blob_url})") end + + context 'with new github image format' do + let(:image_url) { 'https://github.com/nickname/public-test-repo/assets/142635249/4b9f9c90-f060-4845-97cf-b24c558bcb11' } + let(:image_tag_url) { 'https://github.com/nickname/public-test-repo/assets/142635249/4b9f9c90-f060-4845-97cf-b24c558bcb11' } + + it 'changes image attachment links' do + importer.execute + + record.reload + expect(record.description).to include('![image.jpeg](/uploads/') + expect(record.description).to include('