From 242c4c8f6128580d78da81e18a655c7dbda7a380 Mon Sep 17 00:00:00 2001 From: Kassio Borges Date: Tue, 23 Nov 2021 23:04:55 +0000 Subject: [PATCH] Fix project import from remote to import from S3 Currently, `Import::GitlabProjects::CreateProjectFromRemoteFileService` validates: 1. The file size based on the `Content-Length` header, which S3 doesn't returb on `head` requests; 1. The file content type, which S3 use `application/x-tar` instead of `application/gzip` for a `tar.gz`; Related to: https://gitlab.com/gitlab-org/gitlab/-/issues/337230 Changelog: fixed MR: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/75170 --- ...create_project_from_remote_file_service.rb | 21 +++++++- ...e_project_from_remote_file_service_spec.rb | 51 +++++++++++++------ 2 files changed, 54 insertions(+), 18 deletions(-) diff --git a/app/services/import/gitlab_projects/create_project_from_remote_file_service.rb b/app/services/import/gitlab_projects/create_project_from_remote_file_service.rb index bbfdaf692f9742..edb9dc8ad91e86 100644 --- a/app/services/import/gitlab_projects/create_project_from_remote_file_service.rb +++ b/app/services/import/gitlab_projects/create_project_from_remote_file_service.rb @@ -4,7 +4,10 @@ module Import module GitlabProjects class CreateProjectFromRemoteFileService < CreateProjectFromUploadedFileService FILE_SIZE_LIMIT = 10.gigabytes - ALLOWED_CONTENT_TYPES = ['application/gzip'].freeze + ALLOWED_CONTENT_TYPES = [ + 'application/gzip', # most common content-type when fetching a tar.gz + 'application/x-tar' # aws-s3 uses x-tar for tar.gz files + ].freeze validate :valid_remote_import_url? validate :validate_file_size @@ -44,17 +47,27 @@ def allow_local_requests? end def validate_content_type + # AWS-S3 presigned URLs don't respond to HTTP HEAD requests, + # so file type cannot be validated + # https://gitlab.com/gitlab-org/gitlab/-/merge_requests/75170#note_748059103 + return if amazon_s3? + if headers['content-type'].blank? errors.add(:base, "Missing 'ContentType' header") elsif !ALLOWED_CONTENT_TYPES.include?(headers['content-type']) errors.add(:base, "Remote file content type '%{content_type}' not allowed. (Allowed content types: %{allowed})" % { content_type: headers['content-type'], - allowed: ALLOWED_CONTENT_TYPES.join(',') + allowed: ALLOWED_CONTENT_TYPES.join(', ') }) end end def validate_file_size + # AWS-S3 presigned URLs don't respond to HTTP HEAD requests, + # so file size cannot be validated + # https://gitlab.com/gitlab-org/gitlab/-/merge_requests/75170#note_748059103 + return if amazon_s3? + if headers['content-length'].to_i == 0 errors.add(:base, "Missing 'ContentLength' header") elsif headers['content-length'].to_i > FILE_SIZE_LIMIT @@ -64,6 +77,10 @@ def validate_file_size end end + def amazon_s3? + headers['Server'] == 'AmazonS3' && headers['x-amz-request-id'].present? + end + def headers return {} if params[:remote_import_url].blank? || !valid_remote_import_url? diff --git a/spec/services/import/gitlab_projects/create_project_from_remote_file_service_spec.rb b/spec/services/import/gitlab_projects/create_project_from_remote_file_service_spec.rb index 3c461c91ff0ef7..92c46cf705265c 100644 --- a/spec/services/import/gitlab_projects/create_project_from_remote_file_service_spec.rb +++ b/spec/services/import/gitlab_projects/create_project_from_remote_file_service_spec.rb @@ -18,24 +18,29 @@ subject { described_class.new(user, params) } - it 'creates a project and returns a successful response' do - stub_headers_for(remote_url, { - 'content-type' => 'application/gzip', - 'content-length' => '10' - }) - - response = nil - expect { response = subject.execute } - .to change(Project, :count).by(1) + shared_examples 'successfully import' do |content_type| + it 'creates a project and returns a successful response' do + stub_headers_for(remote_url, { + 'content-type' => content_type, + 'content-length' => '10' + }) - expect(response).to be_success - expect(response.http_status).to eq(:ok) - expect(response.payload).to be_instance_of(Project) - expect(response.payload.name).to eq('name') - expect(response.payload.path).to eq('path') - expect(response.payload.namespace).to eq(user.namespace) + response = nil + expect { response = subject.execute } + .to change(Project, :count).by(1) + + expect(response).to be_success + expect(response.http_status).to eq(:ok) + expect(response.payload).to be_instance_of(Project) + expect(response.payload.name).to eq('name') + expect(response.payload.path).to eq('path') + expect(response.payload.namespace).to eq(user.namespace) + end end + it_behaves_like 'successfully import', 'application/gzip' + it_behaves_like 'successfully import', 'application/x-tar' + context 'when the file url is invalid' do it 'returns an erred response with the reason of the failure' do stub_application_setting(allow_local_requests_from_web_hooks_and_services: false) @@ -79,7 +84,7 @@ expect(response).not_to be_success expect(response.http_status).to eq(:bad_request) expect(response.message) - .to eq("Remote file content type 'application/js' not allowed. (Allowed content types: application/gzip)") + .to eq("Remote file content type 'application/js' not allowed. (Allowed content types: application/gzip, application/x-tar)") end end @@ -130,6 +135,20 @@ end end + it 'does not validate content-type or content-length when the file is stored in AWS-S3' do + stub_headers_for(remote_url, { + 'Server' => 'AmazonS3', + 'x-amz-request-id' => 'Something' + }) + + response = nil + expect { response = subject.execute } + .to change(Project, :count) + + expect(response).to be_success + expect(response.http_status).to eq(:ok) + end + context 'when required parameters are not provided' do let(:params) { {} } -- GitLab