From e73a8fedc232936494e7248c73532946f4a1116e Mon Sep 17 00:00:00 2001 From: Patrick Bajao Date: Wed, 8 Jan 2020 17:23:07 +0800 Subject: [PATCH 1/9] Create LfsObjectsProject record for forks as well Previous behavior was `LfsObjectsProject` are created on the source project. To fix the issue whenever the source project gets deleted or fork was selectively synced, we also need to create the records for forks. This way, they'll also have access to the `LfsObject` and no longer dependent on the source project. --- app/controllers/concerns/lfs_request.rb | 4 -- .../repositories/lfs_storage_controller.rb | 11 +++-- .../bulk_create_service.rb | 34 ++++++++++++++ .../lfs_pointers/lfs_download_service.rb | 6 +-- app/workers/repository_fork_worker.rb | 8 +++- .../20042-create-lfs-objects-project.yml | 5 ++ spec/controllers/concerns/lfs_request_spec.rb | 18 -------- spec/requests/lfs_http_spec.rb | 4 +- .../bulk_create_service_spec.rb | 46 +++++++++++++++++++ .../lfs_pointers/lfs_download_service_spec.rb | 3 +- spec/workers/repository_fork_worker_spec.rb | 9 ++++ 11 files changed, 114 insertions(+), 34 deletions(-) create mode 100644 app/services/lfs_objects_projects/bulk_create_service.rb create mode 100644 changelogs/unreleased/20042-create-lfs-objects-project.yml create mode 100644 spec/services/lfs_objects_projects/bulk_create_service_spec.rb diff --git a/app/controllers/concerns/lfs_request.rb b/app/controllers/concerns/lfs_request.rb index 61072eec5359d6..3152d959ae4cb8 100644 --- a/app/controllers/concerns/lfs_request.rb +++ b/app/controllers/concerns/lfs_request.rb @@ -112,10 +112,6 @@ def build_can_download_code? has_authentication_ability?(:build_download_code) && can?(user, :build_download_code, project) end - def storage_project - @storage_project ||= project.lfs_storage_project - end - def objects @objects ||= (params[:objects] || []).to_a end diff --git a/app/controllers/repositories/lfs_storage_controller.rb b/app/controllers/repositories/lfs_storage_controller.rb index 58f496e16d3d81..ec5ca5bbeecc17 100644 --- a/app/controllers/repositories/lfs_storage_controller.rb +++ b/app/controllers/repositories/lfs_storage_controller.rb @@ -80,12 +80,13 @@ def create_file!(oid, size) LfsObject.create!(oid: oid, size: size, file: uploaded_file) end - # rubocop: disable CodeReuse/ActiveRecord def link_to_project!(object) - if object && !object.projects.exists?(storage_project.id) - object.lfs_objects_projects.create!(project: storage_project) - end + return unless object + + LfsObjectsProject.safe_find_or_create_by!( + project: project, + lfs_object: object + ) end - # rubocop: enable CodeReuse/ActiveRecord end end diff --git a/app/services/lfs_objects_projects/bulk_create_service.rb b/app/services/lfs_objects_projects/bulk_create_service.rb new file mode 100644 index 00000000000000..bc590e25fb14d4 --- /dev/null +++ b/app/services/lfs_objects_projects/bulk_create_service.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module LfsObjectsProjects + class BulkCreateService < BaseService + def initialize(project, params = {}) + @project = project + @params = params + end + + def execute + target_project = params[:target_project] + + return unless target_project.present? + + Gitlab::Database.bulk_insert( + LfsObjectsProject.table_name, + lfs_objects_projects_map(target_project), + on_conflict: :do_nothing + ) + end + + private + + def lfs_objects_projects_map(target_project) + project.lfs_objects_projects.map do |objects_project| + { + lfs_object_id: objects_project.lfs_object_id, + project_id: target_project.id, + repository_type: objects_project.read_attribute_before_type_cast(:repository_type) + } + end + end + end +end diff --git a/app/services/projects/lfs_pointers/lfs_download_service.rb b/app/services/projects/lfs_pointers/lfs_download_service.rb index a009f479d5d46d..bd70012c76cb13 100644 --- a/app/services/projects/lfs_pointers/lfs_download_service.rb +++ b/app/services/projects/lfs_pointers/lfs_download_service.rb @@ -39,9 +39,9 @@ def wrap_download_errors(&block) def download_lfs_file! with_tmp_file do |tmp_file| download_and_save_file!(tmp_file) - project.all_lfs_objects << LfsObject.new(oid: lfs_oid, - size: lfs_size, - file: tmp_file) + project.lfs_objects << LfsObject.new(oid: lfs_oid, + size: lfs_size, + file: tmp_file) success end diff --git a/app/workers/repository_fork_worker.rb b/app/workers/repository_fork_worker.rb index 0adf745c7aca16..81be94a142611c 100644 --- a/app/workers/repository_fork_worker.rb +++ b/app/workers/repository_fork_worker.rb @@ -29,7 +29,13 @@ def fork_repository(target_project, source_project) result = gitlab_shell.fork_repository(source_project, target_project) - raise "Unable to fork project #{target_project.id} for repository #{source_project.disk_path} -> #{target_project.disk_path}" unless result + if result + LfsObjectsProjects::BulkCreateService + .new(source_project, target_project: target_project) + .execute + else + raise "Unable to fork project #{target_project.id} for repository #{source_project.disk_path} -> #{target_project.disk_path}" + end target_project.after_import end diff --git a/changelogs/unreleased/20042-create-lfs-objects-project.yml b/changelogs/unreleased/20042-create-lfs-objects-project.yml new file mode 100644 index 00000000000000..0f942e11670c6f --- /dev/null +++ b/changelogs/unreleased/20042-create-lfs-objects-project.yml @@ -0,0 +1,5 @@ +--- +title: Create LfsObjectsProject record for forks as well +merge_request: 22418 +author: +type: fixed diff --git a/spec/controllers/concerns/lfs_request_spec.rb b/spec/controllers/concerns/lfs_request_spec.rb index 79257e9a7f64e0..67c81156ca6c42 100644 --- a/spec/controllers/concerns/lfs_request_spec.rb +++ b/spec/controllers/concerns/lfs_request_spec.rb @@ -10,8 +10,6 @@ include LfsRequest def show - storage_project - head :ok end @@ -38,22 +36,6 @@ def ci? stub_lfs_setting(enabled: true) end - describe '#storage_project' do - it 'assigns the project as storage project' do - get :show, params: { id: project.id } - - expect(assigns(:storage_project)).to eq(project) - end - - it 'assigns the source of a forked project' do - forked_project = fork_project(project) - - get :show, params: { id: forked_project.id } - - expect(assigns(:storage_project)).to eq(project) - end - end - context 'user is authenticated without access to lfs' do before do allow(controller).to receive(:authenticate_user) diff --git a/spec/requests/lfs_http_spec.rb b/spec/requests/lfs_http_spec.rb index 0e02c0f001b38e..4e21c08ad5c1aa 100644 --- a/spec/requests/lfs_http_spec.rb +++ b/spec/requests/lfs_http_spec.rb @@ -1193,8 +1193,8 @@ def authorization_in_action(action) it_behaves_like 'LFS http 200 response' - it 'LFS object is linked to the source project' do - expect(lfs_object.projects.pluck(:id)).to include(upstream_project.id) + it 'LFS object is linked to the forked project' do + expect(lfs_object.projects.pluck(:id)).to include(project.id) end end end diff --git a/spec/services/lfs_objects_projects/bulk_create_service_spec.rb b/spec/services/lfs_objects_projects/bulk_create_service_spec.rb new file mode 100644 index 00000000000000..94022a7829d223 --- /dev/null +++ b/spec/services/lfs_objects_projects/bulk_create_service_spec.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe LfsObjectsProjects::BulkCreateService do + let(:project) { create(:project) } + let(:target_project) { create(:project) } + let(:params) { { target_project: target_project } } + + subject { described_class.new(project, params).execute } + + shared_examples_for 'LFS objects assigned to target project' do + it 'creates LfsObjectsProject records for the target project' do + expect { subject }.to change { target_project.lfs_objects.count } + expect(target_project.lfs_objects).to eq(project.lfs_objects) + end + end + + context 'target_project is not associated with any of the LFS objects' do + before do + create(:lfs_objects_project, project: project) + create(:lfs_objects_project, project: project) + end + + it_behaves_like 'LFS objects assigned to target project' + end + + context 'target_project already associated with some of the LFS objects' do + before do + objects_project = create(:lfs_objects_project, project: project) + create(:lfs_objects_project, project: target_project, lfs_object: objects_project.lfs_object) + create(:lfs_objects_project, project: project) + end + + it_behaves_like 'LFS objects assigned to target project' + end + + context 'target_project is not passed' do + let(:params) { {} } + + it 'does not create LfsObjectsProject records for the target project' do + expect { subject }.not_to change { target_project.lfs_objects.count } + expect(target_project.lfs_objects).to be_empty + end + end +end diff --git a/spec/services/projects/lfs_pointers/lfs_download_service_spec.rb b/spec/services/projects/lfs_pointers/lfs_download_service_spec.rb index 970e82e71072bb..21a139cdf3ca56 100644 --- a/spec/services/projects/lfs_pointers/lfs_download_service_spec.rb +++ b/spec/services/projects/lfs_pointers/lfs_download_service_spec.rb @@ -48,10 +48,11 @@ end shared_examples 'lfs object is created' do - it do + it 'creates and associate the LFS object to project' do expect(subject).to receive(:download_and_save_file!).and_call_original expect { subject.execute }.to change { LfsObject.count }.by(1) + expect(LfsObject.first.projects).to include(project) end it 'returns success result' do diff --git a/spec/workers/repository_fork_worker_spec.rb b/spec/workers/repository_fork_worker_spec.rb index 26fd67adfaaece..b1405217834d64 100644 --- a/spec/workers/repository_fork_worker_spec.rb +++ b/spec/workers/repository_fork_worker_spec.rb @@ -47,6 +47,15 @@ def expect_fork_repository perform! end + it 'calls LfsObjectsProjects::BulkCreateService#execute' do + expect_fork_repository.and_return(true) + expect_next_instance_of(LfsObjectsProjects::BulkCreateService) do |service| + expect(service).to receive(:execute) + end + + perform! + end + it 'protects the default branch' do expect_fork_repository.and_return(true) -- GitLab From 4e6ab24dc10f3eb9ceeae523463dfd31e395f5bf Mon Sep 17 00:00:00 2001 From: Patrick Bajao Date: Wed, 8 Jan 2020 17:48:22 +0800 Subject: [PATCH 2/9] Don't delete LfsObjectsProject when forking When an existing project becomes a fork of another project, we previously delete the LfsObjectsProject records of that fork. Since we're now making each fork to have its own records, we shouldn't delete records in that case anymore. --- app/services/projects/fork_service.rb | 12 +----------- doc/api/projects.md | 5 ----- spec/services/projects/fork_service_spec.rb | 8 -------- 3 files changed, 1 insertion(+), 24 deletions(-) diff --git a/app/services/projects/fork_service.rb b/app/services/projects/fork_service.rb index e66a0ed181a4a1..fcfea56788510a 100644 --- a/app/services/projects/fork_service.rb +++ b/app/services/projects/fork_service.rb @@ -26,17 +26,7 @@ def link_existing_project(fork_to_project) build_fork_network_member(fork_to_project) - if link_fork_network(fork_to_project) - # A forked project stores its LFS objects in the `forked_from_project`. - # So the LFS objects become inaccessible, and therefore delete them from - # the database so they'll get cleaned up. - # - # TODO: refactor this to get the correct lfs objects when implementing - # https://gitlab.com/gitlab-org/gitlab-foss/issues/39769 - fork_to_project.lfs_objects_projects.delete_all - - fork_to_project - end + fork_to_project if link_fork_network(fork_to_project) end def fork_new_project diff --git a/doc/api/projects.md b/doc/api/projects.md index dd80e3ce8f3dcb..a0243be1907bbb 100644 --- a/doc/api/projects.md +++ b/doc/api/projects.md @@ -2000,11 +2000,6 @@ Allows modification of the forked relationship between existing projects. Availa ### Create a forked from/to relation between existing projects -CAUTION: **Warning:** -This will destroy the LFS objects stored in the fork. -So to retain the LFS objects, make sure you've pulled them **before** creating the fork relation, -and push them again **after** creating the fork relation. - ``` POST /projects/:id/fork/:forked_from_id ``` diff --git a/spec/services/projects/fork_service_spec.rb b/spec/services/projects/fork_service_spec.rb index e7b904fcd601c7..e14f1abf018c6b 100644 --- a/spec/services/projects/fork_service_spec.rb +++ b/spec/services/projects/fork_service_spec.rb @@ -375,14 +375,6 @@ def forked_from_project(project) expect(fork_from_project.forks_count).to eq(1) end - it 'leaves no LFS objects dangling' do - create(:lfs_objects_project, project: fork_to_project) - - expect { subject.execute(fork_to_project) } - .to change { fork_to_project.lfs_objects_projects.count } - .to(0) - end - context 'if the fork is not allowed' do let(:fork_from_project) { create(:project, :private) } -- GitLab From ec55463614e72f2cb64cfb31965d716849916e97 Mon Sep 17 00:00:00 2001 From: Patrick Bajao Date: Wed, 8 Jan 2020 18:00:25 +0800 Subject: [PATCH 3/9] Add comments about removing some methods later These methods won't be necessary once all the `LfsObjectsProject` are backfilled for forks. --- app/models/project.rb | 4 ++++ app/services/projects/unlink_fork_service.rb | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/app/models/project.rb b/app/models/project.rb index 816d964519d02d..b149f5295978af 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -1357,6 +1357,10 @@ def fork_source forked_from_project || fork_network&.root_project end + # TODO: Remove this method once all LfsObjectsProject records are backfilled + # for forks. + # + # See https://gitlab.com/gitlab-org/gitlab/issues/122002 for more info. def lfs_storage_project @lfs_storage_project ||= begin result = self diff --git a/app/services/projects/unlink_fork_service.rb b/app/services/projects/unlink_fork_service.rb index e7e0141099e7d6..b3cf27373cde13 100644 --- a/app/services/projects/unlink_fork_service.rb +++ b/app/services/projects/unlink_fork_service.rb @@ -52,6 +52,10 @@ def refresh_forks_count(project) Projects::ForksCountService.new(project).refresh_cache end + # TODO: Remove this method once all LfsObjectsProject records are backfilled + # for forks. + # + # See https://gitlab.com/gitlab-org/gitlab/issues/122002 for more info. def save_lfs_objects return unless @project.forked? -- GitLab From 122a87b1e6e9617ff4737fca2cef11c55fb738c4 Mon Sep 17 00:00:00 2001 From: Patrick Bajao Date: Wed, 8 Jan 2020 18:09:39 +0800 Subject: [PATCH 4/9] Move LFS objects to project even if it's a fork --- app/services/projects/move_lfs_objects_projects_service.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/projects/move_lfs_objects_projects_service.rb b/app/services/projects/move_lfs_objects_projects_service.rb index 10e19014db4913..8cc420d7ba7ca3 100644 --- a/app/services/projects/move_lfs_objects_projects_service.rb +++ b/app/services/projects/move_lfs_objects_projects_service.rb @@ -16,7 +16,7 @@ def execute(source_project, remove_remaining_elements: true) private def move_lfs_objects_projects - non_existent_lfs_objects_projects.update_all(project_id: @project.lfs_storage_project.id) + non_existent_lfs_objects_projects.update_all(project_id: @project.id) end def remove_remaining_lfs_objects_project -- GitLab From 428c3b1dccb57e49a4486f44ef87961d598d4567 Mon Sep 17 00:00:00 2001 From: Patrick Bajao Date: Wed, 8 Jan 2020 18:43:06 +0800 Subject: [PATCH 5/9] Link LFS objects when MR is created This is to avoid errors when pulling LFS objects in context of the MR's target project. --- app/services/lfs_objects_projects/bulk_create_service.rb | 4 +++- app/services/merge_requests/create_service.rb | 9 +++++++++ spec/services/merge_requests/create_service_spec.rb | 8 ++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/app/services/lfs_objects_projects/bulk_create_service.rb b/app/services/lfs_objects_projects/bulk_create_service.rb index bc590e25fb14d4..838e4aa67f28ce 100644 --- a/app/services/lfs_objects_projects/bulk_create_service.rb +++ b/app/services/lfs_objects_projects/bulk_create_service.rb @@ -21,8 +21,9 @@ def execute private + # rubocop: disable CodeReuse/ActiveRecord def lfs_objects_projects_map(target_project) - project.lfs_objects_projects.map do |objects_project| + project.lfs_objects_projects.where.not(lfs_object: target_project.lfs_objects).map do |objects_project| { lfs_object_id: objects_project.lfs_object_id, project_id: target_project.id, @@ -30,5 +31,6 @@ def lfs_objects_projects_map(target_project) } end end + # rubocop: enable CodeReuse/ActiveRecord end end diff --git a/app/services/merge_requests/create_service.rb b/app/services/merge_requests/create_service.rb index 9a37a0330fc19d..1a18a8714ecd91 100644 --- a/app/services/merge_requests/create_service.rb +++ b/app/services/merge_requests/create_service.rb @@ -27,6 +27,7 @@ def after_create(issuable) create_pipeline_for(issuable, current_user) issuable.update_head_pipeline Gitlab::UsageDataCounters::MergeRequestCounter.count(:create) + link_lfs_objects(issuable) super end @@ -64,6 +65,14 @@ def set_projects! raise Gitlab::Access::AccessDeniedError end end + + def link_lfs_objects(issuable) + return if issuable.source_project == issuable.target_project + + LfsObjectsProjects::BulkCreateService + .new(issuable.source_project, target_project: issuable.target_project) + .execute + end end end diff --git a/spec/services/merge_requests/create_service_spec.rb b/spec/services/merge_requests/create_service_spec.rb index 3db1471bf3c3e1..80d2eeb6162b16 100644 --- a/spec/services/merge_requests/create_service_spec.rb +++ b/spec/services/merge_requests/create_service_spec.rb @@ -483,6 +483,14 @@ expect(merge_request).to be_persisted end + it 'calls LfsObjectsProjects::BulkCreateService#execute', :sidekiq_might_not_need_inline do + expect_next_instance_of(LfsObjectsProjects::BulkCreateService) do |service| + expect(service).to receive(:execute) + end + + described_class.new(project, user, opts).execute + end + it 'does not create the merge request when the target project is archived' do target_project.update!(archived: true) -- GitLab From c98e862a3e883df43f7de17644c324c59cff8928 Mon Sep 17 00:00:00 2001 From: Patrick Bajao Date: Fri, 10 Jan 2020 17:05:18 +0800 Subject: [PATCH 6/9] Look for LFS objects from the fork and source While backfilling LfsObjectsProject is ongoing, we should still be looking at both the fork and source when finding for LFS objects. This way, older forks that are dependent on finding LFS objects from the source, will still be able to find them. Newer forks, should be able to find the object directly. --- app/models/project.rb | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/app/models/project.rb b/app/models/project.rb index b149f5295978af..a7779e017dbe1f 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -1357,7 +1357,7 @@ def fork_source forked_from_project || fork_network&.root_project end - # TODO: Remove this method once all LfsObjectsProject records are backfilled + # TODO: Remove this method once all LfsObjectsProject records are backfilled # for forks. # # See https://gitlab.com/gitlab-org/gitlab/issues/122002 for more info. @@ -1373,14 +1373,16 @@ def lfs_storage_project end end - # This will return all `lfs_objects` that are accessible to the project. - # So this might be `self.lfs_objects` if the project is not part of a fork - # network, or it is the base of the fork network. + # This will return all `lfs_objects` that are accessible to the project and + # the fork source. This is needed since older forks won't have access to some + # LFS objects directly and have to get it from the fork source. # - # TODO: refactor this to get the correct lfs objects when implementing - # https://gitlab.com/gitlab-org/gitlab-foss/issues/39769 + # TODO: Remove this method once all LfsObjectsProject records are backfilled + # for forks. At that point, projects can look at their own `lfs_objects`. + # + # See https://gitlab.com/gitlab-org/gitlab/issues/122002 for more info. def all_lfs_objects - lfs_storage_project.lfs_objects + LfsObject.where(id: LfsObjectsProject.select(:lfs_object_id).where(project: [self, lfs_storage_project])) end def personal? -- GitLab From 1d7b454ba35290e5820ed32684e3a6e1dcc6b3f1 Mon Sep 17 00:00:00 2001 From: Patrick Bajao Date: Mon, 20 Jan 2020 18:11:57 +0800 Subject: [PATCH 7/9] Only link LFS objects that are in merge request Utilize existing Projects::LfsPointers::LfsLinkService after getting the OIDs of LFS objects that are present in the MR. Also call the service in MergeRequests::RefreshService so if new LFS object/s get pushed to MR's source branch, they will be linked to target project as well (if they don't exist yet). --- .../bulk_create_service.rb | 36 ------ app/services/merge_requests/create_service.rb | 6 +- .../link_lfs_objects_service.rb | 31 ++++++ .../merge_requests/refresh_service.rb | 29 ++++- app/workers/repository_fork_worker.rb | 10 +- spec/requests/api/merge_requests_spec.rb | 2 +- .../bulk_create_service_spec.rb | 46 -------- .../merge_requests/create_service_spec.rb | 6 +- .../link_lfs_objects_service_spec.rb | 103 ++++++++++++++++++ .../merge_requests/refresh_service_spec.rb | 8 ++ spec/workers/repository_fork_worker_spec.rb | 18 +-- 11 files changed, 186 insertions(+), 109 deletions(-) delete mode 100644 app/services/lfs_objects_projects/bulk_create_service.rb create mode 100644 app/services/merge_requests/link_lfs_objects_service.rb delete mode 100644 spec/services/lfs_objects_projects/bulk_create_service_spec.rb create mode 100644 spec/services/merge_requests/link_lfs_objects_service_spec.rb diff --git a/app/services/lfs_objects_projects/bulk_create_service.rb b/app/services/lfs_objects_projects/bulk_create_service.rb deleted file mode 100644 index 838e4aa67f28ce..00000000000000 --- a/app/services/lfs_objects_projects/bulk_create_service.rb +++ /dev/null @@ -1,36 +0,0 @@ -# frozen_string_literal: true - -module LfsObjectsProjects - class BulkCreateService < BaseService - def initialize(project, params = {}) - @project = project - @params = params - end - - def execute - target_project = params[:target_project] - - return unless target_project.present? - - Gitlab::Database.bulk_insert( - LfsObjectsProject.table_name, - lfs_objects_projects_map(target_project), - on_conflict: :do_nothing - ) - end - - private - - # rubocop: disable CodeReuse/ActiveRecord - def lfs_objects_projects_map(target_project) - project.lfs_objects_projects.where.not(lfs_object: target_project.lfs_objects).map do |objects_project| - { - lfs_object_id: objects_project.lfs_object_id, - project_id: target_project.id, - repository_type: objects_project.read_attribute_before_type_cast(:repository_type) - } - end - end - # rubocop: enable CodeReuse/ActiveRecord - end -end diff --git a/app/services/merge_requests/create_service.rb b/app/services/merge_requests/create_service.rb index 1a18a8714ecd91..4a05d1fd7efac3 100644 --- a/app/services/merge_requests/create_service.rb +++ b/app/services/merge_requests/create_service.rb @@ -67,11 +67,7 @@ def set_projects! end def link_lfs_objects(issuable) - return if issuable.source_project == issuable.target_project - - LfsObjectsProjects::BulkCreateService - .new(issuable.source_project, target_project: issuable.target_project) - .execute + LinkLfsObjectsService.new(issuable.target_project).execute(issuable) end end end diff --git a/app/services/merge_requests/link_lfs_objects_service.rb b/app/services/merge_requests/link_lfs_objects_service.rb new file mode 100644 index 00000000000000..191da59409548e --- /dev/null +++ b/app/services/merge_requests/link_lfs_objects_service.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module MergeRequests + class LinkLfsObjectsService < ::BaseService + def execute(merge_request, oldrev: merge_request.diff_base_sha, newrev: merge_request.diff_head_sha) + return if merge_request.source_project == project + return if no_changes?(oldrev, newrev) + + new_lfs_oids = lfs_oids(merge_request.source_project.repository, oldrev, newrev) + + return if new_lfs_oids.empty? + + Projects::LfsPointers::LfsLinkService + .new(project) + .execute(new_lfs_oids) + end + + private + + def no_changes?(oldrev, newrev) + oldrev == newrev + end + + def lfs_oids(source_repository, oldrev, newrev) + Gitlab::Git::LfsChanges + .new(source_repository, newrev) + .new_pointers(not_in: [oldrev]) + .map(&:lfs_oid) + end + end +end diff --git a/app/services/merge_requests/refresh_service.rb b/app/services/merge_requests/refresh_service.rb index 396ddec6383db2..c6e1651fa262f7 100644 --- a/app/services/merge_requests/refresh_service.rb +++ b/app/services/merge_requests/refresh_service.rb @@ -21,6 +21,7 @@ def refresh_merge_requests! # empty diff during a manual merge close_upon_missing_source_branch_ref post_merge_manually_merged + link_forks_lfs_objects reload_merge_requests outdate_suggestions refresh_pipelines_on_merge_requests @@ -91,17 +92,25 @@ def post_merge_manually_merged end # rubocop: enable CodeReuse/ActiveRecord + # Link LFS objects that exists in forks but does not exists in merge requests + # target project + def link_forks_lfs_objects + return unless @push.branch_updated? + + merge_requests_for_forks.find_each do |mr| + LinkLfsObjectsService + .new(mr.target_project) + .execute(mr, oldrev: @push.oldrev, newrev: @push.newrev) + end + end + # Refresh merge request diff if we push to source or target branch of merge request # Note: we should update merge requests from forks too - # rubocop: disable CodeReuse/ActiveRecord def reload_merge_requests merge_requests = @project.merge_requests.opened .by_source_or_target_branch(@push.branch_name).to_a - # Fork merge requests - merge_requests += MergeRequest.opened - .where(source_branch: @push.branch_name, source_project: @project) - .where.not(target_project: @project).to_a + merge_requests += merge_requests_for_forks.to_a filter_merge_requests(merge_requests).each do |merge_request| if branch_and_project_match?(merge_request) || @push.force_push? @@ -117,7 +126,6 @@ def reload_merge_requests # @source_merge_requests diffs (for MergeRequest#commit_shas for instance). merge_requests_for_source_branch(reload: true) end - # rubocop: enable CodeReuse/ActiveRecord def push_commit_ids @push_commit_ids ||= @commits.map(&:id) @@ -282,6 +290,15 @@ def merge_requests_for_source_branch(reload: false) @source_merge_requests = nil if reload @source_merge_requests ||= merge_requests_for(@push.branch_name) end + + # rubocop: disable CodeReuse/ActiveRecord + def merge_requests_for_forks + @merge_requests_for_forks ||= + MergeRequest.opened + .where(source_branch: @push.branch_name, source_project: @project) + .where.not(target_project: @project) + end + # rubocop: enable CodeReuse/ActiveRecord end end diff --git a/app/workers/repository_fork_worker.rb b/app/workers/repository_fork_worker.rb index 81be94a142611c..bf8537882cb0ae 100644 --- a/app/workers/repository_fork_worker.rb +++ b/app/workers/repository_fork_worker.rb @@ -30,9 +30,9 @@ def fork_repository(target_project, source_project) result = gitlab_shell.fork_repository(source_project, target_project) if result - LfsObjectsProjects::BulkCreateService - .new(source_project, target_project: target_project) - .execute + Projects::LfsPointers::LfsLinkService + .new(target_project) + .execute(lfs_pointers(source_project)) else raise "Unable to fork project #{target_project.id} for repository #{source_project.disk_path} -> #{target_project.disk_path}" end @@ -46,4 +46,8 @@ def start_fork(project) Rails.logger.info("Project #{project.full_path} was in inconsistent state (#{project.import_status}) while forking.") # rubocop:disable Gitlab/RailsLogger false end + + def lfs_pointers(project) + project.lfs_objects.map(&:oid) + end end diff --git a/spec/requests/api/merge_requests_spec.rb b/spec/requests/api/merge_requests_spec.rb index 245a8aa49052ef..427a361295c5c1 100644 --- a/spec/requests/api/merge_requests_spec.rb +++ b/spec/requests/api/merge_requests_spec.rb @@ -1492,7 +1492,7 @@ end end - context 'forked projects' do + context 'forked projects', :sidekiq_might_not_need_inline do let!(:user2) { create(:user) } let(:project) { create(:project, :public, :repository) } let!(:forked_project) { fork_project(project, user2, repository: true) } diff --git a/spec/services/lfs_objects_projects/bulk_create_service_spec.rb b/spec/services/lfs_objects_projects/bulk_create_service_spec.rb deleted file mode 100644 index 94022a7829d223..00000000000000 --- a/spec/services/lfs_objects_projects/bulk_create_service_spec.rb +++ /dev/null @@ -1,46 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -describe LfsObjectsProjects::BulkCreateService do - let(:project) { create(:project) } - let(:target_project) { create(:project) } - let(:params) { { target_project: target_project } } - - subject { described_class.new(project, params).execute } - - shared_examples_for 'LFS objects assigned to target project' do - it 'creates LfsObjectsProject records for the target project' do - expect { subject }.to change { target_project.lfs_objects.count } - expect(target_project.lfs_objects).to eq(project.lfs_objects) - end - end - - context 'target_project is not associated with any of the LFS objects' do - before do - create(:lfs_objects_project, project: project) - create(:lfs_objects_project, project: project) - end - - it_behaves_like 'LFS objects assigned to target project' - end - - context 'target_project already associated with some of the LFS objects' do - before do - objects_project = create(:lfs_objects_project, project: project) - create(:lfs_objects_project, project: target_project, lfs_object: objects_project.lfs_object) - create(:lfs_objects_project, project: project) - end - - it_behaves_like 'LFS objects assigned to target project' - end - - context 'target_project is not passed' do - let(:params) { {} } - - it 'does not create LfsObjectsProject records for the target project' do - expect { subject }.not_to change { target_project.lfs_objects.count } - expect(target_project.lfs_objects).to be_empty - end - end -end diff --git a/spec/services/merge_requests/create_service_spec.rb b/spec/services/merge_requests/create_service_spec.rb index 80d2eeb6162b16..8490127058c237 100644 --- a/spec/services/merge_requests/create_service_spec.rb +++ b/spec/services/merge_requests/create_service_spec.rb @@ -483,9 +483,9 @@ expect(merge_request).to be_persisted end - it 'calls LfsObjectsProjects::BulkCreateService#execute', :sidekiq_might_not_need_inline do - expect_next_instance_of(LfsObjectsProjects::BulkCreateService) do |service| - expect(service).to receive(:execute) + it 'calls MergeRequests::LinkLfsObjectsService#execute', :sidekiq_might_not_need_inline do + expect_next_instance_of(MergeRequests::LinkLfsObjectsService) do |service| + expect(service).to receive(:execute).with(instance_of(MergeRequest)) end described_class.new(project, user, opts).execute diff --git a/spec/services/merge_requests/link_lfs_objects_service_spec.rb b/spec/services/merge_requests/link_lfs_objects_service_spec.rb new file mode 100644 index 00000000000000..f07cf13e4f243c --- /dev/null +++ b/spec/services/merge_requests/link_lfs_objects_service_spec.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe MergeRequests::LinkLfsObjectsService, :sidekiq_inline do + include ProjectForksHelper + include RepoHelpers + + let(:target_project) { create(:project, :public, :repository) } + + let(:merge_request) do + create( + :merge_request, + target_project: target_project, + target_branch: 'lfs', + source_project: source_project, + source_branch: 'link-lfs-objects' + ) + end + + subject { described_class.new(target_project) } + + shared_examples_for 'linking LFS objects' do + context 'when source project is the same as target project' do + let(:source_project) { target_project } + + it 'does not call Projects::LfsPointers::LfsLinkService#execute' do + expect(Projects::LfsPointers::LfsLinkService).not_to receive(:new) + + execute + end + end + + context 'when source project is different from target project' do + let(:user) { create(:user) } + let(:source_project) { fork_project(target_project, user, namespace: user.namespace, repository: true) } + + before do + create_branch(source_project, 'link-lfs-objects', 'lfs') + end + + context 'and there are changes' do + before do + allow(source_project).to receive(:lfs_enabled?).and_return(true) + end + + context 'and there are LFS objects added' do + before do + create_file_in_repo(source_project, 'link-lfs-objects', 'link-lfs-objects', 'one.lfs', 'One') + create_file_in_repo(source_project, 'link-lfs-objects', 'link-lfs-objects', 'two.lfs', 'Two') + end + + it 'calls Projects::LfsPointers::LfsLinkService#execute with OIDs of LFS objects in merge request' do + expect_next_instance_of(Projects::LfsPointers::LfsLinkService) do |service| + expect(service).to receive(:execute).with(%w[ + 8b12507783d5becacbf2ebe5b01a60024d8728a8f86dcc818bce699e8b3320bc + 94a72c074cfe574742c9e99e863322f73feff82981d065ff65a0308f44f19f62 + ]) + end + + execute + end + end + + context 'but there are no LFS objects added' do + before do + create_file_in_repo(source_project, 'link-lfs-objects', 'link-lfs-objects', 'one.txt', 'One') + end + + it 'does not call Projects::LfsPointers::LfsLinkService#execute' do + expect(Projects::LfsPointers::LfsLinkService).not_to receive(:new) + + execute + end + end + end + + context 'and there are no changes' do + it 'does not call Projects::LfsPointers::LfsLinkService#execute' do + expect(Projects::LfsPointers::LfsLinkService).not_to receive(:new) + + execute + end + end + end + end + + context 'when no oldrev and newrev passed' do + let(:execute) { subject.execute(merge_request) } + + it_behaves_like 'linking LFS objects' + end + + context 'when oldrev and newrev are passed' do + let(:execute) { subject.execute(merge_request, oldrev: merge_request.diff_base_sha, newrev: merge_request.diff_head_sha) } + + it_behaves_like 'linking LFS objects' + end + + def create_branch(project, new_name, branch_name) + ::Branches::CreateService.new(project, user).execute(new_name, branch_name) + end +end diff --git a/spec/services/merge_requests/refresh_service_spec.rb b/spec/services/merge_requests/refresh_service_spec.rb index 1ba216e8ff1a16..b67779a912d1f2 100644 --- a/spec/services/merge_requests/refresh_service_spec.rb +++ b/spec/services/merge_requests/refresh_service_spec.rb @@ -384,6 +384,14 @@ def refresh end context 'open fork merge request' do + it 'calls MergeRequests::LinkLfsObjectsService#execute' do + expect_next_instance_of(MergeRequests::LinkLfsObjectsService) do |svc| + expect(svc).to receive(:execute).with(@fork_merge_request, oldrev: @oldrev, newrev: @newrev) + end + + refresh + end + it 'executes hooks with update action' do refresh diff --git a/spec/workers/repository_fork_worker_spec.rb b/spec/workers/repository_fork_worker_spec.rb index b1405217834d64..73b1a39a0094ee 100644 --- a/spec/workers/repository_fork_worker_spec.rb +++ b/spec/workers/repository_fork_worker_spec.rb @@ -47,15 +47,6 @@ def expect_fork_repository perform! end - it 'calls LfsObjectsProjects::BulkCreateService#execute' do - expect_fork_repository.and_return(true) - expect_next_instance_of(LfsObjectsProjects::BulkCreateService) do |service| - expect(service).to receive(:execute) - end - - perform! - end - it 'protects the default branch' do expect_fork_repository.and_return(true) @@ -88,6 +79,15 @@ def expect_fork_repository expect { perform! }.to raise_error(StandardError, error_message) end + + it 'calls Projects::LfsPointers::LfsLinkService#execute with OIDs of source project LFS objects' do + expect_fork_repository.and_return(true) + expect_next_instance_of(Projects::LfsPointers::LfsLinkService) do |service| + expect(service).to receive(:execute).with(project.lfs_objects.map(&:oid)) + end + + perform! + end end context 'only project ID passed' do -- GitLab From a9fcc790dd9b47d51d322845226d625e140b79c8 Mon Sep 17 00:00:00 2001 From: Patrick Bajao Date: Fri, 24 Jan 2020 12:15:42 +0800 Subject: [PATCH 8/9] Make Project#all_lfs_objects query more performant Add a multi-column index to `project_id` and `lfs_object_id` and use a JOIN. This also removes the existing `project_id` index since it'll be redundant. --- app/models/project.rb | 5 ++- ...ti_column_index_on_lfs_objects_projects.rb | 17 ++++++++ ...roject_id_index_on_lfs_objects_projects.rb | 17 ++++++++ db/schema.rb | 2 +- spec/models/project_spec.rb | 40 ++++++++++++++++--- 5 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 db/migrate/20200123040535_add_multi_column_index_on_lfs_objects_projects.rb create mode 100644 db/migrate/20200123045415_remove_project_id_index_on_lfs_objects_projects.rb diff --git a/app/models/project.rb b/app/models/project.rb index a7779e017dbe1f..df1ce437c992be 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -1382,7 +1382,10 @@ def lfs_storage_project # # See https://gitlab.com/gitlab-org/gitlab/issues/122002 for more info. def all_lfs_objects - LfsObject.where(id: LfsObjectsProject.select(:lfs_object_id).where(project: [self, lfs_storage_project])) + LfsObject + .distinct + .joins(:lfs_objects_projects) + .where(lfs_objects_projects: { project_id: [self, lfs_storage_project] }) end def personal? diff --git a/db/migrate/20200123040535_add_multi_column_index_on_lfs_objects_projects.rb b/db/migrate/20200123040535_add_multi_column_index_on_lfs_objects_projects.rb new file mode 100644 index 00000000000000..36ccfa4743eee8 --- /dev/null +++ b/db/migrate/20200123040535_add_multi_column_index_on_lfs_objects_projects.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +class AddMultiColumnIndexOnLfsObjectsProjects < ActiveRecord::Migration[5.2] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + def up + add_concurrent_index :lfs_objects_projects, [:project_id, :lfs_object_id] + end + + def down + remove_concurrent_index :lfs_objects_projects, [:project_id, :lfs_object_id] + end +end diff --git a/db/migrate/20200123045415_remove_project_id_index_on_lfs_objects_projects.rb b/db/migrate/20200123045415_remove_project_id_index_on_lfs_objects_projects.rb new file mode 100644 index 00000000000000..540383e8808e48 --- /dev/null +++ b/db/migrate/20200123045415_remove_project_id_index_on_lfs_objects_projects.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +class RemoveProjectIdIndexOnLfsObjectsProjects < ActiveRecord::Migration[5.2] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + def up + remove_concurrent_index :lfs_objects_projects, :project_id + end + + def down + add_concurrent_index :lfs_objects_projects, :project_id + end +end diff --git a/db/schema.rb b/db/schema.rb index 354224617413dd..9402b095b93d9e 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -2347,7 +2347,7 @@ t.datetime "updated_at" t.integer "repository_type", limit: 2 t.index ["lfs_object_id"], name: "index_lfs_objects_projects_on_lfs_object_id" - t.index ["project_id"], name: "index_lfs_objects_projects_on_project_id" + t.index ["project_id", "lfs_object_id"], name: "index_lfs_objects_projects_on_project_id_and_lfs_object_id" end create_table "licenses", id: :serial, force: :cascade do |t| diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index ae4db1c2158a6f..3839678fbdc061 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -2694,16 +2694,44 @@ describe '#all_lfs_objects' do let(:lfs_object) { create(:lfs_object) } - before do - project.lfs_objects << lfs_object + context 'when LFS object is only associated to the source' do + before do + project.lfs_objects << lfs_object + end + + it 'returns the lfs object for a project' do + expect(project.all_lfs_objects).to contain_exactly(lfs_object) + end + + it 'returns the lfs object for a fork' do + expect(forked_project.all_lfs_objects).to contain_exactly(lfs_object) + end end - it 'returns the lfs object for a project' do - expect(project.all_lfs_objects).to contain_exactly(lfs_object) + context 'when LFS object is only associated to the fork' do + before do + forked_project.lfs_objects << lfs_object + end + + it 'returns nothing' do + expect(project.all_lfs_objects).to be_empty + end + + it 'returns the lfs object for a fork' do + expect(forked_project.all_lfs_objects).to contain_exactly(lfs_object) + end end - it 'returns the lfs object for a fork' do - expect(forked_project.all_lfs_objects).to contain_exactly(lfs_object) + context 'when LFS object is associated to both source and fork' do + before do + project.lfs_objects << lfs_object + forked_project.lfs_objects << lfs_object + end + + it 'returns the lfs object for the source and fork' do + expect(project.all_lfs_objects).to contain_exactly(lfs_object) + expect(forked_project.all_lfs_objects).to contain_exactly(lfs_object) + end end end end -- GitLab From b6a64df917a672cf5f15ce7dff05e08f5a77a41d Mon Sep 17 00:00:00 2001 From: Patrick Bajao Date: Thu, 6 Feb 2020 16:37:30 +0800 Subject: [PATCH 9/9] Fail forking when source has lots of LFS objects Also use `pluck` instead of loading all objects in memory. --- app/models/project.rb | 8 +++++++ app/workers/repository_fork_worker.rb | 26 ++++++++++++++++----- spec/models/project_spec.rb | 25 ++++++++++++++++++++ spec/workers/repository_fork_worker_spec.rb | 17 +++++++++++--- 4 files changed, 67 insertions(+), 9 deletions(-) diff --git a/app/models/project.rb b/app/models/project.rb index df1ce437c992be..4674dca53887ef 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -1388,6 +1388,14 @@ def all_lfs_objects .where(lfs_objects_projects: { project_id: [self, lfs_storage_project] }) end + # TODO: Call `#lfs_objects` instead once all LfsObjectsProject records are + # backfilled. At that point, projects can look at their own `lfs_objects`. + # + # See https://gitlab.com/gitlab-org/gitlab/issues/122002 for more info. + def lfs_objects_oids + all_lfs_objects.pluck(:oid) + end + def personal? !group end diff --git a/app/workers/repository_fork_worker.rb b/app/workers/repository_fork_worker.rb index bf8537882cb0ae..ba141f808a768e 100644 --- a/app/workers/repository_fork_worker.rb +++ b/app/workers/repository_fork_worker.rb @@ -30,11 +30,13 @@ def fork_repository(target_project, source_project) result = gitlab_shell.fork_repository(source_project, target_project) if result - Projects::LfsPointers::LfsLinkService - .new(target_project) - .execute(lfs_pointers(source_project)) + link_lfs_objects(source_project, target_project) else - raise "Unable to fork project #{target_project.id} for repository #{source_project.disk_path} -> #{target_project.disk_path}" + raise_fork_failure( + source_project, + target_project, + 'Failed to create fork repository' + ) end target_project.after_import @@ -47,7 +49,19 @@ def start_fork(project) false end - def lfs_pointers(project) - project.lfs_objects.map(&:oid) + def link_lfs_objects(source_project, target_project) + Projects::LfsPointers::LfsLinkService + .new(target_project) + .execute(source_project.lfs_objects_oids) + rescue Projects::LfsPointers::LfsLinkService::TooManyOidsError + raise_fork_failure( + source_project, + target_project, + 'Source project has too many LFS objects' + ) + end + + def raise_fork_failure(source_project, target_project, reason) + raise "Unable to fork project #{target_project.id} for repository #{source_project.disk_path} -> #{target_project.disk_path}: #{reason}" end end diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index 3839678fbdc061..a95ac4dca04955 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -5547,6 +5547,31 @@ def enable_lfs end end + describe '#lfs_objects_oids' do + let(:project) { create(:project) } + let(:lfs_object) { create(:lfs_object) } + let(:another_lfs_object) { create(:lfs_object) } + + subject { project.lfs_objects_oids } + + context 'when project has associated LFS objects' do + before do + create(:lfs_objects_project, lfs_object: lfs_object, project: project) + create(:lfs_objects_project, lfs_object: another_lfs_object, project: project) + end + + it 'returns OIDs of LFS objects' do + expect(subject).to match_array([lfs_object.oid, another_lfs_object.oid]) + end + end + + context 'when project has no associated LFS objects' do + it 'returns empty array' do + expect(subject).to be_empty + end + end + end + def rugged_config rugged_repo(project.repository).config end diff --git a/spec/workers/repository_fork_worker_spec.rb b/spec/workers/repository_fork_worker_spec.rb index 73b1a39a0094ee..01104049404001 100644 --- a/spec/workers/repository_fork_worker_spec.rb +++ b/spec/workers/repository_fork_worker_spec.rb @@ -72,8 +72,8 @@ def expect_fork_repository perform! end - it "handles bad fork" do - error_message = "Unable to fork project #{forked_project.id} for repository #{project.disk_path} -> #{forked_project.disk_path}" + it 'handles bad fork' do + error_message = "Unable to fork project #{forked_project.id} for repository #{project.disk_path} -> #{forked_project.disk_path}: Failed to create fork repository" expect_fork_repository.and_return(false) @@ -83,11 +83,22 @@ def expect_fork_repository it 'calls Projects::LfsPointers::LfsLinkService#execute with OIDs of source project LFS objects' do expect_fork_repository.and_return(true) expect_next_instance_of(Projects::LfsPointers::LfsLinkService) do |service| - expect(service).to receive(:execute).with(project.lfs_objects.map(&:oid)) + expect(service).to receive(:execute).with(project.lfs_objects_oids) end perform! end + + it "handles LFS objects link failure" do + error_message = "Unable to fork project #{forked_project.id} for repository #{project.disk_path} -> #{forked_project.disk_path}: Source project has too many LFS objects" + + expect_fork_repository.and_return(true) + expect_next_instance_of(Projects::LfsPointers::LfsLinkService) do |service| + expect(service).to receive(:execute).and_raise(Projects::LfsPointers::LfsLinkService::TooManyOidsError) + end + + expect { perform! }.to raise_error(StandardError, error_message) + end end context 'only project ID passed' do -- GitLab