From 7d3d34dd15c91582a6b75769ebf08c7d9c48759b Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Thu, 14 Mar 2019 17:20:40 +1300 Subject: [PATCH 1/6] Enrich commits with full data in CommitCollection Allow incomplete commit records to load their full data from gitaly. Commits can be based on a Hash of data retrieved from PostgreSQL, and this data can be intentionally incomplete in order to save space. A new method #gitaly? has been added to Gitlab::Git::Commit, which returns true if the underlying data source of the Commit is a Gitaly::GitCommit. CommitCollection now has a method #enrich which replaces non-gitaly commits in place with commits from gitaly. CommitCollection#without_merge_commits has been updated to call this method, as in order to determine a merge commit we need to have parent data. Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/58805 --- app/models/commit_collection.rb | 31 ++++++- lib/gitlab/git/commit.rb | 6 +- spec/lib/gitlab/git/commit_spec.rb | 12 +++ spec/models/commit_collection_spec.rb | 86 ++++++++++++++++++- spec/models/merge_request_spec.rb | 33 +++---- .../merge_request_widget_entity_spec.rb | 15 ++-- 6 files changed, 154 insertions(+), 29 deletions(-) diff --git a/app/models/commit_collection.rb b/app/models/commit_collection.rb index a9a2e9c81eb5be..f4e64abf362d44 100644 --- a/app/models/commit_collection.rb +++ b/app/models/commit_collection.rb @@ -28,10 +28,39 @@ def authors def without_merge_commits strong_memoize(:without_merge_commits) do - commits.reject(&:merge_commit?) + # `#enrich!` the collection to ensure all commits contain + # the necessary parent data + enrich!.commits.reject(&:merge_commit?) end end + def unenriched + commits.reject(&:gitaly_commit?) + end + + def fully_enriched? + unenriched.empty? + end + + # Batch load any commits that are not backed by full gitaly data, and + # replace them in the collection. + def enrich! + return self if fully_enriched? + + # Batch load full Commits from the repository + # and map to a Hash of id => Commit + replacements = Hash[unenriched.map do |c| + [c.id, Commit.lazy(project, c.id)] + end.compact] + + # Replace the commits, keeping the same order + @commits = @commits.map do |c| + replacements.fetch(c.id, c) + end + + self + end + # Sets the pipeline status for every commit. # # Setting this status ahead of time removes the need for running a query for diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb index 88ff9fbceb40be..8fac3621df9c8f 100644 --- a/lib/gitlab/git/commit.rb +++ b/lib/gitlab/git/commit.rb @@ -318,6 +318,10 @@ def merge_commit? parent_ids.size > 1 end + def gitaly_commit? + raw_commit.is_a?(Gitaly::GitCommit) + end + def tree_entry(path) return unless path.present? @@ -340,7 +344,7 @@ def commit_tree_entry(path) end def to_gitaly_commit - return raw_commit if raw_commit.is_a?(Gitaly::GitCommit) + return raw_commit if gitaly_commit? message_split = raw_commit.message.split("\n", 2) Gitaly::GitCommit.new( diff --git a/spec/lib/gitlab/git/commit_spec.rb b/spec/lib/gitlab/git/commit_spec.rb index 3fb41a626b29b3..4a4ac833e39fdc 100644 --- a/spec/lib/gitlab/git/commit_spec.rb +++ b/spec/lib/gitlab/git/commit_spec.rb @@ -537,6 +537,18 @@ end end + describe '#gitaly_commit?' do + context 'when the commit data comes from gitaly' do + it { expect(commit.gitaly_commit?).to eq(true) } + end + + context 'when the commit data comes from a Hash' do + let(:commit) { described_class.new(repository, sample_commit_hash) } + + it { expect(commit.gitaly_commit?).to eq(false) } + end + end + describe '#has_zero_stats?' do it { expect(commit.has_zero_stats?).to eq(false) } end diff --git a/spec/models/commit_collection_spec.rb b/spec/models/commit_collection_spec.rb index 0f5d03ff458caa..30c504ebea8812 100644 --- a/spec/models/commit_collection_spec.rb +++ b/spec/models/commit_collection_spec.rb @@ -37,12 +37,92 @@ describe '#without_merge_commits' do it 'returns all commits except merge commits' do + merge_commit = project.commit("60ecb67744cb56576c30214ff52294f8ce2def98") + expect(merge_commit).to receive(:merge_commit?).and_return(true) + collection = described_class.new(project, [ - build(:commit), - build(:commit, :merge_commit) + commit, + merge_commit ]) - expect(collection.without_merge_commits.size).to eq(1) + expect(collection.without_merge_commits).to contain_exactly(commit) + end + end + + describe 'enrichment methods' do + let(:gitaly_commit) { commit } + let(:hash_commit) { Commit.from_hash(gitaly_commit.to_hash, project) } + + describe '#unenriched' do + it 'returns all commits that are not backed by gitaly data' do + collection = described_class.new(project, [gitaly_commit, hash_commit]) + + expect(collection.unenriched).to contain_exactly(hash_commit) + end + end + + describe '#fully_enriched?' do + it 'returns true when all commits are backed by gitaly data' do + collection = described_class.new(project, [gitaly_commit, gitaly_commit]) + + expect(collection.fully_enriched?).to eq(true) + end + + it 'returns false when any commits are not backed by gitaly data' do + collection = described_class.new(project, [gitaly_commit, hash_commit]) + + expect(collection.fully_enriched?).to eq(false) + end + + it 'returns true when the collection is empty' do + collection = described_class.new(project, []) + + expect(collection.fully_enriched?).to eq(true) + end + end + + describe '#enrich!' do + it 'replaces commits in the collection with those backed by gitaly data' do + collection = described_class.new(project, [hash_commit]) + + collection.enrich! + + new_commit = collection.commits.first + expect(new_commit.id).to eq(hash_commit.id) + expect(hash_commit.gitaly_commit?).to eq(false) + expect(new_commit.gitaly_commit?).to eq(true) + end + + it 'maintains the original order of the commits' do + gitaly_commits = [gitaly_commit] * 3 + hash_commits = [hash_commit] * 3 + # Interleave the gitaly and hash commits together + original_commits = gitaly_commits.zip(hash_commits).flatten + collection = described_class.new(project, original_commits) + + collection.enrich! + + original_commits.each_with_index do |original_commit, i| + new_commit = collection.commits[i] + expect(original_commit.id).to eq(new_commit.id) + end + end + + it 'fetches data if there are unenriched commits' do + collection = described_class.new(project, [hash_commit]) + + expect(Commit).to receive(:lazy).exactly(:once) + + collection.enrich! + end + + it 'does not fetch data if all commits are enriched' do + collection = described_class.new(project, [gitaly_commit]) + + expect(Commit).not_to receive(:lazy) + + collection.enrich! + end end end diff --git a/spec/models/merge_request_spec.rb b/spec/models/merge_request_spec.rb index 83723148c5ad19..768015a9c6a540 100644 --- a/spec/models/merge_request_spec.rb +++ b/spec/models/merge_request_spec.rb @@ -84,32 +84,27 @@ describe '#default_squash_commit_message' do let(:project) { subject.project } - - def commit_collection(commit_hashes) - raw_commits = commit_hashes.map { |raw| Commit.from_hash(raw, project) } - - CommitCollection.new(project, raw_commits) - end + let(:is_multiline) { -> (c) { c.description.present? } } + let(:multiline_commits) { subject.commits.select(&is_multiline) } + let(:singleline_commits) { subject.commits.reject(&is_multiline) } it 'returns the oldest multiline commit message' do - commits = commit_collection([ - { message: 'Singleline', parent_ids: [] }, - { message: "Second multiline\nCommit message", parent_ids: [] }, - { message: "First multiline\nCommit message", parent_ids: [] } - ]) - - expect(subject).to receive(:commits).and_return(commits) - - expect(subject.default_squash_commit_message).to eq("First multiline\nCommit message") + expect(subject.default_squash_commit_message).to eq(multiline_commits.last.message) end it 'returns the merge request title if there are no multiline commits' do - commits = commit_collection([ - { message: 'Singleline', parent_ids: [] } - ]) + expect(subject).to receive(:commits).and_return( + CommitCollection.new(project, singleline_commits) + ) + + expect(subject.default_squash_commit_message).to eq(subject.title) + end - expect(subject).to receive(:commits).and_return(commits) + it 'does not return commit messages from multiline merge commits' do + collection = CommitCollection.new(project, multiline_commits).enrich! + expect(collection.commits).to all( receive(:merge_commit?).and_return(true) ) + expect(subject).to receive(:commits).and_return(collection) expect(subject.default_squash_commit_message).to eq(subject.title) end end diff --git a/spec/serializers/merge_request_widget_entity_spec.rb b/spec/serializers/merge_request_widget_entity_spec.rb index 4dbd79f2fc0a4f..727fd8951f29af 100644 --- a/spec/serializers/merge_request_widget_entity_spec.rb +++ b/spec/serializers/merge_request_widget_entity_spec.rb @@ -279,13 +279,18 @@ end describe 'commits_without_merge_commits' do + def find_matching_commit(short_id) + resource.commits.find { |c| c.short_id == short_id } + end + it 'should not include merge commits' do - # Mock all but the first 5 commits to be merge commits - resource.commits.each_with_index do |commit, i| - expect(commit).to receive(:merge_commit?).at_least(:once).and_return(i > 4) - end + commits_in_widget = subject[:commits_without_merge_commits] - expect(subject[:commits_without_merge_commits].size).to eq(5) + expect(commits_in_widget.length).to be < resource.commits.length + expect(commits_in_widget.length).to eq(resource.commits.without_merge_commits.length) + commits_in_widget.each do |c| + expect(find_matching_commit(c[:short_id]).merge_commit?).to eq(false) + end end end end -- GitLab From ae13abe437266bf546d11f2fda698dc766357f81 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Fri, 15 Mar 2019 11:49:28 +1300 Subject: [PATCH 2/6] Scope out merge commits in MergeRequest spec Previously the code for excluding merge commits from the commit collection (CommitCollection#without_merge_commits) was not working when the commits had come from a merge request. Now that this has been fixed, these tests fails. They should always have been written to exclude merge commits when comparing. --- spec/models/merge_request_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/models/merge_request_spec.rb b/spec/models/merge_request_spec.rb index 768015a9c6a540..6c850f7f6eb577 100644 --- a/spec/models/merge_request_spec.rb +++ b/spec/models/merge_request_spec.rb @@ -1039,7 +1039,7 @@ def set_compare(merge_request) describe '#commit_authors' do it 'returns all the authors of every commit in the merge request' do - users = subject.commits.map(&:author_email).uniq.map do |email| + users = subject.commits.without_merge_commits.map(&:author_email).uniq.map do |email| create(:user, email: email) end @@ -1053,7 +1053,7 @@ def set_compare(merge_request) describe '#authors' do it 'returns a list with all the commit authors in the merge request and author' do - users = subject.commits.map(&:author_email).uniq.map do |email| + users = subject.commits.without_merge_commits.map(&:author_email).uniq.map do |email| create(:user, email: email) end -- GitLab From 4844018bd0f9847654c1c54b30cfdf112f6a439f Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Wed, 20 Mar 2019 12:07:51 +1300 Subject: [PATCH 3/6] Handle blank projects in CommitCollection#enrich! A project is needed in order to fetch data from gitaly. Projects can be absent from commits in certain rare situations (like when viewing a MR of a deleted fork). In these cases, assume that the enriched data is not needed. See this comment: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/26144#note_152191106 "It's led to a dilemma about where to "fix" this in code. I'm going to fix it by allowing CommitCollection#enrich! to just return unenriched commits when a project is missing, essentially "silently failing". I hope this is the right decision. It's going with the assumption that calls in these situations in the future are not needing the full data. The alternative would be to allow CommitCollection#enrich! to still error, but handle it in the methods that call #enrich!, however that might lead to brittleness in future when working with project-less MRs." --- app/models/commit_collection.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/models/commit_collection.rb b/app/models/commit_collection.rb index f4e64abf362d44..52524456439ae9 100644 --- a/app/models/commit_collection.rb +++ b/app/models/commit_collection.rb @@ -45,7 +45,11 @@ def fully_enriched? # Batch load any commits that are not backed by full gitaly data, and # replace them in the collection. def enrich! - return self if fully_enriched? + # A project is needed in order to fetch data from gitaly. Projects + # can be absent from commits in certain rare situations (like when + # viewing a MR of a deleted fork). In these cases, assume that the + # enriched data is not needed. + return self if project.blank? || fully_enriched? # Batch load full Commits from the repository # and map to a Hash of id => Commit -- GitLab From bcb756cf0754c433f88a0a7d5d0108597f432f0a Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Fri, 22 Mar 2019 12:05:25 +1300 Subject: [PATCH 4/6] Update ee specs to correctly select MR author When ce MR https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/26144 was merged in, it caused failures in ee specs https://gitlab.com/gitlab-org/gitlab-ee/issues/10567. The fix for these specs is to select an "author" of the MR by first filtering merge commits from the commit collection. This mirrors the behaviour of MergeRequest#authors. Previously, this test passed as MergeRequest#commits would always return commits that lacked parent_ids, as they are initialized from data kept in MergeRequestDiffCommit which does not have parent_ids data. This meant that all MR commits were considered not to be merge commits, even though in gitaly some did have parent ids. However, now, CommitCollection#enrich! is called CommitCollection#without_merge_commits is called (as it is when MergeRequest#authors is called), so commit data is fetched from gitaly and now have parent_ids data. This cause the tests to fail, because the "author" being chosen from the merge request commits was in fact the user who had committed a merge commit. --- ee/spec/models/approvable_for_rule_spec.rb | 4 ++-- ee/spec/models/approvable_spec.rb | 4 ++-- ee/spec/models/visible_approvable_for_rule_spec.rb | 2 +- ee/spec/models/visible_approvable_spec.rb | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ee/spec/models/approvable_for_rule_spec.rb b/ee/spec/models/approvable_for_rule_spec.rb index 6711aeda5c18c9..a8797d469ada58 100644 --- a/ee/spec/models/approvable_for_rule_spec.rb +++ b/ee/spec/models/approvable_for_rule_spec.rb @@ -60,7 +60,7 @@ end context 'when user is committer' do - let(:user) { create(:user, email: merge_request.commits.first.committer_email) } + let(:user) { create(:user, email: merge_request.commits.without_merge_commits.first.committer_email) } before do project.add_developer(user) @@ -90,7 +90,7 @@ end it 'returns false when user is a committer' do - user = create(:user, email: merge_request.commits.first.committer_email) + user = create(:user, email: merge_request.commits.without_merge_commits.first.committer_email) project.add_developer(user) create(:approver, target: merge_request, user: user) diff --git a/ee/spec/models/approvable_spec.rb b/ee/spec/models/approvable_spec.rb index 13d14e51c7940c..0b6569c047014d 100644 --- a/ee/spec/models/approvable_spec.rb +++ b/ee/spec/models/approvable_spec.rb @@ -79,7 +79,7 @@ end context 'when user is committer' do - let(:user) { create(:user, email: merge_request.commits.first.committer_email) } + let(:user) { create(:user, email: merge_request.commits.without_merge_commits.first.committer_email) } before do project.add_developer(user) @@ -109,7 +109,7 @@ end it 'returns false when user is a committer' do - user = create(:user, email: merge_request.commits.first.committer_email) + user = create(:user, email: merge_request.commits.without_merge_commits.first.committer_email) project.add_developer(user) create(:approver, target: merge_request, user: user) diff --git a/ee/spec/models/visible_approvable_for_rule_spec.rb b/ee/spec/models/visible_approvable_for_rule_spec.rb index 7ffb9c4a857c8d..051aa07f124653 100644 --- a/ee/spec/models/visible_approvable_for_rule_spec.rb +++ b/ee/spec/models/visible_approvable_for_rule_spec.rb @@ -85,7 +85,7 @@ end context 'when committer is approver' do - let(:approver) { create(:user, email: resource.commits.first.committer_email) } + let(:approver) { create(:user, email: resource.commits.without_merge_commits.first.committer_email) } it_behaves_like 'able to exclude authors' end diff --git a/ee/spec/models/visible_approvable_spec.rb b/ee/spec/models/visible_approvable_spec.rb index 88f3e466ed01d3..d4db55a8ee954a 100644 --- a/ee/spec/models/visible_approvable_spec.rb +++ b/ee/spec/models/visible_approvable_spec.rb @@ -69,7 +69,7 @@ end context 'when committer is approver' do - let(:user) { create(:user, email: resource.commits.first.committer_email) } + let(:user) { create(:user, email: resource.commits.without_merge_commits.first.committer_email) } let!(:committer_approver) { create(:approver, target: project, user: user) } before do -- GitLab From f51ee0fad5ab6c6a4ddb9be5b84f14cabb441dcd Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Tue, 26 Mar 2019 10:46:45 +1300 Subject: [PATCH 5/6] Add changelog entry --- ...-incomplete-commit-data-to-be-fetched-from-collection.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelogs/unreleased/58805-allow-incomplete-commit-data-to-be-fetched-from-collection.yml diff --git a/changelogs/unreleased/58805-allow-incomplete-commit-data-to-be-fetched-from-collection.yml b/changelogs/unreleased/58805-allow-incomplete-commit-data-to-be-fetched-from-collection.yml new file mode 100644 index 00000000000000..4377ebfdbdf165 --- /dev/null +++ b/changelogs/unreleased/58805-allow-incomplete-commit-data-to-be-fetched-from-collection.yml @@ -0,0 +1,5 @@ +--- +title: Fix merge commits being used as default squash commit messages +merge_request: 26445 +author: +type: fixed -- GitLab From 1b2c40cfbd4e0cb52436a31afcc4efb6bb9d9353 Mon Sep 17 00:00:00 2001 From: Luke Duncalfe Date: Tue, 26 Mar 2019 10:46:53 +1300 Subject: [PATCH 6/6] Add changelog entry --- ...complete-commit-data-to-be-fetched-from-collection-ee.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 ee/changelogs/unreleased/58805-allow-incomplete-commit-data-to-be-fetched-from-collection-ee.yml diff --git a/ee/changelogs/unreleased/58805-allow-incomplete-commit-data-to-be-fetched-from-collection-ee.yml b/ee/changelogs/unreleased/58805-allow-incomplete-commit-data-to-be-fetched-from-collection-ee.yml new file mode 100644 index 00000000000000..d2303ad032fdc4 --- /dev/null +++ b/ee/changelogs/unreleased/58805-allow-incomplete-commit-data-to-be-fetched-from-collection-ee.yml @@ -0,0 +1,5 @@ +--- +title: Fix authors of merge commits being excluded from approving an MR +merge_request: 10359 +author: +type: fixed -- GitLab