From 7671d6f38ac6828dc156797fb64fda0edfeded50 Mon Sep 17 00:00:00 2001 From: Mario de la Ossa Date: Wed, 27 Feb 2019 23:22:54 -0600 Subject: [PATCH] Avoid Gitaly N+1 in Elasticsearch commit results We avoid a Gitaly N+1 by passing a hash to Gitlab::Git::Commit instead of loading the commits via Gitaly. We also change Gitlab::Git::Commit to have a lazy_load_parents option that allows us not to load parent_ids unless necessary --- .../concerns/elastic/application_search.rb | 4 +-- .../concerns/elastic/repositories_search.rb | 35 +++++++++++++------ .../unreleased/avoid_es_loading_commits.yml | 5 +++ lib/gitlab/git/commit.rb | 9 ++++- 4 files changed, 40 insertions(+), 13 deletions(-) create mode 100644 ee/changelogs/unreleased/avoid_es_loading_commits.yml diff --git a/ee/app/models/concerns/elastic/application_search.rb b/ee/app/models/concerns/elastic/application_search.rb index 8495dc1827c132..16aac6923448fb 100644 --- a/ee/app/models/concerns/elastic/application_search.rb +++ b/ee/app/models/concerns/elastic/application_search.rb @@ -44,7 +44,7 @@ module ApplicationSearch # Since we can't have multiple types in ES6, but want to be able to use JOINs, we must declare all our # fields together instead of per model - mappings do + mappings dynamic: 'strict' do ### Shared fields indexes :id, type: :integer indexes :created_at, type: :date @@ -182,7 +182,7 @@ module ApplicationSearch indexes :time, type: :date, format: :basic_date_time_no_millis end - indexes :commiter do + indexes :committer do indexes :name, type: :text, index_options: 'offsets' indexes :email, type: :text, index_options: 'offsets' indexes :time, type: :date, format: :basic_date_time_no_millis diff --git a/ee/app/models/concerns/elastic/repositories_search.rb b/ee/app/models/concerns/elastic/repositories_search.rb index d03a2ba55b2935..7806d9d65b82f3 100644 --- a/ee/app/models/concerns/elastic/repositories_search.rb +++ b/ee/app/models/concerns/elastic/repositories_search.rb @@ -57,22 +57,37 @@ def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: # Avoid one SELECT per result by loading all projects into a hash project_ids = response.map {|result| result["_source"]["commit"]["rid"] }.uniq - projects = Project.where(id: project_ids).index_by(&:id) + projects = Project.includes(:route).where(id: project_ids).index_by(&:id) - # n + 1: https://gitlab.com/gitlab-org/gitlab-ee/issues/3454 - commits = Gitlab::GitalyClient.allow_n_plus_1_calls do - response.map do |result| - sha = result["_source"]["commit"]["sha"] - project_id = result["_source"]["commit"]["rid"].to_i - - projects[project_id].try(:commit, sha) - end - end.compact + commits = response.map do |result| + project_id = result["_source"]["commit"]["rid"].to_i + project = projects[project_id] + raw_commit = Gitlab::Git::Commit.new( + project.repository.raw, + prepare_commit(result['_source']['commit']), + lazy_load_parents: true + ) + Commit.new(raw_commit, project) + end # Before "map" we had a paginated array so we need to recover it offset = per_page * ((page || 1) - 1) Kaminari.paginate_array(commits, total_count: response.total_count, limit: per_page, offset: offset) end + + def prepare_commit(raw_commit_hash) + { + id: raw_commit_hash['sha'], + message: raw_commit_hash['message'], + parent_ids: nil, + author_name: raw_commit_hash['author']['name'], + author_email: raw_commit_hash['author']['email'], + authored_date: Time.parse(raw_commit_hash['author']['time']).utc, + committer_name: raw_commit_hash['committer']['name'], + committer_email: raw_commit_hash['committer']['email'], + committed_date: Time.parse(raw_commit_hash['committer']['time']).utc + } + end end end end diff --git a/ee/changelogs/unreleased/avoid_es_loading_commits.yml b/ee/changelogs/unreleased/avoid_es_loading_commits.yml new file mode 100644 index 00000000000000..cf5ca79e5d7a1b --- /dev/null +++ b/ee/changelogs/unreleased/avoid_es_loading_commits.yml @@ -0,0 +1,5 @@ +--- +title: Avoid a Gitaly N+1 when loading commits for Elasticsearch search results +merge_request: 9760 +author: +type: performance diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb index e5bbd500e9833a..88ff9fbceb40be 100644 --- a/lib/gitlab/git/commit.rb +++ b/lib/gitlab/git/commit.rb @@ -184,11 +184,12 @@ def get_messages(repository, commit_ids) end end - def initialize(repository, raw_commit, head = nil) + def initialize(repository, raw_commit, head = nil, lazy_load_parents: false) raise "Nil as raw commit passed" unless raw_commit @repository = repository @head = head + @lazy_load_parents = lazy_load_parents init_commit(raw_commit) end @@ -225,6 +226,12 @@ def different_committer? author_name != committer_name || author_email != committer_email end + def parent_ids + return @parent_ids unless @lazy_load_parents + + @parent_ids ||= @repository.commit(id).parent_ids + end + def parent_id parent_ids.first end -- GitLab