diff --git a/ee/app/models/concerns/elastic/application_search.rb b/ee/app/models/concerns/elastic/application_search.rb index 8495dc1827c132a69a6b6f2701d65e21816c8fd0..16aac6923448fb15df50423dc1f8b76036f283ab 100644 --- a/ee/app/models/concerns/elastic/application_search.rb +++ b/ee/app/models/concerns/elastic/application_search.rb @@ -44,7 +44,7 @@ module ApplicationSearch # Since we can't have multiple types in ES6, but want to be able to use JOINs, we must declare all our # fields together instead of per model - mappings do + mappings dynamic: 'strict' do ### Shared fields indexes :id, type: :integer indexes :created_at, type: :date @@ -182,7 +182,7 @@ module ApplicationSearch indexes :time, type: :date, format: :basic_date_time_no_millis end - indexes :commiter do + indexes :committer do indexes :name, type: :text, index_options: 'offsets' indexes :email, type: :text, index_options: 'offsets' indexes :time, type: :date, format: :basic_date_time_no_millis diff --git a/ee/app/models/concerns/elastic/repositories_search.rb b/ee/app/models/concerns/elastic/repositories_search.rb index d03a2ba55b293571da247f079ce874d0724d7467..7806d9d65b82f32899535d907257ba43ecb035b3 100644 --- a/ee/app/models/concerns/elastic/repositories_search.rb +++ b/ee/app/models/concerns/elastic/repositories_search.rb @@ -57,22 +57,37 @@ def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: # Avoid one SELECT per result by loading all projects into a hash project_ids = response.map {|result| result["_source"]["commit"]["rid"] }.uniq - projects = Project.where(id: project_ids).index_by(&:id) + projects = Project.includes(:route).where(id: project_ids).index_by(&:id) - # n + 1: https://gitlab.com/gitlab-org/gitlab-ee/issues/3454 - commits = Gitlab::GitalyClient.allow_n_plus_1_calls do - response.map do |result| - sha = result["_source"]["commit"]["sha"] - project_id = result["_source"]["commit"]["rid"].to_i - - projects[project_id].try(:commit, sha) - end - end.compact + commits = response.map do |result| + project_id = result["_source"]["commit"]["rid"].to_i + project = projects[project_id] + raw_commit = Gitlab::Git::Commit.new( + project.repository.raw, + prepare_commit(result['_source']['commit']), + lazy_load_parents: true + ) + Commit.new(raw_commit, project) + end # Before "map" we had a paginated array so we need to recover it offset = per_page * ((page || 1) - 1) Kaminari.paginate_array(commits, total_count: response.total_count, limit: per_page, offset: offset) end + + def prepare_commit(raw_commit_hash) + { + id: raw_commit_hash['sha'], + message: raw_commit_hash['message'], + parent_ids: nil, + author_name: raw_commit_hash['author']['name'], + author_email: raw_commit_hash['author']['email'], + authored_date: Time.parse(raw_commit_hash['author']['time']).utc, + committer_name: raw_commit_hash['committer']['name'], + committer_email: raw_commit_hash['committer']['email'], + committed_date: Time.parse(raw_commit_hash['committer']['time']).utc + } + end end end end diff --git a/ee/changelogs/unreleased/avoid_es_loading_commits.yml b/ee/changelogs/unreleased/avoid_es_loading_commits.yml new file mode 100644 index 0000000000000000000000000000000000000000..cf5ca79e5d7a1b8e1d1e4105e2bd45765bcdeec4 --- /dev/null +++ b/ee/changelogs/unreleased/avoid_es_loading_commits.yml @@ -0,0 +1,5 @@ +--- +title: Avoid a Gitaly N+1 when loading commits for Elasticsearch search results +merge_request: 9760 +author: +type: performance diff --git a/lib/gitlab/git/commit.rb b/lib/gitlab/git/commit.rb index e5bbd500e9833a6c2489ed6cdfeb68260b6b3119..88ff9fbceb40be08cf15bde92a5a59e42de72c53 100644 --- a/lib/gitlab/git/commit.rb +++ b/lib/gitlab/git/commit.rb @@ -184,11 +184,12 @@ def get_messages(repository, commit_ids) end end - def initialize(repository, raw_commit, head = nil) + def initialize(repository, raw_commit, head = nil, lazy_load_parents: false) raise "Nil as raw commit passed" unless raw_commit @repository = repository @head = head + @lazy_load_parents = lazy_load_parents init_commit(raw_commit) end @@ -225,6 +226,12 @@ def different_committer? author_name != committer_name || author_email != committer_email end + def parent_ids + return @parent_ids unless @lazy_load_parents + + @parent_ids ||= @repository.commit(id).parent_ids + end + def parent_id parent_ids.first end