From d631b91353d74e442c92e2566b8ba844fa85cf59 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Tue, 29 Nov 2022 13:59:12 +1100 Subject: [PATCH 01/13] Add zoekt code search integration This [MR](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049) introduces a new backend [Zoekt](https://github.com/sourcegraph/zoekt) to power code searches in GitLab. Previously we supported Elasticsearch and Gitaly backed searches. We found Gitaly didn't scale for searching across large groups and Elasticsearch has limitations in how indexing and searching works that makes it a poor choice for code. Zoekt provides a more "grep-like" user experience and solves a lot of the problems we had with Elasticsearch. You can read more about the motivation at https://gitlab.com/gitlab-org/gitlab/-/merge_requests/107891 . Since the rollout of this will be gradual and it is somewhat experimental it will not be the default and we will not be removing support for Elasticsearch any time soon. Changelog: added --- .gitlab/ci/rails/shared.gitlab-ci.yml | 2 +- config/sidekiq_queues.yml | 2 + db/docs/zoekt_indexed_namespaces.yml | 10 + db/docs/zoekt_shards.yml | 10 + ...add_zoekt_shards_and_indexed_namespaces.rb | 20 ++ ...dd_zoekt_indexed_namespaces_foreign_key.rb | 15 ++ db/schema_migrations/20230104201524 | 1 + db/schema_migrations/20230107125328 | 1 + db/structure.sql | 58 +++++ doc/user/search/exact_code_search.md | 36 +++ .../concerns/zoekt/searchable_repository.rb | 47 ++++ ee/app/models/ee/namespace.rb | 4 + ee/app/models/ee/project.rb | 4 + ee/app/models/ee/repository.rb | 1 + .../models/gitlab_subscriptions/features.rb | 2 + ee/app/models/zoekt/indexed_namespace.rb | 28 +++ ee/app/models/zoekt/shard.rb | 11 + .../concerns/search/zoekt_searchable.rb | 34 +++ ee/app/services/ee/git/branch_push_service.rb | 9 + ee/app/services/ee/search/group_service.rb | 12 + ee/app/services/ee/search/project_service.rb | 14 ++ ee/app/workers/all_queues.yml | 9 + ee/app/workers/zoekt/indexer_worker.rb | 28 +++ .../development/zoekt_for_code_search.yml | 8 + ee/lib/gitlab/zoekt/search_results.rb | 207 ++++++++++++++++++ ee/spec/features/search/zoekt/search_spec.rb | 74 +++++++ .../lib/gitlab/zoekt/search_results_spec.rb | 132 +++++++++++ .../zoekt/searchable_repository_spec.rb | 76 +++++++ ee/spec/models/ee/namespace_spec.rb | 8 + ee/spec/models/project_spec.rb | 8 + .../models/zoekt/indexed_namespace_spec.rb | 60 +++++ ee/spec/models/zoekt/shard_spec.rb | 20 ++ .../ee/git/branch_push_service_spec.rb | 48 +++- ee/spec/services/search/group_service_spec.rb | 66 ++++++ .../services/search/project_service_spec.rb | 47 ++++ .../search_service_shared_examples.rb | 2 + ee/spec/support/zoekt.rb | 44 ++++ ee/spec/workers/zoekt/indexer_worker_spec.rb | 70 ++++++ 38 files changed, 1226 insertions(+), 2 deletions(-) create mode 100644 db/docs/zoekt_indexed_namespaces.yml create mode 100644 db/docs/zoekt_shards.yml create mode 100644 db/migrate/20230104201524_add_zoekt_shards_and_indexed_namespaces.rb create mode 100644 db/migrate/20230107125328_add_zoekt_indexed_namespaces_foreign_key.rb create mode 100644 db/schema_migrations/20230104201524 create mode 100644 db/schema_migrations/20230107125328 create mode 100644 doc/user/search/exact_code_search.md create mode 100644 ee/app/models/concerns/zoekt/searchable_repository.rb create mode 100644 ee/app/models/zoekt/indexed_namespace.rb create mode 100644 ee/app/models/zoekt/shard.rb create mode 100644 ee/app/services/concerns/search/zoekt_searchable.rb create mode 100644 ee/app/workers/zoekt/indexer_worker.rb create mode 100644 ee/config/feature_flags/development/zoekt_for_code_search.yml create mode 100644 ee/lib/gitlab/zoekt/search_results.rb create mode 100644 ee/spec/features/search/zoekt/search_spec.rb create mode 100644 ee/spec/lib/gitlab/zoekt/search_results_spec.rb create mode 100644 ee/spec/models/concerns/zoekt/searchable_repository_spec.rb create mode 100644 ee/spec/models/zoekt/indexed_namespace_spec.rb create mode 100644 ee/spec/models/zoekt/shard_spec.rb create mode 100644 ee/spec/support/zoekt.rb create mode 100644 ee/spec/workers/zoekt/indexer_worker_spec.rb diff --git a/.gitlab/ci/rails/shared.gitlab-ci.yml b/.gitlab/ci/rails/shared.gitlab-ci.yml index 4ca82f55b63df9..3f57fc35e40edd 100644 --- a/.gitlab/ci/rails/shared.gitlab-ci.yml +++ b/.gitlab/ci/rails/shared.gitlab-ci.yml @@ -62,7 +62,7 @@ include: # spec/lib, yet background migration tests are also sitting there, # and they should run on their own jobs so we don't need to run them # in unit tests again. - - rspec_paralellized_job "--tag ~quarantine --tag ~level:background_migration" + - rspec_paralellized_job "--tag ~quarantine --tag ~zoekt --tag ~level:background_migration" allow_failure: exit_codes: !reference [.rspec-base, variables, SUCCESSFULLY_RETRIED_TEST_EXIT_CODE] diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index de6e159921c4e5..067c4730f0adac 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -543,3 +543,5 @@ - 1 - - x509_certificate_revoke - 1 +- - zoekt_indexer + - 1 diff --git a/db/docs/zoekt_indexed_namespaces.yml b/db/docs/zoekt_indexed_namespaces.yml new file mode 100644 index 00000000000000..1ab748ac154ef4 --- /dev/null +++ b/db/docs/zoekt_indexed_namespaces.yml @@ -0,0 +1,10 @@ +--- +table_name: zoekt_indexed_namespaces +classes: +- Zoekt::IndexedNamespace +feature_categories: +- global_search +description: Describes a namespace that is configured to use a specific Zoekt shard for code search +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049 +milestone: '15.9' +gitlab_schema: gitlab_main diff --git a/db/docs/zoekt_shards.yml b/db/docs/zoekt_shards.yml new file mode 100644 index 00000000000000..5fe3b469b19182 --- /dev/null +++ b/db/docs/zoekt_shards.yml @@ -0,0 +1,10 @@ +--- +table_name: zoekt_shards +classes: +- Zoekt::Shard +feature_categories: +- global_search +description: Describes a Zoekt server that will be used for indexing and search for some configured namespaces +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049 +milestone: '15.9' +gitlab_schema: gitlab_main diff --git a/db/migrate/20230104201524_add_zoekt_shards_and_indexed_namespaces.rb b/db/migrate/20230104201524_add_zoekt_shards_and_indexed_namespaces.rb new file mode 100644 index 00000000000000..015d5fbeb8467c --- /dev/null +++ b/db/migrate/20230104201524_add_zoekt_shards_and_indexed_namespaces.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +class AddZoektShardsAndIndexedNamespaces < Gitlab::Database::Migration[2.1] + enable_lock_retries! + + def change + create_table :zoekt_shards do |t| + t.text :index_base_url, limit: 1024 + t.text :search_base_url, limit: 1024 + t.timestamps_with_timezone + end + + create_table :zoekt_indexed_namespaces do |t| + t.references :zoekt_shard, null: false, foreign_key: { on_delete: :cascade } + t.bigint :namespace_id, null: false, index: true + t.timestamps_with_timezone + t.index [:zoekt_shard_id, :namespace_id], unique: true, name: 'index_zoekt_shard_and_namespace' + end + end +end diff --git a/db/migrate/20230107125328_add_zoekt_indexed_namespaces_foreign_key.rb b/db/migrate/20230107125328_add_zoekt_indexed_namespaces_foreign_key.rb new file mode 100644 index 00000000000000..db995d6603e4d7 --- /dev/null +++ b/db/migrate/20230107125328_add_zoekt_indexed_namespaces_foreign_key.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class AddZoektIndexedNamespacesForeignKey < Gitlab::Database::Migration[2.1] + disable_ddl_transaction! + + def up + add_concurrent_foreign_key :zoekt_indexed_namespaces, :namespaces, column: :namespace_id, on_delete: :cascade + end + + def down + with_lock_retries do + remove_foreign_key :zoekt_indexed_namespaces, column: :namespace_id + end + end +end diff --git a/db/schema_migrations/20230104201524 b/db/schema_migrations/20230104201524 new file mode 100644 index 00000000000000..e98bb08fe2f49f --- /dev/null +++ b/db/schema_migrations/20230104201524 @@ -0,0 +1 @@ +e27a0a61f6807352c02ddf7c0bd44a86e3c244051fa3977f597cc92e83fcb0d1 \ No newline at end of file diff --git a/db/schema_migrations/20230107125328 b/db/schema_migrations/20230107125328 new file mode 100644 index 00000000000000..94ba5596a06a52 --- /dev/null +++ b/db/schema_migrations/20230107125328 @@ -0,0 +1 @@ +741599316bd51b0d454e49c43a06b834d8d172f3fd1dcd28996494da8fdf5d8b \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index cbc4b291335f5a..b7faeabce364db 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -23841,6 +23841,42 @@ CREATE SEQUENCE zentao_tracker_data_id_seq ALTER SEQUENCE zentao_tracker_data_id_seq OWNED BY zentao_tracker_data.id; +CREATE TABLE zoekt_indexed_namespaces ( + id bigint NOT NULL, + zoekt_shard_id bigint NOT NULL, + namespace_id bigint NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL +); + +CREATE SEQUENCE zoekt_indexed_namespaces_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE zoekt_indexed_namespaces_id_seq OWNED BY zoekt_indexed_namespaces.id; + +CREATE TABLE zoekt_shards ( + id bigint NOT NULL, + index_base_url text, + search_base_url text, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + CONSTRAINT check_61794bac26 CHECK ((char_length(search_base_url) <= 1024)), + CONSTRAINT check_c65bb85a32 CHECK ((char_length(index_base_url) <= 1024)) +); + +CREATE SEQUENCE zoekt_shards_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE zoekt_shards_id_seq OWNED BY zoekt_shards.id; + CREATE TABLE zoom_meetings ( id bigint NOT NULL, project_id bigint NOT NULL, @@ -24930,6 +24966,10 @@ ALTER TABLE ONLY x509_issuers ALTER COLUMN id SET DEFAULT nextval('x509_issuers_ ALTER TABLE ONLY zentao_tracker_data ALTER COLUMN id SET DEFAULT nextval('zentao_tracker_data_id_seq'::regclass); +ALTER TABLE ONLY zoekt_indexed_namespaces ALTER COLUMN id SET DEFAULT nextval('zoekt_indexed_namespaces_id_seq'::regclass); + +ALTER TABLE ONLY zoekt_shards ALTER COLUMN id SET DEFAULT nextval('zoekt_shards_id_seq'::regclass); + ALTER TABLE ONLY zoom_meetings ALTER COLUMN id SET DEFAULT nextval('zoom_meetings_id_seq'::regclass); ALTER TABLE ONLY analytics_cycle_analytics_issue_stage_events @@ -27392,6 +27432,12 @@ ALTER TABLE ONLY x509_issuers ALTER TABLE ONLY zentao_tracker_data ADD CONSTRAINT zentao_tracker_data_pkey PRIMARY KEY (id); +ALTER TABLE ONLY zoekt_indexed_namespaces + ADD CONSTRAINT zoekt_indexed_namespaces_pkey PRIMARY KEY (id); + +ALTER TABLE ONLY zoekt_shards + ADD CONSTRAINT zoekt_shards_pkey PRIMARY KEY (id); + ALTER TABLE ONLY zoom_meetings ADD CONSTRAINT zoom_meetings_pkey PRIMARY KEY (id); @@ -31825,6 +31871,12 @@ CREATE INDEX index_x509_issuers_on_subject_key_identifier ON x509_issuers USING CREATE INDEX index_zentao_tracker_data_on_integration_id ON zentao_tracker_data USING btree (integration_id); +CREATE INDEX index_zoekt_indexed_namespaces_on_namespace_id ON zoekt_indexed_namespaces USING btree (namespace_id); + +CREATE INDEX index_zoekt_indexed_namespaces_on_zoekt_shard_id ON zoekt_indexed_namespaces USING btree (zoekt_shard_id); + +CREATE UNIQUE INDEX index_zoekt_shard_and_namespace ON zoekt_indexed_namespaces USING btree (zoekt_shard_id, namespace_id); + CREATE INDEX index_zoom_meetings_on_issue_id ON zoom_meetings USING btree (issue_id); CREATE UNIQUE INDEX index_zoom_meetings_on_issue_id_and_issue_status ON zoom_meetings USING btree (issue_id, issue_status) WHERE (issue_status = 1); @@ -33563,6 +33615,9 @@ ALTER TABLE ONLY agent_activity_events ALTER TABLE ONLY issues ADD CONSTRAINT fk_3b8c72ea56 FOREIGN KEY (sprint_id) REFERENCES sprints(id) ON DELETE SET NULL; +ALTER TABLE ONLY zoekt_indexed_namespaces + ADD CONSTRAINT fk_3bebdb4efc FOREIGN KEY (namespace_id) REFERENCES namespaces(id) ON DELETE CASCADE; + ALTER TABLE ONLY epics ADD CONSTRAINT fk_3c1fd1cccc FOREIGN KEY (due_date_sourcing_milestone_id) REFERENCES milestones(id) ON DELETE SET NULL; @@ -34805,6 +34860,9 @@ ALTER TABLE ONLY geo_repository_renamed_events ALTER TABLE ONLY aws_roles ADD CONSTRAINT fk_rails_4ed56f4720 FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE; +ALTER TABLE ONLY zoekt_indexed_namespaces + ADD CONSTRAINT fk_rails_4f6006e94c FOREIGN KEY (zoekt_shard_id) REFERENCES zoekt_shards(id) ON DELETE CASCADE; + ALTER TABLE ONLY packages_debian_publications ADD CONSTRAINT fk_rails_4fc8ebd03e FOREIGN KEY (distribution_id) REFERENCES packages_debian_project_distributions(id) ON DELETE CASCADE; diff --git a/doc/user/search/exact_code_search.md b/doc/user/search/exact_code_search.md new file mode 100644 index 00000000000000..ca787f611e7f4d --- /dev/null +++ b/doc/user/search/exact_code_search.md @@ -0,0 +1,36 @@ +--- +stage: Data Stores +group: Global Search +info: "To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/product/ux/technical-writing/#assignments" +type: reference +--- + +# Exact Code Search **(PREMIUM)** + +> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049) in GitLab 15.9 [with a flag](../../administration/feature_flags.md) named `zoekt_for_code_search`. Disabled by default. + +WARNING: +Exact code search is in [**Alpha**](../../policy/alpha-beta-support.md#alpha-features). +For the Exact code search feature roadmap, see [epic 9404](https://gitlab.com/groups/gitlab-org/-/epics/9404). + +This feature will initially only be rolled out to +specific customers on GitLab.com that request +access. + +On self-managed GitLab it should be possible to enable this, but no +documentation is provided as it requires executing commands from the Rails +console as well advanced configuration of +[Zoekt](https://github.com/sourcegraph/zoekt) servers. + +## Usage + +When performing any Code search in GitLab it will choose to use "Exact Code +Search" powered by [Zoekt](https://github.com/sourcegraph/zoekt) if the project +is part of an enabled Group. + +The main differences between Zoekt and [Advanced Search](advanced_search.md) +are that Zoekt provides exact substring matching as well as allows you to +search for regular expressions. Since it allows searching for regular +expressions, certain special characters will require escaping. Backslash can +escape special characters and wrapping in double quotes can be used for phrase +searches. diff --git a/ee/app/models/concerns/zoekt/searchable_repository.rb b/ee/app/models/concerns/zoekt/searchable_repository.rb new file mode 100644 index 00000000000000..6351166e94d0d5 --- /dev/null +++ b/ee/app/models/concerns/zoekt/searchable_repository.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module Zoekt + module SearchableRepository + extend ActiveSupport::Concern + + class_methods do + def truncate_zoekt_index!(shard) + conn = ::Faraday.new(url: shard.index_base_url) + conn.post('/truncate', '') + end + end + + included do + def use_zoekt? + project&.use_zoekt? + end + + def update_zoekt_index!(use_local_disk_path: false) + conn = ::Faraday.new(url: zoekt_index_base_url) + + repository_url = if use_local_disk_path + path_to_repo + else + project.http_url_to_repo + end + + payload = { RepositoryUrl: repository_url, RepositoryId: project.id.to_s } + conn.post('/index', payload.to_json, { "Content-Type" => "application/json" }) + end + + def async_update_zoekt_index + ::Zoekt::IndexerWorker.perform_async(project.id) + end + + private + + def zoekt_index_base_url + Zoekt::IndexedNamespace.where(namespace: project.root_namespace).first&.shard&.index_base_url + end + + def zoekt_search_base_url + Zoekt::IndexedNamespace.where(namespace: project.root_namespace).first&.shard&.search_base_url + end + end + end +end diff --git a/ee/app/models/ee/namespace.rb b/ee/app/models/ee/namespace.rb index 2027e4083e7ca3..a762b90b9618bf 100644 --- a/ee/app/models/ee/namespace.rb +++ b/ee/app/models/ee/namespace.rb @@ -423,6 +423,10 @@ def use_elasticsearch? ::Gitlab::CurrentSettings.elasticsearch_indexes_namespace?(self) end + def use_zoekt? + ::Zoekt::IndexedNamespace.enabled_for_namespace?(self) + end + def invalidate_elasticsearch_indexes_cache! ::Gitlab::CurrentSettings.invalidate_elasticsearch_indexes_cache_for_namespace!(self.id) end diff --git a/ee/app/models/ee/project.rb b/ee/app/models/ee/project.rb index 142fccf08f11f4..c550d14f70639c 100644 --- a/ee/app/models/ee/project.rb +++ b/ee/app/models/ee/project.rb @@ -828,6 +828,10 @@ def after_import ElasticCommitIndexerWorker.perform_async(id, true) if use_elasticsearch? && !forked? end + def use_zoekt? + ::Zoekt::IndexedNamespace.enabled_for_project?(self) + end + def elastic_namespace_ancestry namespace.elastic_namespace_ancestry + "p#{id}-" end diff --git a/ee/app/models/ee/repository.rb b/ee/app/models/ee/repository.rb index 24f3d7f099883b..10418b92c3701b 100644 --- a/ee/app/models/ee/repository.rb +++ b/ee/app/models/ee/repository.rb @@ -13,6 +13,7 @@ module Repository prepended do include Elastic::RepositoriesSearch + include ::Zoekt::SearchableRepository delegate :checksum, :find_remote_root_ref, to: :raw_repository end diff --git a/ee/app/models/gitlab_subscriptions/features.rb b/ee/app/models/gitlab_subscriptions/features.rb index 424729d3b0552f..ac2840c897f783 100644 --- a/ee/app/models/gitlab_subscriptions/features.rb +++ b/ee/app/models/gitlab_subscriptions/features.rb @@ -38,6 +38,7 @@ class Features runner_jobs_statistics seat_link usage_quotas + zoekt_code_search ].freeze STARTER_FEATURES = %i[ @@ -74,6 +75,7 @@ class Features usage_quotas visual_review_app wip_limits + zoekt_code_search ].freeze PREMIUM_FEATURES = %i[ diff --git a/ee/app/models/zoekt/indexed_namespace.rb b/ee/app/models/zoekt/indexed_namespace.rb new file mode 100644 index 00000000000000..c8498784a43cdc --- /dev/null +++ b/ee/app/models/zoekt/indexed_namespace.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Zoekt + class IndexedNamespace < ApplicationRecord + def self.table_name_prefix + 'zoekt_' + end + + belongs_to :shard, foreign_key: :zoekt_shard_id, inverse_of: :indexed_namespaces + belongs_to :namespace + + validate :only_root_namespaces_can_be_indexed + + def self.enabled_for_project?(project) + where(namespace: project.root_namespace).exists? + end + + def self.enabled_for_namespace?(namespace) + where(namespace: namespace.root_ancestor).exists? + end + + def only_root_namespaces_can_be_indexed + return unless namespace.parent_id.present? + + errors.add(:base, 'Only root namespaces can be indexed') + end + end +end diff --git a/ee/app/models/zoekt/shard.rb b/ee/app/models/zoekt/shard.rb new file mode 100644 index 00000000000000..39109a3cae9b50 --- /dev/null +++ b/ee/app/models/zoekt/shard.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Zoekt + class Shard < ApplicationRecord + def self.table_name_prefix + 'zoekt_' + end + + has_many :indexed_namespaces, foreign_key: :zoekt_shard_id, inverse_of: :shard + end +end diff --git a/ee/app/services/concerns/search/zoekt_searchable.rb b/ee/app/services/concerns/search/zoekt_searchable.rb new file mode 100644 index 00000000000000..fe30c171a9a8a6 --- /dev/null +++ b/ee/app/services/concerns/search/zoekt_searchable.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module Search + module ZoektSearchable + def use_zoekt? + return false unless ::License.feature_available?(:zoekt_code_search) + return false unless ::Feature.enabled?(:zoekt_for_code_search) + return false if params[:basic_search] + + scope == 'blobs' && + zoekt_searchable_scope.respond_to?(:use_zoekt?) && + zoekt_searchable_scope.use_zoekt? + end + + def zoekt_searchable_scope + raise NotImplementedError + end + + def zoekt_projects + raise NotImplementedError + end + + def zoekt_search_results + ::Gitlab::Zoekt::SearchResults.new( + current_user, + params[:search], + zoekt_projects, + order_by: params[:order_by], + sort: params[:sort], + filters: { language: params[:language] } + ) + end + end +end diff --git a/ee/app/services/ee/git/branch_push_service.rb b/ee/app/services/ee/git/branch_push_service.rb index e325105929896a..3afb54dc0035a7 100644 --- a/ee/app/services/ee/git/branch_push_service.rb +++ b/ee/app/services/ee/git/branch_push_service.rb @@ -8,6 +8,7 @@ module BranchPushService override :execute def execute enqueue_elasticsearch_indexing + enqueue_zoekt_indexing enqueue_update_external_pull_requests super @@ -21,6 +22,14 @@ def enqueue_elasticsearch_indexing project.repository.index_commits_and_blobs end + def enqueue_zoekt_indexing + return false unless ::Feature.enabled?(:zoekt_for_code_search) + return false unless default_branch? + return false unless project.use_zoekt? + + project.repository.async_update_zoekt_index + end + def enqueue_update_external_pull_requests return unless project.mirror? return unless params.fetch(:create_pipelines, true) diff --git a/ee/app/services/ee/search/group_service.rb b/ee/app/services/ee/search/group_service.rb index cc3721956c8fa5..d5a66101f2b74d 100644 --- a/ee/app/services/ee/search/group_service.rb +++ b/ee/app/services/ee/search/group_service.rb @@ -4,12 +4,18 @@ module EE module Search module GroupService extend ::Gitlab::Utils::Override + include ::Search::ZoektSearchable override :elasticsearchable_scope def elasticsearchable_scope group end + override :zoekt_searchable_scope + def zoekt_searchable_scope + group + end + override :elastic_global def elastic_global false @@ -20,8 +26,14 @@ def elastic_projects @elastic_projects ||= projects.pluck_primary_key end + override :zoekt_projects + def zoekt_projects + @zoekt_projects ||= projects.pluck_primary_key + end + override :execute def execute + return zoekt_search_results if use_zoekt? return super unless use_elasticsearch? ::Gitlab::Elastic::GroupSearchResults.new( diff --git a/ee/app/services/ee/search/project_service.rb b/ee/app/services/ee/search/project_service.rb index 2054f27ccd065a..c913c9ab2a536e 100644 --- a/ee/app/services/ee/search/project_service.rb +++ b/ee/app/services/ee/search/project_service.rb @@ -5,12 +5,14 @@ module Search module ProjectService extend ::Gitlab::Utils::Override include ::Search::Elasticsearchable + include ::Search::ZoektSearchable SCOPES_THAT_SUPPORT_BRANCHES = %w(wiki_blobs commits blobs).freeze override :execute def execute return super if project.respond_to?(:archived?) && project.archived? + return zoekt_search_results if use_zoekt? && use_default_branch? return super unless use_elasticsearch? && use_default_branch? search = params[:search] @@ -53,9 +55,21 @@ def use_default_branch? project.root_ref?(repository_ref) end + override :elasticsearchable_scope def elasticsearchable_scope project end + + override :zoekt_searchable_scope + def zoekt_searchable_scope + project + end + + override :zoekt_projects + def zoekt_projects + # TODO: This was copied from above. Why can project be an array? + @zoekt_projects ||= Array(project).map(&:id) + end end end end diff --git a/ee/app/workers/all_queues.yml b/ee/app/workers/all_queues.yml index a3fe53b13ab7e7..4b704ade25ea2f 100644 --- a/ee/app/workers/all_queues.yml +++ b/ee/app/workers/all_queues.yml @@ -1587,3 +1587,12 @@ :weight: 1 :idempotent: true :tags: [] +- :name: zoekt_indexer + :worker_name: Zoekt::IndexerWorker + :feature_category: :global_search + :has_external_dependencies: false + :urgency: :throttled + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] diff --git a/ee/app/workers/zoekt/indexer_worker.rb b/ee/app/workers/zoekt/indexer_worker.rb new file mode 100644 index 00000000000000..885f9e08c2dbe7 --- /dev/null +++ b/ee/app/workers/zoekt/indexer_worker.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Zoekt + class IndexerWorker + TIMEOUT = 2.hours + + include ApplicationWorker + + data_consistency :always + include Gitlab::ExclusiveLeaseHelpers + + feature_category :global_search + urgency :throttled + idempotent! + + def perform(project_id) + return unless ::License.feature_available?(:zoekt_code_search) + return unless ::Feature.enabled?(:zoekt_for_code_search) + + project = Project.find(project_id) + return true unless project.use_zoekt? + + in_lock("#{self.class.name}/#{project_id}", ttl: (TIMEOUT + 1.minute), retries: 0) do + project.repository.update_zoekt_index! + end + end + end +end diff --git a/ee/config/feature_flags/development/zoekt_for_code_search.yml b/ee/config/feature_flags/development/zoekt_for_code_search.yml new file mode 100644 index 00000000000000..26aadac635c6dd --- /dev/null +++ b/ee/config/feature_flags/development/zoekt_for_code_search.yml @@ -0,0 +1,8 @@ +--- +name: zoekt_for_code_search +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049 +rollout_issue_url: +milestone: '15.9' +type: development +group: group::global search +default_enabled: false diff --git a/ee/lib/gitlab/zoekt/search_results.rb b/ee/lib/gitlab/zoekt/search_results.rb new file mode 100644 index 00000000000000..bbf6f49d2abebd --- /dev/null +++ b/ee/lib/gitlab/zoekt/search_results.rb @@ -0,0 +1,207 @@ +# frozen_string_literal: true + +module Gitlab + module Zoekt + class SearchResults + include ActionView::Helpers::NumberHelper + include Gitlab::Utils::StrongMemoize + + ELASTIC_COUNT_LIMIT = 10000 + DEFAULT_PER_PAGE = Gitlab::SearchResults::DEFAULT_PER_PAGE + + attr_reader :current_user, :query, :public_and_internal_projects, :order_by, :sort, :filters + + # Limit search results by passed projects + # It allows us to search only for projects user has access to + attr_reader :limit_project_ids + + def initialize(current_user, query, limit_project_ids = nil, order_by: nil, sort: nil, filters: {}) + @current_user = current_user + @query = query + @limit_project_ids = limit_project_ids + @order_by = order_by + @sort = sort + @filters = filters + end + + def objects(scope, page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil) + page = (page || 1).to_i + + case scope + when 'blobs' + blobs(page: page, per_page: per_page, preload_method: preload_method) + else + Kaminari.paginate_array([]) + end + end + + def formatted_count(scope) + case scope + when 'blobs' + limited_counter_with_delimiter(blobs_count) + end + end + + def blobs_count + @blobs_count ||= blobs.total_count + end + + # These aliases act as an adapter to the Gitlab::SearchResults + # interface, which is mostly implemented by this class. + alias_method :limited_blobs_count, :blobs_count + + def parse_zoekt_search_result(result, project) + ref = project.default_branch_or_main + path = result[:path] + basename = File.join(File.dirname(path), File.basename(path, '.*')) + content = result[:content] + project_id = project.id + + ::Gitlab::Search::FoundBlob.new( + path: path, + basename: basename, + ref: ref, + startline: [result[:line] - 1, 0].max, + highlight_line: result[:line], + data: content, + project: project, + project_id: project_id + ) + end + + def aggregations(scope) + [] + end + + def highlight_map(_) + nil + end + + private + + def base_options + { + current_user: current_user, + project_ids: limit_project_ids, + public_and_internal_projects: public_and_internal_projects, + order_by: order_by, + sort: sort + } + end + + def memoize_key(scope, count_only:) + count_only ? "#{scope}_results_count".to_sym : scope + end + + def blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false, preload_method: nil) + return Kaminari.paginate_array([]) if query.blank? + + strong_memoize(memoize_key(:blobs, count_only: count_only)) do + search_as_found_blob( + query, + Repository, + page: (page || 1).to_i, + per_page: per_page, + options: base_options.merge(count_only: count_only).merge(filters.slice(:language)), + preload_method: preload_method + ) + end + end + + def default_scope + 'blobs' + end + + def limited_counter_with_delimiter(count) + number_with_delimiter(count) + end + + def search_as_found_blob(query, repositories, page:, per_page:, options:, preload_method:) + zoekt_search_and_wrap(query, + page: page, + per_page: per_page, + options: options, + preload_method: preload_method) do |result, project| + parse_zoekt_search_result(result, project) + end + end + + def zoekt_search(query, num:, options:) + body = { + Q: query, + Opts: { + TotalMaxMatchCount: num, + NumContextLines: 1 + } + } + + # Safety net because Zoekt will match all projects if you provide + # an empty array. + raise "Not possible to search no projects" if options[:project_ids] == [] + + body[:RepoIds] = options[:project_ids] unless options[:project_ids] == :any + + # TODO: When we support sharding we'll need to find the ones that + # match the namespaces being searched + base_url = ::Zoekt::Shard.first.search_base_url + + conn = ::Faraday.new(url: base_url) + + response = conn.post('/api/search', body.to_json, { "Content-Type" => "application/json" }) + ::Gitlab::Json.parse(response.body, symbolize_names: true) + end + + def zoekt_search_and_wrap(query, page: 1, per_page: 20, options: {}, preload_method: nil, &blk) + search_result = zoekt_search( + query, + num: per_page + page * per_page, + options: options + ) + total_count = search_result[:Result][:MatchCount] + + response = (search_result[:Result][:Files] || []).flat_map do |r| + project_id = r[:Repository].to_i + + r[:LineMatches].map do |match| + { + project_id: project_id, + content: [match[:Before], match[:Line], match[:After]].compact.map { |l| Base64.decode64(l) }.join("\n"), + line: match[:LineNumber], + path: r[:FileName] + } + end + end + + items, total_count = yield_each_zoekt_search_result(response, preload_method, total_count, &blk) + + # Before "map" we had a paginated array so we need to recover it + offset = per_page * ((page || 1) - 1) + Kaminari.paginate_array(items, total_count: total_count, limit: per_page, offset: offset) + end + + def yield_each_zoekt_search_result(response, preload_method, total_count) + project_ids = response.pluck(:project_id).uniq # rubocop:disable CodeReuse/ActiveRecord + projects = Project.with_route.id_in(project_ids) + projects = projects.public_send(preload_method) if preload_method # rubocop:disable GitlabSecurity/PublicSend + projects = projects.index_by(&:id) + + items = response.map do |result| + project_id = result[:project_id] + project = projects[project_id] + + if project.nil? || project.pending_delete? + total_count -= 1 + next + end + + yield(result, project) + end + + # Remove results for deleted projects + items.compact! + + [items, total_count] + end + end + end +end diff --git a/ee/spec/features/search/zoekt/search_spec.rb b/ee/spec/features/search/zoekt/search_spec.rb new file mode 100644 index 00000000000000..ee81ba62fef8c1 --- /dev/null +++ b/ee/spec/features/search/zoekt/search_spec.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'Zoekt search', :zoekt, :js, :disable_rate_limiter, :elastic, feature_category: :global_search do + let_it_be(:user) { create(:user) } + let_it_be(:group) { create(:group, :public) } + let_it_be(:project1) { create(:project, :repository, :public, namespace: group) } + let_it_be(:project2) { create(:project, :repository, :public, namespace: group) } + let_it_be(:private_group) { create(:group, :private) } + let_it_be(:private_project) { create(:project, :repository, :private, namespace: private_group) } + + def choose_group(group) + find('[data-testid="group-filter"]').click + wait_for_requests + + page.within '[data-testid="group-filter"]' do + click_button group.name + end + end + + def choose_project(project) + find('[data-testid="project-filter"]').click + wait_for_requests + + page.within '[data-testid="project-filter"]' do + click_button project.name + end + end + + before do + # Necessary as group scoped code search is + # not available without Elasticsearch enabled + # even though it's using Zoekt. + stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true) + + zoekt_ensure_project_indexed!(project1) + zoekt_ensure_project_indexed!(project2) + zoekt_ensure_project_indexed!(private_project) + + project1.add_maintainer(user) + project2.add_maintainer(user) + group.add_owner(user) + + sign_in(user) + + visit(search_path) + + wait_for_requests + + choose_group(group) + end + + describe 'blob search' do + it 'finds files with a regex search and allows filtering down again by project' do + submit_search('user.*egex') + select_search_scope('Code') + + expect(page).to have_selector('.file-content .blob-content', count: 2) + expect(page).to have_button('Copy file path') + + choose_project(project1) + + expect(page).to have_selector('.file-content .blob-content', count: 1) + + allow(Ability).to receive(:allowed?).and_call_original + expect(Ability).to receive(:allowed?).with(anything, :read_blob, anything).twice.and_return(false) + + submit_search("username_regex") + select_search_scope('Code') + expect(page).not_to have_selector('.file-content .blob-content') + end + end +end diff --git a/ee/spec/lib/gitlab/zoekt/search_results_spec.rb b/ee/spec/lib/gitlab/zoekt/search_results_spec.rb new file mode 100644 index 00000000000000..201f86f7afdff2 --- /dev/null +++ b/ee/spec/lib/gitlab/zoekt/search_results_spec.rb @@ -0,0 +1,132 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::Gitlab::Zoekt::SearchResults, :zoekt, feature_category: :global_search do + let_it_be(:user) { create(:user) } + + let(:query) { 'hello world' } + let_it_be(:project_1) { create(:project, :public, :repository) } + let_it_be(:project_2) { create(:project, :public, :repository) } + let(:limit_project_ids) { [project_1.id] } + + before do + zoekt_ensure_project_indexed!(project_1) + zoekt_ensure_project_indexed!(project_2) + end + + describe 'blobs', :sidekiq_inline do + before do + zoekt_ensure_project_indexed!(project_1) + end + + def search_for(term) + described_class.new(user, term, [project_1.id]).objects('blobs').map(&:path) + end + + it 'finds blobs by regex search' do + results = described_class.new(user, 'use.*egex', limit_project_ids) + blobs = results.objects('blobs') + + expect(blobs.map(&:data).join).to include("def username_regex\n default_regex") + expect(results.blobs_count).to eq 5 + end + + it 'finds blobs from searched projects only' do + project_3 = create :project, :repository, :private + zoekt_ensure_project_indexed!(project_3) + project_3.add_reporter(user) + + results = described_class.new(user, 'project_name_regex', [project_1.id]) + expect(results.blobs_count).to eq 1 + result_project_ids = results.objects('blobs').map(&:project_id) + expect(result_project_ids.uniq).to match_array([project_1.id]) + + results = described_class.new(user, 'project_name_regex', [project_1.id, project_3.id]) + result_project_ids = results.objects('blobs').map(&:project_id) + expect(result_project_ids.uniq).to match_array([project_1.id, project_3.id]) + expect(results.blobs_count).to eq 2 + + results = described_class.new(user, 'project_name_regex', :any) + result_project_ids = results.objects('blobs').map(&:project_id) + expect(result_project_ids.uniq).to match_array([project_1.id, project_2.id, project_3.id]) + expect(results.blobs_count).to eq 3 + end + + it 'raises an error if there are somehow no project_id in the filter' do + expect do + described_class.new(user, 'project_name_regex', []).objects('blobs') + end.to raise_error('Not possible to search no projects') + end + + it 'returns zero when blobs are not found' do + results = described_class.new(user, 'asdfg', limit_project_ids) + + expect(results.blobs_count).to eq 0 + end + + context 'when searching with special characters', :aggregate_failures do + let(:examples) do + { + 'perlMethodCall' => '$my_perl_object->perlMethodCall', + '"absolute_with_specials.txt"' => '/a/longer/file-path/absolute_with_specials.txt', + '"components-within-slashes"' => '/file-path/components-within-slashes/', + 'bar\(x\)' => 'Foo.bar(x)', + 'someSingleColonMethodCall' => 'LanguageWithSingleColon:someSingleColonMethodCall', + 'javaLangStaticMethodCall' => 'MyJavaClass::javaLangStaticMethodCall', + 'tokenAfterParentheses' => 'ParenthesesBetweenTokens)tokenAfterParentheses', + 'ruby_call_method_123' => 'RubyClassInvoking.ruby_call_method_123(with_arg)', + 'ruby_method_call' => 'RubyClassInvoking.ruby_method_call(with_arg)', + '#ambitious-planning' => 'We [plan ambitiously](#ambitious-planning).', + 'ambitious-planning' => 'We [plan ambitiously](#ambitious-planning).', + 'tokenAfterCommaWithNoSpace' => 'WouldHappenInManyLanguages,tokenAfterCommaWithNoSpace', + 'missing_token_around_equals' => 'a.b.c=missing_token_around_equals', + 'and;colons:too\$' => 'and;colons:too$', + '"differeñt-lønguage.txt"' => 'another/file-path/differeñt-lønguage.txt', + '"relative-with-specials.txt"' => 'another/file-path/relative-with-specials.txt', + 'ruby_method_123' => 'def self.ruby_method_123(ruby_another_method_arg)', + 'ruby_method_name' => 'def self.ruby_method_name(ruby_method_arg)', + '"dots.also.neeeeed.testing"' => 'dots.also.neeeeed.testing', + '.testing' => 'dots.also.neeeeed.testing', + 'dots' => 'dots.also.neeeeed.testing', + 'also.neeeeed' => 'dots.also.neeeeed.testing', + 'neeeeed' => 'dots.also.neeeeed.testing', + 'tests-image' => 'extends: .gitlab-tests-image', + 'gitlab-tests' => 'extends: .gitlab-tests-image', + 'gitlab-tests-image' => 'extends: .gitlab-tests-image', + 'foo/bar' => 'https://s3.amazonaws.com/foo/bar/baz.png', + 'https://test.or.dev.com/repository' => 'https://test.or.dev.com/repository/maven-all', + 'test.or.dev.com/repository/maven-all' => 'https://test.or.dev.com/repository/maven-all', + 'repository/maven-all' => 'https://test.or.dev.com/repository/maven-all', + 'https://test.or.dev.com/repository/maven-all' => 'https://test.or.dev.com/repository/maven-all', + 'bar-baz-conventions' => 'id("foo.bar-baz-conventions")', + 'baz-conventions' => 'id("foo.bar-baz-conventions")', + 'baz' => 'id("foo.bar-baz-conventions")', + 'bikes-3.4' => 'include "bikes-3.4"', + 'sql_log_bin' => 'q = "SET @@session.sql_log_bin=0;"', + 'sql_log_bin=0' => 'q = "SET @@session.sql_log_bin=0;"', + 'v3/delData' => 'uri: "v3/delData"', + '"us-east-2"' => 'us-east-2' + } + end + + before do + examples.values.uniq.each do |file_content| + file_name = Digest::SHA256.hexdigest(file_content) + project_1.repository.create_file(user, file_name, file_content, message: 'Some commit message', +branch_name: 'master') + end + + zoekt_ensure_project_indexed!(project_1) + end + + it 'finds all examples' do + examples.each do |search_term, file_content| + file_name = Digest::SHA256.hexdigest(file_content) + + expect(search_for(search_term)).to include(file_name) + end + end + end + end +end diff --git a/ee/spec/models/concerns/zoekt/searchable_repository_spec.rb b/ee/spec/models/concerns/zoekt/searchable_repository_spec.rb new file mode 100644 index 00000000000000..163b0cdeba6e53 --- /dev/null +++ b/ee/spec/models/concerns/zoekt/searchable_repository_spec.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::Zoekt::SearchableRepository, :zoekt, feature_category: :global_search do + let_it_be(:user) { create(:user) } + + let_it_be(:project) { create(:project, :repository) } + let_it_be(:unindexed_project) { create(:project, :repository) } + let(:repository) { project.repository } + let(:unindexed_repository) { unindexed_project.repository } + + before do + zoekt_ensure_project_indexed!(project) + end + + describe '#use_zoekt?' do + it 'is true for indexed projects' do + expect(repository.use_zoekt?).to eq(true) + end + + it 'is false for unindexed projects' do + expect(unindexed_repository.use_zoekt?).to eq(false) + end + end + + def search_for(term) + ::Gitlab::Zoekt::SearchResults.new(user, term, :any).objects('blobs').map(&:path) + end + + describe '#update_zoekt_index!' do + it 'makes updates available' do + project.repository.create_file( + user, + 'somenewsearchablefile.txt', + 'some content', + message: 'added test file', + branch_name: project.default_branch) + + expect(search_for('somenewsearchablefile.txt')).to be_empty + + repository.update_zoekt_index! + + expect(search_for('somenewsearchablefile.txt')).to match_array(['somenewsearchablefile.txt']) + end + end + + describe '.truncate_zoekt_index!' do + it 'removes all data from the Zoekt shard' do + expect(search_for('.')).not_to be_empty + + Repository.truncate_zoekt_index!(::Zoekt::Shard.last) + + expect(search_for('.')).to be_empty + end + end + + describe '#async_update_zoekt_index', :sidekiq_inline do + it 'makes updates available via ::Zoekt::IndexerWorker' do + expect(::Zoekt::IndexerWorker).to receive(:perform_async).with(project.id).and_call_original + + project.repository.create_file( + user, + 'anothersomenewsearchablefile.txt', + 'some content', + message: 'added test file', + branch_name: project.default_branch) + + expect(search_for('anothersomenewsearchablefile.txt')).to be_empty + + repository.async_update_zoekt_index + + expect(search_for('anothersomenewsearchablefile.txt')).to match_array(['anothersomenewsearchablefile.txt']) + end + end +end diff --git a/ee/spec/models/ee/namespace_spec.rb b/ee/spec/models/ee/namespace_spec.rb index 15e62333733dc8..8f886abdfcc324 100644 --- a/ee/spec/models/ee/namespace_spec.rb +++ b/ee/spec/models/ee/namespace_spec.rb @@ -124,6 +124,14 @@ end end + describe '#use_zoekt?', feature_category: :global_search do + it 'delegates to ::Zoekt::IndexedNamespace' do + expect(::Zoekt::IndexedNamespace).to receive(:enabled_for_namespace?).with(namespace).and_return(true) + + expect(namespace.use_zoekt?).to eq(true) + end + end + describe '#invalidate_elasticsearch_indexes_cache!' do let(:namespace) { create :namespace } diff --git a/ee/spec/models/project_spec.rb b/ee/spec/models/project_spec.rb index 3229be42b8840e..8efe3195667384 100644 --- a/ee/spec/models/project_spec.rb +++ b/ee/spec/models/project_spec.rb @@ -2245,6 +2245,14 @@ end end + describe '#use_zoekt?', feature_category: :global_search do + it 'delegates to ::Zoekt::IndexedNamespace' do + expect(::Zoekt::IndexedNamespace).to receive(:enabled_for_project?).with(project).and_return(true) + + expect(project.use_zoekt?).to eq(true) + end + end + describe '#lfs_http_url_to_repo' do let(:project) { create(:project) } let(:project_path) { "#{Gitlab::Routing.url_helpers.project_path(project)}.git" } diff --git a/ee/spec/models/zoekt/indexed_namespace_spec.rb b/ee/spec/models/zoekt/indexed_namespace_spec.rb new file mode 100644 index 00000000000000..3da6a53d760628 --- /dev/null +++ b/ee/spec/models/zoekt/indexed_namespace_spec.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::Zoekt::IndexedNamespace, feature_category: :global_searcch do + let_it_be(:indexed_namespace1) { create(:namespace) } + let_it_be(:indexed_namespace2) { create(:namespace) } + let_it_be(:indexed_parent_namespace) { create(:group) } + let_it_be(:indexed_child_namespace) { create(:group, parent: indexed_parent_namespace) } + let_it_be(:unindexed_namespace) { create(:namespace) } + let_it_be(:indexed_project1) { create(:project, namespace: indexed_namespace1) } + let_it_be(:unindexed_project) { create(:project, namespace: unindexed_namespace) } + let_it_be(:indexed_project_of_parent_namespace) { create(:project, namespace: indexed_parent_namespace) } + let_it_be(:indexed_project_of_child_namespace) { create(:project, namespace: indexed_child_namespace) } + let_it_be(:shard) { Zoekt::Shard.create!(index_base_url: 'http://example.com:1234/', search_base_url: 'http://example.com:4567/') } + + before :all do + described_class.create!(shard: shard, namespace: indexed_namespace1) + described_class.create!(shard: shard, namespace: indexed_namespace2) + described_class.create!(shard: shard, namespace: indexed_parent_namespace) + end + + context 'with validations' do + it 'does not allow you to mark a subgroup as indexed' do + expect do + described_class.create!(shard: shard, namespace: indexed_child_namespace) + end.to raise_error(/Only root namespaces can be indexed/) + end + end + + describe '#enabled_for_namespace?' do + it 'returns true for those indexed namespace records' do + expect(described_class.enabled_for_namespace?(indexed_namespace1)).to eq(true) + expect(described_class.enabled_for_namespace?(indexed_namespace2)).to eq(true) + end + + it 'returns false for unindexed namespace records' do + expect(described_class.enabled_for_namespace?(unindexed_namespace)).to eq(false) + end + + it 'delegates to root namespace for subgroups' do + expect(described_class.enabled_for_namespace?(indexed_child_namespace)).to eq(true) + end + end + + describe '#enabled_for_project?' do + it 'returns true for projects in indexed namespaces' do + expect(described_class.enabled_for_project?(indexed_project1)).to eq(true) + expect(described_class.enabled_for_project?(indexed_project_of_parent_namespace)).to eq(true) + end + + it 'returns false for projects in unindexed namespaces' do + expect(described_class.enabled_for_project?(unindexed_project)).to eq(false) + end + + it 'delegates to root namespace for projects in subgroups' do + expect(described_class.enabled_for_project?(indexed_project_of_child_namespace)).to eq(true) + end + end +end diff --git a/ee/spec/models/zoekt/shard_spec.rb b/ee/spec/models/zoekt/shard_spec.rb new file mode 100644 index 00000000000000..c8b2245b226b0a --- /dev/null +++ b/ee/spec/models/zoekt/shard_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::Zoekt::Shard, feature_category: :global_search do + let_it_be(:indexed_namespace1) { create(:namespace) } + let_it_be(:indexed_namespace2) { create(:namespace) } + let_it_be(:unindexed_namespace) { create(:namespace) } + let(:shard) { described_class.create!(index_base_url: 'http://example.com:1234/', search_base_url: 'http://example.com:4567/') } + + before do + Zoekt::IndexedNamespace.create!(shard: shard, namespace: indexed_namespace1) + Zoekt::IndexedNamespace.create!(shard: shard, namespace: indexed_namespace2) + end + + it 'has many indexed_namespaces' do + expect(shard.indexed_namespaces.count).to eq(2) + expect(shard.indexed_namespaces.map(&:namespace)).to contain_exactly(indexed_namespace1, indexed_namespace2) + end +end diff --git a/ee/spec/services/ee/git/branch_push_service_spec.rb b/ee/spec/services/ee/git/branch_push_service_spec.rb index 7f01a236dce39e..3340147a6d5db2 100644 --- a/ee/spec/services/ee/git/branch_push_service_spec.rb +++ b/ee/spec/services/ee/git/branch_push_service_spec.rb @@ -38,7 +38,7 @@ end end - context 'ElasticSearch indexing', :elastic, :clean_gitlab_redis_shared_state do + context 'ElasticSearch indexing', :elastic, :clean_gitlab_redis_shared_state, feature_category: :global_search do before do stub_ee_application_setting(elasticsearch_indexing?: true) end @@ -107,6 +107,52 @@ end end + context 'with Zoekt indexing', feature_category: :global_search do + let(:use_zoekt) { true } + + before do + allow(project).to receive(:use_zoekt?).and_return(use_zoekt) + end + + it 'triggers async_update_zoekt_index' do + expect(project.repository).to receive(:async_update_zoekt_index) + + subject.execute + end + + context 'when pushing to a non-default branch' do + let(:ref) { 'refs/heads/other' } + + it 'does not trigger async_update_zoekt_index' do + expect(project.repository).not_to receive(:async_update_zoekt_index) + + subject.execute + end + end + + context 'when zoekt_for_code_search is disabled' do + before do + stub_feature_flags(zoekt_for_code_search: false) + end + + it 'does not trigger async_update_zoekt_index' do + expect(project.repository).not_to receive(:async_update_zoekt_index) + + subject.execute + end + end + + context 'when zoekt is not enabled for the project' do + let(:use_zoekt) { false } + + it 'does not trigger async_update_zoekt_index' do + expect(project.repository).not_to receive(:async_update_zoekt_index) + + subject.execute + end + end + end + context 'External pull requests' do it 'runs UpdateExternalPullRequestsWorker' do expect(UpdateExternalPullRequestsWorker).to receive(:perform_async).with(project.id, user.id, ref) diff --git a/ee/spec/services/search/group_service_spec.rb b/ee/spec/services/search/group_service_spec.rb index 41067f67c4aa69..5cbc9e076e62d2 100644 --- a/ee/spec/services/search/group_service_spec.rb +++ b/ee/spec/services/search/group_service_spec.rb @@ -62,6 +62,72 @@ end end + context 'when searching with Zoekt' do + let(:service) { described_class.new(user, group, search: 'foobar', scope: scope, basic_search: basic_search) } + let(:use_zoekt) { true } + let(:scope) { 'blobs' } + let(:basic_search) { nil } + + before do + allow(group).to receive(:use_zoekt?).and_return(use_zoekt) + end + + it 'searches with Zoekt' do + expect(service.use_zoekt?).to eq(true) + expect(service.zoekt_searchable_scope).to eq(group) + expect(service.execute).to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + + context 'when group does not have Zoekt enabled' do + let(:use_zoekt) { false } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when scope is not blobs' do + let(:scope) { 'issues' } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when basic_search is requested' do + let(:basic_search) { true } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when zoekt_for_code_search is disabled' do + before do + stub_feature_flags(zoekt_for_code_search: false) + end + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when zoekt_code_search licensed feature is disabled' do + before do + stub_licensed_features(zoekt_code_search: false) + end + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + end + context 'visibility', :elastic_delete_by_query, :sidekiq_inline do include_context 'ProjectPolicyTable context' diff --git a/ee/spec/services/search/project_service_spec.rb b/ee/spec/services/search/project_service_spec.rb index c3a6e5d52a3956..21c0222b8474de 100644 --- a/ee/spec/services/search/project_service_spec.rb +++ b/ee/spec/services/search/project_service_spec.rb @@ -43,6 +43,53 @@ end end + context 'when searching with Zoekt' do + let_it_be(:user) { create(:user) } + let_it_be(:project) { create(:project, namespace: user.namespace) } + + let(:service) { described_class.new(project, user, search: 'foobar', scope: scope, basic_search: basic_search) } + let(:use_zoekt) { true } + let(:scope) { 'blobs' } + let(:basic_search) { nil } + + before do + allow(project).to receive(:use_zoekt?).and_return(use_zoekt) + end + + it 'searches with Zoekt' do + expect(service.use_zoekt?).to eq(true) + expect(service.zoekt_searchable_scope).to eq(project) + expect(service.execute).to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + + context 'when project does not have Zoekt enabled' do + let(:use_zoekt) { false } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when scope is not blobs' do + let(:scope) { 'issues' } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when basic_search is requested' do + let(:basic_search) { true } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + end + context 'when a multiple projects provided' do it_behaves_like 'EE search service shared examples', ::Gitlab::ProjectSearchResults, ::Gitlab::Elastic::SearchResults do let_it_be(:group) { create(:group) } diff --git a/ee/spec/support/shared_examples/services/search_service_shared_examples.rb b/ee/spec/support/shared_examples/services/search_service_shared_examples.rb index 65328ef0af6f34..344730a4aae3fe 100644 --- a/ee/spec/support/shared_examples/services/search_service_shared_examples.rb +++ b/ee/spec/support/shared_examples/services/search_service_shared_examples.rb @@ -47,6 +47,7 @@ expect(Gitlab::CurrentSettings) .to receive(:search_using_elasticsearch?) .with(scope: scope) + .at_least(:once) .and_return(true) is_expected.to be_a(elasticsearch_results) @@ -56,6 +57,7 @@ expect(Gitlab::CurrentSettings) .to receive(:search_using_elasticsearch?) .with(scope: scope) + .at_least(:once) .and_return(false) is_expected.to be_a(normal_results) diff --git a/ee/spec/support/zoekt.rb b/ee/spec/support/zoekt.rb new file mode 100644 index 00000000000000..2f7a346f657e88 --- /dev/null +++ b/ee/spec/support/zoekt.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module Zoekt + module TestHelpers + def zoekt_shard + @zoekt_shard ||= ::Zoekt::Shard.find_or_create_by!(index_base_url: 'http://127.0.0.1:6060/', search_base_url: 'http://127.0.0.1:6070/') + end + module_function :zoekt_shard + + def zoekt_truncate_index! + Repository.truncate_zoekt_index!(zoekt_shard) + end + module_function :zoekt_truncate_index! + + def zoekt_ensure_namespace_indexed!(namespace) + ::Zoekt::IndexedNamespace.find_or_create_by!(shard: zoekt_shard, namespace: namespace.root_ancestor) + end + + def zoekt_ensure_project_indexed!(project) + zoekt_ensure_namespace_indexed!(project.namespace) + + # TODO: We shouldn't be referencing files on disk but I don't think we + # can git clone from rspec as Web/API is not running + allow(::Gitlab::GitalyClient::StorageSettings).to receive(:disk_access_denied?).and_return(false) + project.repository.update_zoekt_index!(use_local_disk_path: true) + end + end +end + +RSpec.configure do |config| + config.around(:each, :zoekt) do |example| + ::Zoekt::TestHelpers.zoekt_truncate_index! + + example.run + + ::Zoekt::TestHelpers.zoekt_truncate_index! + end + + config.before(:each, :zoekt) do + stub_licensed_features(zoekt_code_search: true) + end + + config.include ::Zoekt::TestHelpers +end diff --git a/ee/spec/workers/zoekt/indexer_worker_spec.rb b/ee/spec/workers/zoekt/indexer_worker_spec.rb new file mode 100644 index 00000000000000..c481c75a6a3ac5 --- /dev/null +++ b/ee/spec/workers/zoekt/indexer_worker_spec.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::Zoekt::IndexerWorker, feature_category: :global_search do + let_it_be(:project) { create(:project, :repository) } + let(:use_zoekt) { true } + + subject { described_class.new } + + before do + # Mocking Project.find simplifies the stubs on project.use_zoekt? and + # project.repository + allow(Project).to receive(:find).with(project.id).and_return(project) + allow(project).to receive(:use_zoekt?).and_return(use_zoekt) + end + + describe '#perform' do + it 'sends the project to Zoekt for indexing' do + expect(project.repository).to receive(:update_zoekt_index!) + + subject.perform(project.id) + end + + context 'when zoekt_for_code_search is disabled' do + before do + stub_feature_flags(zoekt_for_code_search: false) + end + + it 'does not send the project to Zoekt for indexing' do + expect(project.repository).not_to receive(:update_zoekt_index!) + + subject.perform(project.id) + end + end + + context 'when the zoekt_code_search licensed feature is disabled' do + before do + stub_licensed_features(zoekt_code_search: false) + end + + it 'does nothing' do + expect(project.repository).not_to receive(:update_zoekt_index!) + + subject.perform(project.id) + end + end + + context 'when the project does not have zoekt enabled' do + let(:use_zoekt) { false } + + it 'does not send the project to Zoekt for indexing' do + expect(project.repository).not_to receive(:update_zoekt_index!) + + subject.perform(project.id) + end + end + + context 'when the indexer is locked for the given project' do + it 'does not run index' do + expect(subject).to receive(:in_lock) # Mock and don't yield + .with("Zoekt::IndexerWorker/#{project.id}", ttl: (Zoekt::IndexerWorker::TIMEOUT + 1.minute), retries: 0) + + expect(project.repository).not_to receive(:update_zoekt_index!) + + subject.perform(project.id) + end + end + end +end -- GitLab From 82f59d0ac98d8fd131326126761f86f7e25ccd79 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 19 Jan 2023 12:35:21 +1100 Subject: [PATCH 02/13] Reorder Feature/License checks Zoekt --- ee/app/services/concerns/search/zoekt_searchable.rb | 4 ++-- ee/app/workers/zoekt/indexer_worker.rb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ee/app/services/concerns/search/zoekt_searchable.rb b/ee/app/services/concerns/search/zoekt_searchable.rb index fe30c171a9a8a6..35236a36ee0db7 100644 --- a/ee/app/services/concerns/search/zoekt_searchable.rb +++ b/ee/app/services/concerns/search/zoekt_searchable.rb @@ -3,9 +3,9 @@ module Search module ZoektSearchable def use_zoekt? - return false unless ::License.feature_available?(:zoekt_code_search) - return false unless ::Feature.enabled?(:zoekt_for_code_search) return false if params[:basic_search] + return false unless ::Feature.enabled?(:zoekt_for_code_search) + return false unless ::License.feature_available?(:zoekt_code_search) scope == 'blobs' && zoekt_searchable_scope.respond_to?(:use_zoekt?) && diff --git a/ee/app/workers/zoekt/indexer_worker.rb b/ee/app/workers/zoekt/indexer_worker.rb index 885f9e08c2dbe7..33c6c91defd556 100644 --- a/ee/app/workers/zoekt/indexer_worker.rb +++ b/ee/app/workers/zoekt/indexer_worker.rb @@ -14,8 +14,8 @@ class IndexerWorker idempotent! def perform(project_id) - return unless ::License.feature_available?(:zoekt_code_search) return unless ::Feature.enabled?(:zoekt_for_code_search) + return unless ::License.feature_available?(:zoekt_code_search) project = Project.find(project_id) return true unless project.use_zoekt? -- GitLab From acdd154c26c536b5beaceba6c9ff89a4657ebcc4 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 19 Jan 2023 13:54:47 +1100 Subject: [PATCH 03/13] Switch to 2 feature flags for Zoekt --- doc/user/search/exact_code_search.md | 2 +- ee/app/services/concerns/search/zoekt_searchable.rb | 2 +- ee/app/services/ee/git/branch_push_service.rb | 2 +- ee/app/workers/zoekt/indexer_worker.rb | 2 +- ...oekt_for_code_search.yml => index_code_with_zoekt.yml} | 4 ++-- .../feature_flags/development/search_code_with_zoekt.yml | 8 ++++++++ ee/spec/services/ee/git/branch_push_service_spec.rb | 4 ++-- ee/spec/services/search/group_service_spec.rb | 4 ++-- ee/spec/workers/zoekt/indexer_worker_spec.rb | 4 ++-- 9 files changed, 20 insertions(+), 12 deletions(-) rename ee/config/feature_flags/development/{zoekt_for_code_search.yml => index_code_with_zoekt.yml} (63%) create mode 100644 ee/config/feature_flags/development/search_code_with_zoekt.yml diff --git a/doc/user/search/exact_code_search.md b/doc/user/search/exact_code_search.md index ca787f611e7f4d..97f58b973cb3c4 100644 --- a/doc/user/search/exact_code_search.md +++ b/doc/user/search/exact_code_search.md @@ -7,7 +7,7 @@ type: reference # Exact Code Search **(PREMIUM)** -> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049) in GitLab 15.9 [with a flag](../../administration/feature_flags.md) named `zoekt_for_code_search`. Disabled by default. +> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049) in GitLab 15.9 [with a flag](../../administration/feature_flags.md) named `index_code_with_zoekt` and `search_code_with_zoekt` which enables indexing and searching respectively. Both are disabled by default. WARNING: Exact code search is in [**Alpha**](../../policy/alpha-beta-support.md#alpha-features). diff --git a/ee/app/services/concerns/search/zoekt_searchable.rb b/ee/app/services/concerns/search/zoekt_searchable.rb index 35236a36ee0db7..a7ee4098711aac 100644 --- a/ee/app/services/concerns/search/zoekt_searchable.rb +++ b/ee/app/services/concerns/search/zoekt_searchable.rb @@ -4,7 +4,7 @@ module Search module ZoektSearchable def use_zoekt? return false if params[:basic_search] - return false unless ::Feature.enabled?(:zoekt_for_code_search) + return false unless ::Feature.enabled?(:search_code_with_zoekt, current_user) return false unless ::License.feature_available?(:zoekt_code_search) scope == 'blobs' && diff --git a/ee/app/services/ee/git/branch_push_service.rb b/ee/app/services/ee/git/branch_push_service.rb index 3afb54dc0035a7..99e9316d869e2a 100644 --- a/ee/app/services/ee/git/branch_push_service.rb +++ b/ee/app/services/ee/git/branch_push_service.rb @@ -23,7 +23,7 @@ def enqueue_elasticsearch_indexing end def enqueue_zoekt_indexing - return false unless ::Feature.enabled?(:zoekt_for_code_search) + return false unless ::Feature.enabled?(:index_code_with_zoekt) return false unless default_branch? return false unless project.use_zoekt? diff --git a/ee/app/workers/zoekt/indexer_worker.rb b/ee/app/workers/zoekt/indexer_worker.rb index 33c6c91defd556..408070fded0c61 100644 --- a/ee/app/workers/zoekt/indexer_worker.rb +++ b/ee/app/workers/zoekt/indexer_worker.rb @@ -14,7 +14,7 @@ class IndexerWorker idempotent! def perform(project_id) - return unless ::Feature.enabled?(:zoekt_for_code_search) + return unless ::Feature.enabled?(:index_code_with_zoekt) return unless ::License.feature_available?(:zoekt_code_search) project = Project.find(project_id) diff --git a/ee/config/feature_flags/development/zoekt_for_code_search.yml b/ee/config/feature_flags/development/index_code_with_zoekt.yml similarity index 63% rename from ee/config/feature_flags/development/zoekt_for_code_search.yml rename to ee/config/feature_flags/development/index_code_with_zoekt.yml index 26aadac635c6dd..626252c1dfb427 100644 --- a/ee/config/feature_flags/development/zoekt_for_code_search.yml +++ b/ee/config/feature_flags/development/index_code_with_zoekt.yml @@ -1,7 +1,7 @@ --- -name: zoekt_for_code_search +name: index_code_with_zoekt introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049 -rollout_issue_url: +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/388519 milestone: '15.9' type: development group: group::global search diff --git a/ee/config/feature_flags/development/search_code_with_zoekt.yml b/ee/config/feature_flags/development/search_code_with_zoekt.yml new file mode 100644 index 00000000000000..c443dc0238f015 --- /dev/null +++ b/ee/config/feature_flags/development/search_code_with_zoekt.yml @@ -0,0 +1,8 @@ +--- +name: search_code_with_zoekt +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/388519 +milestone: '15.9' +type: development +group: group::global search +default_enabled: false diff --git a/ee/spec/services/ee/git/branch_push_service_spec.rb b/ee/spec/services/ee/git/branch_push_service_spec.rb index 3340147a6d5db2..9bf5cd340ab30d 100644 --- a/ee/spec/services/ee/git/branch_push_service_spec.rb +++ b/ee/spec/services/ee/git/branch_push_service_spec.rb @@ -130,9 +130,9 @@ end end - context 'when zoekt_for_code_search is disabled' do + context 'when index_code_with_zoekt is disabled' do before do - stub_feature_flags(zoekt_for_code_search: false) + stub_feature_flags(index_code_with_zoekt: false) end it 'does not trigger async_update_zoekt_index' do diff --git a/ee/spec/services/search/group_service_spec.rb b/ee/spec/services/search/group_service_spec.rb index 5cbc9e076e62d2..9a67be9d9fbcae 100644 --- a/ee/spec/services/search/group_service_spec.rb +++ b/ee/spec/services/search/group_service_spec.rb @@ -105,9 +105,9 @@ end end - context 'when zoekt_for_code_search is disabled' do + context 'when search_code_with_zoekt is disabled' do before do - stub_feature_flags(zoekt_for_code_search: false) + stub_feature_flags(search_code_with_zoekt: false) end it 'does not search with Zoekt' do diff --git a/ee/spec/workers/zoekt/indexer_worker_spec.rb b/ee/spec/workers/zoekt/indexer_worker_spec.rb index c481c75a6a3ac5..265a831343b789 100644 --- a/ee/spec/workers/zoekt/indexer_worker_spec.rb +++ b/ee/spec/workers/zoekt/indexer_worker_spec.rb @@ -22,9 +22,9 @@ subject.perform(project.id) end - context 'when zoekt_for_code_search is disabled' do + context 'when index_code_with_zoekt is disabled' do before do - stub_feature_flags(zoekt_for_code_search: false) + stub_feature_flags(index_code_with_zoekt: false) end it 'does not send the project to Zoekt for indexing' do -- GitLab From b6cb68e07abae8679acae195c329418c28be3456 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 19 Jan 2023 13:58:27 +1100 Subject: [PATCH 04/13] Fix test order flakiness in searchable_repository_spec --- ee/spec/models/concerns/zoekt/searchable_repository_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ee/spec/models/concerns/zoekt/searchable_repository_spec.rb b/ee/spec/models/concerns/zoekt/searchable_repository_spec.rb index 163b0cdeba6e53..4d495dd91e1845 100644 --- a/ee/spec/models/concerns/zoekt/searchable_repository_spec.rb +++ b/ee/spec/models/concerns/zoekt/searchable_repository_spec.rb @@ -61,16 +61,16 @@ def search_for(term) project.repository.create_file( user, - 'anothersomenewsearchablefile.txt', + 'anothernewsearchablefile.txt', 'some content', message: 'added test file', branch_name: project.default_branch) - expect(search_for('anothersomenewsearchablefile.txt')).to be_empty + expect(search_for('anothernewsearchablefile.txt')).to be_empty repository.async_update_zoekt_index - expect(search_for('anothersomenewsearchablefile.txt')).to match_array(['anothersomenewsearchablefile.txt']) + expect(search_for('anothernewsearchablefile.txt')).to match_array(['anothernewsearchablefile.txt']) end end end -- GitLab From 61145e26e6bcb71842f56d44a5cc879a24cf4743 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 19 Jan 2023 14:36:45 +1100 Subject: [PATCH 05/13] Default next page in Zoekt to ES and ignore pagination --- .../concerns/search/zoekt_searchable.rb | 1 + ee/lib/gitlab/zoekt/search_results.rb | 6 ++--- ee/spec/services/search/group_service_spec.rb | 23 +++++++++++++++++-- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/ee/app/services/concerns/search/zoekt_searchable.rb b/ee/app/services/concerns/search/zoekt_searchable.rb index a7ee4098711aac..9b9a8e1ab5712e 100644 --- a/ee/app/services/concerns/search/zoekt_searchable.rb +++ b/ee/app/services/concerns/search/zoekt_searchable.rb @@ -4,6 +4,7 @@ module Search module ZoektSearchable def use_zoekt? return false if params[:basic_search] + return false if params[:page] && params[:page].to_i != 1 return false unless ::Feature.enabled?(:search_code_with_zoekt, current_user) return false unless ::License.feature_available?(:zoekt_code_search) diff --git a/ee/lib/gitlab/zoekt/search_results.rb b/ee/lib/gitlab/zoekt/search_results.rb index bbf6f49d2abebd..7671907faa819c 100644 --- a/ee/lib/gitlab/zoekt/search_results.rb +++ b/ee/lib/gitlab/zoekt/search_results.rb @@ -6,7 +6,6 @@ class SearchResults include ActionView::Helpers::NumberHelper include Gitlab::Utils::StrongMemoize - ELASTIC_COUNT_LIMIT = 10000 DEFAULT_PER_PAGE = Gitlab::SearchResults::DEFAULT_PER_PAGE attr_reader :current_user, :query, :public_and_internal_projects, :order_by, :sort, :filters @@ -154,7 +153,7 @@ def zoekt_search(query, num:, options:) def zoekt_search_and_wrap(query, page: 1, per_page: 20, options: {}, preload_method: nil, &blk) search_result = zoekt_search( query, - num: per_page + page * per_page, + num: per_page, options: options ) total_count = search_result[:Result][:MatchCount] @@ -174,8 +173,7 @@ def zoekt_search_and_wrap(query, page: 1, per_page: 20, options: {}, preload_met items, total_count = yield_each_zoekt_search_result(response, preload_method, total_count, &blk) - # Before "map" we had a paginated array so we need to recover it - offset = per_page * ((page || 1) - 1) + offset = 0 Kaminari.paginate_array(items, total_count: total_count, limit: per_page, offset: offset) end diff --git a/ee/spec/services/search/group_service_spec.rb b/ee/spec/services/search/group_service_spec.rb index 9a67be9d9fbcae..18d27995d2d7c8 100644 --- a/ee/spec/services/search/group_service_spec.rb +++ b/ee/spec/services/search/group_service_spec.rb @@ -63,16 +63,17 @@ end context 'when searching with Zoekt' do - let(:service) { described_class.new(user, group, search: 'foobar', scope: scope, basic_search: basic_search) } + let(:service) { described_class.new(user, group, search: 'foobar', scope: scope, basic_search: basic_search, page: page) } let(:use_zoekt) { true } let(:scope) { 'blobs' } let(:basic_search) { nil } + let(:page) { nil } before do allow(group).to receive(:use_zoekt?).and_return(use_zoekt) end - it 'searches with Zoekt' do + it 'returns a Gitlab::Zoekt::SearchResults' do expect(service.use_zoekt?).to eq(true) expect(service.zoekt_searchable_scope).to eq(group) expect(service.execute).to be_kind_of(::Gitlab::Zoekt::SearchResults) @@ -116,6 +117,24 @@ end end + context 'when requesting the first page' do + let(:page) { 1 } + + it 'searches with Zoekt' do + expect(service.use_zoekt?).to eq(true) + expect(service.execute).to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when requesting a page other than the first' do + let(:page) { 2 } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + context 'when zoekt_code_search licensed feature is disabled' do before do stub_licensed_features(zoekt_code_search: false) -- GitLab From 8bfe85ac695b2696d0559f72b5d42c5562042d57 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Thu, 19 Jan 2023 14:53:34 +1100 Subject: [PATCH 06/13] Remove some redundant stuff from Zoekt::SearchResults --- ee/app/services/ee/search/project_service.rb | 1 - ee/lib/gitlab/zoekt/search_results.rb | 18 ++---------------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/ee/app/services/ee/search/project_service.rb b/ee/app/services/ee/search/project_service.rb index c913c9ab2a536e..4a722d9d7b607d 100644 --- a/ee/app/services/ee/search/project_service.rb +++ b/ee/app/services/ee/search/project_service.rb @@ -67,7 +67,6 @@ def zoekt_searchable_scope override :zoekt_projects def zoekt_projects - # TODO: This was copied from above. Why can project be an array? @zoekt_projects ||= Array(project).map(&:id) end end diff --git a/ee/lib/gitlab/zoekt/search_results.rb b/ee/lib/gitlab/zoekt/search_results.rb index 7671907faa819c..58d4c983cc2b44 100644 --- a/ee/lib/gitlab/zoekt/search_results.rb +++ b/ee/lib/gitlab/zoekt/search_results.rb @@ -24,21 +24,11 @@ def initialize(current_user, query, limit_project_ids = nil, order_by: nil, sort end def objects(scope, page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil) - page = (page || 1).to_i - - case scope - when 'blobs' - blobs(page: page, per_page: per_page, preload_method: preload_method) - else - Kaminari.paginate_array([]) - end + blobs(page: page, per_page: per_page, preload_method: preload_method) end def formatted_count(scope) - case scope - when 'blobs' - limited_counter_with_delimiter(blobs_count) - end + limited_counter_with_delimiter(blobs_count) end def blobs_count @@ -107,10 +97,6 @@ def blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false, preload_method end end - def default_scope - 'blobs' - end - def limited_counter_with_delimiter(count) number_with_delimiter(count) end -- GitLab From 7d9e0674198d0ee8814f12063284a21e7918ce87 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Tue, 24 Jan 2023 08:22:02 +1100 Subject: [PATCH 07/13] Switch to RepoUrl/RepoId new Zoekt API --- ee/app/models/concerns/zoekt/searchable_repository.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/app/models/concerns/zoekt/searchable_repository.rb b/ee/app/models/concerns/zoekt/searchable_repository.rb index 6351166e94d0d5..86a3c095ed8605 100644 --- a/ee/app/models/concerns/zoekt/searchable_repository.rb +++ b/ee/app/models/concerns/zoekt/searchable_repository.rb @@ -25,7 +25,7 @@ def update_zoekt_index!(use_local_disk_path: false) project.http_url_to_repo end - payload = { RepositoryUrl: repository_url, RepositoryId: project.id.to_s } + payload = { RepoUrl: repository_url, RepoId: project.id } conn.post('/index', payload.to_json, { "Content-Type" => "application/json" }) end -- GitLab From 14806094bcbb4b790babec4488a9d80eefc6d1a0 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 25 Jan 2023 09:07:52 +1100 Subject: [PATCH 08/13] Inline search_for helper in zoekt spec --- ee/spec/lib/gitlab/zoekt/search_results_spec.rb | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ee/spec/lib/gitlab/zoekt/search_results_spec.rb b/ee/spec/lib/gitlab/zoekt/search_results_spec.rb index 201f86f7afdff2..4b4e68f6773e64 100644 --- a/ee/spec/lib/gitlab/zoekt/search_results_spec.rb +++ b/ee/spec/lib/gitlab/zoekt/search_results_spec.rb @@ -20,10 +20,6 @@ zoekt_ensure_project_indexed!(project_1) end - def search_for(term) - described_class.new(user, term, [project_1.id]).objects('blobs').map(&:path) - end - it 'finds blobs by regex search' do results = described_class.new(user, 'use.*egex', limit_project_ids) blobs = results.objects('blobs') @@ -124,7 +120,8 @@ def search_for(term) examples.each do |search_term, file_content| file_name = Digest::SHA256.hexdigest(file_content) - expect(search_for(search_term)).to include(file_name) + results = described_class.new(user, search_term, limit_project_ids).objects('blobs').map(&:path) + expect(results).to include(file_name) end end end -- GitLab From 42aa6ed948c90630d44bf1206e6bcddd850cb9a4 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 25 Jan 2023 09:37:52 +1100 Subject: [PATCH 09/13] Changed to CloneUrl in Zoekt API --- ee/app/models/concerns/zoekt/searchable_repository.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/app/models/concerns/zoekt/searchable_repository.rb b/ee/app/models/concerns/zoekt/searchable_repository.rb index 86a3c095ed8605..c8a40f1b420e26 100644 --- a/ee/app/models/concerns/zoekt/searchable_repository.rb +++ b/ee/app/models/concerns/zoekt/searchable_repository.rb @@ -25,7 +25,7 @@ def update_zoekt_index!(use_local_disk_path: false) project.http_url_to_repo end - payload = { RepoUrl: repository_url, RepoId: project.id } + payload = { CloneUrl: repository_url, RepoId: project.id } conn.post('/index', payload.to_json, { "Content-Type" => "application/json" }) end -- GitLab From 8328b023f6d45e6660295da828580b0e881d37bd Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 25 Jan 2023 11:28:52 +1100 Subject: [PATCH 10/13] Fix reordering args in Search::ProjectService --- ee/spec/services/search/project_service_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/spec/services/search/project_service_spec.rb b/ee/spec/services/search/project_service_spec.rb index 21c0222b8474de..f945f1ae05a596 100644 --- a/ee/spec/services/search/project_service_spec.rb +++ b/ee/spec/services/search/project_service_spec.rb @@ -47,7 +47,7 @@ let_it_be(:user) { create(:user) } let_it_be(:project) { create(:project, namespace: user.namespace) } - let(:service) { described_class.new(project, user, search: 'foobar', scope: scope, basic_search: basic_search) } + let(:service) { described_class.new(user, project, search: 'foobar', scope: scope, basic_search: basic_search) } let(:use_zoekt) { true } let(:scope) { 'blobs' } let(:basic_search) { nil } -- GitLab From 312007a67b519867e04bbaa09a951c1ae9f94c55 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 25 Jan 2023 13:48:37 +1100 Subject: [PATCH 11/13] Zoekt schema improvements --- ...04201524_add_zoekt_shards_and_indexed_namespaces.rb | 6 +++--- db/structure.sql | 10 ++++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/db/migrate/20230104201524_add_zoekt_shards_and_indexed_namespaces.rb b/db/migrate/20230104201524_add_zoekt_shards_and_indexed_namespaces.rb index 015d5fbeb8467c..c9d7bc51041cb0 100644 --- a/db/migrate/20230104201524_add_zoekt_shards_and_indexed_namespaces.rb +++ b/db/migrate/20230104201524_add_zoekt_shards_and_indexed_namespaces.rb @@ -5,13 +5,13 @@ class AddZoektShardsAndIndexedNamespaces < Gitlab::Database::Migration[2.1] def change create_table :zoekt_shards do |t| - t.text :index_base_url, limit: 1024 - t.text :search_base_url, limit: 1024 + t.text :index_base_url, limit: 1024, index: { unique: true }, null: false + t.text :search_base_url, limit: 1024, index: { unique: true }, null: false t.timestamps_with_timezone end create_table :zoekt_indexed_namespaces do |t| - t.references :zoekt_shard, null: false, foreign_key: { on_delete: :cascade } + t.references :zoekt_shard, null: false, index: false, foreign_key: { on_delete: :cascade } t.bigint :namespace_id, null: false, index: true t.timestamps_with_timezone t.index [:zoekt_shard_id, :namespace_id], unique: true, name: 'index_zoekt_shard_and_namespace' diff --git a/db/structure.sql b/db/structure.sql index b7faeabce364db..6bf3fdda189104 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -23860,8 +23860,8 @@ ALTER SEQUENCE zoekt_indexed_namespaces_id_seq OWNED BY zoekt_indexed_namespaces CREATE TABLE zoekt_shards ( id bigint NOT NULL, - index_base_url text, - search_base_url text, + index_base_url text NOT NULL, + search_base_url text NOT NULL, created_at timestamp with time zone NOT NULL, updated_at timestamp with time zone NOT NULL, CONSTRAINT check_61794bac26 CHECK ((char_length(search_base_url) <= 1024)), @@ -31873,10 +31873,12 @@ CREATE INDEX index_zentao_tracker_data_on_integration_id ON zentao_tracker_data CREATE INDEX index_zoekt_indexed_namespaces_on_namespace_id ON zoekt_indexed_namespaces USING btree (namespace_id); -CREATE INDEX index_zoekt_indexed_namespaces_on_zoekt_shard_id ON zoekt_indexed_namespaces USING btree (zoekt_shard_id); - CREATE UNIQUE INDEX index_zoekt_shard_and_namespace ON zoekt_indexed_namespaces USING btree (zoekt_shard_id, namespace_id); +CREATE UNIQUE INDEX index_zoekt_shards_on_index_base_url ON zoekt_shards USING btree (index_base_url); + +CREATE UNIQUE INDEX index_zoekt_shards_on_search_base_url ON zoekt_shards USING btree (search_base_url); + CREATE INDEX index_zoom_meetings_on_issue_id ON zoom_meetings USING btree (issue_id); CREATE UNIQUE INDEX index_zoom_meetings_on_issue_id_and_issue_status ON zoom_meetings USING btree (issue_id, issue_status) WHERE (issue_status = 1); -- GitLab From 2b525766306aa02af8336925e9a96f50ffb4570b Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Wed, 25 Jan 2023 14:16:45 +1100 Subject: [PATCH 12/13] Switch Zoekt from Faraday to Gitlab::HTTP --- .../concerns/zoekt/searchable_repository.rb | 16 +++++++++++----- ee/lib/gitlab/zoekt/search_results.rb | 10 ++++++---- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/ee/app/models/concerns/zoekt/searchable_repository.rb b/ee/app/models/concerns/zoekt/searchable_repository.rb index c8a40f1b420e26..b3e1764e28b60b 100644 --- a/ee/app/models/concerns/zoekt/searchable_repository.rb +++ b/ee/app/models/concerns/zoekt/searchable_repository.rb @@ -6,8 +6,10 @@ module SearchableRepository class_methods do def truncate_zoekt_index!(shard) - conn = ::Faraday.new(url: shard.index_base_url) - conn.post('/truncate', '') + ::Gitlab::HTTP.post( + URI.join(shard.index_base_url, '/truncate'), + allow_local_requests: true + ) end end @@ -17,8 +19,6 @@ def use_zoekt? end def update_zoekt_index!(use_local_disk_path: false) - conn = ::Faraday.new(url: zoekt_index_base_url) - repository_url = if use_local_disk_path path_to_repo else @@ -26,7 +26,13 @@ def update_zoekt_index!(use_local_disk_path: false) end payload = { CloneUrl: repository_url, RepoId: project.id } - conn.post('/index', payload.to_json, { "Content-Type" => "application/json" }) + + ::Gitlab::HTTP.post( + URI.join(zoekt_index_base_url, '/index'), + headers: { "Content-Type" => "application/json" }, + body: payload.to_json, + allow_local_requests: true + ) end def async_update_zoekt_index diff --git a/ee/lib/gitlab/zoekt/search_results.rb b/ee/lib/gitlab/zoekt/search_results.rb index 58d4c983cc2b44..dfbc3992830f8b 100644 --- a/ee/lib/gitlab/zoekt/search_results.rb +++ b/ee/lib/gitlab/zoekt/search_results.rb @@ -126,13 +126,15 @@ def zoekt_search(query, num:, options:) body[:RepoIds] = options[:project_ids] unless options[:project_ids] == :any - # TODO: When we support sharding we'll need to find the ones that - # match the namespaces being searched base_url = ::Zoekt::Shard.first.search_base_url - conn = ::Faraday.new(url: base_url) + response = ::Gitlab::HTTP.post( + URI.join(base_url, '/api/search'), + headers: { "Content-Type" => "application/json" }, + body: body.to_json, + allow_local_requests: true + ) - response = conn.post('/api/search', body.to_json, { "Content-Type" => "application/json" }) ::Gitlab::Json.parse(response.body, symbolize_names: true) end -- GitLab From 5037e9f9ab16f0e48bf22f2e8ca6a684644dfe7f Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Fri, 27 Jan 2023 12:42:43 +1100 Subject: [PATCH 13/13] Fix feature_category in zoekt/indexed_namespace_spec.rb --- ee/spec/models/zoekt/indexed_namespace_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/spec/models/zoekt/indexed_namespace_spec.rb b/ee/spec/models/zoekt/indexed_namespace_spec.rb index 3da6a53d760628..b31db28809f1a2 100644 --- a/ee/spec/models/zoekt/indexed_namespace_spec.rb +++ b/ee/spec/models/zoekt/indexed_namespace_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe ::Zoekt::IndexedNamespace, feature_category: :global_searcch do +RSpec.describe ::Zoekt::IndexedNamespace, feature_category: :global_search do let_it_be(:indexed_namespace1) { create(:namespace) } let_it_be(:indexed_namespace2) { create(:namespace) } let_it_be(:indexed_parent_namespace) { create(:group) } -- GitLab