diff --git a/.gitlab/ci/rails/shared.gitlab-ci.yml b/.gitlab/ci/rails/shared.gitlab-ci.yml index 4ca82f55b63df9fcdb0eb4cfde49d37cd8858e18..3f57fc35e40edde346981d414aa055d5d6191903 100644 --- a/.gitlab/ci/rails/shared.gitlab-ci.yml +++ b/.gitlab/ci/rails/shared.gitlab-ci.yml @@ -62,7 +62,7 @@ include: # spec/lib, yet background migration tests are also sitting there, # and they should run on their own jobs so we don't need to run them # in unit tests again. - - rspec_paralellized_job "--tag ~quarantine --tag ~level:background_migration" + - rspec_paralellized_job "--tag ~quarantine --tag ~zoekt --tag ~level:background_migration" allow_failure: exit_codes: !reference [.rspec-base, variables, SUCCESSFULLY_RETRIED_TEST_EXIT_CODE] diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index de6e159921c4e561ac860ff5e8e0c2e576b7649c..067c4730f0adac7852c6e7f9cfbc7d002068f18a 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -543,3 +543,5 @@ - 1 - - x509_certificate_revoke - 1 +- - zoekt_indexer + - 1 diff --git a/db/docs/zoekt_indexed_namespaces.yml b/db/docs/zoekt_indexed_namespaces.yml new file mode 100644 index 0000000000000000000000000000000000000000..1ab748ac154ef44679812f40d9b1f5d3d0cb67ed --- /dev/null +++ b/db/docs/zoekt_indexed_namespaces.yml @@ -0,0 +1,10 @@ +--- +table_name: zoekt_indexed_namespaces +classes: +- Zoekt::IndexedNamespace +feature_categories: +- global_search +description: Describes a namespace that is configured to use a specific Zoekt shard for code search +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049 +milestone: '15.9' +gitlab_schema: gitlab_main diff --git a/db/docs/zoekt_shards.yml b/db/docs/zoekt_shards.yml new file mode 100644 index 0000000000000000000000000000000000000000..5fe3b469b191829da93f79d52b3d11e6fc5c9c58 --- /dev/null +++ b/db/docs/zoekt_shards.yml @@ -0,0 +1,10 @@ +--- +table_name: zoekt_shards +classes: +- Zoekt::Shard +feature_categories: +- global_search +description: Describes a Zoekt server that will be used for indexing and search for some configured namespaces +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049 +milestone: '15.9' +gitlab_schema: gitlab_main diff --git a/db/migrate/20230104201524_add_zoekt_shards_and_indexed_namespaces.rb b/db/migrate/20230104201524_add_zoekt_shards_and_indexed_namespaces.rb new file mode 100644 index 0000000000000000000000000000000000000000..c9d7bc51041cb088d2adcc271607ee61ad6f3aee --- /dev/null +++ b/db/migrate/20230104201524_add_zoekt_shards_and_indexed_namespaces.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +class AddZoektShardsAndIndexedNamespaces < Gitlab::Database::Migration[2.1] + enable_lock_retries! + + def change + create_table :zoekt_shards do |t| + t.text :index_base_url, limit: 1024, index: { unique: true }, null: false + t.text :search_base_url, limit: 1024, index: { unique: true }, null: false + t.timestamps_with_timezone + end + + create_table :zoekt_indexed_namespaces do |t| + t.references :zoekt_shard, null: false, index: false, foreign_key: { on_delete: :cascade } + t.bigint :namespace_id, null: false, index: true + t.timestamps_with_timezone + t.index [:zoekt_shard_id, :namespace_id], unique: true, name: 'index_zoekt_shard_and_namespace' + end + end +end diff --git a/db/migrate/20230107125328_add_zoekt_indexed_namespaces_foreign_key.rb b/db/migrate/20230107125328_add_zoekt_indexed_namespaces_foreign_key.rb new file mode 100644 index 0000000000000000000000000000000000000000..db995d6603e4d7a62b0f0b930fe9e4bf11d35fed --- /dev/null +++ b/db/migrate/20230107125328_add_zoekt_indexed_namespaces_foreign_key.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class AddZoektIndexedNamespacesForeignKey < Gitlab::Database::Migration[2.1] + disable_ddl_transaction! + + def up + add_concurrent_foreign_key :zoekt_indexed_namespaces, :namespaces, column: :namespace_id, on_delete: :cascade + end + + def down + with_lock_retries do + remove_foreign_key :zoekt_indexed_namespaces, column: :namespace_id + end + end +end diff --git a/db/schema_migrations/20230104201524 b/db/schema_migrations/20230104201524 new file mode 100644 index 0000000000000000000000000000000000000000..e98bb08fe2f49f0dcce3c78a74651acb5ca303a9 --- /dev/null +++ b/db/schema_migrations/20230104201524 @@ -0,0 +1 @@ +e27a0a61f6807352c02ddf7c0bd44a86e3c244051fa3977f597cc92e83fcb0d1 \ No newline at end of file diff --git a/db/schema_migrations/20230107125328 b/db/schema_migrations/20230107125328 new file mode 100644 index 0000000000000000000000000000000000000000..94ba5596a06a52e146978fa1ee218612483754ca --- /dev/null +++ b/db/schema_migrations/20230107125328 @@ -0,0 +1 @@ +741599316bd51b0d454e49c43a06b834d8d172f3fd1dcd28996494da8fdf5d8b \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index cbc4b291335f5aec25e7c29e485e8a4ec3bffeff..6bf3fdda189104d81e0a513d54ec71d3f86feb7f 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -23841,6 +23841,42 @@ CREATE SEQUENCE zentao_tracker_data_id_seq ALTER SEQUENCE zentao_tracker_data_id_seq OWNED BY zentao_tracker_data.id; +CREATE TABLE zoekt_indexed_namespaces ( + id bigint NOT NULL, + zoekt_shard_id bigint NOT NULL, + namespace_id bigint NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL +); + +CREATE SEQUENCE zoekt_indexed_namespaces_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE zoekt_indexed_namespaces_id_seq OWNED BY zoekt_indexed_namespaces.id; + +CREATE TABLE zoekt_shards ( + id bigint NOT NULL, + index_base_url text NOT NULL, + search_base_url text NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + CONSTRAINT check_61794bac26 CHECK ((char_length(search_base_url) <= 1024)), + CONSTRAINT check_c65bb85a32 CHECK ((char_length(index_base_url) <= 1024)) +); + +CREATE SEQUENCE zoekt_shards_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE zoekt_shards_id_seq OWNED BY zoekt_shards.id; + CREATE TABLE zoom_meetings ( id bigint NOT NULL, project_id bigint NOT NULL, @@ -24930,6 +24966,10 @@ ALTER TABLE ONLY x509_issuers ALTER COLUMN id SET DEFAULT nextval('x509_issuers_ ALTER TABLE ONLY zentao_tracker_data ALTER COLUMN id SET DEFAULT nextval('zentao_tracker_data_id_seq'::regclass); +ALTER TABLE ONLY zoekt_indexed_namespaces ALTER COLUMN id SET DEFAULT nextval('zoekt_indexed_namespaces_id_seq'::regclass); + +ALTER TABLE ONLY zoekt_shards ALTER COLUMN id SET DEFAULT nextval('zoekt_shards_id_seq'::regclass); + ALTER TABLE ONLY zoom_meetings ALTER COLUMN id SET DEFAULT nextval('zoom_meetings_id_seq'::regclass); ALTER TABLE ONLY analytics_cycle_analytics_issue_stage_events @@ -27392,6 +27432,12 @@ ALTER TABLE ONLY x509_issuers ALTER TABLE ONLY zentao_tracker_data ADD CONSTRAINT zentao_tracker_data_pkey PRIMARY KEY (id); +ALTER TABLE ONLY zoekt_indexed_namespaces + ADD CONSTRAINT zoekt_indexed_namespaces_pkey PRIMARY KEY (id); + +ALTER TABLE ONLY zoekt_shards + ADD CONSTRAINT zoekt_shards_pkey PRIMARY KEY (id); + ALTER TABLE ONLY zoom_meetings ADD CONSTRAINT zoom_meetings_pkey PRIMARY KEY (id); @@ -31825,6 +31871,14 @@ CREATE INDEX index_x509_issuers_on_subject_key_identifier ON x509_issuers USING CREATE INDEX index_zentao_tracker_data_on_integration_id ON zentao_tracker_data USING btree (integration_id); +CREATE INDEX index_zoekt_indexed_namespaces_on_namespace_id ON zoekt_indexed_namespaces USING btree (namespace_id); + +CREATE UNIQUE INDEX index_zoekt_shard_and_namespace ON zoekt_indexed_namespaces USING btree (zoekt_shard_id, namespace_id); + +CREATE UNIQUE INDEX index_zoekt_shards_on_index_base_url ON zoekt_shards USING btree (index_base_url); + +CREATE UNIQUE INDEX index_zoekt_shards_on_search_base_url ON zoekt_shards USING btree (search_base_url); + CREATE INDEX index_zoom_meetings_on_issue_id ON zoom_meetings USING btree (issue_id); CREATE UNIQUE INDEX index_zoom_meetings_on_issue_id_and_issue_status ON zoom_meetings USING btree (issue_id, issue_status) WHERE (issue_status = 1); @@ -33563,6 +33617,9 @@ ALTER TABLE ONLY agent_activity_events ALTER TABLE ONLY issues ADD CONSTRAINT fk_3b8c72ea56 FOREIGN KEY (sprint_id) REFERENCES sprints(id) ON DELETE SET NULL; +ALTER TABLE ONLY zoekt_indexed_namespaces + ADD CONSTRAINT fk_3bebdb4efc FOREIGN KEY (namespace_id) REFERENCES namespaces(id) ON DELETE CASCADE; + ALTER TABLE ONLY epics ADD CONSTRAINT fk_3c1fd1cccc FOREIGN KEY (due_date_sourcing_milestone_id) REFERENCES milestones(id) ON DELETE SET NULL; @@ -34805,6 +34862,9 @@ ALTER TABLE ONLY geo_repository_renamed_events ALTER TABLE ONLY aws_roles ADD CONSTRAINT fk_rails_4ed56f4720 FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE; +ALTER TABLE ONLY zoekt_indexed_namespaces + ADD CONSTRAINT fk_rails_4f6006e94c FOREIGN KEY (zoekt_shard_id) REFERENCES zoekt_shards(id) ON DELETE CASCADE; + ALTER TABLE ONLY packages_debian_publications ADD CONSTRAINT fk_rails_4fc8ebd03e FOREIGN KEY (distribution_id) REFERENCES packages_debian_project_distributions(id) ON DELETE CASCADE; diff --git a/doc/user/search/exact_code_search.md b/doc/user/search/exact_code_search.md new file mode 100644 index 0000000000000000000000000000000000000000..97f58b973cb3c46afa4c8959db00ca35d0a15a38 --- /dev/null +++ b/doc/user/search/exact_code_search.md @@ -0,0 +1,36 @@ +--- +stage: Data Stores +group: Global Search +info: "To determine the technical writer assigned to the Stage/Group associated with this page, see https://about.gitlab.com/handbook/product/ux/technical-writing/#assignments" +type: reference +--- + +# Exact Code Search **(PREMIUM)** + +> [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049) in GitLab 15.9 [with a flag](../../administration/feature_flags.md) named `index_code_with_zoekt` and `search_code_with_zoekt` which enables indexing and searching respectively. Both are disabled by default. + +WARNING: +Exact code search is in [**Alpha**](../../policy/alpha-beta-support.md#alpha-features). +For the Exact code search feature roadmap, see [epic 9404](https://gitlab.com/groups/gitlab-org/-/epics/9404). + +This feature will initially only be rolled out to +specific customers on GitLab.com that request +access. + +On self-managed GitLab it should be possible to enable this, but no +documentation is provided as it requires executing commands from the Rails +console as well advanced configuration of +[Zoekt](https://github.com/sourcegraph/zoekt) servers. + +## Usage + +When performing any Code search in GitLab it will choose to use "Exact Code +Search" powered by [Zoekt](https://github.com/sourcegraph/zoekt) if the project +is part of an enabled Group. + +The main differences between Zoekt and [Advanced Search](advanced_search.md) +are that Zoekt provides exact substring matching as well as allows you to +search for regular expressions. Since it allows searching for regular +expressions, certain special characters will require escaping. Backslash can +escape special characters and wrapping in double quotes can be used for phrase +searches. diff --git a/ee/app/models/concerns/zoekt/searchable_repository.rb b/ee/app/models/concerns/zoekt/searchable_repository.rb new file mode 100644 index 0000000000000000000000000000000000000000..b3e1764e28b60b94301a4c2cf971997e5a3416ee --- /dev/null +++ b/ee/app/models/concerns/zoekt/searchable_repository.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module Zoekt + module SearchableRepository + extend ActiveSupport::Concern + + class_methods do + def truncate_zoekt_index!(shard) + ::Gitlab::HTTP.post( + URI.join(shard.index_base_url, '/truncate'), + allow_local_requests: true + ) + end + end + + included do + def use_zoekt? + project&.use_zoekt? + end + + def update_zoekt_index!(use_local_disk_path: false) + repository_url = if use_local_disk_path + path_to_repo + else + project.http_url_to_repo + end + + payload = { CloneUrl: repository_url, RepoId: project.id } + + ::Gitlab::HTTP.post( + URI.join(zoekt_index_base_url, '/index'), + headers: { "Content-Type" => "application/json" }, + body: payload.to_json, + allow_local_requests: true + ) + end + + def async_update_zoekt_index + ::Zoekt::IndexerWorker.perform_async(project.id) + end + + private + + def zoekt_index_base_url + Zoekt::IndexedNamespace.where(namespace: project.root_namespace).first&.shard&.index_base_url + end + + def zoekt_search_base_url + Zoekt::IndexedNamespace.where(namespace: project.root_namespace).first&.shard&.search_base_url + end + end + end +end diff --git a/ee/app/models/ee/namespace.rb b/ee/app/models/ee/namespace.rb index 2027e4083e7ca32c3f13405ed73232ff7b6b0e23..a762b90b9618bff76608a281c6dcd96157e1d031 100644 --- a/ee/app/models/ee/namespace.rb +++ b/ee/app/models/ee/namespace.rb @@ -423,6 +423,10 @@ def use_elasticsearch? ::Gitlab::CurrentSettings.elasticsearch_indexes_namespace?(self) end + def use_zoekt? + ::Zoekt::IndexedNamespace.enabled_for_namespace?(self) + end + def invalidate_elasticsearch_indexes_cache! ::Gitlab::CurrentSettings.invalidate_elasticsearch_indexes_cache_for_namespace!(self.id) end diff --git a/ee/app/models/ee/project.rb b/ee/app/models/ee/project.rb index 142fccf08f11f46543b953e9df5d9d69c3af2dea..c550d14f70639c3a41947a8282e679b37cb47543 100644 --- a/ee/app/models/ee/project.rb +++ b/ee/app/models/ee/project.rb @@ -828,6 +828,10 @@ def after_import ElasticCommitIndexerWorker.perform_async(id, true) if use_elasticsearch? && !forked? end + def use_zoekt? + ::Zoekt::IndexedNamespace.enabled_for_project?(self) + end + def elastic_namespace_ancestry namespace.elastic_namespace_ancestry + "p#{id}-" end diff --git a/ee/app/models/ee/repository.rb b/ee/app/models/ee/repository.rb index 24f3d7f099883bd42c62c09dce8c8a70fdd25349..10418b92c3701b2beedc85c834da29643d8fb8a2 100644 --- a/ee/app/models/ee/repository.rb +++ b/ee/app/models/ee/repository.rb @@ -13,6 +13,7 @@ module Repository prepended do include Elastic::RepositoriesSearch + include ::Zoekt::SearchableRepository delegate :checksum, :find_remote_root_ref, to: :raw_repository end diff --git a/ee/app/models/gitlab_subscriptions/features.rb b/ee/app/models/gitlab_subscriptions/features.rb index 424729d3b0552f50357c839b47c18bc61130c12a..ac2840c897f783fd605090a452b31dd317063473 100644 --- a/ee/app/models/gitlab_subscriptions/features.rb +++ b/ee/app/models/gitlab_subscriptions/features.rb @@ -38,6 +38,7 @@ class Features runner_jobs_statistics seat_link usage_quotas + zoekt_code_search ].freeze STARTER_FEATURES = %i[ @@ -74,6 +75,7 @@ class Features usage_quotas visual_review_app wip_limits + zoekt_code_search ].freeze PREMIUM_FEATURES = %i[ diff --git a/ee/app/models/zoekt/indexed_namespace.rb b/ee/app/models/zoekt/indexed_namespace.rb new file mode 100644 index 0000000000000000000000000000000000000000..c8498784a43cdc1f5cf0894d0a3acd08a650d642 --- /dev/null +++ b/ee/app/models/zoekt/indexed_namespace.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Zoekt + class IndexedNamespace < ApplicationRecord + def self.table_name_prefix + 'zoekt_' + end + + belongs_to :shard, foreign_key: :zoekt_shard_id, inverse_of: :indexed_namespaces + belongs_to :namespace + + validate :only_root_namespaces_can_be_indexed + + def self.enabled_for_project?(project) + where(namespace: project.root_namespace).exists? + end + + def self.enabled_for_namespace?(namespace) + where(namespace: namespace.root_ancestor).exists? + end + + def only_root_namespaces_can_be_indexed + return unless namespace.parent_id.present? + + errors.add(:base, 'Only root namespaces can be indexed') + end + end +end diff --git a/ee/app/models/zoekt/shard.rb b/ee/app/models/zoekt/shard.rb new file mode 100644 index 0000000000000000000000000000000000000000..39109a3cae9b500641cbe09d699e3243b5c0cb18 --- /dev/null +++ b/ee/app/models/zoekt/shard.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Zoekt + class Shard < ApplicationRecord + def self.table_name_prefix + 'zoekt_' + end + + has_many :indexed_namespaces, foreign_key: :zoekt_shard_id, inverse_of: :shard + end +end diff --git a/ee/app/services/concerns/search/zoekt_searchable.rb b/ee/app/services/concerns/search/zoekt_searchable.rb new file mode 100644 index 0000000000000000000000000000000000000000..9b9a8e1ab5712e9efaa45c4b3621ab27fda3b015 --- /dev/null +++ b/ee/app/services/concerns/search/zoekt_searchable.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module Search + module ZoektSearchable + def use_zoekt? + return false if params[:basic_search] + return false if params[:page] && params[:page].to_i != 1 + return false unless ::Feature.enabled?(:search_code_with_zoekt, current_user) + return false unless ::License.feature_available?(:zoekt_code_search) + + scope == 'blobs' && + zoekt_searchable_scope.respond_to?(:use_zoekt?) && + zoekt_searchable_scope.use_zoekt? + end + + def zoekt_searchable_scope + raise NotImplementedError + end + + def zoekt_projects + raise NotImplementedError + end + + def zoekt_search_results + ::Gitlab::Zoekt::SearchResults.new( + current_user, + params[:search], + zoekt_projects, + order_by: params[:order_by], + sort: params[:sort], + filters: { language: params[:language] } + ) + end + end +end diff --git a/ee/app/services/ee/git/branch_push_service.rb b/ee/app/services/ee/git/branch_push_service.rb index e325105929896ab9dc53fa04d9b48f7c9fe59ee8..99e9316d869e2af686661a3ba50e8bdd39120ea5 100644 --- a/ee/app/services/ee/git/branch_push_service.rb +++ b/ee/app/services/ee/git/branch_push_service.rb @@ -8,6 +8,7 @@ module BranchPushService override :execute def execute enqueue_elasticsearch_indexing + enqueue_zoekt_indexing enqueue_update_external_pull_requests super @@ -21,6 +22,14 @@ def enqueue_elasticsearch_indexing project.repository.index_commits_and_blobs end + def enqueue_zoekt_indexing + return false unless ::Feature.enabled?(:index_code_with_zoekt) + return false unless default_branch? + return false unless project.use_zoekt? + + project.repository.async_update_zoekt_index + end + def enqueue_update_external_pull_requests return unless project.mirror? return unless params.fetch(:create_pipelines, true) diff --git a/ee/app/services/ee/search/group_service.rb b/ee/app/services/ee/search/group_service.rb index cc3721956c8fa59c14574854b67ed41970dd9d45..d5a66101f2b74d9b3c3e54d45435c022ba34c1d2 100644 --- a/ee/app/services/ee/search/group_service.rb +++ b/ee/app/services/ee/search/group_service.rb @@ -4,12 +4,18 @@ module EE module Search module GroupService extend ::Gitlab::Utils::Override + include ::Search::ZoektSearchable override :elasticsearchable_scope def elasticsearchable_scope group end + override :zoekt_searchable_scope + def zoekt_searchable_scope + group + end + override :elastic_global def elastic_global false @@ -20,8 +26,14 @@ def elastic_projects @elastic_projects ||= projects.pluck_primary_key end + override :zoekt_projects + def zoekt_projects + @zoekt_projects ||= projects.pluck_primary_key + end + override :execute def execute + return zoekt_search_results if use_zoekt? return super unless use_elasticsearch? ::Gitlab::Elastic::GroupSearchResults.new( diff --git a/ee/app/services/ee/search/project_service.rb b/ee/app/services/ee/search/project_service.rb index 2054f27ccd065a4e28e4d8a457e75ad0cefe8f64..4a722d9d7b607d537329912e6febc3270c4169c7 100644 --- a/ee/app/services/ee/search/project_service.rb +++ b/ee/app/services/ee/search/project_service.rb @@ -5,12 +5,14 @@ module Search module ProjectService extend ::Gitlab::Utils::Override include ::Search::Elasticsearchable + include ::Search::ZoektSearchable SCOPES_THAT_SUPPORT_BRANCHES = %w(wiki_blobs commits blobs).freeze override :execute def execute return super if project.respond_to?(:archived?) && project.archived? + return zoekt_search_results if use_zoekt? && use_default_branch? return super unless use_elasticsearch? && use_default_branch? search = params[:search] @@ -53,9 +55,20 @@ def use_default_branch? project.root_ref?(repository_ref) end + override :elasticsearchable_scope def elasticsearchable_scope project end + + override :zoekt_searchable_scope + def zoekt_searchable_scope + project + end + + override :zoekt_projects + def zoekt_projects + @zoekt_projects ||= Array(project).map(&:id) + end end end end diff --git a/ee/app/workers/all_queues.yml b/ee/app/workers/all_queues.yml index a3fe53b13ab7e713628528b5387dbed5057b1244..4b704ade25ea2f700445286b7a7677de1637c42e 100644 --- a/ee/app/workers/all_queues.yml +++ b/ee/app/workers/all_queues.yml @@ -1587,3 +1587,12 @@ :weight: 1 :idempotent: true :tags: [] +- :name: zoekt_indexer + :worker_name: Zoekt::IndexerWorker + :feature_category: :global_search + :has_external_dependencies: false + :urgency: :throttled + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] diff --git a/ee/app/workers/zoekt/indexer_worker.rb b/ee/app/workers/zoekt/indexer_worker.rb new file mode 100644 index 0000000000000000000000000000000000000000..408070fded0c61f34e9a71fb9f99e1b5528f6354 --- /dev/null +++ b/ee/app/workers/zoekt/indexer_worker.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Zoekt + class IndexerWorker + TIMEOUT = 2.hours + + include ApplicationWorker + + data_consistency :always + include Gitlab::ExclusiveLeaseHelpers + + feature_category :global_search + urgency :throttled + idempotent! + + def perform(project_id) + return unless ::Feature.enabled?(:index_code_with_zoekt) + return unless ::License.feature_available?(:zoekt_code_search) + + project = Project.find(project_id) + return true unless project.use_zoekt? + + in_lock("#{self.class.name}/#{project_id}", ttl: (TIMEOUT + 1.minute), retries: 0) do + project.repository.update_zoekt_index! + end + end + end +end diff --git a/ee/config/feature_flags/development/index_code_with_zoekt.yml b/ee/config/feature_flags/development/index_code_with_zoekt.yml new file mode 100644 index 0000000000000000000000000000000000000000..626252c1dfb42724f55c8c4622dd2d12cbc7657f --- /dev/null +++ b/ee/config/feature_flags/development/index_code_with_zoekt.yml @@ -0,0 +1,8 @@ +--- +name: index_code_with_zoekt +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/388519 +milestone: '15.9' +type: development +group: group::global search +default_enabled: false diff --git a/ee/config/feature_flags/development/search_code_with_zoekt.yml b/ee/config/feature_flags/development/search_code_with_zoekt.yml new file mode 100644 index 0000000000000000000000000000000000000000..c443dc0238f01548632c0fa4a896170ca2237866 --- /dev/null +++ b/ee/config/feature_flags/development/search_code_with_zoekt.yml @@ -0,0 +1,8 @@ +--- +name: search_code_with_zoekt +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/105049 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/388519 +milestone: '15.9' +type: development +group: group::global search +default_enabled: false diff --git a/ee/lib/gitlab/zoekt/search_results.rb b/ee/lib/gitlab/zoekt/search_results.rb new file mode 100644 index 0000000000000000000000000000000000000000..dfbc3992830f8bd0fe0b1825edaf9f1d39b2f36a --- /dev/null +++ b/ee/lib/gitlab/zoekt/search_results.rb @@ -0,0 +1,193 @@ +# frozen_string_literal: true + +module Gitlab + module Zoekt + class SearchResults + include ActionView::Helpers::NumberHelper + include Gitlab::Utils::StrongMemoize + + DEFAULT_PER_PAGE = Gitlab::SearchResults::DEFAULT_PER_PAGE + + attr_reader :current_user, :query, :public_and_internal_projects, :order_by, :sort, :filters + + # Limit search results by passed projects + # It allows us to search only for projects user has access to + attr_reader :limit_project_ids + + def initialize(current_user, query, limit_project_ids = nil, order_by: nil, sort: nil, filters: {}) + @current_user = current_user + @query = query + @limit_project_ids = limit_project_ids + @order_by = order_by + @sort = sort + @filters = filters + end + + def objects(scope, page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil) + blobs(page: page, per_page: per_page, preload_method: preload_method) + end + + def formatted_count(scope) + limited_counter_with_delimiter(blobs_count) + end + + def blobs_count + @blobs_count ||= blobs.total_count + end + + # These aliases act as an adapter to the Gitlab::SearchResults + # interface, which is mostly implemented by this class. + alias_method :limited_blobs_count, :blobs_count + + def parse_zoekt_search_result(result, project) + ref = project.default_branch_or_main + path = result[:path] + basename = File.join(File.dirname(path), File.basename(path, '.*')) + content = result[:content] + project_id = project.id + + ::Gitlab::Search::FoundBlob.new( + path: path, + basename: basename, + ref: ref, + startline: [result[:line] - 1, 0].max, + highlight_line: result[:line], + data: content, + project: project, + project_id: project_id + ) + end + + def aggregations(scope) + [] + end + + def highlight_map(_) + nil + end + + private + + def base_options + { + current_user: current_user, + project_ids: limit_project_ids, + public_and_internal_projects: public_and_internal_projects, + order_by: order_by, + sort: sort + } + end + + def memoize_key(scope, count_only:) + count_only ? "#{scope}_results_count".to_sym : scope + end + + def blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false, preload_method: nil) + return Kaminari.paginate_array([]) if query.blank? + + strong_memoize(memoize_key(:blobs, count_only: count_only)) do + search_as_found_blob( + query, + Repository, + page: (page || 1).to_i, + per_page: per_page, + options: base_options.merge(count_only: count_only).merge(filters.slice(:language)), + preload_method: preload_method + ) + end + end + + def limited_counter_with_delimiter(count) + number_with_delimiter(count) + end + + def search_as_found_blob(query, repositories, page:, per_page:, options:, preload_method:) + zoekt_search_and_wrap(query, + page: page, + per_page: per_page, + options: options, + preload_method: preload_method) do |result, project| + parse_zoekt_search_result(result, project) + end + end + + def zoekt_search(query, num:, options:) + body = { + Q: query, + Opts: { + TotalMaxMatchCount: num, + NumContextLines: 1 + } + } + + # Safety net because Zoekt will match all projects if you provide + # an empty array. + raise "Not possible to search no projects" if options[:project_ids] == [] + + body[:RepoIds] = options[:project_ids] unless options[:project_ids] == :any + + base_url = ::Zoekt::Shard.first.search_base_url + + response = ::Gitlab::HTTP.post( + URI.join(base_url, '/api/search'), + headers: { "Content-Type" => "application/json" }, + body: body.to_json, + allow_local_requests: true + ) + + ::Gitlab::Json.parse(response.body, symbolize_names: true) + end + + def zoekt_search_and_wrap(query, page: 1, per_page: 20, options: {}, preload_method: nil, &blk) + search_result = zoekt_search( + query, + num: per_page, + options: options + ) + total_count = search_result[:Result][:MatchCount] + + response = (search_result[:Result][:Files] || []).flat_map do |r| + project_id = r[:Repository].to_i + + r[:LineMatches].map do |match| + { + project_id: project_id, + content: [match[:Before], match[:Line], match[:After]].compact.map { |l| Base64.decode64(l) }.join("\n"), + line: match[:LineNumber], + path: r[:FileName] + } + end + end + + items, total_count = yield_each_zoekt_search_result(response, preload_method, total_count, &blk) + + offset = 0 + Kaminari.paginate_array(items, total_count: total_count, limit: per_page, offset: offset) + end + + def yield_each_zoekt_search_result(response, preload_method, total_count) + project_ids = response.pluck(:project_id).uniq # rubocop:disable CodeReuse/ActiveRecord + projects = Project.with_route.id_in(project_ids) + projects = projects.public_send(preload_method) if preload_method # rubocop:disable GitlabSecurity/PublicSend + projects = projects.index_by(&:id) + + items = response.map do |result| + project_id = result[:project_id] + project = projects[project_id] + + if project.nil? || project.pending_delete? + total_count -= 1 + next + end + + yield(result, project) + end + + # Remove results for deleted projects + items.compact! + + [items, total_count] + end + end + end +end diff --git a/ee/spec/features/search/zoekt/search_spec.rb b/ee/spec/features/search/zoekt/search_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..ee81ba62fef8c137bb15316a17f3ac12d3be5e2a --- /dev/null +++ b/ee/spec/features/search/zoekt/search_spec.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'Zoekt search', :zoekt, :js, :disable_rate_limiter, :elastic, feature_category: :global_search do + let_it_be(:user) { create(:user) } + let_it_be(:group) { create(:group, :public) } + let_it_be(:project1) { create(:project, :repository, :public, namespace: group) } + let_it_be(:project2) { create(:project, :repository, :public, namespace: group) } + let_it_be(:private_group) { create(:group, :private) } + let_it_be(:private_project) { create(:project, :repository, :private, namespace: private_group) } + + def choose_group(group) + find('[data-testid="group-filter"]').click + wait_for_requests + + page.within '[data-testid="group-filter"]' do + click_button group.name + end + end + + def choose_project(project) + find('[data-testid="project-filter"]').click + wait_for_requests + + page.within '[data-testid="project-filter"]' do + click_button project.name + end + end + + before do + # Necessary as group scoped code search is + # not available without Elasticsearch enabled + # even though it's using Zoekt. + stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true) + + zoekt_ensure_project_indexed!(project1) + zoekt_ensure_project_indexed!(project2) + zoekt_ensure_project_indexed!(private_project) + + project1.add_maintainer(user) + project2.add_maintainer(user) + group.add_owner(user) + + sign_in(user) + + visit(search_path) + + wait_for_requests + + choose_group(group) + end + + describe 'blob search' do + it 'finds files with a regex search and allows filtering down again by project' do + submit_search('user.*egex') + select_search_scope('Code') + + expect(page).to have_selector('.file-content .blob-content', count: 2) + expect(page).to have_button('Copy file path') + + choose_project(project1) + + expect(page).to have_selector('.file-content .blob-content', count: 1) + + allow(Ability).to receive(:allowed?).and_call_original + expect(Ability).to receive(:allowed?).with(anything, :read_blob, anything).twice.and_return(false) + + submit_search("username_regex") + select_search_scope('Code') + expect(page).not_to have_selector('.file-content .blob-content') + end + end +end diff --git a/ee/spec/lib/gitlab/zoekt/search_results_spec.rb b/ee/spec/lib/gitlab/zoekt/search_results_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..4b4e68f6773e6464474b1ee3ef2e58caea33a2ee --- /dev/null +++ b/ee/spec/lib/gitlab/zoekt/search_results_spec.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::Gitlab::Zoekt::SearchResults, :zoekt, feature_category: :global_search do + let_it_be(:user) { create(:user) } + + let(:query) { 'hello world' } + let_it_be(:project_1) { create(:project, :public, :repository) } + let_it_be(:project_2) { create(:project, :public, :repository) } + let(:limit_project_ids) { [project_1.id] } + + before do + zoekt_ensure_project_indexed!(project_1) + zoekt_ensure_project_indexed!(project_2) + end + + describe 'blobs', :sidekiq_inline do + before do + zoekt_ensure_project_indexed!(project_1) + end + + it 'finds blobs by regex search' do + results = described_class.new(user, 'use.*egex', limit_project_ids) + blobs = results.objects('blobs') + + expect(blobs.map(&:data).join).to include("def username_regex\n default_regex") + expect(results.blobs_count).to eq 5 + end + + it 'finds blobs from searched projects only' do + project_3 = create :project, :repository, :private + zoekt_ensure_project_indexed!(project_3) + project_3.add_reporter(user) + + results = described_class.new(user, 'project_name_regex', [project_1.id]) + expect(results.blobs_count).to eq 1 + result_project_ids = results.objects('blobs').map(&:project_id) + expect(result_project_ids.uniq).to match_array([project_1.id]) + + results = described_class.new(user, 'project_name_regex', [project_1.id, project_3.id]) + result_project_ids = results.objects('blobs').map(&:project_id) + expect(result_project_ids.uniq).to match_array([project_1.id, project_3.id]) + expect(results.blobs_count).to eq 2 + + results = described_class.new(user, 'project_name_regex', :any) + result_project_ids = results.objects('blobs').map(&:project_id) + expect(result_project_ids.uniq).to match_array([project_1.id, project_2.id, project_3.id]) + expect(results.blobs_count).to eq 3 + end + + it 'raises an error if there are somehow no project_id in the filter' do + expect do + described_class.new(user, 'project_name_regex', []).objects('blobs') + end.to raise_error('Not possible to search no projects') + end + + it 'returns zero when blobs are not found' do + results = described_class.new(user, 'asdfg', limit_project_ids) + + expect(results.blobs_count).to eq 0 + end + + context 'when searching with special characters', :aggregate_failures do + let(:examples) do + { + 'perlMethodCall' => '$my_perl_object->perlMethodCall', + '"absolute_with_specials.txt"' => '/a/longer/file-path/absolute_with_specials.txt', + '"components-within-slashes"' => '/file-path/components-within-slashes/', + 'bar\(x\)' => 'Foo.bar(x)', + 'someSingleColonMethodCall' => 'LanguageWithSingleColon:someSingleColonMethodCall', + 'javaLangStaticMethodCall' => 'MyJavaClass::javaLangStaticMethodCall', + 'tokenAfterParentheses' => 'ParenthesesBetweenTokens)tokenAfterParentheses', + 'ruby_call_method_123' => 'RubyClassInvoking.ruby_call_method_123(with_arg)', + 'ruby_method_call' => 'RubyClassInvoking.ruby_method_call(with_arg)', + '#ambitious-planning' => 'We [plan ambitiously](#ambitious-planning).', + 'ambitious-planning' => 'We [plan ambitiously](#ambitious-planning).', + 'tokenAfterCommaWithNoSpace' => 'WouldHappenInManyLanguages,tokenAfterCommaWithNoSpace', + 'missing_token_around_equals' => 'a.b.c=missing_token_around_equals', + 'and;colons:too\$' => 'and;colons:too$', + '"differeñt-lønguage.txt"' => 'another/file-path/differeñt-lønguage.txt', + '"relative-with-specials.txt"' => 'another/file-path/relative-with-specials.txt', + 'ruby_method_123' => 'def self.ruby_method_123(ruby_another_method_arg)', + 'ruby_method_name' => 'def self.ruby_method_name(ruby_method_arg)', + '"dots.also.neeeeed.testing"' => 'dots.also.neeeeed.testing', + '.testing' => 'dots.also.neeeeed.testing', + 'dots' => 'dots.also.neeeeed.testing', + 'also.neeeeed' => 'dots.also.neeeeed.testing', + 'neeeeed' => 'dots.also.neeeeed.testing', + 'tests-image' => 'extends: .gitlab-tests-image', + 'gitlab-tests' => 'extends: .gitlab-tests-image', + 'gitlab-tests-image' => 'extends: .gitlab-tests-image', + 'foo/bar' => 'https://s3.amazonaws.com/foo/bar/baz.png', + 'https://test.or.dev.com/repository' => 'https://test.or.dev.com/repository/maven-all', + 'test.or.dev.com/repository/maven-all' => 'https://test.or.dev.com/repository/maven-all', + 'repository/maven-all' => 'https://test.or.dev.com/repository/maven-all', + 'https://test.or.dev.com/repository/maven-all' => 'https://test.or.dev.com/repository/maven-all', + 'bar-baz-conventions' => 'id("foo.bar-baz-conventions")', + 'baz-conventions' => 'id("foo.bar-baz-conventions")', + 'baz' => 'id("foo.bar-baz-conventions")', + 'bikes-3.4' => 'include "bikes-3.4"', + 'sql_log_bin' => 'q = "SET @@session.sql_log_bin=0;"', + 'sql_log_bin=0' => 'q = "SET @@session.sql_log_bin=0;"', + 'v3/delData' => 'uri: "v3/delData"', + '"us-east-2"' => 'us-east-2' + } + end + + before do + examples.values.uniq.each do |file_content| + file_name = Digest::SHA256.hexdigest(file_content) + project_1.repository.create_file(user, file_name, file_content, message: 'Some commit message', +branch_name: 'master') + end + + zoekt_ensure_project_indexed!(project_1) + end + + it 'finds all examples' do + examples.each do |search_term, file_content| + file_name = Digest::SHA256.hexdigest(file_content) + + results = described_class.new(user, search_term, limit_project_ids).objects('blobs').map(&:path) + expect(results).to include(file_name) + end + end + end + end +end diff --git a/ee/spec/models/concerns/zoekt/searchable_repository_spec.rb b/ee/spec/models/concerns/zoekt/searchable_repository_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..4d495dd91e184557c1ae039dc6b7e744fa33e1b1 --- /dev/null +++ b/ee/spec/models/concerns/zoekt/searchable_repository_spec.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::Zoekt::SearchableRepository, :zoekt, feature_category: :global_search do + let_it_be(:user) { create(:user) } + + let_it_be(:project) { create(:project, :repository) } + let_it_be(:unindexed_project) { create(:project, :repository) } + let(:repository) { project.repository } + let(:unindexed_repository) { unindexed_project.repository } + + before do + zoekt_ensure_project_indexed!(project) + end + + describe '#use_zoekt?' do + it 'is true for indexed projects' do + expect(repository.use_zoekt?).to eq(true) + end + + it 'is false for unindexed projects' do + expect(unindexed_repository.use_zoekt?).to eq(false) + end + end + + def search_for(term) + ::Gitlab::Zoekt::SearchResults.new(user, term, :any).objects('blobs').map(&:path) + end + + describe '#update_zoekt_index!' do + it 'makes updates available' do + project.repository.create_file( + user, + 'somenewsearchablefile.txt', + 'some content', + message: 'added test file', + branch_name: project.default_branch) + + expect(search_for('somenewsearchablefile.txt')).to be_empty + + repository.update_zoekt_index! + + expect(search_for('somenewsearchablefile.txt')).to match_array(['somenewsearchablefile.txt']) + end + end + + describe '.truncate_zoekt_index!' do + it 'removes all data from the Zoekt shard' do + expect(search_for('.')).not_to be_empty + + Repository.truncate_zoekt_index!(::Zoekt::Shard.last) + + expect(search_for('.')).to be_empty + end + end + + describe '#async_update_zoekt_index', :sidekiq_inline do + it 'makes updates available via ::Zoekt::IndexerWorker' do + expect(::Zoekt::IndexerWorker).to receive(:perform_async).with(project.id).and_call_original + + project.repository.create_file( + user, + 'anothernewsearchablefile.txt', + 'some content', + message: 'added test file', + branch_name: project.default_branch) + + expect(search_for('anothernewsearchablefile.txt')).to be_empty + + repository.async_update_zoekt_index + + expect(search_for('anothernewsearchablefile.txt')).to match_array(['anothernewsearchablefile.txt']) + end + end +end diff --git a/ee/spec/models/ee/namespace_spec.rb b/ee/spec/models/ee/namespace_spec.rb index 15e62333733dc8d6e0520178666919097c1663d7..8f886abdfcc32496add41bc23cae5c613d37872c 100644 --- a/ee/spec/models/ee/namespace_spec.rb +++ b/ee/spec/models/ee/namespace_spec.rb @@ -124,6 +124,14 @@ end end + describe '#use_zoekt?', feature_category: :global_search do + it 'delegates to ::Zoekt::IndexedNamespace' do + expect(::Zoekt::IndexedNamespace).to receive(:enabled_for_namespace?).with(namespace).and_return(true) + + expect(namespace.use_zoekt?).to eq(true) + end + end + describe '#invalidate_elasticsearch_indexes_cache!' do let(:namespace) { create :namespace } diff --git a/ee/spec/models/project_spec.rb b/ee/spec/models/project_spec.rb index 3229be42b8840e7f6eeb2f4d33af4e60a0052794..8efe3195667384a200c05d0fc02dea3519d818f9 100644 --- a/ee/spec/models/project_spec.rb +++ b/ee/spec/models/project_spec.rb @@ -2245,6 +2245,14 @@ end end + describe '#use_zoekt?', feature_category: :global_search do + it 'delegates to ::Zoekt::IndexedNamespace' do + expect(::Zoekt::IndexedNamespace).to receive(:enabled_for_project?).with(project).and_return(true) + + expect(project.use_zoekt?).to eq(true) + end + end + describe '#lfs_http_url_to_repo' do let(:project) { create(:project) } let(:project_path) { "#{Gitlab::Routing.url_helpers.project_path(project)}.git" } diff --git a/ee/spec/models/zoekt/indexed_namespace_spec.rb b/ee/spec/models/zoekt/indexed_namespace_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..b31db28809f1a2e343d1fa927e763a13804b0422 --- /dev/null +++ b/ee/spec/models/zoekt/indexed_namespace_spec.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::Zoekt::IndexedNamespace, feature_category: :global_search do + let_it_be(:indexed_namespace1) { create(:namespace) } + let_it_be(:indexed_namespace2) { create(:namespace) } + let_it_be(:indexed_parent_namespace) { create(:group) } + let_it_be(:indexed_child_namespace) { create(:group, parent: indexed_parent_namespace) } + let_it_be(:unindexed_namespace) { create(:namespace) } + let_it_be(:indexed_project1) { create(:project, namespace: indexed_namespace1) } + let_it_be(:unindexed_project) { create(:project, namespace: unindexed_namespace) } + let_it_be(:indexed_project_of_parent_namespace) { create(:project, namespace: indexed_parent_namespace) } + let_it_be(:indexed_project_of_child_namespace) { create(:project, namespace: indexed_child_namespace) } + let_it_be(:shard) { Zoekt::Shard.create!(index_base_url: 'http://example.com:1234/', search_base_url: 'http://example.com:4567/') } + + before :all do + described_class.create!(shard: shard, namespace: indexed_namespace1) + described_class.create!(shard: shard, namespace: indexed_namespace2) + described_class.create!(shard: shard, namespace: indexed_parent_namespace) + end + + context 'with validations' do + it 'does not allow you to mark a subgroup as indexed' do + expect do + described_class.create!(shard: shard, namespace: indexed_child_namespace) + end.to raise_error(/Only root namespaces can be indexed/) + end + end + + describe '#enabled_for_namespace?' do + it 'returns true for those indexed namespace records' do + expect(described_class.enabled_for_namespace?(indexed_namespace1)).to eq(true) + expect(described_class.enabled_for_namespace?(indexed_namespace2)).to eq(true) + end + + it 'returns false for unindexed namespace records' do + expect(described_class.enabled_for_namespace?(unindexed_namespace)).to eq(false) + end + + it 'delegates to root namespace for subgroups' do + expect(described_class.enabled_for_namespace?(indexed_child_namespace)).to eq(true) + end + end + + describe '#enabled_for_project?' do + it 'returns true for projects in indexed namespaces' do + expect(described_class.enabled_for_project?(indexed_project1)).to eq(true) + expect(described_class.enabled_for_project?(indexed_project_of_parent_namespace)).to eq(true) + end + + it 'returns false for projects in unindexed namespaces' do + expect(described_class.enabled_for_project?(unindexed_project)).to eq(false) + end + + it 'delegates to root namespace for projects in subgroups' do + expect(described_class.enabled_for_project?(indexed_project_of_child_namespace)).to eq(true) + end + end +end diff --git a/ee/spec/models/zoekt/shard_spec.rb b/ee/spec/models/zoekt/shard_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..c8b2245b226b0a8d1f0b1640d9947e1186070c97 --- /dev/null +++ b/ee/spec/models/zoekt/shard_spec.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::Zoekt::Shard, feature_category: :global_search do + let_it_be(:indexed_namespace1) { create(:namespace) } + let_it_be(:indexed_namespace2) { create(:namespace) } + let_it_be(:unindexed_namespace) { create(:namespace) } + let(:shard) { described_class.create!(index_base_url: 'http://example.com:1234/', search_base_url: 'http://example.com:4567/') } + + before do + Zoekt::IndexedNamespace.create!(shard: shard, namespace: indexed_namespace1) + Zoekt::IndexedNamespace.create!(shard: shard, namespace: indexed_namespace2) + end + + it 'has many indexed_namespaces' do + expect(shard.indexed_namespaces.count).to eq(2) + expect(shard.indexed_namespaces.map(&:namespace)).to contain_exactly(indexed_namespace1, indexed_namespace2) + end +end diff --git a/ee/spec/services/ee/git/branch_push_service_spec.rb b/ee/spec/services/ee/git/branch_push_service_spec.rb index 7f01a236dce39e01c3e8dd6d0d4217dd1ad4400f..9bf5cd340ab30d361a7eda2a28ebb3f4576fdd7a 100644 --- a/ee/spec/services/ee/git/branch_push_service_spec.rb +++ b/ee/spec/services/ee/git/branch_push_service_spec.rb @@ -38,7 +38,7 @@ end end - context 'ElasticSearch indexing', :elastic, :clean_gitlab_redis_shared_state do + context 'ElasticSearch indexing', :elastic, :clean_gitlab_redis_shared_state, feature_category: :global_search do before do stub_ee_application_setting(elasticsearch_indexing?: true) end @@ -107,6 +107,52 @@ end end + context 'with Zoekt indexing', feature_category: :global_search do + let(:use_zoekt) { true } + + before do + allow(project).to receive(:use_zoekt?).and_return(use_zoekt) + end + + it 'triggers async_update_zoekt_index' do + expect(project.repository).to receive(:async_update_zoekt_index) + + subject.execute + end + + context 'when pushing to a non-default branch' do + let(:ref) { 'refs/heads/other' } + + it 'does not trigger async_update_zoekt_index' do + expect(project.repository).not_to receive(:async_update_zoekt_index) + + subject.execute + end + end + + context 'when index_code_with_zoekt is disabled' do + before do + stub_feature_flags(index_code_with_zoekt: false) + end + + it 'does not trigger async_update_zoekt_index' do + expect(project.repository).not_to receive(:async_update_zoekt_index) + + subject.execute + end + end + + context 'when zoekt is not enabled for the project' do + let(:use_zoekt) { false } + + it 'does not trigger async_update_zoekt_index' do + expect(project.repository).not_to receive(:async_update_zoekt_index) + + subject.execute + end + end + end + context 'External pull requests' do it 'runs UpdateExternalPullRequestsWorker' do expect(UpdateExternalPullRequestsWorker).to receive(:perform_async).with(project.id, user.id, ref) diff --git a/ee/spec/services/search/group_service_spec.rb b/ee/spec/services/search/group_service_spec.rb index 41067f67c4aa694ed324c7f0f12d79df96b33d2a..18d27995d2d7c8e7d6b8dfd3ef2879007bd0e5b5 100644 --- a/ee/spec/services/search/group_service_spec.rb +++ b/ee/spec/services/search/group_service_spec.rb @@ -62,6 +62,91 @@ end end + context 'when searching with Zoekt' do + let(:service) { described_class.new(user, group, search: 'foobar', scope: scope, basic_search: basic_search, page: page) } + let(:use_zoekt) { true } + let(:scope) { 'blobs' } + let(:basic_search) { nil } + let(:page) { nil } + + before do + allow(group).to receive(:use_zoekt?).and_return(use_zoekt) + end + + it 'returns a Gitlab::Zoekt::SearchResults' do + expect(service.use_zoekt?).to eq(true) + expect(service.zoekt_searchable_scope).to eq(group) + expect(service.execute).to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + + context 'when group does not have Zoekt enabled' do + let(:use_zoekt) { false } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when scope is not blobs' do + let(:scope) { 'issues' } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when basic_search is requested' do + let(:basic_search) { true } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when search_code_with_zoekt is disabled' do + before do + stub_feature_flags(search_code_with_zoekt: false) + end + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when requesting the first page' do + let(:page) { 1 } + + it 'searches with Zoekt' do + expect(service.use_zoekt?).to eq(true) + expect(service.execute).to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when requesting a page other than the first' do + let(:page) { 2 } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when zoekt_code_search licensed feature is disabled' do + before do + stub_licensed_features(zoekt_code_search: false) + end + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + end + context 'visibility', :elastic_delete_by_query, :sidekiq_inline do include_context 'ProjectPolicyTable context' diff --git a/ee/spec/services/search/project_service_spec.rb b/ee/spec/services/search/project_service_spec.rb index c3a6e5d52a3956019dc257d0f6dd245025f5c5a9..f945f1ae05a596bdcdb8a17323314ca6c88ae904 100644 --- a/ee/spec/services/search/project_service_spec.rb +++ b/ee/spec/services/search/project_service_spec.rb @@ -43,6 +43,53 @@ end end + context 'when searching with Zoekt' do + let_it_be(:user) { create(:user) } + let_it_be(:project) { create(:project, namespace: user.namespace) } + + let(:service) { described_class.new(user, project, search: 'foobar', scope: scope, basic_search: basic_search) } + let(:use_zoekt) { true } + let(:scope) { 'blobs' } + let(:basic_search) { nil } + + before do + allow(project).to receive(:use_zoekt?).and_return(use_zoekt) + end + + it 'searches with Zoekt' do + expect(service.use_zoekt?).to eq(true) + expect(service.zoekt_searchable_scope).to eq(project) + expect(service.execute).to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + + context 'when project does not have Zoekt enabled' do + let(:use_zoekt) { false } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when scope is not blobs' do + let(:scope) { 'issues' } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + + context 'when basic_search is requested' do + let(:basic_search) { true } + + it 'does not search with Zoekt' do + expect(service.use_zoekt?).to eq(false) + expect(service.execute).not_to be_kind_of(::Gitlab::Zoekt::SearchResults) + end + end + end + context 'when a multiple projects provided' do it_behaves_like 'EE search service shared examples', ::Gitlab::ProjectSearchResults, ::Gitlab::Elastic::SearchResults do let_it_be(:group) { create(:group) } diff --git a/ee/spec/support/shared_examples/services/search_service_shared_examples.rb b/ee/spec/support/shared_examples/services/search_service_shared_examples.rb index 65328ef0af6f348229cce4a63ef5103529ca9c56..344730a4aae3fe8b7eb150643a82ebef3953c5de 100644 --- a/ee/spec/support/shared_examples/services/search_service_shared_examples.rb +++ b/ee/spec/support/shared_examples/services/search_service_shared_examples.rb @@ -47,6 +47,7 @@ expect(Gitlab::CurrentSettings) .to receive(:search_using_elasticsearch?) .with(scope: scope) + .at_least(:once) .and_return(true) is_expected.to be_a(elasticsearch_results) @@ -56,6 +57,7 @@ expect(Gitlab::CurrentSettings) .to receive(:search_using_elasticsearch?) .with(scope: scope) + .at_least(:once) .and_return(false) is_expected.to be_a(normal_results) diff --git a/ee/spec/support/zoekt.rb b/ee/spec/support/zoekt.rb new file mode 100644 index 0000000000000000000000000000000000000000..2f7a346f657e88ffabed6ae6b01245567b84b021 --- /dev/null +++ b/ee/spec/support/zoekt.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module Zoekt + module TestHelpers + def zoekt_shard + @zoekt_shard ||= ::Zoekt::Shard.find_or_create_by!(index_base_url: 'http://127.0.0.1:6060/', search_base_url: 'http://127.0.0.1:6070/') + end + module_function :zoekt_shard + + def zoekt_truncate_index! + Repository.truncate_zoekt_index!(zoekt_shard) + end + module_function :zoekt_truncate_index! + + def zoekt_ensure_namespace_indexed!(namespace) + ::Zoekt::IndexedNamespace.find_or_create_by!(shard: zoekt_shard, namespace: namespace.root_ancestor) + end + + def zoekt_ensure_project_indexed!(project) + zoekt_ensure_namespace_indexed!(project.namespace) + + # TODO: We shouldn't be referencing files on disk but I don't think we + # can git clone from rspec as Web/API is not running + allow(::Gitlab::GitalyClient::StorageSettings).to receive(:disk_access_denied?).and_return(false) + project.repository.update_zoekt_index!(use_local_disk_path: true) + end + end +end + +RSpec.configure do |config| + config.around(:each, :zoekt) do |example| + ::Zoekt::TestHelpers.zoekt_truncate_index! + + example.run + + ::Zoekt::TestHelpers.zoekt_truncate_index! + end + + config.before(:each, :zoekt) do + stub_licensed_features(zoekt_code_search: true) + end + + config.include ::Zoekt::TestHelpers +end diff --git a/ee/spec/workers/zoekt/indexer_worker_spec.rb b/ee/spec/workers/zoekt/indexer_worker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..265a831343b789f923506793b34ad0d41fbe1120 --- /dev/null +++ b/ee/spec/workers/zoekt/indexer_worker_spec.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::Zoekt::IndexerWorker, feature_category: :global_search do + let_it_be(:project) { create(:project, :repository) } + let(:use_zoekt) { true } + + subject { described_class.new } + + before do + # Mocking Project.find simplifies the stubs on project.use_zoekt? and + # project.repository + allow(Project).to receive(:find).with(project.id).and_return(project) + allow(project).to receive(:use_zoekt?).and_return(use_zoekt) + end + + describe '#perform' do + it 'sends the project to Zoekt for indexing' do + expect(project.repository).to receive(:update_zoekt_index!) + + subject.perform(project.id) + end + + context 'when index_code_with_zoekt is disabled' do + before do + stub_feature_flags(index_code_with_zoekt: false) + end + + it 'does not send the project to Zoekt for indexing' do + expect(project.repository).not_to receive(:update_zoekt_index!) + + subject.perform(project.id) + end + end + + context 'when the zoekt_code_search licensed feature is disabled' do + before do + stub_licensed_features(zoekt_code_search: false) + end + + it 'does nothing' do + expect(project.repository).not_to receive(:update_zoekt_index!) + + subject.perform(project.id) + end + end + + context 'when the project does not have zoekt enabled' do + let(:use_zoekt) { false } + + it 'does not send the project to Zoekt for indexing' do + expect(project.repository).not_to receive(:update_zoekt_index!) + + subject.perform(project.id) + end + end + + context 'when the indexer is locked for the given project' do + it 'does not run index' do + expect(subject).to receive(:in_lock) # Mock and don't yield + .with("Zoekt::IndexerWorker/#{project.id}", ttl: (Zoekt::IndexerWorker::TIMEOUT + 1.minute), retries: 0) + + expect(project.repository).not_to receive(:update_zoekt_index!) + + subject.perform(project.id) + end + end + end +end