diff --git a/ee/app/services/search/zoekt/benchmark_service.rb b/ee/app/services/search/zoekt/benchmark_service.rb new file mode 100644 index 0000000000000000000000000000000000000000..375def28aa5b0a7801a859226fd87d7d30093f9b --- /dev/null +++ b/ee/app/services/search/zoekt/benchmark_service.rb @@ -0,0 +1,90 @@ +# frozen_string_literal: true + +require 'benchmark' + + +module Search + module Zoekt + class BenchmarkService + # Simple benchmark wrapper for Gitlab::Search::Zoekt::Client + # Usage: + # BenchmarkService.new(json_payload:, url:, options: {}, logger: nil, runs: 5).execute + + def self.execute(*args, **kwargs) + new(*args, **kwargs).execute + end + + def initialize(json_payload:, url:, options: {}, logger: Logger.new($stdout), runs: 5) + @json_payload = json_payload + @url = url + @options = options + @logger = logger || Logger.new($stdout) + @runs = runs + end + + def execute + payload = parse_json(@json_payload) + payload = with_overridden_forward_to_endpoint(payload) + client = ::Gitlab::Search::Zoekt::Client.instance + times = [] + results = [] + @runs.times do |i| + @logger.info("[Benchmark] Run ##{i+1}...") + start = Process.clock_gettime(Process::CLOCK_MONOTONIC) + begin + response = client.send(:post_request, @url, payload, **@options) # rubocop:disable GitlabSecurity/PublicSend -- N/A + duration = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round(2) + @logger.info("[Benchmark] Duration: #{duration}ms") + times << duration + filtered_response = filter_files_from_response(response) + results << { success: true, response: filtered_response, duration: duration } + rescue StandardError => e + duration = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round(2) + @logger.error("[Benchmark] Error: #{e.message} (#{duration}ms)") + times << duration + results << { success: false, error: e.message, duration: duration } + end + end + avg = times.empty? ? 'N/A' : (times.sum / times.size.to_f).round(2) + @logger.info("[Benchmark] Average duration: #{avg}ms over #{@runs} runs") + { times: times, average: avg, results: results } + end + + # If options[:endpoint] is present, override all forward_to entry endpoints in a new hash + def with_overridden_forward_to_endpoint(payload) + endpoint = @options[:endpoint] + return payload unless endpoint && payload[:forward_to].is_a?(Array) + + new_payload = payload.dup + new_payload[:forward_to] = payload[:forward_to].map do |entry| + entry.dup.tap { |e| e[:endpoint] = endpoint } + end + new_payload + end + + private + + def parse_json(json) + case json + when String + Gitlab::Json.parse(json, symbolize_names: true) + when Hash + json.deep_symbolize_keys + else + raise ArgumentError, "Invalid payload: must be JSON string or Hash" + end + end + + # Remove the 'Files' property from response["Result"] or response[:Result] + def filter_files_from_response(response) + result_key = "Result" + return response unless response[result_key].has_key?("Files") + + result = response.with_indifferent_access + + result[result_key].delete("Files") + result + end + end + end +end diff --git a/ee/app/services/search/zoekt/rake_task_executor_service.rb b/ee/app/services/search/zoekt/rake_task_executor_service.rb index 7b3ab3952c42b00f639746a8843d0b3c41f53a45..a2d42266dfd956562db1a33555332b2423983911 100644 --- a/ee/app/services/search/zoekt/rake_task_executor_service.rb +++ b/ee/app/services/search/zoekt/rake_task_executor_service.rb @@ -5,6 +5,7 @@ module Zoekt class RakeTaskExecutorService TASKS = %i[ info + benchmark ].freeze def initialize(logger:, options:) @@ -26,6 +27,10 @@ def execute(task) def info InfoService.execute(logger: logger, options: options) end + + def benchmark + BenchmarkService.execute(logger: logger, options: options) + end end end end diff --git a/ee/lib/gitlab/search/zoekt/client.rb b/ee/lib/gitlab/search/zoekt/client.rb index b660724d83bc9cea9b9af3dba7c19d35b4544b80..976e33ee74d9700fe5b7d61f822cf453cf53af84 100644 --- a/ee/lib/gitlab/search/zoekt/client.rb +++ b/ee/lib/gitlab/search/zoekt/client.rb @@ -43,9 +43,12 @@ def search(query, num:, project_ids:, node_id:, search_mode:, source: nil) def search_zoekt_proxy(query, num:, search_mode:, current_user: nil, **options) start = Time.current + request_stats = { db_start: start } + targets = options[:targets] if search_level(options) != :project && !Ability.allowed?(current_user, :read_cross_project) - log_debug('User does not have permission to search across projects, returning empty response') if debug? + # log_debug('User does not have permission to search across projects, returning empty response') if debug? + puts('User does not have permission to search across projects, returning empty response') return Gitlab::Search::Zoekt::Response.empty end @@ -53,7 +56,8 @@ def search_zoekt_proxy(query, num:, search_mode:, current_user: nil, **options) group_id = options[:group_id] if !use_traversal_id_query?(current_user, project_id: project_id, group_id: group_id) && targets.blank? - log_debug('No targets provided, returning empty response') if debug? + puts('No targets provided, returning empty response') + # log_debug('No targets provided, returning empty response') if debug? return Gitlab::Search::Zoekt::Response.empty end @@ -70,11 +74,17 @@ def search_zoekt_proxy(query, num:, search_mode:, current_user: nil, **options) search_mode: search_mode, **options ).as_json + request_stats[:db_end] = Time.current proxy_node = fetch_proxy_node(**options) raise 'Node can not be found' unless proxy_node + request_stats[:zoekt_start] = Time.current response = post_request(join_url(proxy_node.search_base_url, PROXY_SEARCH_PATH), payload) + request_stats[:zoekt_end] = Time.current + + yield request_stats if block_given? + log_error('Zoekt search failed', status: response.code, response: response.body) unless response.success? log_debug('Zoekt AST request', payload: payload) if debug? Gitlab::Search::Zoekt::Response.new parse_response(response), current_user: current_user diff --git a/ee/lib/gitlab/search/zoekt/response.rb b/ee/lib/gitlab/search/zoekt/response.rb index d3eb14cde3be0c335d52f1b3e18924059d9b927e..08e36c7999eb9906c9d53b4b389917d2ce7d5e57 100644 --- a/ee/lib/gitlab/search/zoekt/response.rb +++ b/ee/lib/gitlab/search/zoekt/response.rb @@ -49,9 +49,11 @@ def match_count @match_count ||= (result['Files']&.sum { |x| x['LineMatches']&.count }).to_i end - def each_file - files = result[:Files] || [] + def files + @files ||= result['Files'] || [] + end + def each_file files.each do |file| yield file end diff --git a/ee/lib/search/rake_task/zoekt.rb b/ee/lib/search/rake_task/zoekt.rb index e069e3f49cc80503e9b9cd7b4ba9292aa1e719b0..de252ed467111c8c4819e501555aaa561f8a1d9a 100644 --- a/ee/lib/search/rake_task/zoekt.rb +++ b/ee/lib/search/rake_task/zoekt.rb @@ -18,6 +18,16 @@ def info(name:, extended: nil, watch_interval: nil) end end + def benchmark(user_id: nil, group_id: nil, project_id: nil) + options = { + user_id: user_id, + group_id: group_id, + project_id: project_id + }.compact + + task_executor_service(options: options).execute(:benchmark) + end + private def task_executor_service(options: {}) diff --git a/ee/lib/search/zoekt/cache.rb b/ee/lib/search/zoekt/cache.rb index 0a687df875dbe8229707f5b6a0d2821fdc000290..1ce3aaa2ffd60f16b83b1efc254f6545ed87eaa3 100644 --- a/ee/lib/search/zoekt/cache.rb +++ b/ee/lib/search/zoekt/cache.rb @@ -31,6 +31,8 @@ def initialize(query, **options) end def enabled? + return false + return false unless Gitlab::CurrentSettings.zoekt_cache_response? (project_id.present? || group_id.present?) && per_page <= max_per_page diff --git a/ee/lib/tasks/gitlab/zoekt.rake b/ee/lib/tasks/gitlab/zoekt.rake index 9a6d46edf558e647d5e9103deeb51dc38d0f1233..2a47cc67cccd9243faa5f8b27c69462cba06dc43 100644 --- a/ee/lib/tasks/gitlab/zoekt.rake +++ b/ee/lib/tasks/gitlab/zoekt.rake @@ -11,6 +11,24 @@ namespace :gitlab do ) end + desc 'GitLab | Zoekt | Benchmark with options: --user-id, --group-id, --project-id' + task :benchmark, [:options] => :environment do |_t, args| + require 'optparse' + + options = {} + OptionParser.new do |opts| + opts.on('--user-id USER_ID', 'User ID') { |v| options[:user_id] = v } + opts.on('--group-id GROUP_ID', 'Group ID') { |v| options[:group_id] = v } + opts.on('--project-id PROJECT_ID', 'Project ID') { |v| options[:project_id] = v } + end.parse!(Array(args[:options]).flat_map { |s| s.to_s.split }) + + Search::RakeTask::Zoekt.benchmark( + user_id: options[:user_id], + group_id: options[:group_id], + project_id: options[:project_id] + ) + end + desc "GitLab | Zoekt Indexer | Install or upgrade gitlab-zoekt" task :install, [:dir, :repo] => :gitlab_environment do |_, args| unless args.dir.present?