From 68b6b9f343b3f8f6d796481fff71497c674b68b7 Mon Sep 17 00:00:00 2001 From: John Mason Date: Tue, 2 Sep 2025 16:28:10 -0400 Subject: [PATCH 1/6] Add zoekt benchmarking script --- .../search/zoekt/benchmark_service.rb | 293 ++++++++++++++++++ .../zoekt/rake_task_executor_service.rb | 5 + ee/lib/gitlab/search/zoekt/client.rb | 14 +- ee/lib/gitlab/search/zoekt/response.rb | 6 +- ee/lib/search/rake_task/zoekt.rb | 10 + ee/lib/tasks/gitlab/zoekt.rake | 18 ++ 6 files changed, 342 insertions(+), 4 deletions(-) create mode 100644 ee/app/services/search/zoekt/benchmark_service.rb diff --git a/ee/app/services/search/zoekt/benchmark_service.rb b/ee/app/services/search/zoekt/benchmark_service.rb new file mode 100644 index 00000000000000..d494f280068210 --- /dev/null +++ b/ee/app/services/search/zoekt/benchmark_service.rb @@ -0,0 +1,293 @@ +# frozen_string_literal: true + +require 'benchmark' + +module Search + module Zoekt + class BenchmarkService + include ActionView::Helpers::NumberHelper + include ActionView::Helpers::DateHelper + + DEFAULT_QUERIES = %w[ + test + TODO + fix + update + refactor + bug + error + function + class + import + zoekt + ].freeze + + SEARCH_MODES = [:regex, :exact].freeze + RESULT_LIMITS = [5, 10, 25, 50, 100].freeze + CONCURRENT_COUNTS = [1, 3, 5].freeze + + PATTERN_TESTS = { + "Simple word" => "test", + "Multiple words" => "test case", + "Regex pattern" => "test.*case", + "Complex regex" => "(TODO|FIXME|HACK).*urgent", + "Case sensitive" => "Test", + "Special chars" => "user@example.com" + }.freeze + + def self.execute(...) + new(...).execute + end + + def initialize(logger:, options: {}) + @logger = logger + @options = options.with_indifferent_access + @results = [] + @entries = [] + end + + def execute + validate_setup! + display_benchmark_header + benchmark_simple_searches + display_summary + end + + private + + attr_reader :logger, :options, :results, :entries + + def validate_setup! + raise ArgumentError, "No search scope provided (group_id or project_id required)" unless group || project + end + + def current_user + @current_user ||= User.find_by_id(options[:user_id]) + end + + def group + @group ||= Group.find_by_id(options[:group_id]) if options[:group_id] + end + + def project + @project ||= Project.find_by_id(options[:project_id]) if options[:project_id] + end + + def search_queries + @search_queries ||= options[:queries].presence || DEFAULT_QUERIES + end + + def search_options + @search_options ||= {}.tap do |opts| + opts[:group_id] = group.id if group + opts[:project_id] = project.id if project + end + end + + def display_benchmark_header + log_header("Search Performance Benchmarks") + log("User", value: current_user.username) + log("Group", value: group&.full_path || "N/A") + log("Project", value: project&.full_path || "N/A") + log("Timestamp", value: Time.current) + log("Query count", value: search_queries.size) + log("Traversal ID queries enabled", value: use_traversal_id_queries?) + display_entries + end + + def use_traversal_id_queries? + Feature.enabled?(:zoekt_traversal_id_queries, current_user) + end + + def benchmark_simple_searches # rubocop:disable Metrics/AbcSize -- N/A + log_header("Search Term Performance") + table = [] + @search_count = 0 + search_queries.each do |term| + db_times = [] + zoekt_times = [] + total_times = [] + 5.times do + result = measure_search( + query: term, + num: 10, + search_mode: :regex, + request_stats_collector: ->(request_stats) { + db = request_stats[:db_end].to_f - request_stats[:db_start].to_f + zoekt = request_stats[:zoekt_end].to_f - request_stats[:zoekt_start].to_f + db_times << (db * 1000) + zoekt_times << (zoekt * 1000) + total_times << ((db + zoekt) * 1000) + } + ) + @search_count += 1 if result[:success] + end + + next if db_times.empty? || zoekt_times.empty? || total_times.empty? + + table << { + term: term, + db: "#{(db_times.sum / db_times.size).round(2)}ms", + zoekt: "#{(zoekt_times.sum / zoekt_times.size).round(2)}ms", + total: "#{(total_times.sum / total_times.size).round(2)}ms" + } + end + # Print as table + logger.info("| Search Term | DB | Zoekt | Total |") + logger.info("|-------------|---------:|---------:|---------:|") + table.each do |row| + logger.info(format("| %-11s | %8s | %8s | %8s |", row[:term], row[:db], row[:zoekt], row[:total])) + end + + # Grand total average row + all_db = table.map { |row| row[:db].to_f } + all_zoekt = table.map { |row| row[:zoekt].to_f } + all_total = table.map { |row| row[:total].to_f } + + return unless all_db.any? && all_zoekt.any? && all_total.any? + + logger.info("|==============================================|") + logger.info(format("| %-11s | %8s | %8s | %8s |", 'TOTAL', "#{(all_db.sum / all_db.size).round(2)}ms", + "#{(all_zoekt.sum / all_zoekt.size).round(2)}ms", "#{(all_total.sum / all_total.size).round(2)}ms")) + end + + def measure_search(query:, num:, search_mode:, description: nil, request_stats_collector: nil) + result = { + description: description || "Search: #{query}", + query: query, + num: num, + search_mode: search_mode, + timestamp: Time.current + } + + targets = nil + if Feature.disabled?(:zoekt_traversal_id_queries, current_user) + projects = project ? [project] : group.projects + targets = ::Search::Zoekt::RoutingService.execute(projects) + end + + benchmark_result = Benchmark.measure do + response = ::Gitlab::Search::Zoekt::Client.search_zoekt_proxy( + query, + num: num, + search_mode: search_mode, + current_user: current_user, + targets: targets, + **search_options + ) do |request_stats| + request_stats_collector.call(request_stats) if request_stats_collector + end + + result[:response_size] = response.to_s.bytesize + result[:file_matches] = response.files&.size || 0 + result[:success] = true + rescue StandardError => e + result[:error] = e.message + result[:success] = false + end + + result[:duration] = benchmark_result.real + result[:cpu_time] = benchmark_result.total + result[:user_cpu] = benchmark_result.utime + result[:system_cpu] = benchmark_result.stime + + result + end + + def display_result(result) + status = result[:success] ? "✓" : "✗" + time_str = result[:duration] ? "#{(result[:duration] * 1000).round(2)}ms" : "N/A" + matches_str = result[:file_matches] ? " (#{result[:file_matches]} files)" : "" + + log((result[:description]).to_s, value: "#{status} #{time_str}#{matches_str}") + log(" Error: #{result[:error]}", value: "") if result[:error] + end + + def display_summary + log_header("BENCHMARK SUMMARY") + total = @search_count || 0 + log("Total searches", value: total) + log("Successful", value: total) + log("Failed", value: 0) + display_entries + end + + def format_value(value) + case value + when TrueClass + Rainbow('yes').green + when FalseClass + 'no' + when ActiveSupport::TimeWithZone, Time + utc_time = value.utc + relative_time = time_ago_in_words(utc_time) + "#{utc_time} (#{relative_time} ago)" + when NilClass + Rainbow('(never)').yellow + else + value.to_s + end + end + + def log_header(message) + display_entries # Display any collected entries before the new header + logger.info("\n#{Rainbow(message).bright.yellow.underline}") + @entries = [] # Start a new section + end + + def log(key, value: nil, nested: nil) + entries << { + key: key, + value: value, + nested: nested + } + end + + def display_entries + return if entries.empty? + + # Calculate padding based only on current section's entries + max_length = entries.map { |entry| entry[:key].length }.max + padding = max_length + 2 # Add 2 for the colon and space + + entries.each do |entry| + key_with_padding = "#{entry[:key]}:#{' ' * (padding - entry[:key].length)}" + + if entry[:nested] + if entry[:nested].empty? + logger.info("#{key_with_padding}#{Rainbow('(none)').yellow}") + else + logger.info("#{key_with_padding}#{entry[:value]}") + display_nested_value(entry[:nested]) + end + else + formatted_value = format_value(entry[:value]) + logger.info("#{key_with_padding}#{formatted_value}") + end + end + @entries = [] + end + + def display_nested_value(value, indent = 2) + # Sort by key to ensure consistent order + value.sort.each do |k, v| + # For watermark levels and states, apply special coloring + colored_value = case k.to_sym + when :critical, :failed, :critical_watermark_exceeded + Rainbow(v).red.bright + when :high, :evicted, :high_watermark_exceeded + Rainbow(v).red + when :low, :overprovisioned, :low_watermark_exceeded + Rainbow(v).yellow + when :normal, :done, :ready, :healthy + Rainbow(v).green + else + v + end + + logger.info("#{' ' * indent}- #{k}: #{colored_value}") + end + end + end + end +end diff --git a/ee/app/services/search/zoekt/rake_task_executor_service.rb b/ee/app/services/search/zoekt/rake_task_executor_service.rb index 7b3ab3952c42b0..a2d42266dfd956 100644 --- a/ee/app/services/search/zoekt/rake_task_executor_service.rb +++ b/ee/app/services/search/zoekt/rake_task_executor_service.rb @@ -5,6 +5,7 @@ module Zoekt class RakeTaskExecutorService TASKS = %i[ info + benchmark ].freeze def initialize(logger:, options:) @@ -26,6 +27,10 @@ def execute(task) def info InfoService.execute(logger: logger, options: options) end + + def benchmark + BenchmarkService.execute(logger: logger, options: options) + end end end end diff --git a/ee/lib/gitlab/search/zoekt/client.rb b/ee/lib/gitlab/search/zoekt/client.rb index b660724d83bc9c..976e33ee74d970 100644 --- a/ee/lib/gitlab/search/zoekt/client.rb +++ b/ee/lib/gitlab/search/zoekt/client.rb @@ -43,9 +43,12 @@ def search(query, num:, project_ids:, node_id:, search_mode:, source: nil) def search_zoekt_proxy(query, num:, search_mode:, current_user: nil, **options) start = Time.current + request_stats = { db_start: start } + targets = options[:targets] if search_level(options) != :project && !Ability.allowed?(current_user, :read_cross_project) - log_debug('User does not have permission to search across projects, returning empty response') if debug? + # log_debug('User does not have permission to search across projects, returning empty response') if debug? + puts('User does not have permission to search across projects, returning empty response') return Gitlab::Search::Zoekt::Response.empty end @@ -53,7 +56,8 @@ def search_zoekt_proxy(query, num:, search_mode:, current_user: nil, **options) group_id = options[:group_id] if !use_traversal_id_query?(current_user, project_id: project_id, group_id: group_id) && targets.blank? - log_debug('No targets provided, returning empty response') if debug? + puts('No targets provided, returning empty response') + # log_debug('No targets provided, returning empty response') if debug? return Gitlab::Search::Zoekt::Response.empty end @@ -70,11 +74,17 @@ def search_zoekt_proxy(query, num:, search_mode:, current_user: nil, **options) search_mode: search_mode, **options ).as_json + request_stats[:db_end] = Time.current proxy_node = fetch_proxy_node(**options) raise 'Node can not be found' unless proxy_node + request_stats[:zoekt_start] = Time.current response = post_request(join_url(proxy_node.search_base_url, PROXY_SEARCH_PATH), payload) + request_stats[:zoekt_end] = Time.current + + yield request_stats if block_given? + log_error('Zoekt search failed', status: response.code, response: response.body) unless response.success? log_debug('Zoekt AST request', payload: payload) if debug? Gitlab::Search::Zoekt::Response.new parse_response(response), current_user: current_user diff --git a/ee/lib/gitlab/search/zoekt/response.rb b/ee/lib/gitlab/search/zoekt/response.rb index d3eb14cde3be0c..08e36c7999eb99 100644 --- a/ee/lib/gitlab/search/zoekt/response.rb +++ b/ee/lib/gitlab/search/zoekt/response.rb @@ -49,9 +49,11 @@ def match_count @match_count ||= (result['Files']&.sum { |x| x['LineMatches']&.count }).to_i end - def each_file - files = result[:Files] || [] + def files + @files ||= result['Files'] || [] + end + def each_file files.each do |file| yield file end diff --git a/ee/lib/search/rake_task/zoekt.rb b/ee/lib/search/rake_task/zoekt.rb index e069e3f49cc805..de252ed467111c 100644 --- a/ee/lib/search/rake_task/zoekt.rb +++ b/ee/lib/search/rake_task/zoekt.rb @@ -18,6 +18,16 @@ def info(name:, extended: nil, watch_interval: nil) end end + def benchmark(user_id: nil, group_id: nil, project_id: nil) + options = { + user_id: user_id, + group_id: group_id, + project_id: project_id + }.compact + + task_executor_service(options: options).execute(:benchmark) + end + private def task_executor_service(options: {}) diff --git a/ee/lib/tasks/gitlab/zoekt.rake b/ee/lib/tasks/gitlab/zoekt.rake index 9a6d46edf558e6..2a47cc67cccd92 100644 --- a/ee/lib/tasks/gitlab/zoekt.rake +++ b/ee/lib/tasks/gitlab/zoekt.rake @@ -11,6 +11,24 @@ namespace :gitlab do ) end + desc 'GitLab | Zoekt | Benchmark with options: --user-id, --group-id, --project-id' + task :benchmark, [:options] => :environment do |_t, args| + require 'optparse' + + options = {} + OptionParser.new do |opts| + opts.on('--user-id USER_ID', 'User ID') { |v| options[:user_id] = v } + opts.on('--group-id GROUP_ID', 'Group ID') { |v| options[:group_id] = v } + opts.on('--project-id PROJECT_ID', 'Project ID') { |v| options[:project_id] = v } + end.parse!(Array(args[:options]).flat_map { |s| s.to_s.split }) + + Search::RakeTask::Zoekt.benchmark( + user_id: options[:user_id], + group_id: options[:group_id], + project_id: options[:project_id] + ) + end + desc "GitLab | Zoekt Indexer | Install or upgrade gitlab-zoekt" task :install, [:dir, :repo] => :gitlab_environment do |_, args| unless args.dir.present? -- GitLab From 47646f23485b51cb24bbd69e1eceb5440f709129 Mon Sep 17 00:00:00 2001 From: John Mason Date: Wed, 3 Sep 2025 09:32:27 -0400 Subject: [PATCH 2/6] Make copy and pasteable --- ee/app/services/search/zoekt/benchmark_service.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/app/services/search/zoekt/benchmark_service.rb b/ee/app/services/search/zoekt/benchmark_service.rb index d494f280068210..d4ffba5e04e776 100644 --- a/ee/app/services/search/zoekt/benchmark_service.rb +++ b/ee/app/services/search/zoekt/benchmark_service.rb @@ -179,7 +179,7 @@ def measure_search(query:, num:, search_mode:, description: nil, request_stats_c end result[:response_size] = response.to_s.bytesize - result[:file_matches] = response.files&.size || 0 + result[:file_matches] = response[:Files]&.size || 0 result[:success] = true rescue StandardError => e result[:error] = e.message -- GitLab From 620bc6e671cee5ae9f14964a4ad5518042451389 Mon Sep 17 00:00:00 2001 From: John Mason Date: Wed, 3 Sep 2025 22:37:47 -0400 Subject: [PATCH 3/6] Refactor for readability --- .../search/zoekt/benchmark_service.rb | 255 ++++++------------ 1 file changed, 81 insertions(+), 174 deletions(-) diff --git a/ee/app/services/search/zoekt/benchmark_service.rb b/ee/app/services/search/zoekt/benchmark_service.rb index d4ffba5e04e776..f3728b0df523e7 100644 --- a/ee/app/services/search/zoekt/benchmark_service.rb +++ b/ee/app/services/search/zoekt/benchmark_service.rb @@ -9,32 +9,10 @@ class BenchmarkService include ActionView::Helpers::DateHelper DEFAULT_QUERIES = %w[ - test - TODO - fix - update - refactor - bug - error - function - class - import - zoekt + test TODO fix update refactor bug error function class import zoekt ].freeze - SEARCH_MODES = [:regex, :exact].freeze - RESULT_LIMITS = [5, 10, 25, 50, 100].freeze - CONCURRENT_COUNTS = [1, 3, 5].freeze - - PATTERN_TESTS = { - "Simple word" => "test", - "Multiple words" => "test case", - "Regex pattern" => "test.*case", - "Complex regex" => "(TODO|FIXME|HACK).*urgent", - "Case sensitive" => "Test", - "Special chars" => "user@example.com" - }.freeze - + # Entry point for running the benchmark def self.execute(...) new(...).execute end @@ -42,14 +20,13 @@ def self.execute(...) def initialize(logger:, options: {}) @logger = logger @options = options.with_indifferent_access - @results = [] @entries = [] end def execute validate_setup! display_benchmark_header - benchmark_simple_searches + benchmark_search_terms display_summary end @@ -57,6 +34,7 @@ def execute attr_reader :logger, :options, :results, :entries + # --- Setup & Validation --- def validate_setup! raise ArgumentError, "No search scope provided (group_id or project_id required)" unless group || project end @@ -78,216 +56,145 @@ def search_queries end def search_options - @search_options ||= {}.tap do |opts| - opts[:group_id] = group.id if group - opts[:project_id] = project.id if project - end + opts = {} + opts[:group_id] = group.id if group + opts[:project_id] = project.id if project + opts end + # --- Logging & Display --- def display_benchmark_header log_header("Search Performance Benchmarks") - log("User", value: current_user.username) + log("User", value: current_user&.username) log("Group", value: group&.full_path || "N/A") log("Project", value: project&.full_path || "N/A") log("Timestamp", value: Time.current) log("Query count", value: search_queries.size) - log("Traversal ID queries enabled", value: use_traversal_id_queries?) display_entries end - def use_traversal_id_queries? - Feature.enabled?(:zoekt_traversal_id_queries, current_user) + # --- Benchmarking --- + def benchmark_search_terms + log_header("Search Term Performance") + table = search_queries.filter_map { |term| benchmark_term(term) } + print_benchmark_table(table) + end + + def benchmark_term(term) + db_times = [] + zoekt_times = [] + total_times = [] + 5.times do + measure_search( + query: term, + num: 10, + search_mode: :regex, + request_stats_collector: ->(stats) { + db = stats[:db_end].to_f - stats[:db_start].to_f + zoekt = stats[:zoekt_end].to_f - stats[:zoekt_start].to_f + db_times << (db * 1000) + zoekt_times << (zoekt * 1000) + total_times << ((db + zoekt) * 1000) + } + ) + end + return if db_times.empty? || zoekt_times.empty? || total_times.empty? + + { + term: term, + db: avg_ms(db_times), + zoekt: avg_ms(zoekt_times), + total: avg_ms(total_times) + } end - def benchmark_simple_searches # rubocop:disable Metrics/AbcSize -- N/A - log_header("Search Term Performance") - table = [] - @search_count = 0 - search_queries.each do |term| - db_times = [] - zoekt_times = [] - total_times = [] - 5.times do - result = measure_search( - query: term, - num: 10, - search_mode: :regex, - request_stats_collector: ->(request_stats) { - db = request_stats[:db_end].to_f - request_stats[:db_start].to_f - zoekt = request_stats[:zoekt_end].to_f - request_stats[:zoekt_start].to_f - db_times << (db * 1000) - zoekt_times << (zoekt * 1000) - total_times << ((db + zoekt) * 1000) - } - ) - @search_count += 1 if result[:success] - end + def avg_ms(arr) + return "N/A" if arr.empty? - next if db_times.empty? || zoekt_times.empty? || total_times.empty? + "#{(arr.sum / arr.size).round(2)}ms" + end - table << { - term: term, - db: "#{(db_times.sum / db_times.size).round(2)}ms", - zoekt: "#{(zoekt_times.sum / zoekt_times.size).round(2)}ms", - total: "#{(total_times.sum / total_times.size).round(2)}ms" - } - end - # Print as table + def print_benchmark_table(table) logger.info("| Search Term | DB | Zoekt | Total |") logger.info("|-------------|---------:|---------:|---------:|") table.each do |row| logger.info(format("| %-11s | %8s | %8s | %8s |", row[:term], row[:db], row[:zoekt], row[:total])) end + return if table.empty? - # Grand total average row all_db = table.map { |row| row[:db].to_f } all_zoekt = table.map { |row| row[:zoekt].to_f } all_total = table.map { |row| row[:total].to_f } - - return unless all_db.any? && all_zoekt.any? && all_total.any? - logger.info("|==============================================|") - logger.info(format("| %-11s | %8s | %8s | %8s |", 'TOTAL', "#{(all_db.sum / all_db.size).round(2)}ms", - "#{(all_zoekt.sum / all_zoekt.size).round(2)}ms", "#{(all_total.sum / all_total.size).round(2)}ms")) - end - - def measure_search(query:, num:, search_mode:, description: nil, request_stats_collector: nil) - result = { - description: description || "Search: #{query}", - query: query, - num: num, - search_mode: search_mode, - timestamp: Time.current - } - - targets = nil - if Feature.disabled?(:zoekt_traversal_id_queries, current_user) - projects = project ? [project] : group.projects - targets = ::Search::Zoekt::RoutingService.execute(projects) - end - - benchmark_result = Benchmark.measure do - response = ::Gitlab::Search::Zoekt::Client.search_zoekt_proxy( - query, - num: num, - search_mode: search_mode, - current_user: current_user, - targets: targets, - **search_options - ) do |request_stats| - request_stats_collector.call(request_stats) if request_stats_collector + logger.info(format("| %-11s | %8s | %8s | %8s |", 'TOTAL', avg_ms(all_db), avg_ms(all_zoekt), + avg_ms(all_total))) + end + + def measure_search(query:, num:, search_mode:, request_stats_collector: nil) + result = {} + begin + benchmark_result = Benchmark.measure do + response = ::Gitlab::Search::Zoekt::Client.search_zoekt_proxy( + query, + num: num, + search_mode: search_mode, + current_user: current_user, + **search_options + ) do |stats| + request_stats_collector&.call(stats) + end + result[:file_matches] = response[:Files]&.size || 0 + result[:success] = true end - - result[:response_size] = response.to_s.bytesize - result[:file_matches] = response[:Files]&.size || 0 - result[:success] = true + result[:duration] = benchmark_result.real rescue StandardError => e result[:error] = e.message result[:success] = false end - - result[:duration] = benchmark_result.real - result[:cpu_time] = benchmark_result.total - result[:user_cpu] = benchmark_result.utime - result[:system_cpu] = benchmark_result.stime - result end - def display_result(result) - status = result[:success] ? "✓" : "✗" - time_str = result[:duration] ? "#{(result[:duration] * 1000).round(2)}ms" : "N/A" - matches_str = result[:file_matches] ? " (#{result[:file_matches]} files)" : "" - - log((result[:description]).to_s, value: "#{status} #{time_str}#{matches_str}") - log(" Error: #{result[:error]}", value: "") if result[:error] - end - def display_summary log_header("BENCHMARK SUMMARY") - total = @search_count || 0 - log("Total searches", value: total) - log("Successful", value: total) - log("Failed", value: 0) + log("Total queries", value: search_queries.size) display_entries end def format_value(value) case value - when TrueClass - Rainbow('yes').green - when FalseClass - 'no' + when TrueClass then 'yes' + when FalseClass then 'no' when ActiveSupport::TimeWithZone, Time utc_time = value.utc relative_time = time_ago_in_words(utc_time) "#{utc_time} (#{relative_time} ago)" - when NilClass - Rainbow('(never)').yellow - else - value.to_s + when NilClass then '(never)' + else value.to_s end end def log_header(message) - display_entries # Display any collected entries before the new header - logger.info("\n#{Rainbow(message).bright.yellow.underline}") - @entries = [] # Start a new section + display_entries + logger.info("\n#{message}") + @entries = [] end def log(key, value: nil, nested: nil) - entries << { - key: key, - value: value, - nested: nested - } + entries << { key: key, value: value, nested: nested } end def display_entries return if entries.empty? - # Calculate padding based only on current section's entries max_length = entries.map { |entry| entry[:key].length }.max - padding = max_length + 2 # Add 2 for the colon and space - + padding = max_length + 2 entries.each do |entry| key_with_padding = "#{entry[:key]}:#{' ' * (padding - entry[:key].length)}" - - if entry[:nested] - if entry[:nested].empty? - logger.info("#{key_with_padding}#{Rainbow('(none)').yellow}") - else - logger.info("#{key_with_padding}#{entry[:value]}") - display_nested_value(entry[:nested]) - end - else - formatted_value = format_value(entry[:value]) - logger.info("#{key_with_padding}#{formatted_value}") - end + formatted_value = format_value(entry[:value]) + logger.info("#{key_with_padding}#{formatted_value}") end @entries = [] end - - def display_nested_value(value, indent = 2) - # Sort by key to ensure consistent order - value.sort.each do |k, v| - # For watermark levels and states, apply special coloring - colored_value = case k.to_sym - when :critical, :failed, :critical_watermark_exceeded - Rainbow(v).red.bright - when :high, :evicted, :high_watermark_exceeded - Rainbow(v).red - when :low, :overprovisioned, :low_watermark_exceeded - Rainbow(v).yellow - when :normal, :done, :ready, :healthy - Rainbow(v).green - else - v - end - - logger.info("#{' ' * indent}- #{k}: #{colored_value}") - end - end end end end -- GitLab From b17fe9650b2afa29109e25eaac0b8dd9f2193829 Mon Sep 17 00:00:00 2001 From: John Mason Date: Mon, 8 Sep 2025 18:24:31 -0400 Subject: [PATCH 4/6] Push up latest script --- .../search/zoekt/benchmark_service.rb | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/ee/app/services/search/zoekt/benchmark_service.rb b/ee/app/services/search/zoekt/benchmark_service.rb index f3728b0df523e7..a1a89d13939b5d 100644 --- a/ee/app/services/search/zoekt/benchmark_service.rb +++ b/ee/app/services/search/zoekt/benchmark_service.rb @@ -59,6 +59,7 @@ def search_options opts = {} opts[:group_id] = group.id if group opts[:project_id] = project.id if project + opts[:targets] = options[:targets] if options[:targets].present? opts end @@ -84,8 +85,12 @@ def benchmark_term(term) db_times = [] zoekt_times = [] total_times = [] - 5.times do - measure_search( + results = [] + stats_collected = [] + 5.times do |i| + logger.debug("[Benchmark] Running search ##{i+1} for term: '#{term}' with options: #{search_options.inspect}") if logger.debug? + stats_for_run = nil + res = measure_search( query: term, num: 10, search_mode: :regex, @@ -95,10 +100,16 @@ def benchmark_term(term) db_times << (db * 1000) zoekt_times << (zoekt * 1000) total_times << ((db + zoekt) * 1000) + stats_for_run = stats } ) + results << res.merge(stats: stats_for_run) + stats_collected << stats_for_run + end + if db_times.empty? || zoekt_times.empty? || total_times.empty? + logger.warn("[Benchmark] No timing stats for term: '#{term}'. Results: #{results.inspect} Stats: #{stats_collected.inspect}") + return end - return if db_times.empty? || zoekt_times.empty? || total_times.empty? { term: term, @@ -132,6 +143,7 @@ def print_benchmark_table(table) def measure_search(query:, num:, search_mode:, request_stats_collector: nil) result = {} + logger.debug("[Benchmark] measure_search: query='#{query}', num=#{num}, search_mode=#{search_mode}, options=#{search_options.inspect}") if logger.debug? begin benchmark_result = Benchmark.measure do response = ::Gitlab::Search::Zoekt::Client.search_zoekt_proxy( @@ -143,7 +155,7 @@ def measure_search(query:, num:, search_mode:, request_stats_collector: nil) ) do |stats| request_stats_collector&.call(stats) end - result[:file_matches] = response[:Files]&.size || 0 + result[:file_matches] = response.result[:Files]&.size || 0 result[:success] = true end result[:duration] = benchmark_result.real -- GitLab From 124688be7c6845f27a25b83689ed715a4af5dae9 Mon Sep 17 00:00:00 2001 From: John Mason Date: Tue, 9 Sep 2025 18:22:52 -0400 Subject: [PATCH 5/6] Use search service in benchmark script --- .../search/zoekt/benchmark_service.rb | 64 +++++++------------ ee/lib/search/zoekt/cache.rb | 2 + 2 files changed, 26 insertions(+), 40 deletions(-) diff --git a/ee/app/services/search/zoekt/benchmark_service.rb b/ee/app/services/search/zoekt/benchmark_service.rb index a1a89d13939b5d..2f768d7f05f328 100644 --- a/ee/app/services/search/zoekt/benchmark_service.rb +++ b/ee/app/services/search/zoekt/benchmark_service.rb @@ -82,40 +82,22 @@ def benchmark_search_terms end def benchmark_term(term) - db_times = [] zoekt_times = [] - total_times = [] results = [] - stats_collected = [] 5.times do |i| logger.debug("[Benchmark] Running search ##{i+1} for term: '#{term}' with options: #{search_options.inspect}") if logger.debug? - stats_for_run = nil res = measure_search( query: term, num: 10, search_mode: :regex, - request_stats_collector: ->(stats) { - db = stats[:db_end].to_f - stats[:db_start].to_f - zoekt = stats[:zoekt_end].to_f - stats[:zoekt_start].to_f - db_times << (db * 1000) - zoekt_times << (zoekt * 1000) - total_times << ((db + zoekt) * 1000) - stats_for_run = stats - } ) - results << res.merge(stats: stats_for_run) - stats_collected << stats_for_run - end - if db_times.empty? || zoekt_times.empty? || total_times.empty? - logger.warn("[Benchmark] No timing stats for term: '#{term}'. Results: #{results.inspect} Stats: #{stats_collected.inspect}") - return + zoekt_times << (res[:duration].to_f * 1000) if res[:duration] + results << res end { term: term, - db: avg_ms(db_times), - zoekt: avg_ms(zoekt_times), - total: avg_ms(total_times) + zoekt: avg_ms(zoekt_times) } end @@ -126,36 +108,38 @@ def avg_ms(arr) end def print_benchmark_table(table) - logger.info("| Search Term | DB | Zoekt | Total |") - logger.info("|-------------|---------:|---------:|---------:|") + logger.info("| Search Term | Duration |") + logger.info("|-------------|---------:|") table.each do |row| - logger.info(format("| %-11s | %8s | %8s | %8s |", row[:term], row[:db], row[:zoekt], row[:total])) + logger.info(format("| %-11s | %8s |", row[:term], row[:zoekt])) end return if table.empty? - all_db = table.map { |row| row[:db].to_f } all_zoekt = table.map { |row| row[:zoekt].to_f } - all_total = table.map { |row| row[:total].to_f } - logger.info("|==============================================|") - logger.info(format("| %-11s | %8s | %8s | %8s |", 'TOTAL', avg_ms(all_db), avg_ms(all_zoekt), - avg_ms(all_total))) + logger.info("|========================|") + logger.info(format("| %-11s | %8s |", 'TOTAL', avg_ms(all_zoekt))) end - def measure_search(query:, num:, search_mode:, request_stats_collector: nil) + def measure_search(query:, num:, search_mode:) result = {} logger.debug("[Benchmark] measure_search: query='#{query}', num=#{num}, search_mode=#{search_mode}, options=#{search_options.inspect}") if logger.debug? begin + # Build params for SearchService as in BlobSearchResolver + params = search_options.merge( + search: query, + page: 1, + per_page: num, + multi_match_enabled: true, + chunk_count: nil, + scope: 'blobs', + regex: (search_mode == :regex), + include_archived: false, + exclude_forks: true + ) + search_service = SearchService.new(current_user, params) benchmark_result = Benchmark.measure do - response = ::Gitlab::Search::Zoekt::Client.search_zoekt_proxy( - query, - num: num, - search_mode: search_mode, - current_user: current_user, - **search_options - ) do |stats| - request_stats_collector&.call(stats) - end - result[:file_matches] = response.result[:Files]&.size || 0 + search_results = search_service.search_objects + result[:file_matches] = search_results.respond_to?(:size) ? search_results.size : 0 result[:success] = true end result[:duration] = benchmark_result.real diff --git a/ee/lib/search/zoekt/cache.rb b/ee/lib/search/zoekt/cache.rb index 0a687df875dbe8..1ce3aaa2ffd60f 100644 --- a/ee/lib/search/zoekt/cache.rb +++ b/ee/lib/search/zoekt/cache.rb @@ -31,6 +31,8 @@ def initialize(query, **options) end def enabled? + return false + return false unless Gitlab::CurrentSettings.zoekt_cache_response? (project_id.present? || group_id.present?) && per_page <= max_per_page -- GitLab From bb95360afb8f6bd09cf72769b938f1a4fd39731c Mon Sep 17 00:00:00 2001 From: John Mason Date: Sat, 13 Sep 2025 13:52:44 -0400 Subject: [PATCH 6/6] Update benchmark service --- .../search/zoekt/benchmark_service.rb | 224 +++++------------- 1 file changed, 59 insertions(+), 165 deletions(-) diff --git a/ee/app/services/search/zoekt/benchmark_service.rb b/ee/app/services/search/zoekt/benchmark_service.rb index 2f768d7f05f328..375def28aa5b0a 100644 --- a/ee/app/services/search/zoekt/benchmark_service.rb +++ b/ee/app/services/search/zoekt/benchmark_service.rb @@ -2,194 +2,88 @@ require 'benchmark' + module Search module Zoekt class BenchmarkService - include ActionView::Helpers::NumberHelper - include ActionView::Helpers::DateHelper - - DEFAULT_QUERIES = %w[ - test TODO fix update refactor bug error function class import zoekt - ].freeze + # Simple benchmark wrapper for Gitlab::Search::Zoekt::Client + # Usage: + # BenchmarkService.new(json_payload:, url:, options: {}, logger: nil, runs: 5).execute - # Entry point for running the benchmark - def self.execute(...) - new(...).execute + def self.execute(*args, **kwargs) + new(*args, **kwargs).execute end - def initialize(logger:, options: {}) - @logger = logger - @options = options.with_indifferent_access - @entries = [] + def initialize(json_payload:, url:, options: {}, logger: Logger.new($stdout), runs: 5) + @json_payload = json_payload + @url = url + @options = options + @logger = logger || Logger.new($stdout) + @runs = runs end def execute - validate_setup! - display_benchmark_header - benchmark_search_terms - display_summary - end - - private - - attr_reader :logger, :options, :results, :entries - - # --- Setup & Validation --- - def validate_setup! - raise ArgumentError, "No search scope provided (group_id or project_id required)" unless group || project - end - - def current_user - @current_user ||= User.find_by_id(options[:user_id]) - end - - def group - @group ||= Group.find_by_id(options[:group_id]) if options[:group_id] - end - - def project - @project ||= Project.find_by_id(options[:project_id]) if options[:project_id] - end - - def search_queries - @search_queries ||= options[:queries].presence || DEFAULT_QUERIES - end - - def search_options - opts = {} - opts[:group_id] = group.id if group - opts[:project_id] = project.id if project - opts[:targets] = options[:targets] if options[:targets].present? - opts - end - - # --- Logging & Display --- - def display_benchmark_header - log_header("Search Performance Benchmarks") - log("User", value: current_user&.username) - log("Group", value: group&.full_path || "N/A") - log("Project", value: project&.full_path || "N/A") - log("Timestamp", value: Time.current) - log("Query count", value: search_queries.size) - display_entries - end - - # --- Benchmarking --- - def benchmark_search_terms - log_header("Search Term Performance") - table = search_queries.filter_map { |term| benchmark_term(term) } - print_benchmark_table(table) - end - - def benchmark_term(term) - zoekt_times = [] + payload = parse_json(@json_payload) + payload = with_overridden_forward_to_endpoint(payload) + client = ::Gitlab::Search::Zoekt::Client.instance + times = [] results = [] - 5.times do |i| - logger.debug("[Benchmark] Running search ##{i+1} for term: '#{term}' with options: #{search_options.inspect}") if logger.debug? - res = measure_search( - query: term, - num: 10, - search_mode: :regex, - ) - zoekt_times << (res[:duration].to_f * 1000) if res[:duration] - results << res + @runs.times do |i| + @logger.info("[Benchmark] Run ##{i+1}...") + start = Process.clock_gettime(Process::CLOCK_MONOTONIC) + begin + response = client.send(:post_request, @url, payload, **@options) # rubocop:disable GitlabSecurity/PublicSend -- N/A + duration = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round(2) + @logger.info("[Benchmark] Duration: #{duration}ms") + times << duration + filtered_response = filter_files_from_response(response) + results << { success: true, response: filtered_response, duration: duration } + rescue StandardError => e + duration = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start) * 1000).round(2) + @logger.error("[Benchmark] Error: #{e.message} (#{duration}ms)") + times << duration + results << { success: false, error: e.message, duration: duration } + end end - - { - term: term, - zoekt: avg_ms(zoekt_times) - } + avg = times.empty? ? 'N/A' : (times.sum / times.size.to_f).round(2) + @logger.info("[Benchmark] Average duration: #{avg}ms over #{@runs} runs") + { times: times, average: avg, results: results } end - def avg_ms(arr) - return "N/A" if arr.empty? - - "#{(arr.sum / arr.size).round(2)}ms" - end - - def print_benchmark_table(table) - logger.info("| Search Term | Duration |") - logger.info("|-------------|---------:|") - table.each do |row| - logger.info(format("| %-11s | %8s |", row[:term], row[:zoekt])) - end - return if table.empty? - - all_zoekt = table.map { |row| row[:zoekt].to_f } - logger.info("|========================|") - logger.info(format("| %-11s | %8s |", 'TOTAL', avg_ms(all_zoekt))) - end + # If options[:endpoint] is present, override all forward_to entry endpoints in a new hash + def with_overridden_forward_to_endpoint(payload) + endpoint = @options[:endpoint] + return payload unless endpoint && payload[:forward_to].is_a?(Array) - def measure_search(query:, num:, search_mode:) - result = {} - logger.debug("[Benchmark] measure_search: query='#{query}', num=#{num}, search_mode=#{search_mode}, options=#{search_options.inspect}") if logger.debug? - begin - # Build params for SearchService as in BlobSearchResolver - params = search_options.merge( - search: query, - page: 1, - per_page: num, - multi_match_enabled: true, - chunk_count: nil, - scope: 'blobs', - regex: (search_mode == :regex), - include_archived: false, - exclude_forks: true - ) - search_service = SearchService.new(current_user, params) - benchmark_result = Benchmark.measure do - search_results = search_service.search_objects - result[:file_matches] = search_results.respond_to?(:size) ? search_results.size : 0 - result[:success] = true - end - result[:duration] = benchmark_result.real - rescue StandardError => e - result[:error] = e.message - result[:success] = false + new_payload = payload.dup + new_payload[:forward_to] = payload[:forward_to].map do |entry| + entry.dup.tap { |e| e[:endpoint] = endpoint } end - result + new_payload end - def display_summary - log_header("BENCHMARK SUMMARY") - log("Total queries", value: search_queries.size) - display_entries - end + private - def format_value(value) - case value - when TrueClass then 'yes' - when FalseClass then 'no' - when ActiveSupport::TimeWithZone, Time - utc_time = value.utc - relative_time = time_ago_in_words(utc_time) - "#{utc_time} (#{relative_time} ago)" - when NilClass then '(never)' - else value.to_s + def parse_json(json) + case json + when String + Gitlab::Json.parse(json, symbolize_names: true) + when Hash + json.deep_symbolize_keys + else + raise ArgumentError, "Invalid payload: must be JSON string or Hash" end end - def log_header(message) - display_entries - logger.info("\n#{message}") - @entries = [] - end - - def log(key, value: nil, nested: nil) - entries << { key: key, value: value, nested: nested } - end + # Remove the 'Files' property from response["Result"] or response[:Result] + def filter_files_from_response(response) + result_key = "Result" + return response unless response[result_key].has_key?("Files") - def display_entries - return if entries.empty? + result = response.with_indifferent_access - max_length = entries.map { |entry| entry[:key].length }.max - padding = max_length + 2 - entries.each do |entry| - key_with_padding = "#{entry[:key]}:#{' ' * (padding - entry[:key].length)}" - formatted_value = format_value(entry[:value]) - logger.info("#{key_with_padding}#{formatted_value}") - end - @entries = [] + result[result_key].delete("Files") + result end end end -- GitLab