From 91d98d831d1f26cea785bde80e775606a9d59898 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Date: Wed, 10 Dec 2025 18:32:40 +0100 Subject: [PATCH 1/5] Add SWE Bench seeder for DAP evaluations - Add rake task gitlab:duo:swe_bench_seeder --- .../duo/developments/swe_bench_seeder.rb | 556 ++++++++++++++++++ ee/lib/tasks/gitlab/duo.rake | 6 + 2 files changed, 562 insertions(+) create mode 100644 ee/lib/gitlab/duo/developments/swe_bench_seeder.rb diff --git a/ee/lib/gitlab/duo/developments/swe_bench_seeder.rb b/ee/lib/gitlab/duo/developments/swe_bench_seeder.rb new file mode 100644 index 00000000000000..1847b4e56d85cb --- /dev/null +++ b/ee/lib/gitlab/duo/developments/swe_bench_seeder.rb @@ -0,0 +1,556 @@ +# frozen_string_literal: true + +module Gitlab + module Duo + module Developments + class SweBenchSeeder + GROUP_PATH = 'gitlab-duo' + SUBGROUP_PATH = 'swe-bench-seeded-data' + + # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity -- Main orchestration method with multiple responsibilities + def self.seed(project_filter: nil) + puts "Seeding SWE Bench data structure..." + puts "Filtering to projects: #{project_filter.join(', ')}" if project_filter + + user = User.find_by_username('root') + parent_group = find_or_create_parent_group(user) + subgroup = find_or_create_subgroup(parent_group, user) + + puts "Subgroup URL: http://gdk.test:3000/#{subgroup.full_path}" + + # Delete all existing issues in the subgroup + delete_all_issues_in_subgroup(subgroup, user) + + # Fetch and process examples from LangSmith dataset + dataset, dataset_name, split_name = fetch_dataset_from_langsmith + return if dataset.empty? + + puts "\n=== Processing examples from SWE Bench Dataset ===" + puts "==========================================\n" + + # Statistics tracking + projects_created = 0 + issues_per_project = {} + created_issue_urls = [] + + # Group examples by repository + examples_by_repo = {} + dataset.each do |example| + repo = example['inputs']&.[]('repo') + if repo + examples_by_repo[repo] ||= [] + examples_by_repo[repo] << example + else + puts "Warning: Skipping example with no repo field" + end + end + + # Filter by project if specified + if project_filter && project_filter.any? + filtered_repos = {} + project_filter.each do |filter| + # Match full repo path (e.g., "pallets/flask") or just project name (e.g., "flask") + matching_repos = examples_by_repo.keys.select do |repo| + repo == filter || repo.end_with?("/#{filter}") + end + matching_repos.each do |repo| + filtered_repos[repo] = examples_by_repo[repo] + end + end + examples_by_repo = filtered_repos + puts "Filtered to #{examples_by_repo.keys.size} repositories matching filter" + end + + total_examples = examples_by_repo.values.sum(&:size) + puts "Found #{examples_by_repo.keys.size} unique repositories with #{total_examples} total examples\n" + + # Process each repository + examples_by_repo.each_with_index do |(repo, examples), repo_index| + repo_msg = "#{repo_index + 1}/#{examples_by_repo.keys.size}: #{repo} (#{examples.size} issue(s))" + puts "\n--- Processing repository #{repo_msg} ---" + + github_url = "https://github.com/#{repo}.git" + puts "GitHub URL: #{github_url}" + + # Create or recreate the project once for all issues + project = clone_repository(github_url, repo, subgroup, user) + + next unless project&.persisted? + + # Track project creation (count if it was newly created or recreated) + projects_created += 1 + + # Initialize issue count for this project + issues_per_project[project.full_path] = 0 + + # Create all issues for this project + examples.each_with_index do |example, example_index| + puts "\n Creating issue #{example_index + 1}/#{examples.size} for #{project.full_path}..." + + next unless example['inputs']['problem_statement'] + + issue = create_issue_from_problem_statement(project, user, example['inputs']['problem_statement']) + next unless issue + + issues_per_project[project.full_path] += 1 + issue_url = Rails.application.routes.url_helpers.project_issue_url(project, issue) + created_issue_urls << issue_url + end + end + + # Print statistics + total_issues = issues_per_project.values.sum + puts "\n#{'=' * 60}" + puts "SEEDING STATISTICS" + puts "=" * 60 + puts "Total projects created: #{projects_created}" + puts "Total projects processed: #{issues_per_project.keys.size} (#{total_issues} issue(s) total)" + puts "\nIssues per project:" + issues_per_project.each do |project_path, issue_count| + puts " #{project_path}: #{issue_count} issue(s)" + end + puts "=" * 60 + + # Save issue URLs to LangSmith dataset + save_issue_urls_to_langsmith(created_issue_urls, dataset_name, split_name) if created_issue_urls.any? + rescue StandardError => e + puts "Error seeding SWE Bench structure: #{e.message}" + puts e.backtrace.first(5).join("\n") + raise + end + # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity + + def self.find_or_create_parent_group(user) + group = Group.find_by_full_path(GROUP_PATH) + + if group + puts "Found existing parent group: #{GROUP_PATH}" + return group + end + + puts "Parent group '#{GROUP_PATH}' not found. Creating a new one..." + + # Find or create organization for the group + org = find_or_create_organization(user, GROUP_PATH) + + group_params = { + name: GROUP_PATH, + path: GROUP_PATH, + organization: org, + visibility_level: org.visibility_level + } + + response = Groups::CreateService.new(user, group_params).execute + + raise "Failed to create parent group: #{response.errors.full_messages.join(', ')}" if response.error? + + puts "Created parent group: #{GROUP_PATH}" + response[:group] + end + + def self.find_or_create_subgroup(parent_group, user) + subgroup_full_path = "#{GROUP_PATH}/#{SUBGROUP_PATH}" + subgroup = Group.find_by_full_path(subgroup_full_path) + + if subgroup + puts "Found existing subgroup: #{subgroup_full_path}" + return subgroup + end + + puts "Subgroup '#{subgroup_full_path}' not found. Creating a new one..." + + subgroup_params = { + name: SUBGROUP_PATH, + path: SUBGROUP_PATH, + parent_id: parent_group.id, + organization: parent_group.organization, + visibility_level: parent_group.visibility_level + } + + response = Groups::CreateService.new(user, subgroup_params).execute + + raise "Failed to create subgroup: #{response.errors.full_messages.join(', ')}" if response.error? + + puts "Created subgroup: #{subgroup_full_path}" + response[:group] + end + + def self.find_or_create_organization(user, namespace) + # Try to find organization by path + org = ::Organizations::Organization.find_by_path(namespace) + + if org + puts "Found existing organization: #{namespace}" + return org + end + + puts "Organization '#{namespace}' not found. Creating a new one..." + + response = ::Organizations::CreateService.new( + current_user: user, + params: { + name: namespace, + path: namespace, + visibility_level: ::Gitlab::VisibilityLevel::PUBLIC + } + ).execute + + raise "Failed to create organization: #{response.errors.full_messages.join(', ')}" if response.error? + + puts "Created organization: #{namespace}" + response[:organization] + end + + def self.fetch_dataset_from_langsmith + # Optional overrides: + # - LANGSMITH_DATASET_NAME + # - LANGSMITH_DATASET_ID + # - LANGSMITH_SPLIT_NAME + default_dataset_name = 'duo_workflow.swe-bench-verified-test.1' + default_dataset_id = '6cd898d8-3b3c-49d4-bfd5-944f83bea1f2' + default_split_name = 'validation_stratified_b06f4db4_p20' + + dataset_name = ENV.fetch('LANGSMITH_DATASET_NAME', default_dataset_name) + dataset_id = ENV.fetch('LANGSMITH_DATASET_ID', default_dataset_id) + split_name = ENV.fetch('LANGSMITH_SPLIT_NAME', default_split_name) + + langchain_api_key = langsmith_api_key!(missing_message: "Missing LANGCHAIN_API_KEY environment variable!") + return [[], dataset_name, split_name] unless langchain_api_key + + response = langsmith_request( + method: :get, + path: '/api/v1/examples', + query: { dataset: dataset_id, splits: split_name }, + api_key: langchain_api_key + ) + + unless response.is_a?(Net::HTTPSuccess) + puts "Failed to fetch dataset: #{response.code} #{response.message}" + puts "Response body: #{response.body}" + return [[], dataset_name, split_name] + end + + # Parse JSON response (not JSONL) + response_data = ::Gitlab::Json.parse(response.body) + + # Extract examples from response (structure may vary, but typically it's an array or has an 'examples' key) + examples = if response_data.is_a?(Array) + response_data + elsif response_data['examples'] + response_data['examples'] + elsif response_data['data'] + response_data['data'] + end || response_data + + # Process all examples from the split + dataset = examples.is_a?(Array) ? examples : [examples] + [dataset, dataset_name, split_name] + rescue StandardError => e + puts "Error fetching dataset from LangSmith: #{e.message}" + puts e.backtrace.first(5).join("\n") + [[], dataset_name, split_name] + end + + def self.clone_repository(github_url, repo_path, subgroup, user) + puts "\nCloning repository from #{github_url}..." + + # Extract project name from repo path (e.g., "matplotlib/matplotlib" -> "matplotlib") + project_name = repo_path.split('/').last + + # Check if project already exists and delete it + project_full_path = "#{subgroup.full_path}/#{project_name}" + existing_project = Project.find_by_full_path(project_full_path) + + if existing_project + puts "Project #{project_full_path} already exists. Deleting it..." + # Delete all issues first + delete_all_issues(existing_project, user) + # Mark project for deletion + ::Projects::MarkForDeletionService.new(existing_project, user).execute + puts "Deleted existing project." + end + + puts "Creating project #{project_name} in #{subgroup.full_path}..." + + # Create project using Projects::CreateService + project_params = { + name: project_name, + path: project_name, + namespace_id: subgroup.id, + import_url: github_url, + visibility_level: subgroup.visibility_level + } + + project = ::Projects::CreateService.new(user, project_params).execute + + if project.errors.any? + puts "Failed to create project: #{project.errors.full_messages.join(', ')}" + return + end + + unless project.persisted? + puts "Failed to create project: Project was not saved" + return + end + + puts "Created project: #{project.full_path}" + puts "Project URL: http://gdk.test:3000/#{project.full_path}" + + project + rescue StandardError => e + puts "Error cloning repository: #{e.message}" + puts e.backtrace.first(5).join("\n") + nil + end + + def self.create_issue_from_problem_statement(project, user, problem_statement) + return if problem_statement.blank? + + # Extract first line as title and remove it from description + lines = problem_statement.split("\n") + title = lines.first.strip + description = lines[1..].join("\n").strip + + # Delete existing issue with the same title if it exists + existing_issue = project.issues.find_by_title(title) + if existing_issue + puts "Issue '#{title}' already exists. Deleting it..." + ::Issues::DestroyService.new(container: project, current_user: user).execute(existing_issue) + puts "Deleted existing issue." + end + + puts "\nCreating issue from problem statement..." + puts "Title: #{title}" + + Sidekiq.strict_args!(false) + result = ::Issues::CreateService.new( + container: project, + current_user: user, + params: { + title: title, + description: description + } + ).execute + + unless result.success? + puts "Failed to create issue '#{title}': #{result.errors.join(', ')}" + return + end + + issue = result.payload[:issue] + issue_url = Rails.application.routes.url_helpers.project_issue_url(project, issue) + puts "Created issue: #{issue_url}" + issue + rescue StandardError => e + puts "Error creating issue: #{e.message}" + puts e.backtrace.first(5).join("\n") + nil + end + + def self.delete_all_issues_in_subgroup(subgroup, user) + puts "\nDeleting all existing issues in subgroup #{subgroup.full_path}..." + total_deleted = 0 + + # Iterate through all projects in the subgroup + subgroup.all_projects.find_each do |project| + project_deleted = 0 + project.issues.find_each do |issue| + ::Issues::DestroyService.new(container: project, current_user: user).execute(issue) + project_deleted += 1 + total_deleted += 1 + rescue StandardError => e + puts "Failed to delete issue '#{issue.title}' in #{project.full_path}: #{e.message}" + end + puts "Deleted #{project_deleted} issue(s) from #{project.full_path}" if project_deleted > 0 + end + + puts "Deleted #{total_deleted} total issue(s) from subgroup.\n" if total_deleted > 0 + end + + def self.delete_all_issues(project, user) + puts "Deleting all existing issues..." + deleted_count = 0 + + project.issues.find_each do |issue| + ::Issues::DestroyService.new(container: project, current_user: user).execute(issue) + deleted_count += 1 + puts "Deleted issue: '#{issue.title}'" + rescue StandardError => e + puts "Failed to delete issue '#{issue.title}': #{e.message}" + end + + puts "Deleted #{deleted_count} issues.\n" if deleted_count > 0 + end + + def self.save_issue_urls_to_langsmith(issue_urls, dataset_name, split_name) + langchain_api_key = langsmith_api_key!( + missing_message: "Warning: Missing LANGCHAIN_API_KEY. Cannot save issue URLs to LangSmith." + ) + return unless langchain_api_key + + # Create dataset name: swe-bench-issue-to-mr-- + new_dataset_name = "swe-bench-issue-to-mr-#{dataset_name}-#{split_name}" + + puts "\nSaving #{issue_urls.size} issue URLs to LangSmith dataset: #{new_dataset_name}" + + # Delete existing dataset if it exists, then create a new one + dataset_id = delete_and_create_dataset(langchain_api_key, new_dataset_name) + return unless dataset_id + + # Add examples to the dataset + add_examples_to_dataset(langchain_api_key, dataset_id, issue_urls) + + puts "Successfully saved #{issue_urls.size} issue URLs to LangSmith dataset: #{new_dataset_name}" + rescue StandardError => e + puts "Error saving issue URLs to LangSmith: #{e.message}" + puts e.backtrace.first(5).join("\n") + end + + def self.delete_and_create_dataset(api_key, dataset_name) + # Try to find and delete existing dataset first + existing_dataset_id = find_dataset(api_key, dataset_name) + if existing_dataset_id + puts "Found existing dataset: #{dataset_name} (ID: #{existing_dataset_id})" + delete_dataset(api_key, existing_dataset_id) + end + + # Create new dataset (using 'name' field as per API requirements) + response = langsmith_request( + method: :post, + path: '/api/v1/datasets', + body: { + name: dataset_name, + description: "Issue URLs created by SWE Bench seeder" + }, + api_key: api_key + ) + + unless response.is_a?(Net::HTTPSuccess) + puts "Failed to create dataset: #{response.code} #{response.message}" + puts "Response body: #{response.body}" + return + end + + dataset_data = ::Gitlab::Json.parse(response.body) + dataset_id = dataset_data['id'] || dataset_data['dataset_id'] + puts "Created new dataset: #{dataset_name} (ID: #{dataset_id})" + dataset_id + rescue StandardError => e + puts "Error deleting/creating dataset: #{e.message}" + nil + end + + def self.find_dataset(api_key, dataset_name) + response = langsmith_request( + method: :get, + path: '/api/v1/datasets', + query: { name: dataset_name }, + api_key: api_key + ) + + return unless response.is_a?(Net::HTTPSuccess) + + datasets = ::Gitlab::Json.parse(response.body) + return unless datasets.is_a?(Array) && datasets.any? + + dataset = datasets.find { |d| d['dataset_name'] == dataset_name || d['name'] == dataset_name } + return unless dataset + + dataset['id'] || dataset['dataset_id'] + rescue StandardError + nil + end + + def self.delete_dataset(api_key, dataset_id) + response = langsmith_request( + method: :delete, + path: "/api/v1/datasets/#{dataset_id}", + api_key: api_key + ) + + if response.is_a?(Net::HTTPSuccess) + puts "Deleted existing dataset (ID: #{dataset_id})" + else + puts "Warning: Failed to delete existing dataset: #{response.code} #{response.message}" + end + rescue StandardError => e + puts "Warning: Error deleting dataset: #{e.message}" + end + + def self.add_examples_to_dataset(api_key, dataset_id, issue_urls) + # Create examples one at a time to match API expectations + success_count = 0 + failure_count = 0 + + issue_urls.each_with_index do |issue_url, index| + # Only log progress every 10 examples or on the last one + if (index + 1) % 10 == 0 || index + 1 == issue_urls.size + puts " Adding examples: #{index + 1}/#{issue_urls.size}..." + end + + # Use /api/v1/examples endpoint + # Create example with dataset_id, inputs, and outputs + response = langsmith_request( + method: :post, + path: '/api/v1/examples', + body: { + dataset_id: dataset_id, + inputs: { issue_url: issue_url }, + outputs: {} + }, + api_key: api_key + ) + + if response.is_a?(Net::HTTPSuccess) + success_count += 1 + else + failure_count += 1 + puts "Failed to add example #{index + 1}: #{response.code} #{response.message}" + puts "Response body: #{response.body}" + end + end + + puts " Added #{success_count} examples successfully#{failure_count > 0 ? ", #{failure_count} failed" : ''}" + rescue StandardError => e + puts "Error adding examples to dataset: #{e.message}" + puts e.backtrace.first(5).join("\n") + raise + end + + def self.langsmith_endpoint + ENV['LANGCHAIN_ENDPOINT'] || 'https://api.smith.langchain.com' + end + + def self.langsmith_api_key!(missing_message:) + api_key = ENV['LANGCHAIN_API_KEY'] + return api_key if api_key.present? + + puts missing_message + nil + end + + def self.langsmith_request(method:, path:, api_key:, query: nil, body: nil) + uri = URI("#{langsmith_endpoint}#{path}") + uri.query = URI.encode_www_form(query) if query&.any? + + request_class = case method.to_sym + when :get then Net::HTTP::Get + when :post then Net::HTTP::Post + when :delete then Net::HTTP::Delete + else + raise ArgumentError, "Unsupported HTTP method: #{method.inspect}" + end + + request = request_class.new(uri) + request['x-api-key'] = api_key + request['Content-Type'] = 'application/json' + request.body = body.to_json if body + + Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') { |http| http.request(request) } + end + + private_class_method :langsmith_endpoint, :langsmith_api_key!, :langsmith_request + end + end + end +end diff --git a/ee/lib/tasks/gitlab/duo.rake b/ee/lib/tasks/gitlab/duo.rake index 5405291de31c7f..afb2b5af44d4a2 100644 --- a/ee/lib/tasks/gitlab/duo.rake +++ b/ee/lib/tasks/gitlab/duo.rake @@ -43,6 +43,12 @@ namespace :gitlab do Gitlab::Duo::Developments::DapEvalsSeeder.seed_issues(output_file: output_file) end + desc 'GitLab | Duo | Seed projects and issues for SWE Bench evaluations' + task :swe_bench_seeder, [:projects] => :environment do |_, args| + project_filter = args[:projects]&.split(',')&.map(&:strip) + Gitlab::Duo::Developments::SweBenchSeeder.seed(project_filter: project_filter) + end + desc 'GitLab | Duo | Onboard Duo Agent Platform' task onboard_dap: :gitlab_environment do Gitlab::Duo::Developments::DapOnboarding.execute -- GitLab From 9a6c98508c063151f1102b58396cd426bb19414a Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Date: Wed, 17 Dec 2025 04:50:00 -0300 Subject: [PATCH 2/5] Filter out empty projects param --- ee/lib/tasks/gitlab/duo.rake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/lib/tasks/gitlab/duo.rake b/ee/lib/tasks/gitlab/duo.rake index afb2b5af44d4a2..9ed63ef4b0aed6 100644 --- a/ee/lib/tasks/gitlab/duo.rake +++ b/ee/lib/tasks/gitlab/duo.rake @@ -45,7 +45,7 @@ namespace :gitlab do desc 'GitLab | Duo | Seed projects and issues for SWE Bench evaluations' task :swe_bench_seeder, [:projects] => :environment do |_, args| - project_filter = args[:projects]&.split(',')&.map(&:strip) + project_filter = args[:projects]&.split(',')&.map(&:strip)&.reject(&:empty?)&.presence Gitlab::Duo::Developments::SweBenchSeeder.seed(project_filter: project_filter) end -- GitLab From 9611f4552b18fc9d4ad45018b4274b14d583a253 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Date: Wed, 17 Dec 2025 08:56:58 +0100 Subject: [PATCH 3/5] Use Gitlab::HTTP for LangSmith requests --- .../duo/developments/swe_bench_seeder.rb | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/ee/lib/gitlab/duo/developments/swe_bench_seeder.rb b/ee/lib/gitlab/duo/developments/swe_bench_seeder.rb index 1847b4e56d85cb..3fa667e71f05f0 100644 --- a/ee/lib/gitlab/duo/developments/swe_bench_seeder.rb +++ b/ee/lib/gitlab/duo/developments/swe_bench_seeder.rb @@ -224,7 +224,7 @@ def self.fetch_dataset_from_langsmith api_key: langchain_api_key ) - unless response.is_a?(Net::HTTPSuccess) + unless response.success? puts "Failed to fetch dataset: #{response.code} #{response.message}" puts "Response body: #{response.body}" return [[], dataset_name, split_name] @@ -425,7 +425,7 @@ def self.delete_and_create_dataset(api_key, dataset_name) api_key: api_key ) - unless response.is_a?(Net::HTTPSuccess) + unless response.success? puts "Failed to create dataset: #{response.code} #{response.message}" puts "Response body: #{response.body}" return @@ -448,7 +448,7 @@ def self.find_dataset(api_key, dataset_name) api_key: api_key ) - return unless response.is_a?(Net::HTTPSuccess) + return unless response.success? datasets = ::Gitlab::Json.parse(response.body) return unless datasets.is_a?(Array) && datasets.any? @@ -468,7 +468,7 @@ def self.delete_dataset(api_key, dataset_id) api_key: api_key ) - if response.is_a?(Net::HTTPSuccess) + if response.success? puts "Deleted existing dataset (ID: #{dataset_id})" else puts "Warning: Failed to delete existing dataset: #{response.code} #{response.message}" @@ -501,7 +501,7 @@ def self.add_examples_to_dataset(api_key, dataset_id, issue_urls) api_key: api_key ) - if response.is_a?(Net::HTTPSuccess) + if response.success? success_count += 1 else failure_count += 1 @@ -530,23 +530,22 @@ def self.langsmith_api_key!(missing_message:) end def self.langsmith_request(method:, path:, api_key:, query: nil, body: nil) - uri = URI("#{langsmith_endpoint}#{path}") - uri.query = URI.encode_www_form(query) if query&.any? - - request_class = case method.to_sym - when :get then Net::HTTP::Get - when :post then Net::HTTP::Post - when :delete then Net::HTTP::Delete - else - raise ArgumentError, "Unsupported HTTP method: #{method.inspect}" - end - - request = request_class.new(uri) - request['x-api-key'] = api_key - request['Content-Type'] = 'application/json' - request.body = body.to_json if body - - Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') { |http| http.request(request) } + url = "#{langsmith_endpoint}#{path}" + headers = { + 'x-api-key' => api_key, + 'Content-Type' => 'application/json' + } + + case method.to_sym + when :get + Gitlab::HTTP.get(url, headers: headers, query: query) + when :post + Gitlab::HTTP.post(url, headers: headers, query: query, body: (body ? body.to_json : nil)) + when :delete + Gitlab::HTTP.delete(url, headers: headers, query: query) + else + raise ArgumentError, "Unsupported HTTP method: #{method.inspect}" + end end private_class_method :langsmith_endpoint, :langsmith_api_key!, :langsmith_request -- GitLab From 53717733e520dc075016edc907e060abe76d1046 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Date: Wed, 17 Dec 2025 11:33:29 +0100 Subject: [PATCH 4/5] Also delete projects --- ee/lib/gitlab/duo/developments/swe_bench_seeder.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ee/lib/gitlab/duo/developments/swe_bench_seeder.rb b/ee/lib/gitlab/duo/developments/swe_bench_seeder.rb index 3fa667e71f05f0..b0647731a68a8b 100644 --- a/ee/lib/gitlab/duo/developments/swe_bench_seeder.rb +++ b/ee/lib/gitlab/duo/developments/swe_bench_seeder.rb @@ -350,6 +350,7 @@ def self.create_issue_from_problem_statement(project, user, problem_statement) def self.delete_all_issues_in_subgroup(subgroup, user) puts "\nDeleting all existing issues in subgroup #{subgroup.full_path}..." total_deleted = 0 + total_projects_deleted = 0 # Iterate through all projects in the subgroup subgroup.all_projects.find_each do |project| @@ -362,9 +363,20 @@ def self.delete_all_issues_in_subgroup(subgroup, user) puts "Failed to delete issue '#{issue.title}' in #{project.full_path}: #{e.message}" end puts "Deleted #{project_deleted} issue(s) from #{project.full_path}" if project_deleted > 0 + + # Also delete the project itself so subsequent seeding starts from a clean slate. + if ::Projects::MarkForDeletionService.new(project, user, {}).execute + total_projects_deleted += 1 + puts "Deleted project: #{project.full_path}" + else + puts "Failed to delete project #{project.full_path} (insufficient permissions or admin-mode required)" + end + rescue StandardError => e + puts "Failed to delete project #{project.full_path}: #{e.message}" end puts "Deleted #{total_deleted} total issue(s) from subgroup.\n" if total_deleted > 0 + puts "Deleted #{total_projects_deleted} total project(s) from subgroup.\n" if total_projects_deleted > 0 end def self.delete_all_issues(project, user) -- GitLab From 89d74185273c4685b123fffe3ea92701a6194e2f Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Date: Thu, 18 Dec 2025 12:45:06 +0100 Subject: [PATCH 5/5] Add unit and integration tests for SweBenchSeeder --- .../duo/developments/swe_bench_seeder_spec.rb | 97 ++++++++++++++ .../tasks/gitlab/duo/swe_bench_seeder_spec.rb | 118 ++++++++++++++++++ 2 files changed, 215 insertions(+) create mode 100644 ee/spec/lib/gitlab/duo/developments/swe_bench_seeder_spec.rb create mode 100644 ee/spec/tasks/gitlab/duo/swe_bench_seeder_spec.rb diff --git a/ee/spec/lib/gitlab/duo/developments/swe_bench_seeder_spec.rb b/ee/spec/lib/gitlab/duo/developments/swe_bench_seeder_spec.rb new file mode 100644 index 00000000000000..38a286b231260b --- /dev/null +++ b/ee/spec/lib/gitlab/duo/developments/swe_bench_seeder_spec.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Duo::Developments::SweBenchSeeder, feature_category: :duo_chat do + describe '.langsmith_request' do + let(:endpoint) { 'https://example.langsmith.test' } + let(:api_key) { 'test-api-key' } + let(:path) { '/api/v1/examples' } + let(:query) { { dataset: '123', splits: 'abc' } } + + subject(:langsmith_request) do + described_class.__send__( + :langsmith_request, + method: method, + path: path, + api_key: api_key, + query: query, + body: body + ) + end + + before do + stub_env('LANGCHAIN_ENDPOINT', endpoint) + end + + context 'when method is GET' do + let(:method) { :get } + let(:body) { nil } + + it 'delegates to Gitlab::HTTP.get with expected url, headers, and query' do + expected_headers = { + 'x-api-key' => api_key, + 'Content-Type' => 'application/json' + } + + expect(Gitlab::HTTP).to receive(:get).with( + "#{endpoint}#{path}", + headers: expected_headers, + query: query + ) + + langsmith_request + end + end + + context 'when method is POST' do + let(:method) { :post } + let(:body) { { dataset_id: '123', inputs: { issue_url: 'http://example.test' }, outputs: {} } } + + it 'delegates to Gitlab::HTTP.post with expected url, headers, query, and json body' do + expected_headers = { + 'x-api-key' => api_key, + 'Content-Type' => 'application/json' + } + + expect(Gitlab::HTTP).to receive(:post).with( + "#{endpoint}#{path}", + headers: expected_headers, + query: query, + body: body.to_json + ) + + langsmith_request + end + end + + context 'when method is DELETE' do + let(:method) { :delete } + let(:body) { nil } + + it 'delegates to Gitlab::HTTP.delete with expected url, headers, and query' do + expected_headers = { + 'x-api-key' => api_key, + 'Content-Type' => 'application/json' + } + + expect(Gitlab::HTTP).to receive(:delete).with( + "#{endpoint}#{path}", + headers: expected_headers, + query: query + ) + + langsmith_request + end + end + + context 'when method is unsupported' do + let(:method) { :patch } + let(:body) { nil } + + it 'raises an ArgumentError' do + expect { langsmith_request }.to raise_error(ArgumentError, /Unsupported HTTP method/) + end + end + end +end diff --git a/ee/spec/tasks/gitlab/duo/swe_bench_seeder_spec.rb b/ee/spec/tasks/gitlab/duo/swe_bench_seeder_spec.rb new file mode 100644 index 00000000000000..b46faf349a18d8 --- /dev/null +++ b/ee/spec/tasks/gitlab/duo/swe_bench_seeder_spec.rb @@ -0,0 +1,118 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe 'gitlab:duo_chat:seed:swe_bench_seeder', :silence_stdout, feature_category: :duo_chat do + let(:langchain_endpoint) { 'https://api.smith.langchain.com' } + let(:langchain_api_key) { 'test_api_key' } + let(:dataset_id) { '6cd898d8-3b3c-49d4-bfd5-944f83bea1f2' } + let(:split_name) { 'validation_stratified_b06f4db4_p20' } + let(:organization) { create(:organization) } + let!(:user) { create(:user, organizations: [organization], username: 'root') } + + let(:run) { run_rake_task('gitlab:duo_chat:seed:swe_bench_seeder') } + + before do + Rake.application.rake_require 'tasks/gitlab/duo' + stub_env('LANGCHAIN_ENDPOINT', langchain_endpoint) + stub_env('LANGCHAIN_API_KEY', langchain_api_key) + stub_env('LANGSMITH_DATASET_ID', dataset_id) + stub_env('LANGSMITH_SPLIT_NAME', split_name) + end + + context 'when LANGCHAIN_API_KEY is missing' do + before do + stub_env('LANGCHAIN_API_KEY', nil) + end + + it 'prints an error message and does not proceed' do + expect { run }.to output(/Missing LANGCHAIN_API_KEY environment variable!/).to_stdout + end + end + + context 'when the API request fails' do + before do + stub_request(:get, "#{langchain_endpoint}/api/v1/examples") + .with( + headers: { 'x-api-key' => langchain_api_key, 'Content-Type' => 'application/json' }, + query: { dataset: dataset_id, splits: split_name } + ) + .to_return(status: 401, body: 'Unauthorized') + end + + it 'prints an error and does not proceed' do + expect { run }.to output(/Failed to fetch dataset: 401/).to_stdout + end + end + + context 'when an unexpected error occurs' do + before do + allow(Gitlab::HTTP).to receive(:get).and_raise(StandardError.new("Network failure")) + end + + it 'prints an error message and handles gracefully' do + expect { run }.to output(/Error fetching dataset from LangSmith: Network failure/).to_stdout + end + end + + context 'when the dataset is empty' do + before do + stub_request(:get, "#{langchain_endpoint}/api/v1/examples") + .with( + headers: { 'x-api-key' => langchain_api_key, 'Content-Type' => 'application/json' }, + query: { dataset: dataset_id, splits: split_name } + ) + .to_return(status: 200, body: [].to_json) + end + + it 'prints a message indicating no data to seed' do + expect { run }.to output(/Found 0 unique repositories with 0 total examples/).to_stdout + end + end + + context 'when the dataset contains examples' do + let(:example_response) do + [ + { + 'inputs' => { + 'repo' => 'pallets/flask', + 'problem_statement' => "Fix bug in Flask\n\nDescription of the bug to fix." + } + }, + { + 'inputs' => { + 'repo' => 'pallets/flask', + 'problem_statement' => "Add feature to Flask\n\nDescription of the feature to add." + } + } + ] + end + + before do + stub_request(:get, "#{langchain_endpoint}/api/v1/examples") + .with( + headers: { 'x-api-key' => langchain_api_key, 'Content-Type' => 'application/json' }, + query: { dataset: dataset_id, splits: split_name } + ) + .to_return(status: 200, body: example_response.to_json) + + # Stub project creation to avoid actual cloning + allow_next_instance_of(Projects::CreateService) do |service| + allow(service).to receive(:execute).and_return( + create(:project, :repository, namespace: create(:group)) + ) + end + + # Stub issue creation + allow_next_instance_of(Issues::CreateService) do |service| + allow(service).to receive(:execute).and_return( + ServiceResponse.success(payload: { issue: create(:issue) }) + ) + end + end + + it 'processes the dataset and creates issues' do + expect { run }.to output(/Processing examples from SWE Bench Dataset/).to_stdout + end + end +end -- GitLab