From 166dbc5c7d3c81d386fdaa0a11ebb8db39967441 Mon Sep 17 00:00:00 2001 From: michaelangeloio Date: Fri, 5 Dec 2025 19:56:44 -0500 Subject: [PATCH 1/3] redact design --- db/structure.sql | 171 +++-- redaction_design_document.md | 1327 ++++++++++++++++++++++++++++++++++ 2 files changed, 1445 insertions(+), 53 deletions(-) create mode 100644 redaction_design_document.md diff --git a/db/structure.sql b/db/structure.sql index df4c50822b7291..484afb84877286 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -5253,6 +5253,36 @@ RETURN NULL; END $$; +CREATE TABLE ai_code_suggestion_events ( + id bigint NOT NULL, + "timestamp" timestamp with time zone NOT NULL, + user_id bigint NOT NULL, + organization_id bigint NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + event smallint NOT NULL, + namespace_path text, + payload jsonb, + CONSTRAINT check_ba9ae3f258 CHECK ((char_length(namespace_path) <= 255)) +) +PARTITION BY RANGE ("timestamp"); + +CREATE TABLE ai_duo_chat_events ( + id bigint NOT NULL, + "timestamp" timestamp with time zone NOT NULL, + user_id bigint NOT NULL, + personal_namespace_id bigint, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + event smallint NOT NULL, + namespace_path text, + payload jsonb, + organization_id bigint, + CONSTRAINT check_628cdfbf3f CHECK ((char_length(namespace_path) <= 255)), + CONSTRAINT check_f759f45177 CHECK ((organization_id IS NOT NULL)) +) +PARTITION BY RANGE ("timestamp"); + CREATE TABLE ai_events_counts ( id bigint NOT NULL, events_date date NOT NULL, @@ -5264,6 +5294,21 @@ CREATE TABLE ai_events_counts ( ) PARTITION BY RANGE (events_date); +CREATE TABLE ai_troubleshoot_job_events ( + id bigint NOT NULL, + "timestamp" timestamp with time zone NOT NULL, + user_id bigint NOT NULL, + job_id bigint NOT NULL, + project_id bigint NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + event smallint NOT NULL, + namespace_path text, + payload jsonb, + CONSTRAINT check_29d6dbc329 CHECK ((char_length(namespace_path) <= 255)) +) +PARTITION BY RANGE ("timestamp"); + CREATE TABLE ai_usage_events ( id bigint NOT NULL, "timestamp" timestamp with time zone NOT NULL, @@ -5995,7 +6040,7 @@ PARTITION BY RANGE (created_at); CREATE TABLE loose_foreign_keys_deleted_records ( id bigint NOT NULL, - partition bigint DEFAULT 1 NOT NULL, + partition bigint DEFAULT 3 NOT NULL, primary_key_value bigint NOT NULL, status smallint DEFAULT 1 NOT NULL, created_at timestamp with time zone DEFAULT now() NOT NULL, @@ -6110,7 +6155,7 @@ PARTITION BY LIST (partition_id); CREATE TABLE p_ci_finished_build_ch_sync_events ( build_id bigint NOT NULL, - partition bigint DEFAULT 1 NOT NULL, + partition bigint DEFAULT 12 NOT NULL, build_finished_at timestamp without time zone NOT NULL, processed boolean DEFAULT false NOT NULL, project_id bigint NOT NULL @@ -6120,7 +6165,7 @@ PARTITION BY LIST (partition); CREATE TABLE p_ci_finished_pipeline_ch_sync_events ( pipeline_id bigint NOT NULL, project_namespace_id bigint NOT NULL, - partition bigint DEFAULT 1 NOT NULL, + partition bigint DEFAULT 13 NOT NULL, pipeline_finished_at timestamp without time zone NOT NULL, processed boolean DEFAULT false NOT NULL ) @@ -6154,6 +6199,48 @@ CREATE TABLE p_generated_ref_commits ( ) PARTITION BY RANGE (project_id); +CREATE TABLE p_knowledge_graph_enabled_namespaces ( + id bigint NOT NULL, + namespace_id bigint NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + state smallint DEFAULT 0 NOT NULL +) +PARTITION BY RANGE (namespace_id); + +CREATE TABLE p_knowledge_graph_replicas ( + id bigint NOT NULL, + namespace_id bigint NOT NULL, + knowledge_graph_enabled_namespace_id bigint, + zoekt_node_id bigint NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + state smallint DEFAULT 0 NOT NULL, + retries_left smallint NOT NULL, + reserved_storage_bytes bigint DEFAULT 10485760 NOT NULL, + indexed_at timestamp with time zone, + schema_version smallint DEFAULT 0 NOT NULL, + CONSTRAINT c_p_knowledge_graph_replicas_retries_status CHECK (((retries_left > 0) OR ((retries_left = 0) AND (state >= 200)))) +) +PARTITION BY RANGE (namespace_id); + +CREATE TABLE p_knowledge_graph_tasks ( + id bigint NOT NULL, + partition_id bigint DEFAULT 1 NOT NULL, + zoekt_node_id bigint NOT NULL, + namespace_id bigint NOT NULL, + knowledge_graph_replica_id bigint NOT NULL, + perform_at timestamp with time zone DEFAULT now() NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + state smallint DEFAULT 0 NOT NULL, + task_type smallint NOT NULL, + retries_left smallint NOT NULL, + metadata jsonb DEFAULT '"{}"'::jsonb NOT NULL, + CONSTRAINT c_p_knowledge_graph_tasks_on_retries_left CHECK (((retries_left > 0) OR ((retries_left = 0) AND (state = 255)))) +) +PARTITION BY LIST (partition_id); + CREATE SEQUENCE sent_notifications_id_seq START WITH 1 INCREMENT BY 1 @@ -11063,20 +11150,6 @@ CREATE SEQUENCE ai_catalog_items_id_seq ALTER SEQUENCE ai_catalog_items_id_seq OWNED BY ai_catalog_items.id; -CREATE TABLE ai_code_suggestion_events ( - id bigint NOT NULL, - "timestamp" timestamp with time zone NOT NULL, - user_id bigint NOT NULL, - organization_id bigint NOT NULL, - created_at timestamp with time zone NOT NULL, - updated_at timestamp with time zone NOT NULL, - event smallint NOT NULL, - namespace_path text, - payload jsonb, - CONSTRAINT check_ba9ae3f258 CHECK ((char_length(namespace_path) <= 255)) -) -PARTITION BY RANGE ("timestamp"); - CREATE SEQUENCE ai_code_suggestion_events_id_seq START WITH 1 INCREMENT BY 1 @@ -11135,22 +11208,6 @@ CREATE SEQUENCE ai_conversation_threads_id_seq ALTER SEQUENCE ai_conversation_threads_id_seq OWNED BY ai_conversation_threads.id; -CREATE TABLE ai_duo_chat_events ( - id bigint NOT NULL, - "timestamp" timestamp with time zone NOT NULL, - user_id bigint NOT NULL, - personal_namespace_id bigint, - created_at timestamp with time zone NOT NULL, - updated_at timestamp with time zone NOT NULL, - event smallint NOT NULL, - namespace_path text, - payload jsonb, - organization_id bigint, - CONSTRAINT check_628cdfbf3f CHECK ((char_length(namespace_path) <= 255)), - CONSTRAINT check_f759f45177 CHECK ((organization_id IS NOT NULL)) -) -PARTITION BY RANGE ("timestamp"); - CREATE SEQUENCE ai_duo_chat_events_id_seq START WITH 1 INCREMENT BY 1 @@ -11271,8 +11328,8 @@ CREATE TABLE ai_settings ( duo_core_features_enabled boolean, duo_agent_platform_service_url text, duo_agent_platform_request_count integer DEFAULT 0 NOT NULL, - foundational_agents_default_enabled boolean DEFAULT true, ai_gateway_timeout_seconds integer DEFAULT 60, + foundational_agents_default_enabled boolean DEFAULT true, minimum_access_level_execute smallint, minimum_access_level_manage smallint, minimum_access_level_enable_on_projects smallint, @@ -11303,21 +11360,6 @@ CREATE TABLE ai_testing_terms_acceptances ( CONSTRAINT check_5efe98894e CHECK ((char_length(user_email) <= 255)) ); -CREATE TABLE ai_troubleshoot_job_events ( - id bigint NOT NULL, - "timestamp" timestamp with time zone NOT NULL, - user_id bigint NOT NULL, - job_id bigint NOT NULL, - project_id bigint NOT NULL, - created_at timestamp with time zone NOT NULL, - updated_at timestamp with time zone NOT NULL, - event smallint NOT NULL, - namespace_path text, - payload jsonb, - CONSTRAINT check_29d6dbc329 CHECK ((char_length(namespace_path) <= 255)) -) -PARTITION BY RANGE ("timestamp"); - CREATE SEQUENCE ai_troubleshoot_job_events_id_seq START WITH 1 INCREMENT BY 1 @@ -27084,8 +27126,8 @@ CREATE TABLE security_policy_dismissals ( dismissal_types smallint[] DEFAULT '{}'::smallint[] NOT NULL, comment text, status smallint DEFAULT 0 NOT NULL, - licenses jsonb DEFAULT '{}'::jsonb NOT NULL, license_occurrence_uuids text[] DEFAULT '{}'::text[] NOT NULL, + licenses jsonb DEFAULT '{}'::jsonb NOT NULL, CONSTRAINT check_654ff06528 CHECK ((char_length(comment) <= 255)), CONSTRAINT check_88beed9dc9 CHECK ((security_findings_uuids IS NOT NULL)) ); @@ -28890,8 +28932,8 @@ CREATE TABLE user_preferences ( work_items_display_settings jsonb DEFAULT '{}'::jsonb NOT NULL, default_duo_add_on_assignment_id bigint, markdown_maintain_indentation boolean DEFAULT false NOT NULL, - project_studio_enabled boolean DEFAULT false NOT NULL, merge_request_dashboard_show_drafts boolean DEFAULT true NOT NULL, + project_studio_enabled boolean DEFAULT false NOT NULL, duo_default_namespace_id bigint, policy_advanced_editor boolean DEFAULT false NOT NULL, early_access_studio_participant boolean DEFAULT false NOT NULL, @@ -29981,9 +30023,8 @@ CREATE TABLE vulnerability_occurrences ( initial_pipeline_id bigint, latest_pipeline_id bigint, security_project_tracked_context_id bigint, - detected_at timestamp with time zone DEFAULT now(), new_uuid uuid, - partition_id bigint DEFAULT 1, + detected_at timestamp with time zone DEFAULT now(), CONSTRAINT check_4a3a60f2ba CHECK ((char_length(solution) <= 7000)), CONSTRAINT check_ade261da6b CHECK ((char_length(description) <= 15000)), CONSTRAINT check_f602da68dd CHECK ((char_length(cve) <= 48400)) @@ -54712,3 +54753,27 @@ ALTER TABLE ONLY user_follow_users ALTER TABLE ONLY user_follow_users ADD CONSTRAINT user_follow_users_follower_id_fkey FOREIGN KEY (follower_id) REFERENCES users(id) ON DELETE CASCADE; + +CREATE PUBLICATION siphon_publication_ci_1 WITH (publish = 'insert, update, delete, truncate'); + +CREATE PUBLICATION siphon_publication_main_1 WITH (publish = 'insert, update, delete, truncate'); + +ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY ci_pipeline_metadata; + +ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY ci_runner_namespaces; + +ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY ci_runner_projects; + +ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY ci_running_builds; + +ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY group_type_ci_runner_machines; + +ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY group_type_ci_runners; + +ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY instance_type_ci_runner_machines; + +ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY instance_type_ci_runners; + +ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY project_type_ci_runner_machines; + +ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY project_type_ci_runners; diff --git a/redaction_design_document.md b/redaction_design_document.md new file mode 100644 index 00000000000000..85980cf25f1829 --- /dev/null +++ b/redaction_design_document.md @@ -0,0 +1,1327 @@ +# Knowledge Graph Redaction Layer: Internal API Endpoint Design Document + +## Overview + +This document provides a detailed design for exposing a new internal API endpoint that enables the GitLab Knowledge Graph (GKG) service to perform batch authorization checks against GitLab Rails. This endpoint is the implementation of **Layer 3: Final Redaction Layer** as described in the [Knowledge Graph Security Architecture](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/gitlab_knowledge_graph/security/). + +### Problem Statement + +The Knowledge Graph service queries ClickHouse for SDLC data (issues, merge requests, vulnerabilities, etc.) that has been pre-filtered using traversal IDs. However, traversal ID filtering provides only coarse-grained authorization at the group/project level. It cannot account for: + +- **Confidential issues** - Only visible to project members and issue participants +- **Runtime checks** - SAML group links, IP restrictions +- **Custom roles** - Fine-grained permissions beyond Reporter+ access +- **Resource-specific visibility** - Feature access levels, banned/blocked users + +Rails is the authoritative source for all authorization decisions via `Ability.allowed?`. The GKG service must call back to Rails to perform final redaction before returning results to users. + +### Solution + +Expose a new internal API endpoint at `POST /api/v4/internal/knowledge_graph/authorize` that: + +1. Accepts an array of resource identifiers (type + ID) and a user ID +2. Performs batch authorization checks using the existing `Ability.allowed?` infrastructure +3. Returns a list of authorization results indicating which resources the user can read +4. Logs all redacted (denied) resources for security audit purposes + +--- + +## Architecture + +### Request Flow + +``` +┌─────────────────────┐ ┌──────────────────────────────────────┐ ┌─────────────────┐ +│ Knowledge Graph │ │ GitLab Rails │ │ ClickHouse │ +│ Service │ │ │ │ │ +└─────────────────────┘ └──────────────────────────────────────┘ └─────────────────┘ + │ │ │ + │ 1. Query with traversal_id │ │ + │ filters │ │ + │────────────────────────────────────┼──────────────────────────────────▶│ + │ │ │ + │ 2. Pre-filtered results │ │ + │◀───────────────────────────────────┼───────────────────────────────────│ + │ │ │ + │ 3. POST /internal/knowledge_graph/authorize │ + │ { user_id, resources: { issues: [ids], merge_requests: [ids] } } │ + │───────────────────────────────────▶│ │ + │ │ │ + │ │ 4. For each type, batch load │ + │ │ then Ability.allowed?(user, │ + │ │ :read_, resource) │ + │ │ │ + │ 5. { authorizations: { issues: {id: bool}, merge_requests: {...} } } │ + │◀───────────────────────────────────│ │ + │ │ │ + │ 6. Filter out denied resources │ │ + │ │ │ + │ 7. Return sanitized results │ │ + │ to client │ │ +``` + +### Sequence Diagram + +```mermaid +sequenceDiagram + participant GKG as Knowledge Graph Service + participant Rails as GitLab Rails + participant Auth as Ability.allowed? + participant Logger as RedactedSearchResultsLogger + + GKG->>GKG: Execute ClickHouse query
(pre-filtered by traversal_ids) + GKG->>GKG: Group results by type
{issues: [ids], merge_requests: [ids], ...} + + GKG->>Rails: POST /api/v4/internal/knowledge_graph/authorize
Authorization: Bearer JWT
{user_id: 123, resources: {issues: [456, 789], merge_requests: [101]}} + + Rails->>Rails: Verify JWT (shell token) + Rails->>Rails: Load User by user_id + + loop For each resource type + Rails->>Rails: Batch load resources by type
Model.where(id: ids) + loop For each resource + Rails->>Auth: Ability.allowed?(user, :read_type, resource) + Auth-->>Rails: true/false + end + end + + alt Any resources denied + Rails->>Logger: Log redacted resources + end + + Rails-->>GKG: {authorizations: {issues: {456: true, 789: false}, merge_requests: {101: true}}} + + GKG->>GKG: Filter out denied resources + GKG-->>GKG: Return sanitized results +``` + +--- + +## Detailed Design + +### API Endpoint Specification + +#### Endpoint + +``` +POST /api/v4/internal/knowledge_graph/authorize +``` + +#### Authentication + +Uses GitLab Shell token authentication, following the pattern established by: +- `ee/lib/api/internal/search/zoekt.rb:7` +- `lib/api/helpers.rb:369-371` + +```ruby +before { authenticate_by_gitlab_shell_token! } +``` + +The GKG service will generate JWTs using the same mechanism as Zoekt: +- **Reference**: `ee/lib/search/zoekt/jwt_auth.rb:6-27` +- **Secret**: `Gitlab::Shell.secret_token` (shared secret from `.gitlab_shell_secret`) +- **Algorithm**: HS256 +- **Expiry**: 5 minutes +- **Issuer**: `gitlab` +- **Audience**: `gitlab-knowledge-graph` + +#### Request Parameters + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `user_id` | Integer | Yes | The ID of the user to check permissions for | +| `resources` | Object | Yes | Resources grouped by type | +| `resources.` | Array[Integer] | No | Array of resource IDs for each type | + +**Supported Resource Types (keys in `resources` object):** + +| Type Key | Model Class | Ability | Reference | +|----------|-------------|---------|-----------| +| `issues` | `Issue` | `:read_issue` | `app/models/concerns/issuable.rb:495-497` | +| `merge_requests` | `MergeRequest` | `:read_merge_request` | `app/models/concerns/issuable.rb:495-497` | +| `epics` | `Epic` | `:read_epic` | `ee/app/models/epic.rb` (via Issuable) | +| `vulnerabilities` | `Vulnerability` | `:read_vulnerability` | `ee/app/models/ee/vulnerability.rb:465-467` | +| `projects` | `Project` | `:read_project` | `app/models/project.rb` | +| `milestones` | `Milestone` | `:read_milestone` | `app/models/milestone.rb` | +| `snippets` | `Snippet` | `:read_snippet` | `app/models/snippet.rb:297-299` | + +#### Request Example + +```json +{ + "user_id": 123, + "resources": { + "issues": [456, 789, 1011], + "merge_requests": [101, 102], + "vulnerabilities": [202], + "epics": [303, 304] + } +} +``` + +#### Response + +| Field | Type | Description | +|-------|------|-------------| +| `authorizations` | Object | Authorization results grouped by type | +| `authorizations.` | Object | Map of resource ID to authorization result | +| `authorizations..` | Boolean | Whether user can read this resource | + +#### Response Example + +```json +{ + "authorizations": { + "issues": { + "456": true, + "789": false, + "1011": true + }, + "merge_requests": { + "101": true, + "102": true + }, + "vulnerabilities": { + "202": true + }, + "epics": { + "303": false, + "304": true + } + } +} +``` + +This grouped structure: +- Eliminates redundant type information in each resource entry +- Allows the service to batch load all resources of the same type in a single query +- Mirrors how the Knowledge Graph service likely structures its ClickHouse query results +- Provides O(1) lookup for authorization results by type and ID + +#### HTTP Status Codes + +| Status | Condition | +|--------|-----------| +| `200 OK` | Authorization check completed successfully | +| `400 Bad Request` | Invalid parameters (missing required fields, invalid types) | +| `401 Unauthorized` | Invalid or missing JWT token | +| `404 Not Found` | User not found | +| `422 Unprocessable Entity` | Request exceeds batch size limit | + +--- + +## Implementation Details + +### File Structure + +``` +ee/ +├── lib/ +│ ├── api/ +│ │ └── internal/ +│ │ └── knowledge_graph.rb # API endpoint definition +│ └── search/ +│ └── knowledge_graph/ +│ └── jwt_auth.rb # JWT authentication module +├── app/ +│ └── services/ +│ └── search/ +│ └── knowledge_graph/ +│ └── batch_authorization_service.rb # Core authorization logic +└── spec/ + ├── requests/ + │ └── api/ + │ └── internal/ + │ └── knowledge_graph_spec.rb # Request specs + └── services/ + └── search/ + └── knowledge_graph/ + └── batch_authorization_service_spec.rb # Service specs +``` + +### 1. JWT Authentication Module + +**File**: `ee/lib/search/knowledge_graph/jwt_auth.rb` + +This module follows the exact pattern established by Zoekt at `ee/lib/search/zoekt/jwt_auth.rb:6-40`. + +```ruby +# frozen_string_literal: true + +module Search + module KnowledgeGraph + module JwtAuth + ISSUER = 'gitlab' + AUDIENCE = 'gitlab-knowledge-graph' + TOKEN_EXPIRE_TIME = 5.minutes + + class << self + # Returns the shared secret token used for JWT signing/verification. + # Uses the same secret as GitLab Shell for consistency with other + # internal services (Zoekt, Workhorse, etc.) + # + # @return [String] The secret token + # @see Gitlab::Shell.secret_token + # @see ee/lib/search/zoekt/jwt_auth.rb:12-14 + def secret_token + Gitlab::Shell.secret_token + end + + # Generates a signed JWT token for authenticating requests to the + # Knowledge Graph authorization endpoint. + # + # Token payload includes: + # - iat: Issued at timestamp + # - iss: Issuer (gitlab) + # - aud: Audience (gitlab-knowledge-graph) + # - exp: Expiration timestamp (5 minutes from now) + # + # @return [String] The encoded JWT token + # @see ee/lib/search/zoekt/jwt_auth.rb:16-25 + def jwt_token + current_time = Time.current.to_i + payload = { + iat: current_time, + iss: ISSUER, + aud: AUDIENCE + } + + payload[:exp] = current_time + TOKEN_EXPIRE_TIME.to_i unless skip_expiration? + + JWT.encode(payload, secret_token, 'HS256') + end + + # Returns the Authorization header value for HTTP requests. + # + # @return [String] Bearer token header value + # @see ee/lib/search/zoekt/jwt_auth.rb:27-29 + def authorization_header + "Bearer #{jwt_token}" + end + + private + + # Allows skipping token expiration for testing purposes. + # Controlled via KNOWLEDGE_GRAPH_JWT_SKIP_EXPIRY environment variable. + # + # @return [Boolean] + # @see ee/lib/search/zoekt/jwt_auth.rb:33-35 + def skip_expiration? + Gitlab::Utils.to_boolean(ENV['KNOWLEDGE_GRAPH_JWT_SKIP_EXPIRY']) + end + end + end + end +end +``` + +### 2. Internal API Endpoint + +**File**: `ee/lib/api/internal/knowledge_graph.rb` + +This endpoint follows the patterns established by: +- `ee/lib/api/internal/search/zoekt.rb` - Internal API structure +- `lib/api/internal/base.rb` - Authentication pattern + +```ruby +# frozen_string_literal: true + +module API + module Internal + class KnowledgeGraph < ::API::Base + # Authenticates using GitLab Shell token, same as Zoekt internal API. + # @see lib/api/helpers.rb:369-371 + # @see ee/lib/api/internal/search/zoekt.rb:7 + before { authenticate_by_gitlab_shell_token! } + + feature_category :global_search + urgency :low + + # Maximum total number of resources that can be authorized in a single request. + # This limit prevents timeout issues and ensures reasonable response times. + # Based on SearchService pagination limits. + # @see app/services/search_service.rb:8 + MAX_BATCH_SIZE = 500 + + # Supported resource types (plural keys) and their corresponding model classes. + # Each type maps to a model that implements #to_ability_name. + # @see app/models/concerns/issuable.rb:495-497 + # @see app/models/snippet.rb:297-299 + # @see ee/app/models/ee/vulnerability.rb:465-467 + RESOURCE_TYPES = { + 'issues' => Issue, + 'merge_requests' => MergeRequest, + 'epics' => Epic, + 'vulnerabilities' => Vulnerability, + 'projects' => Project, + 'milestones' => Milestone, + 'snippets' => Snippet + }.freeze + + helpers do + include Gitlab::Loggable + include ::API::Helpers::RateLimiter + + # Logger for redacted search results, following the pattern from SearchService. + # @see app/services/search_service.rb:179-181 + def logger + @logger ||= ::Gitlab::RedactedSearchResultsLogger.build + end + + # Validates that all resource type keys are supported. + # @param resources [Hash] The resources hash with type keys + # @return [Array] Invalid type keys + def invalid_resource_types(resources) + resources.keys - RESOURCE_TYPES.keys + end + + # Counts total resources across all types. + # @param resources [Hash] The resources hash + # @return [Integer] Total count + def total_resource_count(resources) + resources.values.sum(&:size) + end + + # Validates user state to prevent authorization checks for invalid users. + # SECURITY: Prevents information disclosure via blocked/deactivated users. + # + # @param user [User] The user to validate + # @see app/policies/global_policy.rb:76-113 + # @see app/policies/base_policy.rb:43-49 + def validate_user_state!(user) + return if user.active? + + # Return generic error to prevent user enumeration + # @see lib/api/internal/base.rb:249-250 + error!('Authorization check failed', :forbidden) + end + + # Validates user type to prevent authorization checks for system users. + # SECURITY: Prevents placeholder and import users from authorizing resources. + # + # @param user [User] The user to validate + # @see app/policies/base_policy.rb:134-135 + def validate_user_type!(user) + if user.placeholder? || user.import_user? + error!('Authorization check failed', :forbidden) + end + end + end + + namespace 'internal' do + namespace 'knowledge_graph' do + desc 'Batch authorize resources for a user' do + detail 'Checks read permissions for multiple resources grouped by type. Used by the ' \ + 'Knowledge Graph service to perform final redaction of query results. ' \ + 'Resources must be pre-grouped by type (issues, merge_requests, etc.). ' \ + 'This feature was introduced in GitLab 17.x' + end + params do + requires :user_id, type: Integer, desc: 'ID of the user to check permissions for' + requires :resources, type: Hash, desc: 'Resources grouped by type' do + optional :issues, type: Array[Integer], desc: 'Array of Issue IDs' + optional :merge_requests, type: Array[Integer], desc: 'Array of MergeRequest IDs' + optional :epics, type: Array[Integer], desc: 'Array of Epic IDs' + optional :vulnerabilities, type: Array[Integer], desc: 'Array of Vulnerability IDs' + optional :projects, type: Array[Integer], desc: 'Array of Project IDs' + optional :milestones, type: Array[Integer], desc: 'Array of Milestone IDs' + optional :snippets, type: Array[Integer], desc: 'Array of Snippet IDs' + end + end + post 'authorize' do + # SECURITY: Rate limiting to prevent DoS attacks + # @see lib/gitlab/application_rate_limiter.rb + check_rate_limit!(:knowledge_graph_authorize, scope: [params[:user_id]]) + + # Validate resource types + invalid_types = invalid_resource_types(params[:resources]) + if invalid_types.any? + error!("Invalid resource types: #{invalid_types.join(', ')}. " \ + "Supported types: #{RESOURCE_TYPES.keys.join(', ')}", :bad_request) + end + + # Validate total batch size to prevent timeout issues + total_count = total_resource_count(params[:resources]) + if total_count > MAX_BATCH_SIZE + error!("Total resource count (#{total_count}) exceeds maximum of #{MAX_BATCH_SIZE}", :unprocessable_entity) + end + + # Load user - use generic error to prevent user enumeration + # SECURITY: Don't distinguish between non-existent and invalid users + # @see lib/api/internal/base.rb:249-250 + user = User.find_by_id(params[:user_id]) + error!('Authorization check failed', :forbidden) unless user + + # SECURITY: Validate user is in valid state for authorization + validate_user_state!(user) + validate_user_type!(user) + + # Perform batch authorization using the dedicated service + service = ::Search::KnowledgeGraph::BatchAuthorizationService.new( + user: user, + resources_by_type: params[:resources] + ) + result = service.execute + + # Log any denied resources for security audit + # Following pattern from SearchService#log_redacted_search_results + # @see app/services/search_service.rb:163-177 + denied_resources = result.flat_map do |type, authorizations| + authorizations.filter_map { |id, allowed| { type: type, id: id } unless allowed } + end + + if denied_resources.any? + log_data = { + class: 'API::Internal::KnowledgeGraph', + message: 'knowledge_graph_redacted_results', + user_id: user.id, + denied_count: denied_resources.size, + denied_resources: denied_resources + } + logger.error(build_structured_payload(**log_data)) + end + + status :ok + { authorizations: result } + end + end + end + end + end +end +``` + +### 3. Batch Authorization Service + +**File**: `ee/app/services/search/knowledge_graph/batch_authorization_service.rb` + +This service follows the authorization patterns established by: +- `app/services/search_service.rb:132-161` - `visible_result?` and `redact_unauthorized_results` +- `app/models/ability.rb:42-71` - Batch authorization methods like `issues_readable_by_user` + +```ruby +# frozen_string_literal: true + +module Search + module KnowledgeGraph + # Service to perform batch authorization checks for the Knowledge Graph. + # + # This service is the implementation of Layer 3 (Final Redaction Layer) in + # the Knowledge Graph security architecture. It checks whether a user has + # read access to a batch of resources using GitLab's standard Ability system. + # + # The service follows the same authorization pattern used by SearchService + # for redacting search results, ensuring consistency across all search and + # discovery features. + # + # @example + # service = Search::KnowledgeGraph::BatchAuthorizationService.new( + # user: current_user, + # resources_by_type: { + # 'issues' => [123, 456], + # 'merge_requests' => [789] + # } + # ) + # result = service.execute + # # => { + # # 'issues' => { 123 => true, 456 => false }, + # # 'merge_requests' => { 789 => true } + # # } + # + # @see app/services/search_service.rb:132-161 + # @see app/models/ability.rb:42-71 + class BatchAuthorizationService + include Gitlab::Allowable + + # Mapping of resource type keys (plural) to their corresponding model classes. + # All models must implement #to_ability_name for consistent ability naming. + # + # @see app/models/concerns/issuable.rb:495-497 for Issuable implementation + # @see ee/app/models/ee/vulnerability.rb:465-467 for Vulnerability implementation + RESOURCE_CLASSES = { + 'issues' => Issue, + 'merge_requests' => MergeRequest, + 'epics' => Epic, + 'vulnerabilities' => Vulnerability, + 'projects' => Project, + 'milestones' => Milestone, + 'snippets' => Snippet + }.freeze + + # Preload associations needed for authorization checks to prevent N+1 queries. + # Each resource type may need different associations for its policy evaluation. + # + # @see app/policies/issue_policy.rb - needs project for most conditions + # @see app/policies/merge_request_policy.rb - needs target_project + # @see ee/app/policies/vulnerability_policy.rb - delegates to project + PRELOAD_ASSOCIATIONS = { + 'issues' => [:project, :author, :work_item_type], + 'merge_requests' => [:target_project, :author], + 'vulnerabilities' => [:project], + 'epics' => [:group], + 'projects' => [:namespace, :project_feature], + 'milestones' => [:project, :group], + 'snippets' => [:project, :author] + }.freeze + + # @param user [User] The user to check permissions for + # @param resources_by_type [Hash>] Resources grouped by type + # e.g., { 'issues' => [1, 2, 3], 'merge_requests' => [4, 5] } + def initialize(user:, resources_by_type:) + @user = user + @resources_by_type = resources_by_type + end + + # Executes the batch authorization check. + # + # Processes each resource type, batch loads resources with preloading, + # pre-caches member access for performance, then checks each resource's + # read permission using the standard Ability.allowed? method. + # + # Uses DeclarativePolicy.user_scope to optimize policy evaluation when + # checking multiple resources for the same user, following the pattern + # from Ability.issues_readable_by_user. + # + # @return [Hash>] Authorization results grouped by type + # e.g., { 'issues' => { 1 => true, 2 => false }, 'merge_requests' => { 4 => true } } + # @see app/models/ability.rb:42-48 + def execute + # Step 1: Load all resources with preloading + loaded_resources_by_type = load_all_resources + + # Step 2: Pre-cache member access for all involved projects/groups + # This prevents N+1 queries during authorization checks + # @see app/models/user.rb:2558-2566 + precache_member_access(loaded_resources_by_type) + + # Step 3: Check permissions using user_scope for optimization + # @see app/models/ability.rb:45-47 + DeclarativePolicy.user_scope do + @resources_by_type.each_with_object({}) do |(type, ids), results| + results[type] = authorize_resources_of_type(type, ids, loaded_resources_by_type[type] || {}) + end + end + end + + private + + attr_reader :user, :resources_by_type + + # Loads all resources for all types with appropriate preloading. + # + # @return [Hash>] Loaded resources by type + def load_all_resources + @resources_by_type.each_with_object({}) do |(type, ids), loaded| + loaded[type] = load_resources_for_type(type, ids) + end + end + + # Loads resources for a single type with preloading. + # + # @param type [String] The resource type + # @param ids [Array] The resource IDs + # @return [Hash] Loaded resources indexed by ID + def load_resources_for_type(type, ids) + klass = RESOURCE_CLASSES[type] + return {} unless klass + + preloads = PRELOAD_ASSOCIATIONS[type] || [] + klass.where(id: ids).includes(*preloads).index_by(&:id) + end + + # Pre-caches member access levels for all projects involved in authorization. + # This is critical for performance - without this, each Ability.allowed? call + # may trigger individual member access lookups. + # + # @param loaded_resources_by_type [Hash] All loaded resources + # @see app/models/user.rb:2558-2566 + # @see app/models/project.rb:3483-3491 + def precache_member_access(loaded_resources_by_type) + project_ids = extract_project_ids(loaded_resources_by_type) + group_ids = extract_group_ids(loaded_resources_by_type) + + # Pre-cache project member access + @user.max_member_access_for_project_ids(project_ids) if project_ids.any? + + # Pre-cache group member access + @user.max_member_access_for_group_ids(group_ids) if group_ids.any? + end + + # Extracts all project IDs from loaded resources. + # + # @param loaded_resources_by_type [Hash] All loaded resources + # @return [Array] Unique project IDs + def extract_project_ids(loaded_resources_by_type) + loaded_resources_by_type.flat_map do |type, resources| + resources.values.filter_map do |resource| + if resource.is_a?(Project) + resource.id + elsif resource.respond_to?(:project_id) + resource.project_id + elsif resource.respond_to?(:target_project_id) + resource.target_project_id + end + end + end.uniq.compact + end + + # Extracts all group IDs from loaded resources. + # + # @param loaded_resources_by_type [Hash] All loaded resources + # @return [Array] Unique group IDs + def extract_group_ids(loaded_resources_by_type) + loaded_resources_by_type.flat_map do |type, resources| + resources.values.filter_map do |resource| + resource.group_id if resource.respond_to?(:group_id) + end + end.uniq.compact + end + + # Authorizes all resources of a single type. + # + # Checks authorization for each resource, using pre-loaded data. + # Returns a hash mapping resource IDs to authorization results. + # + # @param type [String] The resource type (e.g., 'issues', 'merge_requests') + # @param ids [Array] The resource IDs to authorize + # @param loaded_resources [Hash] Pre-loaded resources + # @return [Hash] Map of resource ID to authorization result + def authorize_resources_of_type(type, ids, loaded_resources) + klass = RESOURCE_CLASSES[type] + return ids.index_with { false } unless klass + + # Check each resource + ids.index_with do |id| + resource = loaded_resources[id] + + # Resource not found - deny access + # This handles cases where the resource was deleted between + # the ClickHouse query and this authorization check + # + # Security note: We still perform a dummy check to prevent + # timing-based information disclosure about resource existence + if resource.nil? + DeclarativePolicy.has_policy?(klass) # Constant-time padding + next false + end + + # Check visibility using the same pattern as SearchService + visible_result?(resource) + end + end + + # Checks if a resource is visible to the user. + # + # This method is intentionally identical to SearchService#visible_result? + # to ensure consistent authorization behavior across search and the + # Knowledge Graph. + # + # @param resource [ActiveRecord::Base] The resource to check + # @return [Boolean] Whether the user can read the resource + # @see app/services/search_service.rb:132-136 + def visible_result?(resource) + # Resources without policies are considered visible + # This handles edge cases like plain Ruby objects + return true unless resource.respond_to?(:to_ability_name) && DeclarativePolicy.has_policy?(resource) + + # Use the resource's to_ability_name to construct the read ability + # For Issue: :read_issue + # For MergeRequest: :read_merge_request + # For Vulnerability: :read_vulnerability + # etc. + Ability.allowed?(@user, :"read_#{resource.to_ability_name}", resource) + end + end + end +end +``` + +### 4. Mount the API Endpoint + +**File**: `ee/lib/ee/api/api.rb` + +Add the mount statement alongside other internal APIs (around line 104): + +```ruby +# Add after line 104: mount ::API::Internal::Search::Zoekt +mount ::API::Internal::KnowledgeGraph +``` + +--- + + +## Security Considerations + +### Authentication + +1. **JWT Verification**: All requests must include a valid JWT signed with the GitLab Shell secret token + - Reference: `lib/gitlab/shell.rb:21-29` + - Reference: `lib/api/helpers.rb:369-371` + - Algorithm: HS256 with HMAC verification + - Secret: 32 bytes (256 bits), stored in Base64 at `Gitlab.config.gitlab_shell.secret_file` + +2. **Token Expiry**: Tokens expire after 5 minutes to limit replay attack window + - Reference: `ee/lib/search/zoekt/jwt_auth.rb:8` + - JWT claims verified: `iss` (issuer), `exp` (expiration), `iat` (issued at) + +3. **No User Credentials in Transport**: User ID is passed in request body, not in JWT payload, avoiding sensitive data in tokens + +4. **Error Handling**: JWT verification failures are tracked via `Gitlab::ErrorTracking.track_exception` + - Reference: `lib/gitlab/shell.rb:26-28` + +### Authorization + +1. **Declarative Policy**: All checks go through `Ability.allowed?` which evaluates: + - Base policies (`app/policies/base_policy.rb`) - handles blocked, deactivated, bot users + - Global policies (`app/policies/global_policy.rb`) - API access restrictions + - Resource policies (`app/policies/issue_policy.rb`, etc.) + - EE policies (`ee/app/policies/`) + - Composite identity checks (`app/models/ability.rb:92-100`) + - Reference: `app/models/ability.rb:73-107` + +2. **Consistent with Search**: Uses the exact same `visible_result?` pattern as `SearchService` + - Reference: `app/services/search_service.rb:132-136` + +3. **User State Checks**: DeclarativePolicy automatically evaluates: + - `blocked` condition (`app/policies/base_policy.rb:43-45`) + - `deactivated` condition (`app/policies/base_policy.rb:47-49`) + - `inactive` condition (`app/policies/global_policy.rb:76-81`) + - Service account restrictions (`app/policies/global_policy.rb:96-98`) + +### Audit Logging + +1. **Denied Resources Logged**: All denied resources are logged to `redacted_search_results.log` + - Reference: `app/services/search_service.rb:163-177` + - Reference: `lib/gitlab/redacted_search_results_logger.rb` + +2. **Structured Logging**: Uses GitLab's standard structured logging format + - Reference: `lib/gitlab/loggable.rb` + - Includes: user_id, denied_count, denied_resources (type + id) + +3. **Rate Limit Logging**: Throttled requests logged via `AuthLogger` + - Reference: `lib/gitlab/application_rate_limiter.rb:245-262` + +### Rate Limiting + +1. **Batch Size Limit**: Maximum 500 resources per request to prevent DoS + - Aligns with `SearchService::MAX_PER_PAGE` (200) with headroom for batching + +2. **Request Timeout**: Standard Rails request timeout applies (~60s) + +3. **Recommended Rate Limit Configuration**: + ```ruby + # Add to lib/gitlab/application_rate_limiter.rb + knowledge_graph_authorize: { + threshold: -> { application_settings.knowledge_graph_authorize_limit }, + interval: 1.minute + } + ``` + +--- + +## Performance Considerations + +### Optimizations + +1. **Batch Resource Loading**: Resources are loaded in batches by type to minimize N+1 queries + ```ruby + klass.where(id: ids).index_by(&:id) + ``` + +2. **DeclarativePolicy User Scope**: Uses `DeclarativePolicy.user_scope` to cache policy evaluations for the same user across multiple resources + - Reference: `app/models/ability.rb:45-47` + - Reference: `doc/development/policies.md:133-167` + - This is critical for performance - policy conditions cached per-user across all resources + +3. **Request-Scoped Caching**: Policy evaluations cached in `SafeRequestStore` + - Reference: `app/models/ability.rb:114-122` + - Cache key pattern: `/dp/*` + +4. **Preloading for Authorization**: Add preloads for common authorization associations: + ```ruby + # Updated service implementation with preloading + PRELOAD_ASSOCIATIONS = { + 'issues' => [:project, :author, :work_item_type], + 'merge_requests' => [:target_project, :author], + 'vulnerabilities' => [:project], + 'epics' => [:group], + 'projects' => [:namespace, :project_feature], + 'milestones' => [:project, :group], + 'snippets' => [:project, :author] + }.freeze + + def load_resources_for_type(type, ids) + klass = RESOURCE_CLASSES[type] + return {} unless klass + + preloads = PRELOAD_ASSOCIATIONS[type] || [] + klass.where(id: ids).includes(*preloads).index_by(&:id) + end + ``` + +5. **Member Access Pre-caching**: For resources with project-based authorization, pre-load member access: + ```ruby + # Pre-cache member access levels for all involved projects + # Reference: app/models/user.rb:2558-2566 + def precache_member_access(user, project_ids) + return if project_ids.empty? + + user.max_member_access_for_project_ids(project_ids) + end + ``` + +### Preload Associations + +A preload association is a technique to load related database records in a single query upfront, rather than fetching them one-by-one later (which causes N+1 query problems). + +#### The N+1 Query Problem + + Without preloading, if you load 100 issues and then check each issue's project for authorization: +```ruby + issues = Issue.where(id: ids) # 1 query + + issues.each do |issue| + issue.project # 100 separate queries! (N+1 problem) + end +``` + This results in 101 queries (1 + 100). + +#### With Preloading + +```ruby +issues = Issue.where(id: ids).includes(:project) # 2 queries total + + issues.each do |issue| + issue.project # No query - already loaded! + end +``` + +This results in 2 queries: +1. SELECT * FROM issues WHERE id IN (...) +2. SELECT * FROM projects WHERE id IN (...) + +The `PRELOAD_ASSOCIATIONS` constant defines which related records to load for each resource type: + +```ruby + PRELOAD_ASSOCIATIONS = { + 'issues' => [:project, :author, :work_item_type], + 'merge_requests' => [:target_project, :author], + 'vulnerabilities' => [:project], + 'epics' => [:group], + # ... + }.freeze +``` + +These associations are needed because the authorization policies access them. For example: +- IssuePolicy checks issue.project to determine project membership +- VulnerabilityPolicy delegates to vulnerability.project (see ee/app/policies/vulnerability_policy.rb:4) +- EpicPolicy checks epic.group for group membership + +Without preloading these, every `Ability.allowed?` call would trigger additional database queries. + +### Benchmarks + +Expected performance characteristics (with optimizations): +- 100 resources: ~50-100ms +- 500 resources: ~200-500ms + +Without preloading optimizations: +- 100 resources: ~200-400ms (N+1 queries) +- 500 resources: ~800-2000ms (severe N+1 impact) + +### Recommendations for GKG Service + +1. **Batch Requests**: Send authorization requests in batches of 100-500 resources +2. **Parallel Requests**: For large result sets (>500), send multiple parallel batch requests +3. **Circuit Breaker**: Implement circuit breaker pattern for authorization endpoint failures +4. **Caching Layer**: Consider caching authorization results in GKG for short periods (30-60s) for frequently accessed resources +5. **Request Coalescing**: If multiple GKG queries need authorization for overlapping resources, coalesce into single request + +--- + +## Testing Strategy + +### Unit Tests + +**File**: `ee/spec/services/search/knowledge_graph/batch_authorization_service_spec.rb` + +```ruby +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Search::KnowledgeGraph::BatchAuthorizationService, feature_category: :global_search do + let_it_be(:user) { create(:user) } + let_it_be(:project) { create(:project, :private) } + let_it_be(:public_project) { create(:project, :public) } + + describe '#execute' do + context 'with issues' do + let_it_be(:accessible_issue) { create(:issue, project: public_project) } + let_it_be(:confidential_issue) { create(:issue, :confidential, project: project) } + + it 'allows access to public issues' do + service = described_class.new( + user: user, + resources_by_type: { 'issues' => [accessible_issue.id] } + ) + + result = service.execute + + expect(result).to eq({ + 'issues' => { accessible_issue.id => true } + }) + end + + it 'denies access to confidential issues for non-members' do + service = described_class.new( + user: user, + resources_by_type: { 'issues' => [confidential_issue.id] } + ) + + result = service.execute + + expect(result).to eq({ + 'issues' => { confidential_issue.id => false } + }) + end + + it 'allows access to confidential issues for project members' do + project.add_reporter(user) + + service = described_class.new( + user: user, + resources_by_type: { 'issues' => [confidential_issue.id] } + ) + + result = service.execute + + expect(result).to eq({ + 'issues' => { confidential_issue.id => true } + }) + end + + it 'handles multiple issues in a single batch' do + service = described_class.new( + user: user, + resources_by_type: { 'issues' => [accessible_issue.id, confidential_issue.id] } + ) + + result = service.execute + + expect(result).to eq({ + 'issues' => { + accessible_issue.id => true, + confidential_issue.id => false + } + }) + end + end + + context 'with non-existent resources' do + it 'denies access to non-existent resources' do + service = described_class.new( + user: user, + resources_by_type: { 'issues' => [non_existing_record_id] } + ) + + result = service.execute + + expect(result).to eq({ + 'issues' => { non_existing_record_id => false } + }) + end + end + + context 'with mixed resource types' do + let_it_be(:issue) { create(:issue, project: public_project) } + let_it_be(:merge_request) { create(:merge_request, source_project: project) } + + it 'handles multiple resource types' do + service = described_class.new( + user: user, + resources_by_type: { + 'issues' => [issue.id], + 'merge_requests' => [merge_request.id] + } + ) + + result = service.execute + + expect(result).to eq({ + 'issues' => { issue.id => true }, + 'merge_requests' => { merge_request.id => false } + }) + end + end + + context 'with empty resources' do + it 'returns empty result for empty input' do + service = described_class.new( + user: user, + resources_by_type: {} + ) + + result = service.execute + + expect(result).to eq({}) + end + + it 'handles empty arrays for a type' do + service = described_class.new( + user: user, + resources_by_type: { 'issues' => [] } + ) + + result = service.execute + + expect(result).to eq({ 'issues' => {} }) + end + end + end +end +``` + +### Request Specs + +**File**: `ee/spec/requests/api/internal/knowledge_graph_spec.rb` + +```ruby +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe API::Internal::KnowledgeGraph, feature_category: :global_search do + include GitlabShellHelpers + + let_it_be(:user) { create(:user) } + let_it_be(:project) { create(:project, :public) } + let_it_be(:private_project) { create(:project, :private) } + let_it_be(:issue) { create(:issue, project: project) } + let_it_be(:confidential_issue) { create(:issue, :confidential, project: private_project) } + let_it_be(:merge_request) { create(:merge_request, source_project: project) } + + describe 'POST /internal/knowledge_graph/authorize' do + let(:params) do + { + user_id: user.id, + resources: { + 'issues' => [issue.id] + } + } + end + + context 'with valid authentication' do + it 'returns authorization results' do + post api('/internal/knowledge_graph/authorize'), + params: params, + headers: gitlab_shell_internal_api_request_header + + expect(response).to have_gitlab_http_status(:ok) + expect(json_response['authorizations']).to eq({ + 'issues' => { issue.id.to_s => true } + }) + end + + it 'handles multiple resource types' do + params[:resources] = { + 'issues' => [issue.id, confidential_issue.id], + 'merge_requests' => [merge_request.id] + } + + post api('/internal/knowledge_graph/authorize'), + params: params, + headers: gitlab_shell_internal_api_request_header + + expect(response).to have_gitlab_http_status(:ok) + expect(json_response['authorizations']).to eq({ + 'issues' => { + issue.id.to_s => true, + confidential_issue.id.to_s => false + }, + 'merge_requests' => { + merge_request.id.to_s => true + } + }) + end + + it 'logs denied resources' do + params[:resources] = { 'issues' => [confidential_issue.id] } + + expect(Gitlab::RedactedSearchResultsLogger).to receive(:build).and_call_original + + post api('/internal/knowledge_graph/authorize'), + params: params, + headers: gitlab_shell_internal_api_request_header + + expect(response).to have_gitlab_http_status(:ok) + end + end + + context 'without authentication' do + it 'returns unauthorized' do + post api('/internal/knowledge_graph/authorize'), params: params + + expect(response).to have_gitlab_http_status(:unauthorized) + end + end + + context 'with invalid user_id' do + it 'returns not found' do + params[:user_id] = non_existing_record_id + + post api('/internal/knowledge_graph/authorize'), + params: params, + headers: gitlab_shell_internal_api_request_header + + expect(response).to have_gitlab_http_status(:not_found) + end + end + + context 'with batch size exceeding limit' do + it 'returns unprocessable entity' do + params[:resources] = { 'issues' => Array.new(501) { 1 } } + + post api('/internal/knowledge_graph/authorize'), + params: params, + headers: gitlab_shell_internal_api_request_header + + expect(response).to have_gitlab_http_status(:unprocessable_entity) + expect(json_response['error']).to include('exceeds maximum of 500') + end + + it 'counts total across all types' do + params[:resources] = { + 'issues' => Array.new(300) { 1 }, + 'merge_requests' => Array.new(201) { 1 } + } + + post api('/internal/knowledge_graph/authorize'), + params: params, + headers: gitlab_shell_internal_api_request_header + + expect(response).to have_gitlab_http_status(:unprocessable_entity) + end + end + + context 'with invalid resource type' do + it 'returns bad request' do + params[:resources] = { 'invalid_type' => [1, 2, 3] } + + post api('/internal/knowledge_graph/authorize'), + params: params, + headers: gitlab_shell_internal_api_request_header + + expect(response).to have_gitlab_http_status(:bad_request) + expect(json_response['error']).to include('Invalid resource types') + end + end + + context 'with empty resources' do + it 'returns empty authorizations' do + params[:resources] = {} + + post api('/internal/knowledge_graph/authorize'), + params: params, + headers: gitlab_shell_internal_api_request_header + + expect(response).to have_gitlab_http_status(:ok) + expect(json_response['authorizations']).to eq({}) + end + end + end +end +``` + +--- + +## Rollout Plan + +### Phase 1: Implementation (Milestone X.Y) + +1. Implement JWT auth module +2. Implement batch authorization service +3. Implement API endpoint +4. Add comprehensive tests +5. Update `ee/lib/ee/api/api.rb` to mount endpoint + +### Phase 2: Integration Testing (Milestone X.Y+1) + +1. Deploy to staging +2. GKG team integrates with endpoint +3. End-to-end testing with real KG queries +4. Performance benchmarking + +### Phase 3: Production Rollout (Milestone X.Y+2) + +1. Deploy behind feature flag +2. Gradual rollout via percentage-based flag +3. Monitor metrics and logs +4. Full enablement + +### Feature Flag + +```ruby +Feature.enabled?(:knowledge_graph_authorization_api) +``` + +--- + +## Monitoring and Observability + +### Metrics + +| Metric Name | Type | Description | +|-------------|------|-------------| +| `gitlab_knowledge_graph_authorization_requests_total` | Counter | Total authorization requests | +| `gitlab_knowledge_graph_authorization_duration_seconds` | Histogram | Request duration | +| `gitlab_knowledge_graph_authorization_batch_size` | Histogram | Resources per request | +| `gitlab_knowledge_graph_authorization_denied_total` | Counter | Total denied resources | + +### Alerts + +1. **High Denial Rate**: Alert if >20% of resources are denied (may indicate ineffective traversal_id filtering) +2. **High Latency**: Alert if p95 latency >500ms +3. **Error Rate**: Alert if error rate >1% + +### Dashboards + +Create Grafana dashboard with: +- Request rate +- Latency percentiles (p50, p95, p99) +- Batch size distribution +- Denial rate by resource type +- Error rate + +--- + +## Future Enhancements + +1. **Caching**: Add short-lived caching of authorization results (30-60s TTL) +2. **Async Processing**: For very large batches, consider async job processing +3. **Preemptive Filtering**: Push more filtering to ClickHouse queries based on known patterns +4. **Additional Resource Types**: Add support for pipelines, jobs, environments as needed + +--- + +## References + +### Key Files + +| File | Purpose | +|------|---------| +| `app/services/search_service.rb:132-161` | `visible_result?` and `redact_unauthorized_results` pattern | +| `app/models/ability.rb:42-71` | Batch authorization methods | +| `app/models/ability.rb:73-107` | `Ability.allowed?` implementation | +| `ee/lib/api/internal/search/zoekt.rb` | Similar internal API pattern | +| `ee/lib/search/zoekt/jwt_auth.rb` | JWT authentication pattern | +| `lib/api/helpers.rb:369-371` | `authenticate_by_gitlab_shell_token!` | +| `lib/gitlab/shell.rb:21-29` | JWT verification | +| `lib/gitlab/redacted_search_results_logger.rb` | Logging for redacted results | +| `app/models/concerns/issuable.rb:495-497` | `to_ability_name` implementation | + +### Documentation + +- [Knowledge Graph Security Architecture](https://handbook.gitlab.com/handbook/engineering/architecture/design-documents/gitlab_knowledge_graph/security/) +- [GitLab Internal API Documentation](https://docs.gitlab.com/ee/development/internal_api/) +- [Declarative Policy Documentation](https://docs.gitlab.com/ee/development/policies.html) -- GitLab From f089f810324a05fb2696785c15c43fae58ef3adc Mon Sep 17 00:00:00 2001 From: michaelangeloio Date: Fri, 5 Dec 2025 20:16:44 -0500 Subject: [PATCH 2/3] doc updates --- redaction_design_document.md | 116 ++++++++++++++++++++++++++--------- 1 file changed, 86 insertions(+), 30 deletions(-) diff --git a/redaction_design_document.md b/redaction_design_document.md index 85980cf25f1829..4d751bb14e774a 100644 --- a/redaction_design_document.md +++ b/redaction_design_document.md @@ -19,10 +19,12 @@ Rails is the authoritative source for all authorization decisions via `Ability.a Expose a new internal API endpoint at `POST /api/v4/internal/knowledge_graph/authorize` that: -1. Accepts an array of resource identifiers (type + ID) and a user ID -2. Performs batch authorization checks using the existing `Ability.allowed?` infrastructure -3. Returns a list of authorization results indicating which resources the user can read -4. Logs all redacted (denied) resources for security audit purposes +1. Accepts a batch of resource identifiers grouped by type (max 500 total) and a user ID +2. Loads those specific resources and extracts their project/group associations +3. Pre-caches member access only for projects/groups in the batch (NOT the user's entire project list) +4. Performs authorization checks using `Ability.allowed?` with `DeclarativePolicy.user_scope` optimization +5. Returns authorization results indicating which resources the user can read +6. Logs all redacted (denied) resources for security audit purposes --- @@ -66,30 +68,35 @@ Expose a new internal API endpoint at `POST /api/v4/internal/knowledge_graph/aut sequenceDiagram participant GKG as Knowledge Graph Service participant Rails as GitLab Rails + participant DB as Database participant Auth as Ability.allowed? participant Logger as RedactedSearchResultsLogger GKG->>GKG: Execute ClickHouse query
(pre-filtered by traversal_ids) - GKG->>GKG: Group results by type
{issues: [ids], merge_requests: [ids], ...} + GKG->>GKG: Group results by type (max 500 total)
{issues: [ids], merge_requests: [ids], ...} GKG->>Rails: POST /api/v4/internal/knowledge_graph/authorize
Authorization: Bearer JWT
{user_id: 123, resources: {issues: [456, 789], merge_requests: [101]}} Rails->>Rails: Verify JWT (shell token) - Rails->>Rails: Load User by user_id + Rails->>Rails: Validate user state - loop For each resource type - Rails->>Rails: Batch load resources by type
Model.where(id: ids) - loop For each resource - Rails->>Auth: Ability.allowed?(user, :read_type, resource) - Auth-->>Rails: true/false - end + Rails->>DB: Load resources with preloads
Issue.where(id: [...]).includes(:project) + DB-->>Rails: Resources (max 500) + + Rails->>Rails: Extract project_ids FROM loaded resources
(max ~500 unique projects) + Rails->>DB: Pre-cache member access
WHERE project_id IN (extracted IDs) + DB-->>Rails: Access levels cached + + loop For each resource (within DeclarativePolicy.user_scope) + Rails->>Auth: Ability.allowed?(user, :read_type, resource) + Auth-->>Rails: true/false (uses cached access) end alt Any resources denied Rails->>Logger: Log redacted resources end - Rails-->>GKG: {authorizations: {issues: {456: true, 789: false}, merge_requests: {101: true}}} + Rails-->>GKG: {authorizations: {issues: {456: true, 789: false}, ...}} GKG->>GKG: Filter out denied resources GKG-->>GKG: Return sanitized results @@ -205,8 +212,9 @@ This grouped structure: | `200 OK` | Authorization check completed successfully | | `400 Bad Request` | Invalid parameters (missing required fields, invalid types) | | `401 Unauthorized` | Invalid or missing JWT token | -| `404 Not Found` | User not found | +| `403 Forbidden` | User not found, blocked, deactivated, or invalid type (generic error to prevent enumeration) | | `422 Unprocessable Entity` | Request exceeds batch size limit | +| `429 Too Many Requests` | Rate limit exceeded | --- @@ -629,21 +637,32 @@ module Search klass.where(id: ids).includes(*preloads).index_by(&:id) end - # Pre-caches member access levels for all projects involved in authorization. - # This is critical for performance - without this, each Ability.allowed? call - # may trigger individual member access lookups. + # Pre-caches member access levels for projects/groups involved in THIS authorization batch. # - # @param loaded_resources_by_type [Hash] All loaded resources - # @see app/models/user.rb:2558-2566 - # @see app/models/project.rb:3483-3491 + # IMPORTANT SCALABILITY NOTE: This only pre-caches access for the resources in + # the current batch (max 500 resources), NOT all projects/groups the user has + # access to. This is critical for users with massive project counts (e.g., 500K). + # + # The flow is: + # 1. GKG sends batch of 500 resource IDs to authorize + # 2. We load those 500 resources (issues, MRs, etc.) + # 3. We extract project_ids FROM THOSE 500 RESOURCES (max ~500 unique projects) + # 4. We pre-cache member access for just those ~500 projects + # + # This ensures we never query the user's full project list, regardless of + # whether they have access to 100 or 500,000 projects. + # + # @param loaded_resources_by_type [Hash] Loaded resources from current batch only + # @see app/models/user.rb:2536-2549 for max_member_access_for_project_ids + # @see app/models/user.rb:2184-2207 for GitLab's batching pattern with large ID sets def precache_member_access(loaded_resources_by_type) project_ids = extract_project_ids(loaded_resources_by_type) group_ids = extract_group_ids(loaded_resources_by_type) - # Pre-cache project member access + # Pre-cache project member access for THIS BATCH only (max ~500 IDs) @user.max_member_access_for_project_ids(project_ids) if project_ids.any? - # Pre-cache group member access + # Pre-cache group member access for THIS BATCH only @user.max_member_access_for_group_ids(group_ids) if group_ids.any? end @@ -861,12 +880,18 @@ mount ::API::Internal::KnowledgeGraph 5. **Member Access Pre-caching**: For resources with project-based authorization, pre-load member access: ```ruby - # Pre-cache member access levels for all involved projects - # Reference: app/models/user.rb:2558-2566 - def precache_member_access(user, project_ids) - return if project_ids.empty? - - user.max_member_access_for_project_ids(project_ids) + # Pre-cache member access for projects extracted FROM THE BATCH RESOURCES + # NOT the user's entire project list - this is critical for users with 500K+ projects + # + # The batch size limit (500) ensures we never query more than ~500 project IDs + # Reference: app/models/user.rb:2536-2549 + def precache_member_access(loaded_resources_by_type) + # Extract project IDs from the loaded resources (max ~500) + project_ids = extract_project_ids(loaded_resources_by_type) + group_ids = extract_group_ids(loaded_resources_by_type) + + @user.max_member_access_for_project_ids(project_ids) if project_ids.any? + @user.max_member_access_for_group_ids(group_ids) if group_ids.any? end ``` @@ -1167,14 +1192,45 @@ RSpec.describe API::Internal::KnowledgeGraph, feature_category: :global_search d end context 'with invalid user_id' do - it 'returns not found' do + it 'returns forbidden with generic error to prevent enumeration' do params[:user_id] = non_existing_record_id post api('/internal/knowledge_graph/authorize'), params: params, headers: gitlab_shell_internal_api_request_header - expect(response).to have_gitlab_http_status(:not_found) + expect(response).to have_gitlab_http_status(:forbidden) + expect(json_response['error']).to eq('Authorization check failed') + end + end + + context 'with blocked user' do + let_it_be(:blocked_user) { create(:user, :blocked) } + + it 'returns forbidden with generic error' do + params[:user_id] = blocked_user.id + + post api('/internal/knowledge_graph/authorize'), + params: params, + headers: gitlab_shell_internal_api_request_header + + expect(response).to have_gitlab_http_status(:forbidden) + expect(json_response['error']).to eq('Authorization check failed') + end + end + + context 'with deactivated user' do + let_it_be(:deactivated_user) { create(:user, :deactivated) } + + it 'returns forbidden with generic error' do + params[:user_id] = deactivated_user.id + + post api('/internal/knowledge_graph/authorize'), + params: params, + headers: gitlab_shell_internal_api_request_header + + expect(response).to have_gitlab_http_status(:forbidden) + expect(json_response['error']).to eq('Authorization check failed') end end -- GitLab From 5ae897e40a53ab4cf60237d95f7d0364e625715c Mon Sep 17 00:00:00 2001 From: michaelangeloio Date: Fri, 5 Dec 2025 21:52:29 -0500 Subject: [PATCH 3/3] feat: knowledge graph redaction API endpoint --- config/bounded_contexts.yml | 5 + db/structure.sql | 171 ++--- .../batch_authorization_service.rb | 258 ++++++++ .../api/internal/knowledge_graph/authorize.rb | 191 ++++++ ee/lib/ee/api/api.rb | 1 + ee/lib/knowledge_graph/jwt_auth.rb | 70 ++ scripts/knowledge_graph/.gitignore | 1 + scripts/knowledge_graph/__init__.py | 1 + scripts/knowledge_graph/__main__.py | 10 + scripts/knowledge_graph/clients.py | 173 +++++ scripts/knowledge_graph/config.py | 84 +++ scripts/knowledge_graph/factory.py | 316 +++++++++ scripts/knowledge_graph/tests.py | 604 ++++++++++++++++++ .../knowledge_graph/validate_authorization.py | 136 ++++ scripts/knowledge_graph_jwt.rb | 55 ++ 15 files changed, 1958 insertions(+), 118 deletions(-) create mode 100644 ee/app/services/knowledge_graph/batch_authorization_service.rb create mode 100644 ee/lib/api/internal/knowledge_graph/authorize.rb create mode 100644 ee/lib/knowledge_graph/jwt_auth.rb create mode 100644 scripts/knowledge_graph/.gitignore create mode 100644 scripts/knowledge_graph/__init__.py create mode 100644 scripts/knowledge_graph/__main__.py create mode 100644 scripts/knowledge_graph/clients.py create mode 100644 scripts/knowledge_graph/config.py create mode 100644 scripts/knowledge_graph/factory.py create mode 100644 scripts/knowledge_graph/tests.py create mode 100755 scripts/knowledge_graph/validate_authorization.py create mode 100755 scripts/knowledge_graph_jwt.rb diff --git a/config/bounded_contexts.yml b/config/bounded_contexts.yml index c4a4a438a445f7..5a5cdf45575bc9 100644 --- a/config/bounded_contexts.yml +++ b/config/bounded_contexts.yml @@ -170,6 +170,11 @@ domains: feature_categories: - integrations + KnowledgeGraph: + description: Knowledge Graph service + feature_categories: + - knowledge_graph + Issuables: description: Common concerns between work items and merge requests such as setting milestone, diff --git a/db/structure.sql b/db/structure.sql index 484afb84877286..df4c50822b7291 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -5253,36 +5253,6 @@ RETURN NULL; END $$; -CREATE TABLE ai_code_suggestion_events ( - id bigint NOT NULL, - "timestamp" timestamp with time zone NOT NULL, - user_id bigint NOT NULL, - organization_id bigint NOT NULL, - created_at timestamp with time zone NOT NULL, - updated_at timestamp with time zone NOT NULL, - event smallint NOT NULL, - namespace_path text, - payload jsonb, - CONSTRAINT check_ba9ae3f258 CHECK ((char_length(namespace_path) <= 255)) -) -PARTITION BY RANGE ("timestamp"); - -CREATE TABLE ai_duo_chat_events ( - id bigint NOT NULL, - "timestamp" timestamp with time zone NOT NULL, - user_id bigint NOT NULL, - personal_namespace_id bigint, - created_at timestamp with time zone NOT NULL, - updated_at timestamp with time zone NOT NULL, - event smallint NOT NULL, - namespace_path text, - payload jsonb, - organization_id bigint, - CONSTRAINT check_628cdfbf3f CHECK ((char_length(namespace_path) <= 255)), - CONSTRAINT check_f759f45177 CHECK ((organization_id IS NOT NULL)) -) -PARTITION BY RANGE ("timestamp"); - CREATE TABLE ai_events_counts ( id bigint NOT NULL, events_date date NOT NULL, @@ -5294,21 +5264,6 @@ CREATE TABLE ai_events_counts ( ) PARTITION BY RANGE (events_date); -CREATE TABLE ai_troubleshoot_job_events ( - id bigint NOT NULL, - "timestamp" timestamp with time zone NOT NULL, - user_id bigint NOT NULL, - job_id bigint NOT NULL, - project_id bigint NOT NULL, - created_at timestamp with time zone NOT NULL, - updated_at timestamp with time zone NOT NULL, - event smallint NOT NULL, - namespace_path text, - payload jsonb, - CONSTRAINT check_29d6dbc329 CHECK ((char_length(namespace_path) <= 255)) -) -PARTITION BY RANGE ("timestamp"); - CREATE TABLE ai_usage_events ( id bigint NOT NULL, "timestamp" timestamp with time zone NOT NULL, @@ -6040,7 +5995,7 @@ PARTITION BY RANGE (created_at); CREATE TABLE loose_foreign_keys_deleted_records ( id bigint NOT NULL, - partition bigint DEFAULT 3 NOT NULL, + partition bigint DEFAULT 1 NOT NULL, primary_key_value bigint NOT NULL, status smallint DEFAULT 1 NOT NULL, created_at timestamp with time zone DEFAULT now() NOT NULL, @@ -6155,7 +6110,7 @@ PARTITION BY LIST (partition_id); CREATE TABLE p_ci_finished_build_ch_sync_events ( build_id bigint NOT NULL, - partition bigint DEFAULT 12 NOT NULL, + partition bigint DEFAULT 1 NOT NULL, build_finished_at timestamp without time zone NOT NULL, processed boolean DEFAULT false NOT NULL, project_id bigint NOT NULL @@ -6165,7 +6120,7 @@ PARTITION BY LIST (partition); CREATE TABLE p_ci_finished_pipeline_ch_sync_events ( pipeline_id bigint NOT NULL, project_namespace_id bigint NOT NULL, - partition bigint DEFAULT 13 NOT NULL, + partition bigint DEFAULT 1 NOT NULL, pipeline_finished_at timestamp without time zone NOT NULL, processed boolean DEFAULT false NOT NULL ) @@ -6199,48 +6154,6 @@ CREATE TABLE p_generated_ref_commits ( ) PARTITION BY RANGE (project_id); -CREATE TABLE p_knowledge_graph_enabled_namespaces ( - id bigint NOT NULL, - namespace_id bigint NOT NULL, - created_at timestamp with time zone NOT NULL, - updated_at timestamp with time zone NOT NULL, - state smallint DEFAULT 0 NOT NULL -) -PARTITION BY RANGE (namespace_id); - -CREATE TABLE p_knowledge_graph_replicas ( - id bigint NOT NULL, - namespace_id bigint NOT NULL, - knowledge_graph_enabled_namespace_id bigint, - zoekt_node_id bigint NOT NULL, - created_at timestamp with time zone NOT NULL, - updated_at timestamp with time zone NOT NULL, - state smallint DEFAULT 0 NOT NULL, - retries_left smallint NOT NULL, - reserved_storage_bytes bigint DEFAULT 10485760 NOT NULL, - indexed_at timestamp with time zone, - schema_version smallint DEFAULT 0 NOT NULL, - CONSTRAINT c_p_knowledge_graph_replicas_retries_status CHECK (((retries_left > 0) OR ((retries_left = 0) AND (state >= 200)))) -) -PARTITION BY RANGE (namespace_id); - -CREATE TABLE p_knowledge_graph_tasks ( - id bigint NOT NULL, - partition_id bigint DEFAULT 1 NOT NULL, - zoekt_node_id bigint NOT NULL, - namespace_id bigint NOT NULL, - knowledge_graph_replica_id bigint NOT NULL, - perform_at timestamp with time zone DEFAULT now() NOT NULL, - created_at timestamp with time zone NOT NULL, - updated_at timestamp with time zone NOT NULL, - state smallint DEFAULT 0 NOT NULL, - task_type smallint NOT NULL, - retries_left smallint NOT NULL, - metadata jsonb DEFAULT '"{}"'::jsonb NOT NULL, - CONSTRAINT c_p_knowledge_graph_tasks_on_retries_left CHECK (((retries_left > 0) OR ((retries_left = 0) AND (state = 255)))) -) -PARTITION BY LIST (partition_id); - CREATE SEQUENCE sent_notifications_id_seq START WITH 1 INCREMENT BY 1 @@ -11150,6 +11063,20 @@ CREATE SEQUENCE ai_catalog_items_id_seq ALTER SEQUENCE ai_catalog_items_id_seq OWNED BY ai_catalog_items.id; +CREATE TABLE ai_code_suggestion_events ( + id bigint NOT NULL, + "timestamp" timestamp with time zone NOT NULL, + user_id bigint NOT NULL, + organization_id bigint NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + event smallint NOT NULL, + namespace_path text, + payload jsonb, + CONSTRAINT check_ba9ae3f258 CHECK ((char_length(namespace_path) <= 255)) +) +PARTITION BY RANGE ("timestamp"); + CREATE SEQUENCE ai_code_suggestion_events_id_seq START WITH 1 INCREMENT BY 1 @@ -11208,6 +11135,22 @@ CREATE SEQUENCE ai_conversation_threads_id_seq ALTER SEQUENCE ai_conversation_threads_id_seq OWNED BY ai_conversation_threads.id; +CREATE TABLE ai_duo_chat_events ( + id bigint NOT NULL, + "timestamp" timestamp with time zone NOT NULL, + user_id bigint NOT NULL, + personal_namespace_id bigint, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + event smallint NOT NULL, + namespace_path text, + payload jsonb, + organization_id bigint, + CONSTRAINT check_628cdfbf3f CHECK ((char_length(namespace_path) <= 255)), + CONSTRAINT check_f759f45177 CHECK ((organization_id IS NOT NULL)) +) +PARTITION BY RANGE ("timestamp"); + CREATE SEQUENCE ai_duo_chat_events_id_seq START WITH 1 INCREMENT BY 1 @@ -11328,8 +11271,8 @@ CREATE TABLE ai_settings ( duo_core_features_enabled boolean, duo_agent_platform_service_url text, duo_agent_platform_request_count integer DEFAULT 0 NOT NULL, - ai_gateway_timeout_seconds integer DEFAULT 60, foundational_agents_default_enabled boolean DEFAULT true, + ai_gateway_timeout_seconds integer DEFAULT 60, minimum_access_level_execute smallint, minimum_access_level_manage smallint, minimum_access_level_enable_on_projects smallint, @@ -11360,6 +11303,21 @@ CREATE TABLE ai_testing_terms_acceptances ( CONSTRAINT check_5efe98894e CHECK ((char_length(user_email) <= 255)) ); +CREATE TABLE ai_troubleshoot_job_events ( + id bigint NOT NULL, + "timestamp" timestamp with time zone NOT NULL, + user_id bigint NOT NULL, + job_id bigint NOT NULL, + project_id bigint NOT NULL, + created_at timestamp with time zone NOT NULL, + updated_at timestamp with time zone NOT NULL, + event smallint NOT NULL, + namespace_path text, + payload jsonb, + CONSTRAINT check_29d6dbc329 CHECK ((char_length(namespace_path) <= 255)) +) +PARTITION BY RANGE ("timestamp"); + CREATE SEQUENCE ai_troubleshoot_job_events_id_seq START WITH 1 INCREMENT BY 1 @@ -27126,8 +27084,8 @@ CREATE TABLE security_policy_dismissals ( dismissal_types smallint[] DEFAULT '{}'::smallint[] NOT NULL, comment text, status smallint DEFAULT 0 NOT NULL, - license_occurrence_uuids text[] DEFAULT '{}'::text[] NOT NULL, licenses jsonb DEFAULT '{}'::jsonb NOT NULL, + license_occurrence_uuids text[] DEFAULT '{}'::text[] NOT NULL, CONSTRAINT check_654ff06528 CHECK ((char_length(comment) <= 255)), CONSTRAINT check_88beed9dc9 CHECK ((security_findings_uuids IS NOT NULL)) ); @@ -28932,8 +28890,8 @@ CREATE TABLE user_preferences ( work_items_display_settings jsonb DEFAULT '{}'::jsonb NOT NULL, default_duo_add_on_assignment_id bigint, markdown_maintain_indentation boolean DEFAULT false NOT NULL, - merge_request_dashboard_show_drafts boolean DEFAULT true NOT NULL, project_studio_enabled boolean DEFAULT false NOT NULL, + merge_request_dashboard_show_drafts boolean DEFAULT true NOT NULL, duo_default_namespace_id bigint, policy_advanced_editor boolean DEFAULT false NOT NULL, early_access_studio_participant boolean DEFAULT false NOT NULL, @@ -30023,8 +29981,9 @@ CREATE TABLE vulnerability_occurrences ( initial_pipeline_id bigint, latest_pipeline_id bigint, security_project_tracked_context_id bigint, - new_uuid uuid, detected_at timestamp with time zone DEFAULT now(), + new_uuid uuid, + partition_id bigint DEFAULT 1, CONSTRAINT check_4a3a60f2ba CHECK ((char_length(solution) <= 7000)), CONSTRAINT check_ade261da6b CHECK ((char_length(description) <= 15000)), CONSTRAINT check_f602da68dd CHECK ((char_length(cve) <= 48400)) @@ -54753,27 +54712,3 @@ ALTER TABLE ONLY user_follow_users ALTER TABLE ONLY user_follow_users ADD CONSTRAINT user_follow_users_follower_id_fkey FOREIGN KEY (follower_id) REFERENCES users(id) ON DELETE CASCADE; - -CREATE PUBLICATION siphon_publication_ci_1 WITH (publish = 'insert, update, delete, truncate'); - -CREATE PUBLICATION siphon_publication_main_1 WITH (publish = 'insert, update, delete, truncate'); - -ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY ci_pipeline_metadata; - -ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY ci_runner_namespaces; - -ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY ci_runner_projects; - -ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY ci_running_builds; - -ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY group_type_ci_runner_machines; - -ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY group_type_ci_runners; - -ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY instance_type_ci_runner_machines; - -ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY instance_type_ci_runners; - -ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY project_type_ci_runner_machines; - -ALTER PUBLICATION siphon_publication_ci_1 ADD TABLE ONLY project_type_ci_runners; diff --git a/ee/app/services/knowledge_graph/batch_authorization_service.rb b/ee/app/services/knowledge_graph/batch_authorization_service.rb new file mode 100644 index 00000000000000..f2df65dca545de --- /dev/null +++ b/ee/app/services/knowledge_graph/batch_authorization_service.rb @@ -0,0 +1,258 @@ +# frozen_string_literal: true + +module KnowledgeGraph + # Service to perform batch authorization checks for the Knowledge Graph. + # + # This service is the implementation of Layer 3 (Final Redaction Layer) in + # the Knowledge Graph security architecture. It checks whether a user has + # read access to a batch of resources using GitLab's standard Ability system. + # + # The service follows the same authorization pattern used by SearchService + # for redacting search results, ensuring consistency across all search and + # discovery features. + # + # == Scalability + # + # All operations are bounded by the batch size (max 500 resources): + # 1. Load resources: max 500 records across all types + # 2. Extract project_ids: max ~500 unique projects FROM those resources + # 3. Pre-cache member access: single query with max ~500 project IDs + # 4. Authorization checks: max 500 Ability.allowed? calls (optimized by user_scope) + # + # This ensures the endpoint is safe for users with 500K+ project access. + # + # @example + # service = KnowledgeGraph::BatchAuthorizationService.new( + # user: current_user, + # resources_by_type: { + # 'issues' => [123, 456], + # 'merge_requests' => [789] + # } + # ) + # result = service.execute + # # => { + # # 'issues' => { 123 => true, 456 => false }, + # # 'merge_requests' => { 789 => true } + # # } + # + # @see app/services/search_service.rb:132-161 + # @see app/models/ability.rb:42-71 + class BatchAuthorizationService + include Gitlab::Allowable + + # Mapping of resource type keys (plural) to their corresponding model classes. + # All models must implement #to_ability_name for consistent ability naming. + # + # @see app/models/concerns/issuable.rb:495-497 for Issuable implementation + # @see ee/app/models/ee/vulnerability.rb:465-467 for Vulnerability implementation + RESOURCE_CLASSES = { + 'issues' => Issue, + 'merge_requests' => MergeRequest, + 'epics' => Epic, + 'vulnerabilities' => Vulnerability, + 'projects' => Project, + 'milestones' => Milestone, + 'snippets' => Snippet + }.freeze + + # Preload associations needed for authorization checks to prevent N+1 queries. + # Each resource type may need different associations for its policy evaluation. + # + # @see app/policies/issue_policy.rb - needs project for most conditions + # @see app/policies/merge_request_policy.rb - needs target_project + # @see ee/app/policies/vulnerability_policy.rb - delegates to project + PRELOAD_ASSOCIATIONS = { + 'issues' => [:project, :author, :work_item_type], + 'merge_requests' => [:target_project, :author], + 'vulnerabilities' => [:project], + 'epics' => [:group], + 'projects' => [:namespace, :project_feature], + 'milestones' => [:project, :group], + 'snippets' => [:project, :author] + }.freeze + + # @param user [User] The user to check permissions for + # @param resources_by_type [Hash>] Resources grouped by type + # e.g., { 'issues' => [1, 2, 3], 'merge_requests' => [4, 5] } + def initialize(user:, resources_by_type:) + @user = user + @resources_by_type = resources_by_type || {} + end + + # Executes the batch authorization check. + # + # All operations are bounded by the batch size (max 500 resources): + # 1. Load resources: max 500 records across all types + # 2. Extract project_ids: max ~500 unique projects FROM those resources + # 3. Pre-cache member access: single query with max ~500 project IDs + # 4. Authorization checks: max 500 Ability.allowed? calls (optimized by user_scope) + # + # This ensures the endpoint is safe for users with 500K+ project access. + # + # @return [Hash>] Authorization results grouped by type + # e.g., { 'issues' => { 1 => true, 2 => false }, 'merge_requests' => { 4 => true } } + # @see app/models/ability.rb:42-48 + def execute + return {} if @resources_by_type.empty? + + # Step 1: Load requested resources with preloading (max 500 total) + loaded_resources_by_type = load_all_resources + + # Step 2: Pre-cache member access for projects/groups FROM THE LOADED RESOURCES + # NOT the user's entire project list - just the ~500 projects these resources belong to + # @see app/models/user.rb:2536-2549 + precache_member_access(loaded_resources_by_type) + + # Step 3: Check permissions using user_scope for optimization + # @see app/models/ability.rb:45-47 + DeclarativePolicy.user_scope do + @resources_by_type.each_with_object({}) do |(type, ids), results| + results[type] = authorize_resources_of_type(type, ids, loaded_resources_by_type[type] || {}) + end + end + end + + private + + attr_reader :user, :resources_by_type + + # Loads all resources for all types with appropriate preloading. + # + # @return [Hash>] Loaded resources by type + def load_all_resources + @resources_by_type.each_with_object({}) do |(type, ids), loaded| + next if ids.blank? + + loaded[type] = load_resources_for_type(type, ids) + end + end + + # Loads resources for a single type with preloading. + # + # @param type [String] The resource type + # @param ids [Array] The resource IDs + # @return [Hash] Loaded resources indexed by ID + # rubocop:disable CodeReuse/ActiveRecord -- Batch loading with preloads for authorization checks + def load_resources_for_type(type, ids) + klass = RESOURCE_CLASSES[type] + return {} unless klass + + preloads = PRELOAD_ASSOCIATIONS[type] || [] + klass.where(id: ids).includes(*preloads).index_by(&:id) + end + # rubocop:enable CodeReuse/ActiveRecord + + # Pre-caches member access levels for projects/groups involved in THIS authorization batch. + # + # IMPORTANT SCALABILITY NOTE: This only pre-caches access for the resources in + # the current batch (max 500 resources), NOT all projects/groups the user has + # access to. This is critical for users with massive project counts (e.g., 500K). + # + # The flow is: + # 1. GKG sends batch of 500 resource IDs to authorize + # 2. We load those 500 resources (issues, MRs, etc.) + # 3. We extract project_ids FROM THOSE 500 RESOURCES (max ~500 unique projects) + # 4. We pre-cache member access for just those ~500 projects + # + # This ensures we NEVER query the user's full project list, regardless of + # whether they have access to 100 or 500,000 projects. + # + # @param loaded_resources_by_type [Hash] Loaded resources from current batch only + # @see app/models/user.rb:2536-2549 for max_member_access_for_project_ids + # @see app/models/user.rb:2184-2207 for GitLab's batching pattern with large ID sets + def precache_member_access(loaded_resources_by_type) + project_ids = extract_project_ids(loaded_resources_by_type) + group_ids = extract_group_ids(loaded_resources_by_type) + + # Pre-cache project member access for THIS BATCH only (max ~500 IDs) + @user.max_member_access_for_project_ids(project_ids) if project_ids.any? + + # Pre-cache group member access for THIS BATCH only + @user.max_member_access_for_group_ids(group_ids) if group_ids.any? + end + + # Extracts all project IDs from loaded resources. + # + # @param loaded_resources_by_type [Hash] All loaded resources + # @return [Array] Unique project IDs + def extract_project_ids(loaded_resources_by_type) + loaded_resources_by_type.flat_map do |_type, resources| + resources.values.filter_map do |resource| + if resource.is_a?(Project) + resource.id + elsif resource.respond_to?(:project_id) + resource.project_id + elsif resource.respond_to?(:target_project_id) + resource.target_project_id + end + end + end.uniq.compact + end + + # Extracts all group IDs from loaded resources. + # + # @param loaded_resources_by_type [Hash] All loaded resources + # @return [Array] Unique group IDs + def extract_group_ids(loaded_resources_by_type) + loaded_resources_by_type.flat_map do |_type, resources| + resources.values.filter_map do |resource| + resource.group_id if resource.respond_to?(:group_id) + end + end.uniq.compact + end + + # Authorizes all resources of a single type. + # + # Checks authorization for each resource, using pre-loaded data. + # Returns a hash mapping resource IDs to authorization results. + # + # @param type [String] The resource type (e.g., 'issues', 'merge_requests') + # @param ids [Array] The resource IDs to authorize + # @param loaded_resources [Hash] Pre-loaded resources + # @return [Hash] Map of resource ID to authorization result + def authorize_resources_of_type(type, ids, loaded_resources) + klass = RESOURCE_CLASSES[type] + return ids.index_with { false } unless klass + + ids.index_with do |id| + resource = loaded_resources[id] + + # Resource not found - deny access + # This handles cases where the resource was deleted between + # the ClickHouse query and this authorization check + # + # Security note: We still perform a dummy check to prevent + # timing-based information disclosure about resource existence + if resource.nil? + DeclarativePolicy.has_policy?(klass) # Constant-time padding + next false + end + + # Check visibility using the same pattern as SearchService + visible_result?(resource) + end + end + + # Checks if a resource is visible to the user. + # + # This method is intentionally identical to SearchService#visible_result? + # to ensure consistent authorization behavior across search and the + # Knowledge Graph. + # + # @param resource [ActiveRecord::Base] The resource to check + # @return [Boolean] Whether the user can read the resource + # @see app/services/search_service.rb:132-136 + def visible_result?(resource) + # Resources without policies are considered visible + # This handles edge cases like plain Ruby objects + return true unless resource.respond_to?(:to_ability_name) && DeclarativePolicy.has_policy?(resource) + + # Use the resource's to_ability_name to construct the read ability + # For Issue: :read_issue + # For MergeRequest: :read_merge_request + # For Vulnerability: :read_vulnerability + # etc. + Ability.allowed?(@user, :"read_#{resource.to_ability_name}", resource) + end + end +end diff --git a/ee/lib/api/internal/knowledge_graph/authorize.rb b/ee/lib/api/internal/knowledge_graph/authorize.rb new file mode 100644 index 00000000000000..ecc0745d5ebaba --- /dev/null +++ b/ee/lib/api/internal/knowledge_graph/authorize.rb @@ -0,0 +1,191 @@ +# frozen_string_literal: true + +module API + module Internal + module KnowledgeGraph + # Internal API endpoint for Knowledge Graph authorization. + # + # This endpoint implements Layer 3 (Final Redaction Layer) in the Knowledge Graph + # security architecture. It receives batch authorization requests from the + # Knowledge Graph service and returns authorization results for each resource. + # + # Authentication: GitLab Shell token (same as Zoekt internal API) + # Rate limit: 100 requests per minute per user_id + # + # @see ee/lib/api/internal/search/zoekt.rb for similar pattern + # @see ee/app/services/knowledge_graph/batch_authorization_service.rb + class Authorize < ::API::Base + # Authenticates using GitLab Shell token, same as Zoekt internal API. + # @see lib/api/helpers.rb:369-371 + # @see ee/lib/api/internal/search/zoekt.rb:7 + before { authenticate_by_gitlab_shell_token! } + + feature_category :global_search + urgency :low + + # Maximum total number of resources that can be authorized in a single request. + # This limit prevents timeout issues and ensures reasonable response times. + # Based on SearchService pagination limits. + # @see app/services/search_service.rb:8 + MAX_BATCH_SIZE = 500 + + # Supported resource types (plural keys) and their corresponding model classes. + # Each type maps to a model that implements #to_ability_name. + # @see app/models/concerns/issuable.rb:495-497 + # @see app/models/snippet.rb:297-299 + # @see ee/app/models/ee/vulnerability.rb:465-467 + RESOURCE_TYPES = ::KnowledgeGraph::BatchAuthorizationService::RESOURCE_CLASSES + + helpers do + include Gitlab::Loggable + + # Logger for redacted search results, following the pattern from SearchService. + # @see app/services/search_service.rb:179-181 + def logger + @logger ||= ::Gitlab::RedactedSearchResultsLogger.build + end + + # Validates that all resource type keys are supported. + # @param resources [Hash] The resources hash with type keys + # @return [Array] Invalid type keys + def invalid_resource_types(resources) + resources.keys.map(&:to_s) - RESOURCE_TYPES.keys + end + + # Counts total resources across all types. + # @param resources [Hash] The resources hash + # @return [Integer] Total count + def total_resource_count(resources) + resources.values.sum { |ids| ids.is_a?(Array) ? ids.size : 0 } + end + + # Validates user state to prevent authorization checks for invalid users. + # SECURITY: Prevents information disclosure via blocked/deactivated users. + # + # @param user [User] The user to validate + # @return [Boolean] true if user is valid for authorization + # @see app/policies/global_policy.rb:76-113 + # @see app/policies/base_policy.rb:43-49 + def valid_user_state?(user) + return false unless user + return false unless user.active? + + true + end + + # Validates user type to prevent authorization checks for system users. + # SECURITY: Prevents placeholder and import users from authorizing resources. + # + # @param user [User] The user to validate + # @return [Boolean] true if user type is valid + # @see app/policies/base_policy.rb:134-135 + def valid_user_type?(user) + return false if user.ghost? + return false if user.respond_to?(:placeholder?) && user.placeholder? + return false if user.respond_to?(:import_user?) && user.import_user? + + true + end + + # Combined user validation. + # @param user [User, nil] The user to validate + # @return [Boolean] true if user is valid, false otherwise + def valid_user?(user) + valid_user_state?(user) && valid_user_type?(user) + end + end + + namespace 'internal' do + namespace 'knowledge_graph' do + desc 'Batch authorize resources for a user' do + detail 'Checks read permissions for multiple resources grouped by type. Used by the ' \ + 'Knowledge Graph service to perform final redaction of query results. ' \ + 'Resources must be pre-grouped by type (issues, merge_requests, etc.). ' \ + 'This feature was introduced in GitLab 17.x' + success code: 200 + failure [ + { code: 400, message: 'Bad request - invalid resource types' }, + { code: 401, message: 'Unauthorized' }, + { code: 422, message: 'Unprocessable entity - batch size exceeded' } + ] + tags ['internal'] + end + params do + requires :user_id, type: Integer, desc: 'ID of the user to check permissions for' + requires :resources, type: Hash, desc: 'Resources grouped by type' do + optional :issues, type: Array[Integer], desc: 'Array of Issue IDs' + optional :merge_requests, type: Array[Integer], desc: 'Array of MergeRequest IDs' + optional :epics, type: Array[Integer], desc: 'Array of Epic IDs' + optional :vulnerabilities, type: Array[Integer], desc: 'Array of Vulnerability IDs' + optional :projects, type: Array[Integer], desc: 'Array of Project IDs' + optional :milestones, type: Array[Integer], desc: 'Array of Milestone IDs' + optional :snippets, type: Array[Integer], desc: 'Array of Snippet IDs' + end + end + post 'authorize' do + # Note: No rate limiting needed - this is a service-to-service internal API + # authenticated via GitLab Shell token. The Knowledge Graph service is the only caller. + + # Validate resource types + invalid_types = invalid_resource_types(params[:resources]) + if invalid_types.any? + bad_request!("Invalid resource types: #{invalid_types.join(', ')}. " \ + "Supported types: #{RESOURCE_TYPES.keys.join(', ')}") + end + + # Validate total batch size to prevent timeout issues + total_count = total_resource_count(params[:resources]) + if total_count > MAX_BATCH_SIZE + error!("Total resource count (#{total_count}) exceeds maximum of #{MAX_BATCH_SIZE}", 422) + end + + # Load user - use generic error to prevent user enumeration + # SECURITY: Don't distinguish between non-existent and invalid users + # @see lib/api/internal/base.rb:249-250 + user = UserFinder.new(params[:user_id]).find_by_id + + # Security: Return empty authorizations for invalid users to prevent enumeration + next { authorizations: {} } unless valid_user?(user) + + # Convert string keys to proper format for the service + resources_by_type = params[:resources].transform_keys(&:to_s) + + # Perform batch authorization using the dedicated service + service = ::KnowledgeGraph::BatchAuthorizationService.new( + user: user, + resources_by_type: resources_by_type + ) + result = service.execute + + # Log any denied resources for security audit + # Following pattern from SearchService#log_redacted_search_results + # @see app/services/search_service.rb:163-177 + denied_resources = result.flat_map do |type, authorizations| + authorizations.filter_map { |id, allowed| { type: type, id: id } unless allowed } + end + + if denied_resources.any? + log_data = { + class: 'API::Internal::KnowledgeGraph::Authorize', + message: 'knowledge_graph_redacted_results', + user_id: user.id, + denied_count: denied_resources.size, + denied_resources: denied_resources + } + logger.error(build_structured_payload(**log_data)) + end + + # Convert integer keys to strings for JSON response + formatted_results = result.transform_values do |id_hash| + id_hash.transform_keys(&:to_s) + end + + status :ok + { authorizations: formatted_results } + end + end + end + end + end + end +end diff --git a/ee/lib/ee/api/api.rb b/ee/lib/ee/api/api.rb index 212829a685ec27..d77a475720c5ed 100644 --- a/ee/lib/ee/api/api.rb +++ b/ee/lib/ee/api/api.rb @@ -101,6 +101,7 @@ module API mount ::API::VirtualRegistries::Packages::Maven::Endpoints mount ::API::Internal::AppSec::Dast::SiteValidations + mount ::API::Internal::KnowledgeGraph::Authorize mount ::API::Internal::Search::Zoekt mount ::API::Internal::Ai::XRay::Scan mount ::API::Internal::Observability diff --git a/ee/lib/knowledge_graph/jwt_auth.rb b/ee/lib/knowledge_graph/jwt_auth.rb new file mode 100644 index 00000000000000..23cb0ddf6d37d6 --- /dev/null +++ b/ee/lib/knowledge_graph/jwt_auth.rb @@ -0,0 +1,70 @@ +# frozen_string_literal: true + +module KnowledgeGraph + # JWT authentication module for the Knowledge Graph authorization API. + # + # This module follows the exact pattern established by GitLab Shell token + # authentication used by internal APIs like Zoekt. + # + # Uses: + # - Header: Gitlab-Shell-Api-Request + # - Issuer: gitlab-shell + # - Secret: Gitlab::Shell.secret_token + # + # @see lib/gitlab/shell.rb for verification logic + # @see ee/lib/api/internal/search/zoekt.rb for usage pattern + module JwtAuth + # Use gitlab-shell issuer to match Gitlab::Shell.verify_api_request + ISSUER = 'gitlab-shell' + TOKEN_EXPIRE_TIME = 5.minutes + + class << self + # Returns the shared secret token used for JWT signing/verification. + # Uses the same secret as GitLab Shell for consistency with other + # internal services. + # + # @return [String] The secret token + def secret_token + Gitlab::Shell.secret_token + end + + # Generates a signed JWT token for authenticating requests to the + # Knowledge Graph authorization endpoint. + # + # Uses JSONWebToken::HMACToken to match GitLab Shell verification. + # + # @return [String] The encoded JWT token + def jwt_token + token = JSONWebToken::HMACToken.new(secret_token) + token.issuer = ISSUER + token.expire_time = Time.current + TOKEN_EXPIRE_TIME unless skip_expiration? + token.encoded + end + + # Returns the header name for the GitLab Shell API request. + # + # @return [String] Header name + def header_name + Gitlab::Shell::API_HEADER + end + + # Returns a hash of headers to use for HTTP requests. + # Use with curl: -H "Gitlab-Shell-Api-Request: " + # + # @return [Hash] Headers hash + def request_headers + { header_name => jwt_token } + end + + private + + # Allows skipping token expiration for testing purposes. + # Controlled via KNOWLEDGE_GRAPH_JWT_SKIP_EXPIRY environment variable. + # + # @return [Boolean] + def skip_expiration? + Gitlab::Utils.to_boolean(ENV['KNOWLEDGE_GRAPH_JWT_SKIP_EXPIRY']) + end + end + end +end diff --git a/scripts/knowledge_graph/.gitignore b/scripts/knowledge_graph/.gitignore new file mode 100644 index 00000000000000..ba0430d26c996e --- /dev/null +++ b/scripts/knowledge_graph/.gitignore @@ -0,0 +1 @@ +__pycache__/ \ No newline at end of file diff --git a/scripts/knowledge_graph/__init__.py b/scripts/knowledge_graph/__init__.py new file mode 100644 index 00000000000000..7c95240109bffa --- /dev/null +++ b/scripts/knowledge_graph/__init__.py @@ -0,0 +1 @@ +# Knowledge Graph Authorization Validation Scripts diff --git a/scripts/knowledge_graph/__main__.py b/scripts/knowledge_graph/__main__.py new file mode 100644 index 00000000000000..ce6e8fad1728ea --- /dev/null +++ b/scripts/knowledge_graph/__main__.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 +""" +Entry point for running the validation script as a module: + python3 -m scripts.knowledge_graph +""" + +from .validate_authorization import main + +if __name__ == "__main__": + main() diff --git a/scripts/knowledge_graph/clients.py b/scripts/knowledge_graph/clients.py new file mode 100644 index 00000000000000..0b954757890454 --- /dev/null +++ b/scripts/knowledge_graph/clients.py @@ -0,0 +1,173 @@ +""" +API clients for GitLab and Knowledge Graph authorization endpoint. +""" + +import logging +import subprocess +import time +from typing import Optional, Tuple + +import requests + +logger = logging.getLogger(__name__) + + +class GitLabAPIClient: + """Client for interacting with GitLab REST API.""" + + def __init__(self, base_url: str, token: str): + self.base_url = base_url.rstrip("/") + self.api_url = f"{self.base_url}/api/v4" + self.token = token + self.session = requests.Session() + self.session.headers.update({ + "PRIVATE-TOKEN": token, + "Content-Type": "application/json" + }) + + def _request(self, method: str, endpoint: str, **kwargs) -> requests.Response: + """Make an API request.""" + url = f"{self.api_url}/{endpoint.lstrip('/')}" + response = self.session.request(method, url, **kwargs) + return response + + def get(self, endpoint: str, **kwargs) -> requests.Response: + """GET request.""" + return self._request("GET", endpoint, **kwargs) + + def post(self, endpoint: str, **kwargs) -> requests.Response: + """POST request.""" + return self._request("POST", endpoint, **kwargs) + + def delete(self, endpoint: str, **kwargs) -> requests.Response: + """DELETE request.""" + return self._request("DELETE", endpoint, **kwargs) + + def put(self, endpoint: str, **kwargs) -> requests.Response: + """PUT request.""" + return self._request("PUT", endpoint, **kwargs) + + +class KnowledgeGraphAuthClient: + """Client for the Knowledge Graph authorization API.""" + + def __init__(self, gitlab_url: str, rails_root: str = "."): + self.gitlab_url = gitlab_url.rstrip("/") + self.api_url = f"{self.gitlab_url}/api/v4/internal/knowledge_graph/authorize" + self.rails_root = rails_root + + def refresh_user_authorizations(self, user_id: int) -> bool: + """ + Refresh a user's project authorizations cache. + + This is needed because when members are added via API, the project_authorizations + table isn't updated synchronously - it relies on a Sidekiq background job. + In test scenarios, we need to trigger this manually. + + Args: + user_id: The user ID to refresh authorizations for + + Returns: + True on success, False on failure + """ + try: + result = subprocess.run( + ["bundle", "exec", "rails", "runner", + f"User.find({user_id}).refresh_authorized_projects"], + capture_output=True, + text=True, + cwd=self.rails_root, + timeout=60 + ) + if result.returncode == 0: + logger.info(f"Refreshed authorizations for user {user_id}") + return True + else: + logger.error(f"Failed to refresh authorizations: {result.stderr}") + return False + except subprocess.TimeoutExpired: + logger.error("Authorization refresh timed out") + return False + except Exception as e: + logger.error(f"Error refreshing authorizations: {e}") + return False + + def get_jwt_token(self) -> Optional[str]: + """Generate a JWT token using Rails runner.""" + try: + result = subprocess.run( + ["bundle", "exec", "rails", "runner", "puts KnowledgeGraph::JwtAuth.jwt_token"], + capture_output=True, + text=True, + cwd=self.rails_root, + timeout=60 + ) + if result.returncode == 0: + return result.stdout.strip() + else: + logger.error(f"Failed to generate JWT: {result.stderr}") + return None + except subprocess.TimeoutExpired: + logger.error("JWT generation timed out") + return None + except Exception as e: + logger.error(f"Error generating JWT: {e}") + return None + + def authorize( + self, + user_id: int, + resources: dict, + jwt_token: Optional[str] = None + ) -> Optional[dict]: + """ + Call the authorization endpoint. + + Args: + user_id: The user ID to check permissions for + resources: Dict of resource_type -> list of IDs + jwt_token: Optional pre-generated JWT token + + Returns: + Authorization response dict or None on error + """ + if jwt_token is None: + jwt_token = self.get_jwt_token() + if jwt_token is None: + return None + + headers = { + "Gitlab-Shell-Api-Request": jwt_token, + "Content-Type": "application/json" + } + + payload = { + "user_id": user_id, + "resources": resources + } + + # Count total resources for logging + total_resources = sum(len(ids) for ids in resources.values()) + + try: + start_time = time.perf_counter() + response = requests.post(self.api_url, headers=headers, json=payload, timeout=30) + elapsed_ms = (time.perf_counter() - start_time) * 1000 + + # Accept both 200 and 201 as success + if response.status_code in [200, 201]: + logger.debug( + f"Authorization API: {response.status_code} in {elapsed_ms:.2f}ms " + f"(user={user_id}, resources={total_resources})" + ) + result = response.json() + result["_response_time_ms"] = elapsed_ms + return result + else: + logger.error( + f"Authorization request failed: {response.status_code} in {elapsed_ms:.2f}ms - {response.text}" + ) + return None + except Exception as e: + logger.error(f"Error calling authorization API: {e}") + return None diff --git a/scripts/knowledge_graph/config.py b/scripts/knowledge_graph/config.py new file mode 100644 index 00000000000000..09d54cd69e0c78 --- /dev/null +++ b/scripts/knowledge_graph/config.py @@ -0,0 +1,84 @@ +""" +Configuration and data models for Knowledge Graph authorization tests. +""" + +from dataclasses import dataclass, field +from enum import Enum +from typing import Optional + + +class Visibility(Enum): + """Project visibility levels.""" + PUBLIC = "public" + PRIVATE = "private" + INTERNAL = "internal" + + +class AccessLevel(Enum): + """GitLab access levels.""" + NO_ACCESS = 0 + MINIMAL_ACCESS = 5 + GUEST = 10 + REPORTER = 20 + DEVELOPER = 30 + MAINTAINER = 40 + OWNER = 50 + + +@dataclass +class TestConfig: + """Configuration for the test run.""" + gitlab_url: str + admin_token: str + rails_root: str = "." + cleanup: bool = True + verbose: bool = False + + +@dataclass +class TestUser: + """Represents a test user.""" + id: int + username: str + email: str + name: str + is_blocked: bool = False + + +@dataclass +class TestProject: + """Represents a test project.""" + id: int + name: str + path: str + visibility: Visibility + namespace_id: int + + +@dataclass +class TestResource: + """Represents a test resource (issue, MR, etc.).""" + id: int + iid: int + resource_type: str + project_id: int + title: str + confidential: bool = False + + +@dataclass +class TestContext: + """Holds all test data created during the run.""" + users: list = field(default_factory=list) + projects: list = field(default_factory=list) + resources: list = field(default_factory=list) + group_id: Optional[int] = None + + +@dataclass +class TestResult: + """Result of a single test.""" + name: str + passed: bool + message: str + details: Optional[dict] = None diff --git a/scripts/knowledge_graph/factory.py b/scripts/knowledge_graph/factory.py new file mode 100644 index 00000000000000..0171789b60cdaa --- /dev/null +++ b/scripts/knowledge_graph/factory.py @@ -0,0 +1,316 @@ +""" +Test data factory for creating GitLab resources. +""" + +import logging +import secrets +import string +import time +from typing import Optional + +from .config import TestUser, TestProject, TestResource, Visibility +from .clients import GitLabAPIClient + +logger = logging.getLogger(__name__) + + +class TestDataFactory: + """Factory for creating test data in GitLab.""" + + def __init__(self, client: GitLabAPIClient): + self.client = client + self.timestamp = int(time.time()) + + def _generate_password(self) -> str: + """Generate a strong random password.""" + alphabet = string.ascii_letters + string.digits + "!@#$%" + return ''.join(secrets.choice(alphabet) for _ in range(20)) + + def create_user( + self, + username_suffix: str, + is_admin: bool = False + ) -> Optional[TestUser]: + """ + Create a test user. + + Args: + username_suffix: Suffix to add to the username + is_admin: Whether the user should be an admin + + Returns: + TestUser object or None on failure + """ + username = f"kg_test_{username_suffix}_{self.timestamp}" + email = f"{username}@example.com" + + data = { + "email": email, + "username": username, + "name": f"KG Test User {username_suffix}", + "password": self._generate_password(), + "skip_confirmation": True, + "admin": is_admin + } + + response = self.client.post("users", json=data) + if response.status_code == 201: + user_data = response.json() + logger.info(f"Created user: {username} (ID: {user_data['id']})") + return TestUser( + id=user_data["id"], + username=user_data["username"], + email=user_data["email"], + name=user_data["name"] + ) + else: + logger.error(f"Failed to create user {username}: {response.status_code} - {response.text}") + return None + + def create_group(self, name_suffix: str) -> Optional[int]: + """ + Create a test group. + + Args: + name_suffix: Suffix to add to the group name + + Returns: + Group ID or None on failure + """ + name = f"kg_test_group_{name_suffix}_{self.timestamp}" + path = name.lower().replace(" ", "_") + + data = { + "name": name, + "path": path, + "visibility": "private" + } + + response = self.client.post("groups", json=data) + if response.status_code == 201: + group_data = response.json() + logger.info(f"Created group: {name} (ID: {group_data['id']})") + return group_data["id"] + else: + logger.error(f"Failed to create group {name}: {response.status_code} - {response.text}") + return None + + def create_project( + self, + name_suffix: str, + visibility: Visibility, + namespace_id: Optional[int] = None + ) -> Optional[TestProject]: + """ + Create a test project. + + Args: + name_suffix: Suffix to add to the project name + visibility: Project visibility level + namespace_id: Optional namespace ID + + Returns: + TestProject object or None on failure + """ + name = f"kg_test_project_{name_suffix}_{self.timestamp}" + + data = { + "name": name, + "visibility": visibility.value, + "initialize_with_readme": True + } + if namespace_id: + data["namespace_id"] = namespace_id + + response = self.client.post("projects", json=data) + if response.status_code == 201: + project_data = response.json() + logger.info(f"Created project: {name} (ID: {project_data['id']}, visibility: {visibility.value})") + return TestProject( + id=project_data["id"], + name=project_data["name"], + path=project_data["path"], + visibility=visibility, + namespace_id=project_data["namespace"]["id"] + ) + else: + logger.error(f"Failed to create project {name}: {response.status_code} - {response.text}") + return None + + def create_issue( + self, + project_id: int, + title_suffix: str, + confidential: bool = False + ) -> Optional[TestResource]: + """ + Create a test issue. + + Args: + project_id: Project to create the issue in + title_suffix: Suffix to add to the issue title + confidential: Whether the issue is confidential + + Returns: + TestResource object or None on failure + """ + title = f"KG Test Issue {title_suffix}" + + data = { + "title": title, + "description": "Test issue for Knowledge Graph authorization validation", + "confidential": confidential + } + + response = self.client.post(f"projects/{project_id}/issues", json=data) + if response.status_code == 201: + issue_data = response.json() + logger.info(f"Created issue: {title} (ID: {issue_data['id']}, confidential: {confidential})") + return TestResource( + id=issue_data["id"], + iid=issue_data["iid"], + resource_type="issues", + project_id=project_id, + title=title, + confidential=confidential + ) + else: + logger.error(f"Failed to create issue: {response.status_code} - {response.text}") + return None + + def create_merge_request( + self, + project_id: int, + title_suffix: str + ) -> Optional[TestResource]: + """ + Create a test merge request. + + Args: + project_id: Project to create the MR in + title_suffix: Suffix to add to the MR title + + Returns: + TestResource object or None on failure + """ + # First create a branch + branch_name = f"kg-test-branch-{title_suffix}-{self.timestamp}" + + # Get default branch + project_resp = self.client.get(f"projects/{project_id}") + if project_resp.status_code != 200: + logger.error(f"Failed to get project: {project_resp.text}") + return None + + default_branch = project_resp.json().get("default_branch", "main") + + # Create branch + branch_data = { + "branch": branch_name, + "ref": default_branch + } + branch_resp = self.client.post(f"projects/{project_id}/repository/branches", json=branch_data) + if branch_resp.status_code not in [200, 201]: + logger.error(f"Failed to create branch: {branch_resp.text}") + return None + + # Create a file change in the branch + file_data = { + "branch": branch_name, + "content": f"# Test file for MR {title_suffix}\nCreated at {self.timestamp}", + "commit_message": f"Add test file for MR {title_suffix}" + } + file_resp = self.client.post( + f"projects/{project_id}/repository/files/test_file_{self.timestamp}.md", + json=file_data + ) + if file_resp.status_code not in [200, 201]: + logger.error(f"Failed to create file: {file_resp.text}") + return None + + # Create MR + title = f"KG Test MR {title_suffix}" + mr_data = { + "source_branch": branch_name, + "target_branch": default_branch, + "title": title, + "description": "Test MR for Knowledge Graph authorization validation" + } + + response = self.client.post(f"projects/{project_id}/merge_requests", json=mr_data) + if response.status_code == 201: + mr_data = response.json() + logger.info(f"Created MR: {title} (ID: {mr_data['id']})") + return TestResource( + id=mr_data["id"], + iid=mr_data["iid"], + resource_type="merge_requests", + project_id=project_id, + title=title + ) + else: + logger.error(f"Failed to create MR: {response.status_code} - {response.text}") + return None + + def add_project_member( + self, + project_id: int, + user_id: int, + access_level: int = 30 # Developer + ) -> bool: + """ + Add a user as a project member. + + Args: + project_id: Project ID + user_id: User ID to add + access_level: Access level (default: Developer = 30) + + Returns: + True on success, False on failure + """ + data = { + "user_id": user_id, + "access_level": access_level + } + + response = self.client.post(f"projects/{project_id}/members", json=data) + if response.status_code == 201: + logger.info(f"Added user {user_id} to project {project_id} with access level {access_level}") + return True + else: + logger.error(f"Failed to add member: {response.status_code} - {response.text}") + return False + + def block_user(self, user_id: int) -> bool: + """ + Block a user. + + Args: + user_id: User ID to block + + Returns: + True on success, False on failure + """ + response = self.client.post(f"users/{user_id}/block") + if response.status_code in [200, 201]: + logger.info(f"Blocked user {user_id}") + return True + else: + logger.error(f"Failed to block user: {response.status_code} - {response.text}") + return False + + def delete_user(self, user_id: int) -> bool: + """Delete a user.""" + response = self.client.delete(f"users/{user_id}") + return response.status_code in [200, 202, 204] + + def delete_project(self, project_id: int) -> bool: + """Delete a project.""" + response = self.client.delete(f"projects/{project_id}") + return response.status_code in [200, 202, 204] + + def delete_group(self, group_id: int) -> bool: + """Delete a group.""" + response = self.client.delete(f"groups/{group_id}") + return response.status_code in [200, 202, 204] diff --git a/scripts/knowledge_graph/tests.py b/scripts/knowledge_graph/tests.py new file mode 100644 index 00000000000000..f8a61b0950fe2d --- /dev/null +++ b/scripts/knowledge_graph/tests.py @@ -0,0 +1,604 @@ +""" +Test cases for Knowledge Graph authorization validation. +""" + +import logging +from typing import Optional, Tuple, Callable + +from .config import ( + TestConfig, TestContext, TestResult, TestUser, TestProject, + TestResource, Visibility +) +from .clients import GitLabAPIClient, KnowledgeGraphAuthClient +from .factory import TestDataFactory + +logger = logging.getLogger(__name__) + + +class AuthorizationTestRunner: + """Runs authorization test scenarios.""" + + def __init__( + self, + config: TestConfig, + gitlab_client: GitLabAPIClient, + kg_client: KnowledgeGraphAuthClient, + factory: TestDataFactory + ): + self.config = config + self.gitlab_client = gitlab_client + self.kg_client = kg_client + self.factory = factory + self.context = TestContext() + self.results: list[TestResult] = [] + self.jwt_token: Optional[str] = None + + # ========================================================================= + # Setup and Teardown + # ========================================================================= + + def setup(self) -> bool: + """Set up test data.""" + logger.info("=" * 60) + logger.info("SETUP: Creating test data") + logger.info("=" * 60) + + # Get JWT token once for all tests + self.jwt_token = self.kg_client.get_jwt_token() + if not self.jwt_token: + logger.error("Failed to get JWT token") + return False + + # Create test users + # User with access (will be added to private project) + user_with_access = self.factory.create_user("with_access") + if not user_with_access: + return False + self.context.users.append(user_with_access) + + # User without access + user_without_access = self.factory.create_user("no_access") + if not user_without_access: + return False + self.context.users.append(user_without_access) + + # Blocked user + blocked_user = self.factory.create_user("blocked") + if not blocked_user: + return False + self.factory.block_user(blocked_user.id) + blocked_user.is_blocked = True + self.context.users.append(blocked_user) + + # Create test projects + # Public project + public_project = self.factory.create_project("public", Visibility.PUBLIC) + if not public_project: + return False + self.context.projects.append(public_project) + + # Private project + private_project = self.factory.create_project("private", Visibility.PRIVATE) + if not private_project: + return False + self.context.projects.append(private_project) + + # Add user_with_access to private project + if self.factory.add_project_member(private_project.id, user_with_access.id, access_level=30): + # Refresh user authorizations to ensure project_authorizations table is updated + # This is needed because the API adds members but doesn't synchronously update + # the authorization cache - it relies on a Sidekiq background job + self.kg_client.refresh_user_authorizations(user_with_access.id) + + # Create test resources + # Public project issues + public_issue = self.factory.create_issue(public_project.id, "public") + if public_issue: + self.context.resources.append(public_issue) + + # Private project issues + private_issue = self.factory.create_issue(private_project.id, "private") + if private_issue: + self.context.resources.append(private_issue) + + # Confidential issue in private project + confidential_issue = self.factory.create_issue( + private_project.id, "confidential", confidential=True + ) + if confidential_issue: + self.context.resources.append(confidential_issue) + + # Merge request in private project + private_mr = self.factory.create_merge_request(private_project.id, "private") + if private_mr: + self.context.resources.append(private_mr) + + logger.info(f"Setup complete: {len(self.context.users)} users, " + f"{len(self.context.projects)} projects, " + f"{len(self.context.resources)} resources") + return True + + def cleanup(self): + """Clean up test data.""" + logger.info("=" * 60) + logger.info("CLEANUP: Removing test data") + logger.info("=" * 60) + + # Delete projects (this also deletes issues and MRs) + for project in self.context.projects: + if self.factory.delete_project(project.id): + logger.info(f"Deleted project: {project.name}") + else: + logger.warning(f"Failed to delete project {project.name}") + + # Delete users + for user in self.context.users: + if self.factory.delete_user(user.id): + logger.info(f"Deleted user: {user.username}") + else: + logger.warning(f"Failed to delete user {user.username}") + + # Delete group if created + if self.context.group_id: + if self.factory.delete_group(self.context.group_id): + logger.info(f"Deleted group: {self.context.group_id}") + + # ========================================================================= + # Helper Methods + # ========================================================================= + + def run_test(self, name: str, test_func: Callable) -> TestResult: + """Run a single test and record the result.""" + try: + passed, message, details = test_func() + result = TestResult(name=name, passed=passed, message=message, details=details) + except Exception as e: + logger.exception(f"Exception in test {name}") + result = TestResult(name=name, passed=False, message=f"Exception: {e}") + + self.results.append(result) + status = "PASS" if result.passed else "FAIL" + + # Extract response time from details if available + response_time = self._extract_response_time(result.details) + time_str = f" ({response_time:.2f}ms)" if response_time else "" + + logger.info(f"[{status}] {name}: {result.message}{time_str}") + return result + + def _extract_response_time(self, details: Optional[dict]) -> Optional[float]: + """Extract response time from test details.""" + if not details: + return None + + # Handle single response + if "_response_time_ms" in details: + return details["_response_time_ms"] + + # Handle multiple responses (e.g., MR test with with_access and without_access) + times = [] + for key, value in details.items(): + if isinstance(value, dict) and "_response_time_ms" in value: + times.append(value["_response_time_ms"]) + + return sum(times) / len(times) if times else None + + def get_resource_by_type_and_suffix( + self, + resource_type: str, + suffix: str + ) -> Optional[TestResource]: + """Find a resource by type and title suffix.""" + for resource in self.context.resources: + if resource.resource_type == resource_type and suffix in resource.title.lower(): + return resource + return None + + def get_user_by_suffix(self, suffix: str) -> Optional[TestUser]: + """Find a user by username suffix.""" + for user in self.context.users: + if suffix in user.username: + return user + return None + + def get_project_by_visibility(self, visibility: Visibility) -> Optional[TestProject]: + """Find a project by visibility.""" + for project in self.context.projects: + if project.visibility == visibility: + return project + return None + + def _check_authorization( + self, + user_id: int, + resource_type: str, + resource_id: int + ) -> Optional[bool]: + """Check if a user is authorized for a specific resource.""" + result = self.kg_client.authorize( + user_id=user_id, + resources={resource_type: [resource_id]}, + jwt_token=self.jwt_token + ) + if not result: + return None + + return result.get("authorizations", {}).get(resource_type, {}).get(str(resource_id)) + + # ========================================================================= + # Test Cases + # ========================================================================= + + def test_public_resource_access(self) -> Tuple[bool, str, Optional[dict]]: + """Test that all users can access public project resources.""" + public_issue = self.get_resource_by_type_and_suffix("issues", "public") + if not public_issue: + return False, "Public issue not found", None + + user_no_access = self.get_user_by_suffix("no_access") + if not user_no_access: + return False, "User not found", None + + result = self.kg_client.authorize( + user_id=user_no_access.id, + resources={"issues": [public_issue.id]}, + jwt_token=self.jwt_token + ) + + if not result: + return False, "API call failed", None + + authorized = result.get("authorizations", {}).get("issues", {}).get(str(public_issue.id)) + if authorized: + return True, "User can access public resource", result + else: + return False, "User cannot access public resource", result + + def test_private_resource_no_access(self) -> Tuple[bool, str, Optional[dict]]: + """Test that users without access cannot see private project resources.""" + private_issue = self.get_resource_by_type_and_suffix("issues", "private") + if not private_issue: + return False, "Private issue not found", None + + user_no_access = self.get_user_by_suffix("no_access") + if not user_no_access: + return False, "User not found", None + + result = self.kg_client.authorize( + user_id=user_no_access.id, + resources={"issues": [private_issue.id]}, + jwt_token=self.jwt_token + ) + + if not result: + return False, "API call failed", None + + authorized = result.get("authorizations", {}).get("issues", {}).get(str(private_issue.id)) + if not authorized: + return True, "User correctly denied access to private resource", result + else: + return False, "User incorrectly granted access to private resource", result + + def test_private_resource_with_access(self) -> Tuple[bool, str, Optional[dict]]: + """Test that users with access can see private project resources.""" + private_issue = self.get_resource_by_type_and_suffix("issues", "private") + if not private_issue: + return False, "Private issue not found", None + + user_with_access = self.get_user_by_suffix("with_access") + if not user_with_access: + return False, "User not found", None + + result = self.kg_client.authorize( + user_id=user_with_access.id, + resources={"issues": [private_issue.id]}, + jwt_token=self.jwt_token + ) + + if not result: + return False, "API call failed", None + + authorized = result.get("authorizations", {}).get("issues", {}).get(str(private_issue.id)) + if authorized: + return True, "User with access can see private resource", result + else: + return False, f"User with access cannot see private resource (user_id={user_with_access.id}, issue_id={private_issue.id})", result + + def test_confidential_issue_access(self) -> Tuple[bool, str, Optional[dict]]: + """Test confidential issue access control.""" + conf_issue = self.get_resource_by_type_and_suffix("issues", "confidential") + if not conf_issue: + return False, "Confidential issue not found", None + + user_with_access = self.get_user_by_suffix("with_access") + if not user_with_access: + return False, "User not found", None + + result = self.kg_client.authorize( + user_id=user_with_access.id, + resources={"issues": [conf_issue.id]}, + jwt_token=self.jwt_token + ) + + if not result: + return False, "API call failed", None + + # User with project access should be able to see confidential issues + authorized = result.get("authorizations", {}).get("issues", {}).get(str(conf_issue.id)) + if authorized: + return True, "Project member can access confidential issue", result + else: + return False, "Project member cannot access confidential issue", result + + def test_blocked_user_no_access(self) -> Tuple[bool, str, Optional[dict]]: + """Test that blocked users cannot access any resources.""" + public_issue = self.get_resource_by_type_and_suffix("issues", "public") + if not public_issue: + return False, "Public issue not found", None + + blocked_user = self.get_user_by_suffix("blocked") + if not blocked_user: + return False, "Blocked user not found", None + + result = self.kg_client.authorize( + user_id=blocked_user.id, + resources={"issues": [public_issue.id]}, + jwt_token=self.jwt_token + ) + + if not result: + return False, "API call failed", None + + # Blocked users should get empty authorizations + authorizations = result.get("authorizations", {}) + if not authorizations or not authorizations.get("issues"): + return True, "Blocked user correctly gets empty authorizations", result + else: + # Check if explicitly denied + auth_value = authorizations.get("issues", {}).get(str(public_issue.id)) + if auth_value is False or auth_value is None: + return True, "Blocked user correctly denied access", result + return False, "Blocked user incorrectly gets authorizations", result + + def test_nonexistent_user(self) -> Tuple[bool, str, Optional[dict]]: + """Test that non-existent users get empty authorizations.""" + public_issue = self.get_resource_by_type_and_suffix("issues", "public") + if not public_issue: + return False, "Public issue not found", None + + result = self.kg_client.authorize( + user_id=999999999, # Non-existent user + resources={"issues": [public_issue.id]}, + jwt_token=self.jwt_token + ) + + if not result: + return False, "API call failed", None + + authorizations = result.get("authorizations", {}) + if not authorizations: + return True, "Non-existent user correctly gets empty authorizations", result + else: + return False, "Non-existent user incorrectly gets authorizations", result + + def test_nonexistent_resource(self) -> Tuple[bool, str, Optional[dict]]: + """Test that non-existent resources return false.""" + user_with_access = self.get_user_by_suffix("with_access") + if not user_with_access: + return False, "User not found", None + + result = self.kg_client.authorize( + user_id=user_with_access.id, + resources={"issues": [999999999]}, # Non-existent issue + jwt_token=self.jwt_token + ) + + if not result: + return False, "API call failed", None + + authorized = result.get("authorizations", {}).get("issues", {}).get("999999999") + if not authorized: + return True, "Non-existent resource correctly returns false", result + else: + return False, "Non-existent resource incorrectly returns true", result + + def test_merge_request_authorization(self) -> Tuple[bool, str, Optional[dict]]: + """Test merge request authorization.""" + private_mr = self.get_resource_by_type_and_suffix("merge_requests", "private") + if not private_mr: + return False, "Private MR not found", None + + user_with_access = self.get_user_by_suffix("with_access") + user_no_access = self.get_user_by_suffix("no_access") + + if not user_with_access or not user_no_access: + return False, "Users not found", None + + # Test user with access + result_with = self.kg_client.authorize( + user_id=user_with_access.id, + resources={"merge_requests": [private_mr.id]}, + jwt_token=self.jwt_token + ) + + # Test user without access + result_without = self.kg_client.authorize( + user_id=user_no_access.id, + resources={"merge_requests": [private_mr.id]}, + jwt_token=self.jwt_token + ) + + if not result_with or not result_without: + return False, "API call failed", None + + auth_with = result_with.get("authorizations", {}).get("merge_requests", {}).get(str(private_mr.id)) + auth_without = result_without.get("authorizations", {}).get("merge_requests", {}).get(str(private_mr.id)) + + if auth_with and not auth_without: + return True, "MR authorization works correctly", {"with_access": result_with, "without_access": result_without} + else: + return False, f"MR authorization incorrect: with={auth_with}, without={auth_without}", { + "with_access": result_with, + "without_access": result_without, + "mr_id": private_mr.id, + "user_with_access_id": user_with_access.id, + "user_no_access_id": user_no_access.id + } + + def test_batch_authorization(self) -> Tuple[bool, str, Optional[dict]]: + """Test batch authorization with multiple resource types.""" + user_with_access = self.get_user_by_suffix("with_access") + if not user_with_access: + return False, "User not found", None + + # Collect all resource IDs by type + resources_by_type = {} + for resource in self.context.resources: + if resource.resource_type not in resources_by_type: + resources_by_type[resource.resource_type] = [] + resources_by_type[resource.resource_type].append(resource.id) + + result = self.kg_client.authorize( + user_id=user_with_access.id, + resources=resources_by_type, + jwt_token=self.jwt_token + ) + + if not result: + return False, "API call failed", None + + # Verify we got results for all resource types + authorizations = result.get("authorizations", {}) + for resource_type in resources_by_type: + if resource_type not in authorizations: + return False, f"Missing authorizations for {resource_type}", result + + return True, f"Batch authorization returned results for {len(resources_by_type)} resource types", result + + def test_empty_resources(self) -> Tuple[bool, str, Optional[dict]]: + """Test with empty resources.""" + user_with_access = self.get_user_by_suffix("with_access") + if not user_with_access: + return False, "User not found", None + + result = self.kg_client.authorize( + user_id=user_with_access.id, + resources={}, + jwt_token=self.jwt_token + ) + + if not result: + return False, "API call failed", None + + authorizations = result.get("authorizations", {}) + if authorizations == {}: + return True, "Empty resources returns empty authorizations", result + else: + return False, "Empty resources should return empty authorizations", result + + def test_large_batch_merge_requests(self) -> Tuple[bool, str, Optional[dict]]: + """Test authorization for a large batch of merge requests.""" + user_with_access = self.get_user_by_suffix("with_access") + if not user_with_access: + return False, "User not found", None + + private_project = self.get_project_by_visibility(Visibility.PRIVATE) + if not private_project: + return False, "Private project not found", None + + # Create multiple MRs for batch testing + mr_count = 100 + logger.info(f"Creating {mr_count} merge requests for large batch test...") + + mr_ids = [] + for i in range(mr_count): + mr = self.factory.create_merge_request(private_project.id, f"batch_{i}") + if mr: + mr_ids.append(mr.id) + self.context.resources.append(mr) + else: + logger.warning(f"Failed to create MR {i}") + + if len(mr_ids) < 5: + return False, f"Only created {len(mr_ids)} MRs, need at least 5", None + + logger.info(f"Created {len(mr_ids)} MRs, testing batch authorization...") + + # Test authorization for all MRs at once + result = self.kg_client.authorize( + user_id=user_with_access.id, + resources={"merge_requests": mr_ids}, + jwt_token=self.jwt_token + ) + + if not result: + return False, "API call failed", None + + authorizations = result.get("authorizations", {}).get("merge_requests", {}) + response_time = result.get("_response_time_ms", 0) + + # Count authorized MRs + authorized_count = sum(1 for mr_id in mr_ids if authorizations.get(str(mr_id))) + + if authorized_count == len(mr_ids): + return ( + True, + f"Authorized {authorized_count}/{len(mr_ids)} MRs", + result + ) + else: + return ( + False, + f"Only authorized {authorized_count}/{len(mr_ids)} MRs", + result + ) + + # ========================================================================= + # Test Execution + # ========================================================================= + + def run_all_tests(self): + """Run all test cases.""" + logger.info("=" * 60) + logger.info("RUNNING TESTS") + logger.info("=" * 60) + + tests = [ + ("Public resource access", self.test_public_resource_access), + ("Private resource - no access", self.test_private_resource_no_access), + ("Private resource - with access", self.test_private_resource_with_access), + ("Confidential issue access", self.test_confidential_issue_access), + ("Blocked user - no access", self.test_blocked_user_no_access), + ("Non-existent user", self.test_nonexistent_user), + ("Non-existent resource", self.test_nonexistent_resource), + ("Merge request authorization", self.test_merge_request_authorization), + ("Batch authorization", self.test_batch_authorization), + ("Empty resources", self.test_empty_resources), + ("Large batch MRs", self.test_large_batch_merge_requests), + ] + + for name, test_func in tests: + self.run_test(name, test_func) + + def print_summary(self) -> bool: + """Print test summary and return True if all tests passed.""" + logger.info("=" * 60) + logger.info("TEST SUMMARY") + logger.info("=" * 60) + + passed = sum(1 for r in self.results if r.passed) + failed = sum(1 for r in self.results if not r.passed) + total = len(self.results) + + logger.info(f"Total: {total}, Passed: {passed}, Failed: {failed}") + logger.info("") + + if failed > 0: + logger.info("Failed tests:") + for result in self.results: + if not result.passed: + logger.info(f" - {result.name}: {result.message}") + if result.details: + logger.debug(f" Details: {result.details}") + + return failed == 0 diff --git a/scripts/knowledge_graph/validate_authorization.py b/scripts/knowledge_graph/validate_authorization.py new file mode 100755 index 00000000000000..a3909dd79916f7 --- /dev/null +++ b/scripts/knowledge_graph/validate_authorization.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +""" +Knowledge Graph Authorization API Validation Script + +This script validates the Knowledge Graph authorization endpoint by: +1. Creating test users with different access levels +2. Creating test projects (public/private/internal) +3. Creating test resources (issues, merge requests, etc.) +4. Testing authorization for various user/resource combinations +5. Cleaning up test data + +Usage: + python3 -m scripts.knowledge_graph.validate_authorization \\ + --gitlab-url http://localhost:3000 \\ + --token + + # Or run directly: + python3 scripts/knowledge_graph/validate_authorization.py \\ + --gitlab-url http://localhost:3000 \\ + --token + +Requirements: + pip install requests +""" + +import argparse +import logging +import sys + +try: + import requests +except ImportError: + print("Error: requests library not installed. Run: pip install requests") + sys.exit(1) + +# Import from local modules +from .config import TestConfig +from .clients import GitLabAPIClient, KnowledgeGraphAuthClient +from .factory import TestDataFactory +from .tests import AuthorizationTestRunner + + +def setup_logging(verbose: bool = False) -> logging.Logger: + """Configure logging for the test script.""" + level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig( + level=level, + format="%(asctime)s [%(levelname)s] %(message)s", + datefmt="%H:%M:%S" + ) + return logging.getLogger(__name__) + + +def main(): + parser = argparse.ArgumentParser( + description="Validate Knowledge Graph Authorization API" + ) + parser.add_argument( + "--gitlab-url", + default="http://localhost:3000", + help="GitLab instance URL" + ) + parser.add_argument( + "--token", + required=True, + help="GitLab admin API token" + ) + parser.add_argument( + "--rails-root", + default=".", + help="Path to GitLab Rails root directory" + ) + parser.add_argument( + "--no-cleanup", + action="store_true", + help="Skip cleanup of test data" + ) + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Enable verbose logging" + ) + + args = parser.parse_args() + + # Setup logging + logger = setup_logging(args.verbose) + + logger.info("=" * 60) + logger.info("Knowledge Graph Authorization Validation") + logger.info("=" * 60) + logger.info(f"GitLab URL: {args.gitlab_url}") + logger.info(f"Rails Root: {args.rails_root}") + logger.info("") + + # Initialize configuration + config = TestConfig( + gitlab_url=args.gitlab_url, + admin_token=args.token, + rails_root=args.rails_root, + cleanup=not args.no_cleanup, + verbose=args.verbose + ) + + # Initialize clients + gitlab_client = GitLabAPIClient(args.gitlab_url, args.token) + kg_client = KnowledgeGraphAuthClient(args.gitlab_url, args.rails_root) + factory = TestDataFactory(gitlab_client) + + # Create test runner + runner = AuthorizationTestRunner(config, gitlab_client, kg_client, factory) + + try: + # Setup test data + if not runner.setup(): + logger.error("Setup failed, aborting tests") + sys.exit(1) + + # Run tests + runner.run_all_tests() + + # Print summary + success = runner.print_summary() + + finally: + # Cleanup + if config.cleanup: + runner.cleanup() + else: + logger.info("Skipping cleanup (--no-cleanup specified)") + + sys.exit(0 if success else 1) + + +if __name__ == "__main__": + main() diff --git a/scripts/knowledge_graph_jwt.rb b/scripts/knowledge_graph_jwt.rb new file mode 100755 index 00000000000000..4b72e556177b20 --- /dev/null +++ b/scripts/knowledge_graph_jwt.rb @@ -0,0 +1,55 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# Script to generate JWT tokens for testing the Knowledge Graph authorization API. +# +# Usage: +# # From GitLab Rails console: +# rails runner scripts/knowledge_graph_jwt.rb +# +# # Or load in rails console: +# load 'scripts/knowledge_graph_jwt.rb' +# +# Example curl command: +# TOKEN=$(rails runner scripts/knowledge_graph_jwt.rb) +# curl -X POST http://localhost:3000/api/v4/internal/knowledge_graph/authorize \ +# -H "Gitlab-Shell-Api-Request: $TOKEN" \ +# -H "Content-Type: application/json" \ +# -d '{"user_id": 1, "resources": {"issues": [1, 2, 3]}}' + +require_relative '../config/environment' unless defined?(Rails) + +token = KnowledgeGraph::JwtAuth.jwt_token +header_name = KnowledgeGraph::JwtAuth.header_name + +# If running as a script, output just the token for easy capture +if __FILE__ == $PROGRAM_NAME + puts token +else + # If loaded in console, provide helpful output + puts "=" * 60 + puts "Knowledge Graph JWT Token" + puts "=" * 60 + puts + puts "Token (valid for 5 minutes):" + puts token + puts + puts "Header name: #{header_name}" + puts + puts "Example curl command:" + puts + puts <<~CURL + curl -X POST http://localhost:3000/api/v4/internal/knowledge_graph/authorize \\ + -H "#{header_name}: #{token}" \\ + -H "Content-Type: application/json" \\ + -d '{ + "user_id": 1, + "resources": { + "issues": [1, 2, 3], + "merge_requests": [4, 5] + } + }' + CURL + puts + puts "=" * 60 +end -- GitLab