From df8831a312a5ee1dcda7c60982c2a5a54d99ca59 Mon Sep 17 00:00:00 2001 From: James Liu Date: Fri, 21 Feb 2025 16:44:57 +1100 Subject: [PATCH 1/3] api: Add documentation for a repository health API Subsequent commits will introduce a new repository-scoped API endpoint that returns repository health data from Gitaly. Add the corresponding Grape entity and Markdown documentation. --- doc/api/repositories.md | 48 +++++++++++++++++++++++++++ lib/api/entities/repository_health.rb | 37 +++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 lib/api/entities/repository_health.rb diff --git a/doc/api/repositories.md b/doc/api/repositories.md index e9b85b0fbfe71f..5c38e639e87892 100644 --- a/doc/api/repositories.md +++ b/doc/api/repositories.md @@ -510,6 +510,54 @@ Example response, with line breaks added for readability: } ``` +## Health + +{{< history >}} + +- [Introduced](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/182220) in GitLab 17.10. Guarded behind the + [project_repositories_health](https://gitlab.com/gitlab-org/gitlab/-/issues/521115) feature flag. + +{{< /history >}} + +Get statistics related to the health of a project repository. This endpoint is rate-limited to 5 requests/hour per project. + +```plaintext +GET /projects/:id/repository/health +``` + +Supported attributes: + +| Attribute | Type | Required | Description | +|:-----------|:--------|:---------|:---------------------------------------------------------------------------------------| +| `generate` | boolean | no | Whether a new health report should be generated. Set this if the endpoint returns 404. | + +Example request: + +```shell +curl --header "PRIVATE-TOKEN: token" \ + --url "https://gitlab.com/api/v4/projects/42/repository/health" +``` + +Example response: + +```json +{ + "size": 42002816, + "references": { + "loose_count": 3, + "packed_size": 315703, + "reference_backend": "REFERENCE_BACKEND_FILES" + }, + "objects": { + "size": 39651458, + "recent_size": 39461265, + "stale_size": 190193, + "keep_size": 0 + }, + "updated_at": "2025-02-26T03:42:13.015Z" +} +``` + ## Related topics - User documentation for [changelogs](../user/project/changelogs.md) diff --git a/lib/api/entities/repository_health.rb b/lib/api/entities/repository_health.rb new file mode 100644 index 00000000000000..c3c738a510c719 --- /dev/null +++ b/lib/api/entities/repository_health.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module API + module Entities + # rubocop:disable Layout/LineLength -- `desc` is too long + class RepositoryHealth < Grape::Entity + class References < Grape::Entity + expose :loose_count, + documentation: { type: 'integer', desc: 'Number of loose references in the repository.' } + expose :packed_size, + documentation: { type: 'integer', desc: 'Size in bytes of packed references in the repository.' } + expose :reference_backend, + documentation: { type: 'string', + desc: "Type of backend used to store references. Either 'REFERENCE_BACKEND_REFTABLE' or 'REFERENCE_BACKEND_FILES'." } + end + + class Objects < Grape::Entity + expose :size, + documentation: { type: 'integer', desc: 'Size in bytes of all objects in the repository.' } + expose :recent_size, + documentation: { type: 'integer', + desc: 'Size in bytes of all recent objects in the repository. Recent objects are those which are reachable.' } + expose :stale_size, + documentation: { type: 'integer', + desc: 'Size in bytes of all stale objects in the repository. Stale objects are those which are unreachable and may be deleted during housekeeping.' } + expose :keep_size, + documentation: { type: 'integer', desc: 'Size in bytes of all packfiles with the .keep extension.' } + end + + expose :size, documentation: { type: 'integer', desc: 'Repository size in bytes.' } + expose :references, using: References + expose :objects, using: Objects + expose :updated_at, documentation: { type: 'dateTime', example: '2025-02-24T09:05:50.355Z' } + end + # rubocop:enable Layout/LineLength + end +end -- GitLab From 56ca97d693b7bb56a97f6167f6a655218c5f1b51 Mon Sep 17 00:00:00 2001 From: James Liu Date: Fri, 21 Feb 2025 16:48:53 +1100 Subject: [PATCH 2/3] repository: Add health method A subsequent commit will introduce a new repository-scoped endpoint that returns health information. In the first iteration, the information will simply consist of the fields returned by Gitaly's RepositoryInfo RPC. Extract a `repository_info` method and rewire the existing `repository_info_size_megabytes` method to use it. Add a a `health` method on the Repository model which invokes this method and formats the response. The results of computing repository health are cached to reduce the number of calls to the Gitaly RepositoryInfo RPC. --- app/models/repository.rb | 12 ++++++++++++ lib/gitlab/git/repository.rb | 3 ++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/app/models/repository.rb b/app/models/repository.rb index 4663902be13387..54d5961adb95d3 100644 --- a/app/models/repository.rb +++ b/app/models/repository.rb @@ -791,6 +791,18 @@ def contributors(ref: nil, order_by: nil, sort: 'asc') Commit.order_by(collection: commits, order_by: order_by, sort: sort) end + def health(generate) + cache.fetch(:health) do + if generate + info = raw_repository.repository_info + + info_h = info.to_h + info_h[:updated_at] = Time.current + info_h + end + end + end + def branch_names_contains(sha, limit: 0, exclude_refs: []) refs = raw_repository.branch_names_contains_sha(sha, limit: adjust_containing_limit(limit: limit, exclude_refs: exclude_refs)) diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 759618bbc2d0b7..88b5ea2227e6d7 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -48,6 +48,7 @@ def initialize(error_code) attr_reader :storage, :gl_repository, :gl_project_path, :container delegate :list_oversized_blobs, :list_all_blobs, :list_blobs, to: :gitaly_blob_client + delegate :repository_info, to: :gitaly_repository_client # This remote name has to be stable for all types of repositories that # can join an object pool. If it's structure ever changes, a migration @@ -1331,7 +1332,7 @@ def check_blobs_generated(base, head, changed_paths) end def repository_info_size_megabytes - bytes = gitaly_repository_client.repository_info.size + bytes = repository_info.size Gitlab::Utils.bytes_to_megabytes(bytes).round(2) end -- GitLab From 6d9aac2a269017291f29c9622984e3747951c9c6 Mon Sep 17 00:00:00 2001 From: James Liu Date: Fri, 21 Feb 2025 17:33:26 +1100 Subject: [PATCH 3/3] api: Add Repository Health endpoint Add a new :id/repository/health endpoint which returns information related to repository health. This is a repository-scoped API with the same permissions model as others, for example :id/repository/merge_base. When generating a new health report, the endpoint is rate-limited to 5 requests/hour due to the expensive nature of the underlying Gitaly query. It's also not expected that users will need to generate fresh reports frequently. The endpoint is available to project maintainers only. It is also placed behind a project-scoped feature flag, so we can validate performance concerns before making it widely available. --- .../project_repositories_health.yml | 9 ++ lib/api/repositories.rb | 29 ++++++ lib/gitlab/application_rate_limiter.rb | 1 + spec/requests/api/repositories_spec.rb | 96 +++++++++++++++++++ 4 files changed, 135 insertions(+) create mode 100644 config/feature_flags/gitlab_com_derisk/project_repositories_health.yml diff --git a/config/feature_flags/gitlab_com_derisk/project_repositories_health.yml b/config/feature_flags/gitlab_com_derisk/project_repositories_health.yml new file mode 100644 index 00000000000000..0ac5c19a836321 --- /dev/null +++ b/config/feature_flags/gitlab_com_derisk/project_repositories_health.yml @@ -0,0 +1,9 @@ +--- +name: project_repositories_health +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/509253 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/182220 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/521115 +milestone: '17.10' +group: group::gitaly +type: gitlab_com_derisk +default_enabled: false diff --git a/lib/api/repositories.rb b/lib/api/repositories.rb index 6a8358957e9a2b..f8e071417f2d82 100644 --- a/lib/api/repositories.rb +++ b/lib/api/repositories.rb @@ -231,6 +231,35 @@ def compare_cache_key(current_user, user_project, target_project, params) end end + desc 'Get repository health' do + success Entities::RepositoryHealth + end + params do + optional :generate, type: Boolean, default: false, desc: 'Triggers a new health report to be generated' + end + get ':id/repository/health', urgency: :low do + unless Feature.enabled?(:project_repositories_health, user_project) + not_found! + end + + authorize! :admin_project, user_project + + generate = params[:generate] || false + if generate + check_rate_limit!(:project_repositories_health, scope: [user_project]) do + render_api_error!({ error: 'Repository health has been requested too many times. Try again later.' }, 429) + end + end + + health = user_project.repository.health(generate) + + if health.nil? + not_found! + end + + present health, with: Entities::RepositoryHealth + end + desc 'Get repository contributors' do success Entities::Contributor end diff --git a/lib/gitlab/application_rate_limiter.rb b/lib/gitlab/application_rate_limiter.rb index 91a7b6be28656f..7002b331d72bdc 100644 --- a/lib/gitlab/application_rate_limiter.rb +++ b/lib/gitlab/application_rate_limiter.rb @@ -25,6 +25,7 @@ def rate_limits # rubocop:disable Metrics/AbcSize project_download_export: { threshold: -> { application_settings.project_download_export_limit }, interval: 1.minute }, project_repositories_archive: { threshold: 5, interval: 1.minute }, project_repositories_changelog: { threshold: 5, interval: 1.minute }, + project_repositories_health: { threshold: 5, interval: 1.hour }, project_generate_new_export: { threshold: -> { application_settings.project_export_limit }, interval: 1.minute }, project_import: { threshold: -> { application_settings.project_import_limit }, interval: 1.minute }, play_pipeline_schedule: { threshold: 1, interval: 1.minute }, diff --git a/spec/requests/api/repositories_spec.rb b/spec/requests/api/repositories_spec.rb index 303a14a8a8047a..83a223e6f3c071 100644 --- a/spec/requests/api/repositories_spec.rb +++ b/spec/requests/api/repositories_spec.rb @@ -10,6 +10,7 @@ let(:user) { create(:user) } let(:guest) { create(:user).tap { |u| create(:project_member, :guest, user: u, project: project) } } + let(:developer) { create(:user).tap { |u| create(:project_member, :developer, user: u, project: project) } } let!(:project) { create(:project, :repository, creator: user) } let!(:maintainer) { create(:project_member, :maintainer, user: user, project: project) } @@ -781,6 +782,101 @@ def commit_messages(response) end end + describe 'GET :id/repository/health' do + before do + stub_feature_flags(project_repositories_health: true) + end + + let(:params) { nil } + + subject(:request) do + get(api("/projects/#{project.id}/repository/health", current_user), params: params) + end + + shared_examples 'health' do + it 'returns 404 on first invocation' do + request + + expect(response).to have_gitlab_http_status(:not_found) + end + + it 'returns 404 on subsequent invocations if a report has not been generated' do + 2.times do + request + expect(response).to have_gitlab_http_status(:not_found) + end + end + + describe 'when a new report is generated' do + let(:params) { { generate: true } } + + it 'returns the health report' do + t_start = Time.current + request + t_end = Time.current + + expect(response).to have_gitlab_http_status(:success) + expect(json_response['size']).to be_present + expect(json_response['objects']).to be_present + expect(json_response['references']).to be_present + expect(Time.parse(json_response['updated_at'])).to be_between(t_start, t_end) + end + + context 'when rate limited' do + it 'returns api error' do + allow(Gitlab::ApplicationRateLimiter).to receive(:throttled_request?).and_return(true) + + request + + expect(response).to have_gitlab_http_status(:too_many_requests) + end + end + end + end + + context 'when unauthenticated', 'and project is public' do + it_behaves_like '403 response' do + let(:project) { create(:project, :public, :repository) } + let(:current_user) { nil } + end + end + + context 'when unauthenticated', 'and project is private' do + it_behaves_like '404 response' do + let(:current_user) { nil } + let(:message) { '404 Project Not Found' } + end + end + + context 'when authenticated', 'as a maintainer' do + it_behaves_like 'health' do + let(:current_user) { user } + end + end + + context 'when authenticated', 'as a developer' do + it_behaves_like '403 response' do + let(:current_user) { developer } + end + end + + context 'when authenticated', 'as a guest' do + it_behaves_like '403 response' do + let(:current_user) { guest } + end + end + + context 'when feature flag is disabled' do + before do + stub_feature_flags(project_repositories_health: false) + end + + it_behaves_like '404 response' do + let(:current_user) { user } + end + end + end + describe 'GET :id/repository/merge_base' do let(:refs) do %w[304d257dcb821665ab5110318fc58a007bd104ed 0031876facac3f2b2702a0e53a26e89939a42209 570e7b2abdd848b95f2f578043fc23bd6f6fd24d] -- GitLab