From 00ece26dde48195b604651e008b60f1d08713cc6 Mon Sep 17 00:00:00 2001 From: Pavlo Strokov Date: Fri, 12 Aug 2022 08:47:31 +0300 Subject: [PATCH 1/2] Update of the gitaly gem version API of the Gitaly/Praefect service was extended with a new gRPC method 'ReadinessCheck' in the https://gitlab.com/gitlab-org/gitaly/-/merge_requests/4674. Upgrade to a new version of the gem file to have access to a new method that will be used in upcoming changes. Part of: https://gitlab.com/gitlab-org/gitlab/-/issues/348174 --- Gemfile | 2 +- Gemfile.lock | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Gemfile b/Gemfile index 5cf417a67ae3d0..ae71dea4779a9c 100644 --- a/Gemfile +++ b/Gemfile @@ -483,7 +483,7 @@ gem 'ssh_data', '~> 1.3' gem 'spamcheck', '~> 0.1.0' # Gitaly GRPC protocol definitions -gem 'gitaly', '~> 15.3.0-rc3' +gem 'gitaly', '~> 15.3.0-rc4' # KAS GRPC protocol definitions gem 'kas-grpc', '~> 0.0.2' diff --git a/Gemfile.lock b/Gemfile.lock index f04445e1d571b7..7ec954d9cea096 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -508,7 +508,7 @@ GEM rails (>= 3.2.0) git (1.11.0) rchardet (~> 1.8) - gitaly (15.3.0.pre.rc3) + gitaly (15.3.0.pre.rc4) grpc (~> 1.0) github-markup (1.7.0) gitlab (4.16.1) @@ -1573,7 +1573,7 @@ DEPENDENCIES gettext (~> 3.3) gettext_i18n_rails (~> 1.8.0) gettext_i18n_rails_js (~> 1.3) - gitaly (~> 15.3.0.pre.rc3) + gitaly (~> 15.3.0.pre.rc4) github-markup (~> 1.7.0) gitlab-chronic (~> 0.10.5) gitlab-dangerfiles (~> 3.5.0) -- GitLab From 5923d4d245c60049dbb97e799115c7e05ad67284 Mon Sep 17 00:00:00 2001 From: Pavlo Strokov Date: Fri, 12 Aug 2022 08:48:42 +0300 Subject: [PATCH 2/2] Extend gitlab:gitaly:check task with readiness verification To verify Praefect is ready to server the requests 'gitlab:gitaly:check' task was extended to call 'ReadinessCheck' RPC. The RPC was introduced in https://gitlab.com/gitlab-org/gitaly/-/merge_requests/4674 and allows to run 'praefect -config conf.toml check' sub-command from the remote. If any check on remote fails it will be reported by the task. The checks are executed only if Praefect is used, in case when setup has no Praefect the RPC call to remote (in that case it would be Gitaly) will always return no errors if the service is reachable. Closes: https://gitlab.com/gitlab-org/gitlab/-/issues/348174 --- .../geo/repository_sync_worker_spec.rb | 4 ++ .../primary/batch_worker_spec.rb | 4 ++ .../secondary/scheduler_worker_spec.rb | 4 ++ lib/gitlab/gitaly_client/server_service.rb | 13 ++++++ lib/gitlab/health_checks/gitaly_check.rb | 26 ++++++++++-- .../gitaly_client/server_service_spec.rb | 42 +++++++++++++++++++ .../gitlab/health_checks/gitaly_check_spec.rb | 22 ++++++++-- .../health_checks/probes/collection_spec.rb | 4 ++ spec/requests/health_controller_spec.rb | 4 ++ .../repository_check/dispatch_worker_spec.rb | 4 ++ 10 files changed, 120 insertions(+), 7 deletions(-) create mode 100644 spec/lib/gitlab/gitaly_client/server_service_spec.rb diff --git a/ee/spec/workers/geo/repository_sync_worker_spec.rb b/ee/spec/workers/geo/repository_sync_worker_spec.rb index d9a17861a18ff2..712fef9f64e4b6 100644 --- a/ee/spec/workers/geo/repository_sync_worker_spec.rb +++ b/ee/spec/workers/geo/repository_sync_worker_spec.rb @@ -64,6 +64,10 @@ # hide the 'broken' storage for this spec stub_storage_settings({}) + expect_next_instance_of(Gitlab::GitalyClient::ServerService) do |service| + expect(service).to receive(:readiness_check).and_return({ success: true }) + end + expect(repository_worker).to receive(:perform_async).with(project_in_synced_group.repository.storage) expect(design_worker).to receive(:perform_async).with(project_in_synced_group.repository.storage) expect(repository_worker).not_to receive(:perform_async).with('unknown') diff --git a/ee/spec/workers/geo/repository_verification/primary/batch_worker_spec.rb b/ee/spec/workers/geo/repository_verification/primary/batch_worker_spec.rb index c24c1b4095cef7..491ea9339977f4 100644 --- a/ee/spec/workers/geo/repository_verification/primary/batch_worker_spec.rb +++ b/ee/spec/workers/geo/repository_verification/primary/batch_worker_spec.rb @@ -45,6 +45,10 @@ # hide the 'broken' storage for this spec stub_storage_settings({}) + expect_next_instance_of(Gitlab::GitalyClient::ServerService) do |service| + expect(service).to receive(:readiness_check).and_return({ success: true }) + end + expect(Geo::RepositoryVerification::Primary::ShardWorker).to receive(:perform_async).with(healthy_shard) expect(Geo::RepositoryVerification::Primary::ShardWorker).not_to receive(:perform_async).with('unknown') diff --git a/ee/spec/workers/geo/repository_verification/secondary/scheduler_worker_spec.rb b/ee/spec/workers/geo/repository_verification/secondary/scheduler_worker_spec.rb index 2890971c8b92d9..e306a2c2c83195 100644 --- a/ee/spec/workers/geo/repository_verification/secondary/scheduler_worker_spec.rb +++ b/ee/spec/workers/geo/repository_verification/secondary/scheduler_worker_spec.rb @@ -38,6 +38,10 @@ # hide the 'broken' storage for this spec stub_storage_settings({}) + expect_next_instance_of(Gitlab::GitalyClient::ServerService) do |service| + expect(service).to receive(:readiness_check).and_return({ success: true }) + end + expect(Geo::RepositoryVerification::Secondary::ShardWorker).to receive(:perform_async).with(healthy_shard) expect(Geo::RepositoryVerification::Secondary::ShardWorker).not_to receive(:perform_async).with('unknown') diff --git a/lib/gitlab/gitaly_client/server_service.rb b/lib/gitlab/gitaly_client/server_service.rb index 36bda67c26e68b..48fd0e66354ffc 100644 --- a/lib/gitlab/gitaly_client/server_service.rb +++ b/lib/gitlab/gitaly_client/server_service.rb @@ -26,6 +26,19 @@ def storage_disk_statistics storage_specific(disk_statistics) end + def readiness_check + request = Gitaly::ReadinessCheckRequest.new(timeout: GitalyClient.medium_timeout) + response = GitalyClient.call(@storage, :server_service, :readiness_check, request, timeout: GitalyClient.default_timeout) + + return { success: true } if response.ok_response + + failed_checks = response.failure_response.failed_checks.map do |failed_check| + ["#{failed_check.name}: #{failed_check.error_message}"] + end + + { success: false, message: failed_checks.join("\n") } + end + private def storage_specific(response) diff --git a/lib/gitlab/health_checks/gitaly_check.rb b/lib/gitlab/health_checks/gitaly_check.rb index f5f142c251fdb2..2bd8ea711b5188 100644 --- a/lib/gitlab/health_checks/gitaly_check.rb +++ b/lib/gitlab/health_checks/gitaly_check.rb @@ -27,17 +27,35 @@ def metrics end def check(storage_name) - serv = Gitlab::GitalyClient::HealthCheckService.new(storage_name) - result = serv.check + storage_healthy = healthy(storage_name) + unless storage_healthy[:success] + return HealthChecks::Result.new( + name, + storage_healthy[:success], + storage_healthy[:message], + shard: storage_name + ) + end + storage_ready = ready(storage_name) HealthChecks::Result.new( name, - result[:success], - result[:message], + storage_ready[:success], + storage_ready[:message], shard: storage_name ) end + def healthy(storage_name) + serv = Gitlab::GitalyClient::HealthCheckService.new(storage_name) + serv.check + end + + def ready(storage_name) + serv = Gitlab::GitalyClient::ServerService.new(storage_name) + serv.readiness_check + end + private def metric_prefix diff --git a/spec/lib/gitlab/gitaly_client/server_service_spec.rb b/spec/lib/gitlab/gitaly_client/server_service_spec.rb new file mode 100644 index 00000000000000..615f2ce0c217dc --- /dev/null +++ b/spec/lib/gitlab/gitaly_client/server_service_spec.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::GitalyClient::ServerService do + let(:storage) { 'default' } + + describe '#readiness_check' do + before do + ::Gitlab::GitalyClient.clear_stubs! + end + + let(:request) do + Gitaly::ReadinessCheckRequest.new(timeout: 30) + end + + subject(:readiness_check) { described_class.new(storage).readiness_check } + + it 'returns a positive success if no failures happened' do + expect_next_instance_of(Gitaly::ServerService::Stub) do |service| + response = Gitaly::ReadinessCheckResponse.new(ok_response: Gitaly::ReadinessCheckResponse::Ok.new) + expect(service).to receive(:readiness_check).with(request, kind_of(Hash)).and_return(response) + end + + expect(readiness_check[:success]).to eq(true) + end + + it 'returns a negative success and a compiled message if at least one failure happened' do + failure1 = Gitaly::ReadinessCheckResponse::Failure::Response.new(name: '1', error_message: 'msg 1') + failure2 = Gitaly::ReadinessCheckResponse::Failure::Response.new(name: '2', error_message: 'msg 2') + failures = Gitaly::ReadinessCheckResponse::Failure.new(failed_checks: [failure1, failure2]) + response = Gitaly::ReadinessCheckResponse.new(failure_response: failures) + + expect_next_instance_of(Gitaly::ServerService::Stub) do |service| + expect(service).to receive(:readiness_check).with(request, kind_of(Hash)).and_return(response) + end + + expect(readiness_check[:success]).to eq(false) + expect(readiness_check[:message]).to eq("1: msg 1\n2: msg 2") + end + end +end diff --git a/spec/lib/gitlab/health_checks/gitaly_check_spec.rb b/spec/lib/gitlab/health_checks/gitaly_check_spec.rb index 7c346e3eb69fee..85c9658aa6d654 100644 --- a/spec/lib/gitlab/health_checks/gitaly_check_spec.rb +++ b/spec/lib/gitlab/health_checks/gitaly_check_spec.rb @@ -14,20 +14,36 @@ subject { described_class.readiness } before do - expect(Gitlab::GitalyClient::HealthCheckService).to receive(:new).and_return(gitaly_check) + expect(Gitlab::GitalyClient::HealthCheckService).to receive(:new).and_return(healthy_check) end context 'Gitaly server is up' do - let(:gitaly_check) { double(check: { success: true }) } + before do + expect(Gitlab::GitalyClient::ServerService).to receive(:new).and_return(ready_check) + end + + let(:healthy_check) { double(check: { success: true }) } + let(:ready_check) { double(readiness_check: { success: true }) } it { is_expected.to eq([result_class.new('gitaly_check', true, nil, shard: 'default')]) } end context 'Gitaly server is down' do - let(:gitaly_check) { double(check: { success: false, message: 'Connection refused' }) } + let(:healthy_check) { double(check: { success: false, message: 'Connection refused' }) } it { is_expected.to eq([result_class.new('gitaly_check', false, 'Connection refused', shard: 'default')]) } end + + context 'Gitaly server is not ready' do + before do + expect(Gitlab::GitalyClient::ServerService).to receive(:new).and_return(ready_check) + end + + let(:healthy_check) { double(check: { success: true }) } + let(:ready_check) { double(readiness_check: { success: false, message: 'Clock is out of sync' }) } + + it { is_expected.to match_array([result_class.new('gitaly_check', false, 'Clock is out of sync', shard: 'default')]) } + end end describe '#metrics' do diff --git a/spec/lib/gitlab/health_checks/probes/collection_spec.rb b/spec/lib/gitlab/health_checks/probes/collection_spec.rb index 741c45d953c316..3ec54642e24e8b 100644 --- a/spec/lib/gitlab/health_checks/probes/collection_spec.rb +++ b/spec/lib/gitlab/health_checks/probes/collection_spec.rb @@ -24,6 +24,10 @@ end it 'responds with readiness checks data' do + expect_next_instance_of(Gitlab::GitalyClient::ServerService) do |service| + expect(service).to receive(:readiness_check).and_return({ success: true }) + end + expect(subject.http_status).to eq(200) expect(subject.json[:status]).to eq('ok') diff --git a/spec/requests/health_controller_spec.rb b/spec/requests/health_controller_spec.rb index f70faf5bb9c796..fb1c0f85215158 100644 --- a/spec/requests/health_controller_spec.rb +++ b/spec/requests/health_controller_spec.rb @@ -127,6 +127,10 @@ end it 'responds with readiness checks data' do + expect_next_instance_of(Gitlab::GitalyClient::ServerService) do |service| + expect(service).to receive(:readiness_check).and_return({ success: true }) + end + subject expect(json_response['db_check']).to contain_exactly({ 'status' => 'ok' }) diff --git a/spec/workers/repository_check/dispatch_worker_spec.rb b/spec/workers/repository_check/dispatch_worker_spec.rb index 829abc7d895f98..146228c0852868 100644 --- a/spec/workers/repository_check/dispatch_worker_spec.rb +++ b/spec/workers/repository_check/dispatch_worker_spec.rb @@ -22,6 +22,10 @@ end it 'dispatches work to RepositoryCheck::BatchWorker' do + expect_next_instance_of(Gitlab::GitalyClient::ServerService) do |service| + expect(service).to receive(:readiness_check).and_return({ success: true }) + end + expect(RepositoryCheck::BatchWorker).to receive(:perform_async).at_least(:once) subject.perform -- GitLab