From 72d54886cd608830c75296ba8ca40583122f3bdc Mon Sep 17 00:00:00 2001 From: Valery Sizov Date: Mon, 24 Jul 2023 19:51:06 +0100 Subject: [PATCH] Allow verification of files in Object Storage Changelog: added EE: true --- .../concerns/geo/blob_replicator_strategy.rb | 16 ++++++-- .../models/concerns/geo/replicable_model.rb | 10 +++-- .../geo_object_storage_verification.yml | 8 ++++ .../gitlab/geo/replication/blob_downloader.rb | 6 ++- .../geo/replication/blob_downloader_spec.rb | 26 ++++++++++++ .../concerns/geo/replicable_model_spec.rb | 41 +++++++++++++------ ee/spec/models/ee/ci/job_artifact_spec.rb | 29 ------------- ...lob_replicator_strategy_shared_examples.rb | 9 ++-- ...del_with_separate_table_shared_examples.rb | 4 ++ 9 files changed, 97 insertions(+), 52 deletions(-) create mode 100644 ee/config/feature_flags/development/geo_object_storage_verification.yml diff --git a/ee/app/models/concerns/geo/blob_replicator_strategy.rb b/ee/app/models/concerns/geo/blob_replicator_strategy.rb index b2e3b4cf5f6d4b..41975af4a6606b 100644 --- a/ee/app/models/concerns/geo/blob_replicator_strategy.rb +++ b/ee/app/models/concerns/geo/blob_replicator_strategy.rb @@ -140,13 +140,17 @@ def removed_blob_path(uploader_class, path) File.join(uploader_class.constantize.root, path) end - # Returns a checksum of the file + # Returns a checksum for the local files and file size for remote ones # - # @return [String] SHA256 hash of the carrierwave file + # @return [String] SHA256 hash or file size def calculate_checksum raise 'File is not checksummable' unless checksummable? - model.sha256_hexdigest(blob_path) + if carrierwave_uploader.file_storage? + model.sha256_hexdigest(blob_path) + else + carrierwave_uploader.file.size.to_s + end end # Returns whether the file exists on disk or in remote storage @@ -178,7 +182,11 @@ def download # # @return [Boolean] whether it can generate a checksum def checksummable? - carrierwave_uploader.file_storage? && file_exists? + if Feature.enabled?(:geo_object_storage_verification) + file_exists? + else + carrierwave_uploader.file_storage? && file_exists? + end end # Return whether it's immutable diff --git a/ee/app/models/concerns/geo/replicable_model.rb b/ee/app/models/concerns/geo/replicable_model.rb index 678332ff0a4eb4..5a7baec0893e0e 100644 --- a/ee/app/models/concerns/geo/replicable_model.rb +++ b/ee/app/models/concerns/geo/replicable_model.rb @@ -31,11 +31,15 @@ module ReplicableModel scope :available_replicables, -> { all } # On primary, `verifiables` are records that can be checksummed and/or are replicable. - # On secondary, `verifiables` are records that have already been replicated # and (ideally) have been checksummed on the primary - - scope :verifiables, -> { self.respond_to?(:with_files_stored_locally) ? available_replicables.with_files_stored_locally : available_replicables } + scope :verifiables, -> do + if Feature.enabled?(:geo_object_storage_verification) + available_replicables + else + self.respond_to?(:with_files_stored_locally) ? available_replicables.with_files_stored_locally : available_replicables + end + end # When storing verification details in the same table as the model, # the scope `available_verifiables` returns only those records diff --git a/ee/config/feature_flags/development/geo_object_storage_verification.yml b/ee/config/feature_flags/development/geo_object_storage_verification.yml new file mode 100644 index 00000000000000..6e30f5d9f8cef6 --- /dev/null +++ b/ee/config/feature_flags/development/geo_object_storage_verification.yml @@ -0,0 +1,8 @@ +--- +name: geo_object_storage_verification +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/127408 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/410387 +milestone: '16.4' +type: development +group: group::geo +default_enabled: false diff --git a/ee/lib/gitlab/geo/replication/blob_downloader.rb b/ee/lib/gitlab/geo/replication/blob_downloader.rb index d9b0a653d2e9a1..b72e1624e1346f 100644 --- a/ee/lib/gitlab/geo/replication/blob_downloader.rb +++ b/ee/lib/gitlab/geo/replication/blob_downloader.rb @@ -233,7 +233,11 @@ def checksum_mismatch?(file_path) end def actual_checksum(file_path) - @actual_checksum = Digest::SHA256.file(file_path).hexdigest + @actual_checksum = if file_storage? + Digest::SHA256.file(file_path).hexdigest + elsif Feature.enabled?(:geo_object_storage_verification) + File.size(file_path).to_s + end end def non_success_response_result(response, url) diff --git a/ee/spec/lib/gitlab/geo/replication/blob_downloader_spec.rb b/ee/spec/lib/gitlab/geo/replication/blob_downloader_spec.rb index 5921ed86138e1e..51887ade00c5dd 100644 --- a/ee/spec/lib/gitlab/geo/replication/blob_downloader_spec.rb +++ b/ee/spec/lib/gitlab/geo/replication/blob_downloader_spec.rb @@ -70,6 +70,8 @@ let(:sync_object_storage) { true } context 'when the primary proxies remote storage' do + let(:model_record) { create(:package_file) } + it 'returns success' do content = replicator.carrierwave_uploader.file.read size = content.bytesize @@ -87,6 +89,7 @@ let(:content) { replicator.carrierwave_uploader.file.read } let(:size) { content.bytesize } let(:remote_url) { replicator.carrierwave_uploader.url } + let(:model_record) { create(:package_file) } before do # Set up to ensure that our redirect follow implementation does @@ -229,6 +232,29 @@ expect_blob_downloader_result(result, success: true, bytes_downloaded: content.bytesize, primary_missing_file: false) end end + + context 'when file is in object storage and has the filesize as a checksum on primary' do + let!(:secondary_object_storage) { create(:geo_node, sync_object_storage: true) } + + before do + stub_package_file_object_storage(enabled: true, direct_upload: true) + stub_current_geo_node(secondary_object_storage) + end + + let!(:model_record) { create(:package_file, :npm, :object_storage) } + + it 'returns a successful result' do + allow(replicator).to receive(:primary_checksum).and_return("3") + + content = 'foo' # 3 bytes + stub_request(:get, subject.resource_url) + .to_return(status: 200, body: content) + + result = subject.execute + + expect_blob_downloader_result(result, success: true, bytes_downloaded: content.bytesize, primary_missing_file: false) + end + end end end diff --git a/ee/spec/models/concerns/geo/replicable_model_spec.rb b/ee/spec/models/concerns/geo/replicable_model_spec.rb index 36fe1044b7c771..85af56d72aa1dd 100644 --- a/ee/spec/models/concerns/geo/replicable_model_spec.rb +++ b/ee/spec/models/concerns/geo/replicable_model_spec.rb @@ -69,23 +69,40 @@ end describe '.verifiables' do - context 'when the model can be filtered by locally stored files' do - it 'filters by locally stored files' do - allow(DummyModel).to receive(:respond_to?).with(:all).and_call_original - allow(DummyModel).to receive(:respond_to?).with(:with_files_stored_locally).and_return(true) + context 'when geo_object_storage_verification feature flag is disabled' do + before do + stub_feature_flags(geo_object_storage_verification: false) + end - expect(DummyModel).to receive(:with_files_stored_locally) + context 'when the model can be filtered by locally stored files' do + it 'filters by locally stored files' do + allow(DummyModel).to receive(:respond_to?).with(:all).and_call_original + allow(DummyModel).to receive(:respond_to?).with(:with_files_stored_locally).and_return(true) - DummyModel.verifiables + expect(DummyModel).to receive(:with_files_stored_locally) + + DummyModel.verifiables + end end - end - context 'when the model cannot be filtered by locally stored files' do - it 'does not filter by locally stored files' do - allow(DummyModel).to receive(:respond_to?).with(:all).and_call_original - allow(DummyModel).to receive(:respond_to?).with(:with_files_stored_locally).and_return(false) + context 'when the model cannot be filtered by locally stored files' do + it 'does not filter by locally stored files' do + allow(DummyModel).to receive(:respond_to?).with(:all).and_call_original + allow(DummyModel).to receive(:respond_to?).with(:with_files_stored_locally).and_return(false) + + expect(DummyModel).not_to receive(:with_files_stored_locally) + + DummyModel.verifiables + end + end + end - expect(DummyModel).not_to receive(:with_files_stored_locally) + # We don't need to test the case when geo_object_storage_verification is enabled + # because the whole .verifiables method won't be needed anymore after the FF is removed. + # This one has only symbolic meaning before the removal + context 'when geo_object_storage_verification feature flag is enabled' do + it 'aliasses to .available_replicables' do + expect(DummyModel).to receive(:available_replicables) DummyModel.verifiables end diff --git a/ee/spec/models/ee/ci/job_artifact_spec.rb b/ee/spec/models/ee/ci/job_artifact_spec.rb index 01e72bebe76a06..ada26916ac3a98 100644 --- a/ee/spec/models/ee/ci/job_artifact_spec.rb +++ b/ee/spec/models/ee/ci/job_artifact_spec.rb @@ -8,35 +8,6 @@ let_it_be(:job) { create(:ci_build) } - describe '#save_verification_details' do - let(:verifiable_model_record) { build(:ci_job_artifact, :trace, job: job) } - let(:verification_state_table_class) { verifiable_model_record.class.verification_state_table_class } - - before do - stub_primary_node - end - - context 'when direct upload is enabled for trace artifacts' do - before do - stub_artifacts_object_storage(JobArtifactUploader, direct_upload: true) - end - - it 'does not create verification details' do - expect { verifiable_model_record.save! }.not_to change { verification_state_table_class.count } - end - end - - context 'when direct upload is not enabled' do - before do - stub_artifacts_object_storage(JobArtifactUploader, direct_upload: false) - end - - it 'creates verification details' do - expect { verifiable_model_record.save! }.to change { verification_state_table_class.count }.by(1) - end - end - end - include_examples 'a replicable model with a separate table for verification state' do before do stub_artifacts_object_storage diff --git a/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb b/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb index 45da48770ac62c..6687d870506fcc 100644 --- a/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb +++ b/ee/spec/support/shared_examples/models/concerns/blob_replicator_strategy_shared_examples.rb @@ -276,10 +276,13 @@ context 'when the file is remotely stored' do it 'raises an error' do - carrierwave_uploader = double(file_storage?: false) - allow(subject).to receive(:carrierwave_uploader).and_return(carrierwave_uploader) + carrierwave_uploader = replicator.carrierwave_uploader + allow(carrierwave_uploader).to receive(:file_storage?).and_return(false) + allow(carrierwave_uploader).to receive_message_chain(:file, size: 65112) + allow(carrierwave_uploader).to receive_message_chain(:file, exists?: true) + allow(replicator).to receive(:carrierwave_uploader).and_return(carrierwave_uploader) - expect { subject.calculate_checksum }.to raise_error('File is not checksummable') + expect(subject.calculate_checksum).to eq('65112') end end end diff --git a/ee/spec/support/shared_examples/models/concerns/replicable_model_with_separate_table_shared_examples.rb b/ee/spec/support/shared_examples/models/concerns/replicable_model_with_separate_table_shared_examples.rb index dca2e4ddccc95c..55bd19f974d7a1 100644 --- a/ee/spec/support/shared_examples/models/concerns/replicable_model_with_separate_table_shared_examples.rb +++ b/ee/spec/support/shared_examples/models/concerns/replicable_model_with_separate_table_shared_examples.rb @@ -12,6 +12,10 @@ RSpec.shared_examples 'a replicable model with a separate table for verification state' do include EE::GeoHelpers + before do + stub_feature_flags(geo_object_storage_verification: false) + end + context 'on a primary node' do before do stub_primary_node -- GitLab