From 0b85d5bc66adaa70a4cd7c2b9bbf593c6dd9ee32 Mon Sep 17 00:00:00 2001 From: Aakriti Gupta Date: Tue, 16 Nov 2021 18:42:48 +0100 Subject: [PATCH] Geo: Add verification for LFS objects Add columns to manage verification states in on primary and secondary, in `lfs_objects` and `lfs_object_registry`, respectively. Use VerificationState concern and add tests. Changelog: added EE: true --- ...20211201143042_create_lfs_object_states.rb | 32 +++++++++ db/schema_migrations/20211201143042 | 1 + db/structure.sql | 39 ++++++++++ .../geo/replication/datatypes.md | 4 +- .../monitoring/prometheus/gitlab_metrics.md | 17 +++-- doc/api/geo_nodes.md | 41 ++++++++--- ee/app/models/ee/lfs_object.rb | 35 +++++++++ ee/app/models/geo/lfs_object_registry.rb | 1 + ee/app/models/geo/lfs_object_state.rb | 9 +++ .../replicators/geo/lfs_object_replicator.rb | 6 ++ .../geo_lfs_object_verification.yml | 8 +++ ...add_verification_to_lfs_object_registry.rb | 20 ++++++ ...fs_object_registry_verification_failure.rb | 13 ++++ ...0002_add_indexes_to_lfs_object_registry.rb | 26 +++++++ ee/db/geo/schema_migrations/20211124000000 | 1 + ee/db/geo/schema_migrations/20211124000001 | 1 + ee/db/geo/schema_migrations/20211124000002 | 1 + ee/db/geo/structure.sql | 18 ++++- ee/spec/factories/geo/lfs_object_registry.rb | 8 ++- ee/spec/factories/geo/lfs_object_states.rb | 15 ++++ ee/spec/factories/lfs_object_spec.rb | 25 +++++++ ee/spec/factories/lfs_objects.rb | 17 +++++ ee/spec/models/ee/lfs_object_spec.rb | 72 +++++++++++++++++++ .../models/geo/lfs_object_registry_spec.rb | 1 + .../geo/lfs_object_replicator_spec.rb | 3 +- lib/gitlab/database/gitlab_schemas.yml | 1 + .../user_uploads_designs_spec.rb | 3 + 27 files changed, 399 insertions(+), 19 deletions(-) create mode 100644 db/migrate/20211201143042_create_lfs_object_states.rb create mode 100644 db/schema_migrations/20211201143042 create mode 100644 ee/app/models/geo/lfs_object_state.rb create mode 100644 ee/config/feature_flags/development/geo_lfs_object_verification.yml create mode 100644 ee/db/geo/migrate/20211124000000_add_verification_to_lfs_object_registry.rb create mode 100644 ee/db/geo/migrate/20211124000001_add_text_limit_to_lfs_object_registry_verification_failure.rb create mode 100644 ee/db/geo/migrate/20211124000002_add_indexes_to_lfs_object_registry.rb create mode 100644 ee/db/geo/schema_migrations/20211124000000 create mode 100644 ee/db/geo/schema_migrations/20211124000001 create mode 100644 ee/db/geo/schema_migrations/20211124000002 create mode 100644 ee/spec/factories/geo/lfs_object_states.rb create mode 100644 ee/spec/factories/lfs_object_spec.rb create mode 100644 ee/spec/factories/lfs_objects.rb create mode 100644 ee/spec/models/ee/lfs_object_spec.rb diff --git a/db/migrate/20211201143042_create_lfs_object_states.rb b/db/migrate/20211201143042_create_lfs_object_states.rb new file mode 100644 index 00000000000000..91accbcd4389c9 --- /dev/null +++ b/db/migrate/20211201143042_create_lfs_object_states.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +class CreateLfsObjectStates < Gitlab::Database::Migration[1.0] + VERIFICATION_STATE_INDEX_NAME = "index_lfs_object_states_on_verification_state" + PENDING_VERIFICATION_INDEX_NAME = "index_lfs_object_states_pending_verification" + FAILED_VERIFICATION_INDEX_NAME = "index_lfs_object_states_failed_verification" + NEEDS_VERIFICATION_INDEX_NAME = "index_lfs_object_states_needs_verification" + + disable_ddl_transaction! + + def up + create_table :lfs_object_states, id: false do |t| + t.datetime_with_timezone :verification_started_at + t.datetime_with_timezone :verification_retry_at + t.datetime_with_timezone :verified_at + t.references :lfs_object, primary_key: true, null: false, foreign_key: { on_delete: :cascade } + t.integer :verification_state, default: 0, limit: 2, null: false + t.integer :verification_retry_count, limit: 2 + t.binary :verification_checksum, using: 'verification_checksum::bytea' + t.text :verification_failure, limit: 255 + + t.index :verification_state, name: VERIFICATION_STATE_INDEX_NAME + t.index :verified_at, where: "(verification_state = 0)", order: { verified_at: 'ASC NULLS FIRST' }, name: PENDING_VERIFICATION_INDEX_NAME + t.index :verification_retry_at, where: "(verification_state = 3)", order: { verification_retry_at: 'ASC NULLS FIRST' }, name: FAILED_VERIFICATION_INDEX_NAME + t.index :verification_state, where: "(verification_state = 0 OR verification_state = 3)", name: NEEDS_VERIFICATION_INDEX_NAME + end + end + + def down + drop_table :lfs_object_states + end +end diff --git a/db/schema_migrations/20211201143042 b/db/schema_migrations/20211201143042 new file mode 100644 index 00000000000000..a5f0c8be842f66 --- /dev/null +++ b/db/schema_migrations/20211201143042 @@ -0,0 +1 @@ +0d27ca1250d10b8915fa4523707044f9a8c2372110537f5639a1811aeb0858b8 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index d73bb531e96a8a..12d77936918ad1 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -15768,6 +15768,27 @@ CREATE SEQUENCE lfs_file_locks_id_seq ALTER SEQUENCE lfs_file_locks_id_seq OWNED BY lfs_file_locks.id; +CREATE TABLE lfs_object_states ( + verification_started_at timestamp with time zone, + verification_retry_at timestamp with time zone, + verified_at timestamp with time zone, + lfs_object_id bigint NOT NULL, + verification_state smallint DEFAULT 0 NOT NULL, + verification_retry_count smallint, + verification_checksum bytea, + verification_failure text, + CONSTRAINT check_efe45a8ab3 CHECK ((char_length(verification_failure) <= 255)) +); + +CREATE SEQUENCE lfs_object_states_lfs_object_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE lfs_object_states_lfs_object_id_seq OWNED BY lfs_object_states.lfs_object_id; + CREATE TABLE lfs_objects ( id integer NOT NULL, oid character varying NOT NULL, @@ -21757,6 +21778,8 @@ ALTER TABLE ONLY ldap_group_links ALTER COLUMN id SET DEFAULT nextval('ldap_grou ALTER TABLE ONLY lfs_file_locks ALTER COLUMN id SET DEFAULT nextval('lfs_file_locks_id_seq'::regclass); +ALTER TABLE ONLY lfs_object_states ALTER COLUMN lfs_object_id SET DEFAULT nextval('lfs_object_states_lfs_object_id_seq'::regclass); + ALTER TABLE ONLY lfs_objects ALTER COLUMN id SET DEFAULT nextval('lfs_objects_id_seq'::regclass); ALTER TABLE ONLY lfs_objects_projects ALTER COLUMN id SET DEFAULT nextval('lfs_objects_projects_id_seq'::regclass); @@ -23477,6 +23500,9 @@ ALTER TABLE ONLY ldap_group_links ALTER TABLE ONLY lfs_file_locks ADD CONSTRAINT lfs_file_locks_pkey PRIMARY KEY (id); +ALTER TABLE ONLY lfs_object_states + ADD CONSTRAINT lfs_object_states_pkey PRIMARY KEY (lfs_object_id); + ALTER TABLE ONLY lfs_objects ADD CONSTRAINT lfs_objects_pkey PRIMARY KEY (id); @@ -26488,6 +26514,16 @@ CREATE UNIQUE INDEX index_lfs_file_locks_on_project_id_and_path ON lfs_file_lock CREATE INDEX index_lfs_file_locks_on_user_id ON lfs_file_locks USING btree (user_id); +CREATE INDEX index_lfs_object_states_failed_verification ON lfs_object_states USING btree (verification_retry_at NULLS FIRST) WHERE (verification_state = 3); + +CREATE INDEX index_lfs_object_states_needs_verification ON lfs_object_states USING btree (verification_state) WHERE ((verification_state = 0) OR (verification_state = 3)); + +CREATE INDEX index_lfs_object_states_on_lfs_object_id ON lfs_object_states USING btree (lfs_object_id); + +CREATE INDEX index_lfs_object_states_on_verification_state ON lfs_object_states USING btree (verification_state); + +CREATE INDEX index_lfs_object_states_pending_verification ON lfs_object_states USING btree (verified_at NULLS FIRST) WHERE (verification_state = 0); + CREATE INDEX index_lfs_objects_on_file_store ON lfs_objects USING btree (file_store); CREATE UNIQUE INDEX index_lfs_objects_on_oid ON lfs_objects USING btree (oid); @@ -30282,6 +30318,9 @@ ALTER TABLE ONLY description_versions ALTER TABLE ONLY clusters_kubernetes_namespaces ADD CONSTRAINT fk_rails_40cc7ccbc3 FOREIGN KEY (cluster_project_id) REFERENCES cluster_projects(id) ON DELETE SET NULL; +ALTER TABLE ONLY lfs_object_states + ADD CONSTRAINT fk_rails_4188448cd5 FOREIGN KEY (lfs_object_id) REFERENCES lfs_objects(id) ON DELETE CASCADE; + ALTER TABLE ONLY geo_node_namespace_links ADD CONSTRAINT fk_rails_41ff5fb854 FOREIGN KEY (namespace_id) REFERENCES namespaces(id) ON DELETE CASCADE; diff --git a/doc/administration/geo/replication/datatypes.md b/doc/administration/geo/replication/datatypes.md index ca9388a3af3c3f..5f98656482c158 100644 --- a/doc/administration/geo/replication/datatypes.md +++ b/doc/administration/geo/replication/datatypes.md @@ -37,7 +37,7 @@ verification methods: | Git | Group wiki repository | Geo with Gitaly | _Not implemented_ | | Blobs | User uploads _(file system)_ | Geo with API | _Not implemented_ | | Blobs | User uploads _(object storage)_ | Geo with API/Managed (*2*) | _Not implemented_ | -| Blobs | LFS objects _(file system)_ | Geo with API | _Not implemented_ | +| Blobs | LFS objects _(file system)_ | Geo with API | SHA256 checksum | | Blobs | LFS objects _(object storage)_ | Geo with API/Managed (*2*) | _Not implemented_ | | Blobs | CI job artifacts _(file system)_ | Geo with API | _Not implemented_ | | Blobs | CI job artifacts _(object storage)_ | Geo with API/Managed (*2*) | _Not implemented_ | @@ -190,7 +190,7 @@ successfully, you must replicate their data using some other means. |[Project wiki repository](../../../user/project/wiki/) | **Yes** (10.2) | **Yes** (10.7) | No | | |[Group wiki repository](../../../user/project/wiki/group.md) | [**Yes** (13.10)](https://gitlab.com/gitlab-org/gitlab/-/issues/208147) | No | No | Behind feature flag `geo_group_wiki_repository_replication`, enabled by default. | |[Uploads](../../uploads.md) | **Yes** (10.2) | [No](https://gitlab.com/groups/gitlab-org/-/epics/1817) | No | Verified only on transfer or manually using [Integrity Check Rake Task](../../raketasks/check.md) on both sites and comparing the output between them. | -|[LFS objects](../../lfs/index.md) | **Yes** (10.2) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/8922) | Via Object Storage provider if supported. Native Geo support (Beta). | Verified only on transfer or manually using [Integrity Check Rake Task](../../raketasks/check.md) on both sites and comparing the output between them. GitLab versions 11.11.x and 12.0.x are affected by [a bug that prevents any new LFS objects from replicating](https://gitlab.com/gitlab-org/gitlab/-/issues/32696).

Behind feature flag `geo_lfs_object_replication`, enabled by default. | +|[LFS objects](../../lfs/index.md) | **Yes** (10.2) | **Yes**(14.6) | Via Object Storage provider if supported. Native Geo support (Beta). | Verified only on transfer or manually using [Integrity Check Rake Task](../../raketasks/check.md) on both sites and comparing the output between them. GitLab versions 11.11.x and 12.0.x are affected by [a bug that prevents any new LFS objects from replicating](https://gitlab.com/gitlab-org/gitlab/-/issues/32696).

Replication is behind the feature flag `geo_lfs_object_replication`, enabled by default. Verification is under development behind the feature flag `geo_lfs_object_verification` introduced in 14.6. | |[Personal snippets](../../../user/snippets.md) | **Yes** (10.2) | **Yes** (10.2) | No | | |[Project snippets](../../../user/snippets.md) | **Yes** (10.2) | **Yes** (10.2) | No | | |[CI job artifacts](../../../ci/pipelines/job_artifacts.md) | **Yes** (10.4) | [No](https://gitlab.com/gitlab-org/gitlab/-/issues/8923) | Via Object Storage provider if supported. Native Geo support (Beta). | Verified only manually using [Integrity Check Rake Task](../../raketasks/check.md) on both sites and comparing the output between them. Job logs also verified on transfer. | diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index 237b5561e70f8d..c6a1a93af7c6a0 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -187,16 +187,25 @@ configuration option in `gitlab.yml`. These metrics are served from the | `geo_repositories` | Gauge | 10.2 | Total number of repositories available on primary | `url` | | `geo_repositories_synced` | Gauge | 10.2 | Number of repositories synced on secondary | `url` | | `geo_repositories_failed` | Gauge | 10.2 | Number of repositories failed to sync on secondary | `url` | -| `geo_lfs_objects` | Gauge | 10.2 | Total number of LFS objects available on primary | `url` | -| `geo_lfs_objects_synced` | Gauge | 10.2 | Number of LFS objects synced on secondary | `url` | -| `geo_lfs_objects_failed` | Gauge | 10.2 | Number of LFS objects failed to sync on secondary | `url` | +| `geo_lfs_objects` | Gauge | 10.2 | Number of LFS objects on primary | `url` | +| `geo_lfs_objects_checksummed` | Gauge | 14.6 | Number of LFS objects checksummed successfully on primary | `url` | +| `geo_lfs_objects_checksum_failed` | Gauge | 14.6 | Number of LFS objects failed to calculate the checksum on primary | `url` | +| `geo_lfs_objects_checksum_total` | Gauge | 14.6 | Number of LFS objects tried to checksum on primary | `url` | +| `geo_lfs_objects_synced` | Gauge | 10.2 | Number of syncable LFS objects synced on secondary | `url` | +| `geo_lfs_objects_failed` | Gauge | 10.2 | Number of syncable LFS objects failed to sync on secondary | `url` | +| `geo_lfs_objects_registry` | Gauge | 14.6 | Number of LFS objects in the registry | `url` | +| `geo_lfs_objects_verified` | Gauge | 14.6 | Number of LFS objects verified on secondary | `url` | +| `geo_lfs_objects_verification_failed` | Gauge | 14.6 | Number of LFS objects' verifications failed on secondary | `url` | +| `geo_lfs_objects_verification_total` | Gauge | 14.6 | Number of LFS objects' verifications tried on secondary | `url` |LFS objects failed to sync on secondary | `url` | +| `geo_attachments` | Gauge | 10.2 | Total number of file attachments available on primary | `url` | +| `geo_attachments_synced` | Gauge | 10.2 | Number of attachments synced on secondary | `url` | +| `geo_attachments_failed` | Gauge | 10.2 | Number of attachments failed to sync on secondary | `url` | | `geo_last_event_id` | Gauge | 10.2 | Database ID of the latest event log entry on the primary | `url` | | `geo_last_event_timestamp` | Gauge | 10.2 | UNIX timestamp of the latest event log entry on the primary | `url` | | `geo_cursor_last_event_id` | Gauge | 10.2 | Last database ID of the event log processed by the secondary | `url` | | `geo_cursor_last_event_timestamp` | Gauge | 10.2 | Last UNIX timestamp of the event log processed by the secondary | `url` | | `geo_status_failed_total` | Counter | 10.2 | Number of times retrieving the status from the Geo Node failed | `url` | | `geo_last_successful_status_check_timestamp` | Gauge | 10.2 | Last timestamp when the status was successfully updated | `url` | -| `geo_lfs_objects_synced_missing_on_primary` | Gauge | 10.7 | Number of LFS objects marked as synced due to the file missing on the primary | `url` | | `geo_job_artifacts_synced_missing_on_primary` | Gauge | 10.7 | Number of job artifacts marked as synced due to the file missing on the primary | `url` | | `geo_repositories_checksummed` | Gauge | 10.7 | Number of repositories checksummed on primary | `url` | | `geo_repositories_checksum_failed` | Gauge | 10.7 | Number of repositories failed to calculate the checksum on primary | `url` | diff --git a/doc/api/geo_nodes.md b/doc/api/geo_nodes.md index 0758dba6f081fb..3952a87e69864e 100644 --- a/doc/api/geo_nodes.md +++ b/doc/api/geo_nodes.md @@ -307,11 +307,18 @@ Example response: "health_status": "Healthy", "missing_oauth_application": false, "db_replication_lag_seconds": null, - "lfs_objects_count": 0, + "lfs_objects_count": 5, + "lfs_objects_checksum_total_count": 5, + "lfs_objects_checksummed_count": 5, + "lfs_objects_checksum_failed_count": 0, "lfs_objects_synced_count": null, "lfs_objects_failed_count": null, - "lfs_objects_synced_missing_on_primary_count": 0, + "lfs_objects_registry_count": null, + "lfs_objects_verification_total_count": null, + "lfs_objects_verified_count": null, + "lfs_objects_verification_failed_count": null, "lfs_objects_synced_in_percentage": "0.00%", + "lfs_objects_verified_in_percentage": "0.00%", "job_artifacts_count": 2, "job_artifacts_synced_count": null, "job_artifacts_failed_count": null, @@ -468,11 +475,18 @@ Example response: "health_status": "Healthy", "missing_oauth_application": false, "db_replication_lag_seconds": 0, - "lfs_objects_count": 0, - "lfs_objects_synced_count": 0, - "lfs_objects_failed_count": 0, - "lfs_objects_synced_missing_on_primary_count": 0, + "lfs_objects_count": 5, + "lfs_objects_checksum_total_count": 5, + "lfs_objects_checksummed_count": 5, + "lfs_objects_checksum_failed_count": 0, + "lfs_objects_synced_count": null, + "lfs_objects_failed_count": null, + "lfs_objects_registry_count": null, + "lfs_objects_verification_total_count": null, + "lfs_objects_verified_count": null, + "lfs_objects_verification_failed_count": null, "lfs_objects_synced_in_percentage": "0.00%", + "lfs_objects_verified_in_percentage": "0.00%", "job_artifacts_count": 2, "job_artifacts_synced_count": 1, "job_artifacts_failed_count": 1, @@ -633,11 +647,18 @@ Example response: "health_status": "Healthy", "missing_oauth_application": false, "db_replication_lag_seconds": 0, - "lfs_objects_count": 0, - "lfs_objects_synced_count": 0, - "lfs_objects_failed_count": 0, - "lfs_objects_synced_missing_on_primary_count": 0, + "lfs_objects_count": 5, + "lfs_objects_checksum_total_count": 5, + "lfs_objects_checksummed_count": 5, + "lfs_objects_checksum_failed_count": 0, + "lfs_objects_synced_count": null, + "lfs_objects_failed_count": null, + "lfs_objects_registry_count": null, + "lfs_objects_verification_total_count": null, + "lfs_objects_verified_count": null, + "lfs_objects_verification_failed_count": null, "lfs_objects_synced_in_percentage": "0.00%", + "lfs_objects_verified_in_percentage": "0.00%", "job_artifacts_count": 2, "job_artifacts_synced_count": 1, "job_artifacts_failed_count": 1, diff --git a/ee/app/models/ee/lfs_object.rb b/ee/app/models/ee/lfs_object.rb index a52e3ec04c65cd..dee47efc6ab1cd 100644 --- a/ee/app/models/ee/lfs_object.rb +++ b/ee/app/models/ee/lfs_object.rb @@ -12,13 +12,39 @@ module LfsObject prepended do include ::Gitlab::Geo::ReplicableModel + include ::Gitlab::Geo::VerificationState with_replicator ::Geo::LfsObjectReplicator scope :project_id_in, ->(ids) { joins(:projects).merge(::Project.id_in(ids)) } + + has_one :lfs_object_state, autosave: false, inverse_of: :lfs_object, class_name: 'Geo::LfsObjectState' + + after_save :save_verification_details + + delegate :verification_retry_at, :verification_retry_at=, + :verified_at, :verified_at=, + :verification_checksum, :verification_checksum=, + :verification_failure, :verification_failure=, + :verification_retry_count, :verification_retry_count=, + :verification_state=, :verification_state, + :verification_started_at=, :verification_started_at, + to: :lfs_object_state, allow_nil: true + + scope :with_verification_state, ->(state) { joins(:lfs_object_state).where(lfs_object_states: { verification_state: verification_state_value(state) }) } + scope :checksummed, -> { joins(:lfs_object_state).where.not(lfs_object_states: { verification_checksum: nil } ) } + scope :not_checksummed, -> { joins(:lfs_object_state).where(lfs_object_states: { verification_checksum: nil } ) } + + scope :available_verifiables, -> { joins(:lfs_object_state) } + + def verification_state_object + lfs_object_state + end end class_methods do + extend ::Gitlab::Utils::Override + # @param primary_key_in [Range, LfsObject] arg to pass to primary_key_in scope # @return [ActiveRecord::Relation] everything that should be synced to this node, restricted by primary key def replicables_for_current_secondary(primary_key_in) @@ -27,6 +53,11 @@ def replicables_for_current_secondary(primary_key_in) .merge(object_storage_scope(node)) end + override :verification_state_table_class + def verification_state_table_class + Geo::LfsObjectState + end + private def object_storage_scope(node) @@ -36,6 +67,10 @@ def object_storage_scope(node) end end + def lfs_object_state + super || build_lfs_object_state + end + def log_geo_deleted_event # Keep empty for now. Should be addressed in future # by https://gitlab.com/gitlab-org/gitlab/-/issues/232917 diff --git a/ee/app/models/geo/lfs_object_registry.rb b/ee/app/models/geo/lfs_object_registry.rb index 5977a975166ff4..58bcf5eadca6be 100644 --- a/ee/app/models/geo/lfs_object_registry.rb +++ b/ee/app/models/geo/lfs_object_registry.rb @@ -2,6 +2,7 @@ class Geo::LfsObjectRegistry < Geo::BaseRegistry include ::Geo::ReplicableRegistry + include ::Geo::VerifiableRegistry MODEL_CLASS = ::LfsObject MODEL_FOREIGN_KEY = :lfs_object_id diff --git a/ee/app/models/geo/lfs_object_state.rb b/ee/app/models/geo/lfs_object_state.rb new file mode 100644 index 00000000000000..efacdb93069951 --- /dev/null +++ b/ee/app/models/geo/lfs_object_state.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +module Geo + class LfsObjectState < ApplicationRecord + self.primary_key = :lfs_object_id + + belongs_to :lfs_object, inverse_of: :lfs_object_state + end +end diff --git a/ee/app/replicators/geo/lfs_object_replicator.rb b/ee/app/replicators/geo/lfs_object_replicator.rb index fc52e438da5aad..b13c2dd5a2636f 100644 --- a/ee/app/replicators/geo/lfs_object_replicator.rb +++ b/ee/app/replicators/geo/lfs_object_replicator.rb @@ -3,6 +3,7 @@ module Geo class LfsObjectReplicator < Gitlab::Geo::Replicator include ::Geo::BlobReplicatorStrategy + extend ::Gitlab::Utils::Override def carrierwave_uploader model_record.file @@ -11,5 +12,10 @@ def carrierwave_uploader def self.model ::LfsObject end + + override :verification_feature_flag_enabled? + def self.verification_feature_flag_enabled? + Feature.enabled?(:geo_lfs_object_verification, default_enabled: :yaml) + end end end diff --git a/ee/config/feature_flags/development/geo_lfs_object_verification.yml b/ee/config/feature_flags/development/geo_lfs_object_verification.yml new file mode 100644 index 00000000000000..53023a20d8d73d --- /dev/null +++ b/ee/config/feature_flags/development/geo_lfs_object_verification.yml @@ -0,0 +1,8 @@ +--- +name: geo_lfs_object_verification +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/63981 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/333532 +milestone: '14.6' +type: development +group: group::geo +default_enabled: false diff --git a/ee/db/geo/migrate/20211124000000_add_verification_to_lfs_object_registry.rb b/ee/db/geo/migrate/20211124000000_add_verification_to_lfs_object_registry.rb new file mode 100644 index 00000000000000..970135ccd904ee --- /dev/null +++ b/ee/db/geo/migrate/20211124000000_add_verification_to_lfs_object_registry.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +class AddVerificationToLfsObjectRegistry < Gitlab::Database::Migration[1.0] + disable_ddl_transaction! + + # rubocop:disable Migration/AddLimitToTextColumns + # limit is added in 20211124000001_add_text_limit_to_lfs_object_registry_verification_failure.rb + def change + add_column :lfs_object_registry, :verification_started_at, :datetime_with_timezone + add_column :lfs_object_registry, :verified_at, :datetime_with_timezone + add_column :lfs_object_registry, :verification_retry_at, :datetime_with_timezone + add_column :lfs_object_registry, :verification_retry_count, :integer, default: 0 + add_column :lfs_object_registry, :verification_state, :integer, limit: 2, default: 0, null: false + add_column :lfs_object_registry, :checksum_mismatch, :boolean, default: false, null: false + add_column :lfs_object_registry, :verification_checksum, :binary + add_column :lfs_object_registry, :verification_checksum_mismatched, :binary + add_column :lfs_object_registry, :verification_failure, :text + # rubocop:enable Migration/AddLimitToTextColumns + end +end diff --git a/ee/db/geo/migrate/20211124000001_add_text_limit_to_lfs_object_registry_verification_failure.rb b/ee/db/geo/migrate/20211124000001_add_text_limit_to_lfs_object_registry_verification_failure.rb new file mode 100644 index 00000000000000..b29bc2c2f9dfca --- /dev/null +++ b/ee/db/geo/migrate/20211124000001_add_text_limit_to_lfs_object_registry_verification_failure.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +class AddTextLimitToLfsObjectRegistryVerificationFailure < Gitlab::Database::Migration[1.0] + disable_ddl_transaction! + + def up + add_text_limit :lfs_object_registry, :verification_failure, 255 + end + + def down + remove_text_limit :lfs_object_registry, :verification_failure + end +end diff --git a/ee/db/geo/migrate/20211124000002_add_indexes_to_lfs_object_registry.rb b/ee/db/geo/migrate/20211124000002_add_indexes_to_lfs_object_registry.rb new file mode 100644 index 00000000000000..73cb2358960736 --- /dev/null +++ b/ee/db/geo/migrate/20211124000002_add_indexes_to_lfs_object_registry.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +class AddIndexesToLfsObjectRegistry < Gitlab::Database::Migration[1.0] + LFS_OBJECT_ID_INDEX_NAME = "index_lfs_object_registry_on_lfs_object_id" + FAILED_VERIFICATION_INDEX_NAME = "lfs_object_registry_failed_verification" + NEEDS_VERIFICATION_INDEX_NAME = "lfs_object_registry_needs_verification" + PENDING_VERIFICATION_INDEX_NAME = "lfs_object_registry_pending_verification" + + REGISTRY_TABLE = :lfs_object_registry + + disable_ddl_transaction! + + def up + add_concurrent_index REGISTRY_TABLE, :lfs_object_id, name: LFS_OBJECT_ID_INDEX_NAME, unique: true + add_concurrent_index REGISTRY_TABLE, :verification_retry_at, name: FAILED_VERIFICATION_INDEX_NAME, order: "NULLS FIRST", where: "((state = 2) AND (verification_state = 3))" + add_concurrent_index REGISTRY_TABLE, :verification_state, name: NEEDS_VERIFICATION_INDEX_NAME, where: "((state = 2) AND (verification_state = ANY (ARRAY[0, 3])))" + add_concurrent_index REGISTRY_TABLE, :verified_at, name: PENDING_VERIFICATION_INDEX_NAME, order: "NULLS FIRST", where: "((state = 2) AND (verification_state = 0))" + end + + def down + remove_concurrent_index_by_name REGISTRY_TABLE, name: LFS_OBJECT_ID_INDEX_NAME + remove_concurrent_index_by_name REGISTRY_TABLE, name: FAILED_VERIFICATION_INDEX_NAME + remove_concurrent_index_by_name REGISTRY_TABLE, name: NEEDS_VERIFICATION_INDEX_NAME + remove_concurrent_index_by_name REGISTRY_TABLE, name: PENDING_VERIFICATION_INDEX_NAME + end +end diff --git a/ee/db/geo/schema_migrations/20211124000000 b/ee/db/geo/schema_migrations/20211124000000 new file mode 100644 index 00000000000000..48a0a46a4ba146 --- /dev/null +++ b/ee/db/geo/schema_migrations/20211124000000 @@ -0,0 +1 @@ +7115b2493e24cef7d385e076488539515aa00f097d477c7365b649032ed74dae \ No newline at end of file diff --git a/ee/db/geo/schema_migrations/20211124000001 b/ee/db/geo/schema_migrations/20211124000001 new file mode 100644 index 00000000000000..11e68fa796f553 --- /dev/null +++ b/ee/db/geo/schema_migrations/20211124000001 @@ -0,0 +1 @@ +f67ac276604ebed45ec59a450e35666833a76d770a85ab15be4d275f836652ba \ No newline at end of file diff --git a/ee/db/geo/schema_migrations/20211124000002 b/ee/db/geo/schema_migrations/20211124000002 new file mode 100644 index 00000000000000..f1a6f9e72ea831 --- /dev/null +++ b/ee/db/geo/schema_migrations/20211124000002 @@ -0,0 +1 @@ +388d2578ae1385b5276f760a5cb204831fab01e33f9f1ea75ea1717c11c6e7bb \ No newline at end of file diff --git a/ee/db/geo/structure.sql b/ee/db/geo/structure.sql index 2f1f7cd16d1345..5e73d3fb773c35 100644 --- a/ee/db/geo/structure.sql +++ b/ee/db/geo/structure.sql @@ -151,7 +151,17 @@ CREATE TABLE lfs_object_registry ( sha256 bytea, state smallint DEFAULT 0 NOT NULL, last_synced_at timestamp with time zone, - last_sync_failure text + last_sync_failure text, + verification_started_at timestamp with time zone, + verified_at timestamp with time zone, + verification_retry_at timestamp with time zone, + verification_retry_count integer DEFAULT 0, + verification_state smallint DEFAULT 0 NOT NULL, + checksum_mismatch boolean DEFAULT false NOT NULL, + verification_checksum bytea, + verification_checksum_mismatched bytea, + verification_failure text, + CONSTRAINT check_8bcaa12138 CHECK ((char_length(verification_failure) <= 255)) ); CREATE SEQUENCE lfs_object_registry_id_seq @@ -596,6 +606,12 @@ CREATE UNIQUE INDEX index_terraform_state_version_registry_on_t_state_version_id CREATE UNIQUE INDEX index_tf_state_versions_registry_tf_state_versions_id_unique ON terraform_state_version_registry USING btree (terraform_state_version_id); +CREATE INDEX lfs_object_registry_failed_verification ON lfs_object_registry USING btree (verification_retry_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 3)); + +CREATE INDEX lfs_object_registry_needs_verification ON lfs_object_registry USING btree (verification_state) WHERE ((state = 2) AND (verification_state = ANY (ARRAY[0, 3]))); + +CREATE INDEX lfs_object_registry_pending_verification ON lfs_object_registry USING btree (verified_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 0)); + CREATE INDEX merge_request_diff_registry_failed_verification ON merge_request_diff_registry USING btree (verification_retry_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 3)); CREATE INDEX merge_request_diff_registry_needs_verification ON merge_request_diff_registry USING btree (verification_state) WHERE ((state = 2) AND (verification_state = ANY (ARRAY[0, 3]))); diff --git a/ee/spec/factories/geo/lfs_object_registry.rb b/ee/spec/factories/geo/lfs_object_registry.rb index eceecc85889966..0846b8de55bc67 100644 --- a/ee/spec/factories/geo/lfs_object_registry.rb +++ b/ee/spec/factories/geo/lfs_object_registry.rb @@ -2,7 +2,7 @@ FactoryBot.define do factory :geo_lfs_object_registry, class: 'Geo::LfsObjectRegistry' do - lfs_object + association(:lfs_object, :with_file) state { Geo::LfsObjectRegistry.state_value(:pending) } trait :synced do @@ -22,5 +22,11 @@ last_synced_at { 1.day.ago } retry_count { 0 } end + + trait :verification_succeeded do + verification_checksum { 'e079a831cab27bcda7d81cd9b48296d0c3dd92ef' } + verification_state { Geo::LfsObjectRegistry.verification_state_value(:verification_succeeded) } + verified_at { 5.days.ago } + end end end diff --git a/ee/spec/factories/geo/lfs_object_states.rb b/ee/spec/factories/geo/lfs_object_states.rb new file mode 100644 index 00000000000000..45c5b9bdef93f5 --- /dev/null +++ b/ee/spec/factories/geo/lfs_object_states.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +FactoryBot.define do + factory :geo_lfs_object_state, class: 'Geo::LfsObjectState' do + lfs_object + + trait(:checksummed) do + verification_checksum { 'abc' } + end + + trait(:checksum_failure) do + verification_failure { 'Could not calculate the checksum' } + end + end +end diff --git a/ee/spec/factories/lfs_object_spec.rb b/ee/spec/factories/lfs_object_spec.rb new file mode 100644 index 00000000000000..0abc5ad4fb5aa3 --- /dev/null +++ b/ee/spec/factories/lfs_object_spec.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +FactoryBot.modify do + factory :lfs_object do + trait(:checksummed) do + association :lfs_object_state, :checksummed, strategy: :build + end + + trait(:checksum_failure) do + association :lfs_object_state, :checksum_failure, strategy: :build + end + + trait(:verification_succeeded) do + with_file + verification_checksum { 'abc' } + verification_state { ::LfsObject.verification_state_value(:verification_succeeded) } + end + + trait(:verification_failed) do + with_file + verification_failure { 'Could not calculate the checksum' } + verification_state { ::LfsObject.verification_state_value(:verification_failed) } + end + end +end diff --git a/ee/spec/factories/lfs_objects.rb b/ee/spec/factories/lfs_objects.rb new file mode 100644 index 00000000000000..f2d3620fd7935c --- /dev/null +++ b/ee/spec/factories/lfs_objects.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +FactoryBot.modify do + factory :lfs_object do + trait(:verification_succeeded) do + with_file + verification_checksum { 'abc' } + verification_state { ::LfsObject.verification_state_value(:verification_succeeded) } + end + + trait(:verification_failed) do + with_file + verification_failure { 'Could not calculate the checksum' } + verification_state { ::LfsObject.verification_state_value(:verification_failed) } + end + end +end diff --git a/ee/spec/models/ee/lfs_object_spec.rb b/ee/spec/models/ee/lfs_object_spec.rb new file mode 100644 index 00000000000000..e27ffa83eb8be2 --- /dev/null +++ b/ee/spec/models/ee/lfs_object_spec.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe ::LfsObject do + using RSpec::Parameterized::TableSyntax + include EE::GeoHelpers + + let_it_be(:group) { create(:group) } + let_it_be(:project) { create(:project, :repository, group: group) } + let_it_be(:another_project) { create(:project, :repository) } + + it { is_expected.to respond_to(:log_geo_deleted_event) } + + context 'when model_record is part of available_verifiables scope' do + let(:verifiable_model_record) { build(:lfs_object) } + let(:verification_state_table_class) { verifiable_model_record.class.verification_state_table_class } + + it 'creates verification details' do + expect { verifiable_model_record.save! }.to change { verification_state_table_class.count }.by(1) + end + end + + describe '.with_files_stored_locally' do + let_it_be(:lfs_object) { create(:lfs_object, :with_file) } + + it 'includes states with local storage' do + expect(described_class.with_files_stored_locally).to have_attributes(count: 1) + end + end + + describe '.replicables_for_current_secondary' do + where(:selective_sync_enabled, :object_storage_sync_enabled, :lfs_object_object_storage_enabled, :synced_lfs_objects) do + true | true | false | 1 + true | false | false | 1 + false | true | false | 2 + false | false | false | 2 + true | true | true | 1 + true | false | true | 1 + false | true | true | 2 + false | false | true | 2 + end + + with_them do + let(:secondary) do + node = build(:geo_node, sync_object_storage: object_storage_sync_enabled) + + if selective_sync_enabled + node.selective_sync_type = 'namespaces' + node.namespaces = [group] + end + + node.save! + node + end + + before do + stub_current_geo_node(secondary) + stub_lfs_object_storage(uploader: LfsObjectUploader) if lfs_object_object_storage_enabled + + lfs_object_1 = create(:lfs_object, :with_file) + lfs_object_2 = create(:lfs_object, :with_file) + create(:lfs_objects_project, lfs_object: lfs_object_1, project: project) + create(:lfs_objects_project, lfs_object: lfs_object_2, project: another_project) + end + + it 'returns the proper number of LFS objects' do + expect(described_class.replicables_for_current_secondary(1..described_class.last.id).count).to eq(synced_lfs_objects) + end + end + end +end diff --git a/ee/spec/models/geo/lfs_object_registry_spec.rb b/ee/spec/models/geo/lfs_object_registry_spec.rb index 200d831504a201..0b64d225d4b943 100644 --- a/ee/spec/models/geo/lfs_object_registry_spec.rb +++ b/ee/spec/models/geo/lfs_object_registry_spec.rb @@ -10,4 +10,5 @@ end include_examples 'a Geo framework registry' + include_examples 'a Geo verifiable registry' end diff --git a/ee/spec/replicators/geo/lfs_object_replicator_spec.rb b/ee/spec/replicators/geo/lfs_object_replicator_spec.rb index 7448211a943c14..0d2daa79b6b74b 100644 --- a/ee/spec/replicators/geo/lfs_object_replicator_spec.rb +++ b/ee/spec/replicators/geo/lfs_object_replicator_spec.rb @@ -5,5 +5,6 @@ RSpec.describe Geo::LfsObjectReplicator do let(:model_record) { build(:lfs_object, :with_file) } - it_behaves_like 'a blob replicator' + include_examples 'a blob replicator' + include_examples 'a verifiable replicator' end diff --git a/lib/gitlab/database/gitlab_schemas.yml b/lib/gitlab/database/gitlab_schemas.yml index acb1533f935ed2..c4c34b1a5ba0d1 100644 --- a/lib/gitlab/database/gitlab_schemas.yml +++ b/lib/gitlab/database/gitlab_schemas.yml @@ -287,6 +287,7 @@ ldap_group_links: :gitlab_main lfs_file_locks: :gitlab_main lfs_objects: :gitlab_main lfs_objects_projects: :gitlab_main +lfs_object_states: :gitlab_main licenses: :gitlab_main lists: :gitlab_main list_user_preferences: :gitlab_main diff --git a/spec/features/projects/issues/design_management/user_uploads_designs_spec.rb b/spec/features/projects/issues/design_management/user_uploads_designs_spec.rb index e885c0c44131d7..211576a93f38a1 100644 --- a/spec/features/projects/issues/design_management/user_uploads_designs_spec.rb +++ b/spec/features/projects/issues/design_management/user_uploads_designs_spec.rb @@ -10,6 +10,9 @@ let(:issue) { create(:issue, project: project) } before do + # Cause of raising query limiting threshold https://gitlab.com/gitlab-org/gitlab/-/issues/347334 + stub_const("Gitlab::QueryLimiting::Transaction::THRESHOLD", 102) + sign_in(user) enable_design_management(feature_enabled) visit project_issue_path(project, issue) -- GitLab