diff --git a/db/migrate/20211119111006_create_job_artifact_states.rb b/db/migrate/20211119111006_create_job_artifact_states.rb new file mode 100644 index 0000000000000000000000000000000000000000..44dffed58ee61b7b3d8129abbd384b5e02ae304c --- /dev/null +++ b/db/migrate/20211119111006_create_job_artifact_states.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +class CreateJobArtifactStates < Gitlab::Database::Migration[1.0] + VERIFICATION_STATE_INDEX_NAME = "index_job_artifact_states_on_verification_state" + PENDING_VERIFICATION_INDEX_NAME = "index_job_artifact_states_pending_verification" + FAILED_VERIFICATION_INDEX_NAME = "index_job_artifact_states_failed_verification" + NEEDS_VERIFICATION_INDEX_NAME = "index_job_artifact_states_needs_verification" + + enable_lock_retries! + + def up + create_table :ci_job_artifact_states, id: false do |t| + t.datetime_with_timezone :verification_started_at + t.datetime_with_timezone :verification_retry_at + t.datetime_with_timezone :verified_at + t.references :job_artifact, primary_key: true, null: false, foreign_key: { on_delete: :cascade, to_table: :ci_job_artifacts } + t.integer :verification_state, default: 0, limit: 2, null: false + t.integer :verification_retry_count, limit: 2 + t.binary :verification_checksum, using: 'verification_checksum::bytea' + t.text :verification_failure, limit: 255 + + t.index :verification_state, name: VERIFICATION_STATE_INDEX_NAME + t.index :verified_at, where: "(verification_state = 0)", order: { verified_at: 'ASC NULLS FIRST' }, name: PENDING_VERIFICATION_INDEX_NAME + t.index :verification_retry_at, where: "(verification_state = 3)", order: { verification_retry_at: 'ASC NULLS FIRST' }, name: FAILED_VERIFICATION_INDEX_NAME + t.index :verification_state, where: "(verification_state = 0 OR verification_state = 3)", name: NEEDS_VERIFICATION_INDEX_NAME + end + end + + def down + drop_table :ci_job_artifact_states + end +end diff --git a/db/schema_migrations/20211119111006 b/db/schema_migrations/20211119111006 new file mode 100644 index 0000000000000000000000000000000000000000..ebb8e460452dbdc23ed90888f5611fe689c30614 --- /dev/null +++ b/db/schema_migrations/20211119111006 @@ -0,0 +1 @@ +d618c28360f7716807e9727566019e269963d85164cf2f306ec9692d3b037802 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index a61a4ef4bd50f57af1a7400f4821d4dd5f414d32..407ff9bc3899d67f14e20ec938ccf18eb7e5775a 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -11863,6 +11863,27 @@ CREATE SEQUENCE ci_instance_variables_id_seq ALTER SEQUENCE ci_instance_variables_id_seq OWNED BY ci_instance_variables.id; +CREATE TABLE ci_job_artifact_states ( + verification_started_at timestamp with time zone, + verification_retry_at timestamp with time zone, + verified_at timestamp with time zone, + job_artifact_id bigint NOT NULL, + verification_state smallint DEFAULT 0 NOT NULL, + verification_retry_count smallint, + verification_checksum bytea, + verification_failure text, + CONSTRAINT check_df832b66ea CHECK ((char_length(verification_failure) <= 255)) +); + +CREATE SEQUENCE ci_job_artifact_states_job_artifact_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + +ALTER SEQUENCE ci_job_artifact_states_job_artifact_id_seq OWNED BY ci_job_artifact_states.job_artifact_id; + CREATE TABLE ci_job_artifacts ( project_id integer NOT NULL, file_type integer NOT NULL, @@ -21661,6 +21682,8 @@ ALTER TABLE ONLY ci_group_variables ALTER COLUMN id SET DEFAULT nextval('ci_grou ALTER TABLE ONLY ci_instance_variables ALTER COLUMN id SET DEFAULT nextval('ci_instance_variables_id_seq'::regclass); +ALTER TABLE ONLY ci_job_artifact_states ALTER COLUMN job_artifact_id SET DEFAULT nextval('ci_job_artifact_states_job_artifact_id_seq'::regclass); + ALTER TABLE ONLY ci_job_artifacts ALTER COLUMN id SET DEFAULT nextval('ci_job_artifacts_id_seq'::regclass); ALTER TABLE ONLY ci_job_token_project_scope_links ALTER COLUMN id SET DEFAULT nextval('ci_job_token_project_scope_links_id_seq'::regclass); @@ -23138,6 +23161,9 @@ ALTER TABLE ONLY ci_group_variables ALTER TABLE ONLY ci_instance_variables ADD CONSTRAINT ci_instance_variables_pkey PRIMARY KEY (id); +ALTER TABLE ONLY ci_job_artifact_states + ADD CONSTRAINT ci_job_artifact_states_pkey PRIMARY KEY (job_artifact_id); + ALTER TABLE ONLY ci_job_artifacts ADD CONSTRAINT ci_job_artifacts_pkey PRIMARY KEY (id); @@ -25767,6 +25793,8 @@ CREATE UNIQUE INDEX index_ci_group_variables_on_group_id_and_key_and_environment CREATE UNIQUE INDEX index_ci_instance_variables_on_key ON ci_instance_variables USING btree (key); +CREATE INDEX index_ci_job_artifact_states_on_job_artifact_id ON ci_job_artifact_states USING btree (job_artifact_id); + CREATE INDEX index_ci_job_artifacts_for_terraform_reports ON ci_job_artifacts USING btree (project_id, id) WHERE (file_type = 18); CREATE INDEX index_ci_job_artifacts_id_for_terraform_reports ON ci_job_artifacts USING btree (id) WHERE (file_type = 18); @@ -26755,6 +26783,14 @@ CREATE INDEX index_jira_imports_on_user_id ON jira_imports USING btree (user_id) CREATE INDEX index_jira_tracker_data_on_service_id ON jira_tracker_data USING btree (service_id); +CREATE INDEX index_job_artifact_states_failed_verification ON ci_job_artifact_states USING btree (verification_retry_at NULLS FIRST) WHERE (verification_state = 3); + +CREATE INDEX index_job_artifact_states_needs_verification ON ci_job_artifact_states USING btree (verification_state) WHERE ((verification_state = 0) OR (verification_state = 3)); + +CREATE INDEX index_job_artifact_states_on_verification_state ON ci_job_artifact_states USING btree (verification_state); + +CREATE INDEX index_job_artifact_states_pending_verification ON ci_job_artifact_states USING btree (verified_at NULLS FIRST) WHERE (verification_state = 0); + CREATE INDEX index_keys_on_expires_at_and_id ON keys USING btree (date(timezone('UTC'::text, expires_at)), id) WHERE (expiry_notification_delivered_at IS NULL); CREATE UNIQUE INDEX index_keys_on_fingerprint ON keys USING btree (fingerprint); @@ -30968,6 +31004,9 @@ ALTER TABLE ONLY application_settings ALTER TABLE ONLY clusters_kubernetes_namespaces ADD CONSTRAINT fk_rails_7e7688ecaf FOREIGN KEY (cluster_id) REFERENCES clusters(id) ON DELETE CASCADE; +ALTER TABLE ONLY ci_job_artifact_states + ADD CONSTRAINT fk_rails_80a9cba3b2 FOREIGN KEY (job_artifact_id) REFERENCES ci_job_artifacts(id) ON DELETE CASCADE; + ALTER TABLE ONLY approval_merge_request_rules_users ADD CONSTRAINT fk_rails_80e6801803 FOREIGN KEY (approval_merge_request_rule_id) REFERENCES approval_merge_request_rules(id) ON DELETE CASCADE; diff --git a/doc/administration/geo/replication/datatypes.md b/doc/administration/geo/replication/datatypes.md index 0fa08dcc9e01a6f268799648948bc0b8904a8172..5beb2479c57a4a134b832a066766bc9c2fe0c624 100644 --- a/doc/administration/geo/replication/datatypes.md +++ b/doc/administration/geo/replication/datatypes.md @@ -39,7 +39,7 @@ verification methods: | Blobs | User uploads _(object storage)_ | Geo with API/Managed (*2*) | _Not implemented_ | | Blobs | LFS objects _(file system)_ | Geo with API | SHA256 checksum | | Blobs | LFS objects _(object storage)_ | Geo with API/Managed (*2*) | _Not implemented_ | -| Blobs | CI job artifacts _(file system)_ | Geo with API | _Not implemented_ | +| Blobs | CI job artifacts _(file system)_ | Geo with API | SHA256 checksum | | Blobs | CI job artifacts _(object storage)_ | Geo with API/Managed (*2*) | _Not implemented_ | | Blobs | Archived CI build traces _(file system)_ | Geo with API | _Not implemented_ | | Blobs | Archived CI build traces _(object storage)_ | Geo with API/Managed (*2*) | _Not implemented_ | diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index d94c3cdefbf74251af50b409a0115384f5fdab6d..a1bfae678c7c820b1696fe77b21badb8d77eaba8 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -270,6 +270,16 @@ configuration option in `gitlab.yml`. These metrics are served from the | `geo_pages_deployments_verification_total` | Gauge | 14.6 | Number of pages deployments verifications tried on secondary | `url` | | `geo_pages_deployments_verified` | Gauge | 14.6 | Number of pages deployments verified on secondary | `url` | | `geo_pages_deployments_verification_failed` | Gauge | 14.6 | Number of pages deployments verifications failed on secondary | `url` | +| `geo_job_artifacts` | Gauge | 14.8 | Number of job artifacts on primary | `url` | +| `geo_job_artifacts_checksum_total` | Gauge | 14.8 | Number of job artifacts tried to checksum on primary | `url` | +| `geo_job_artifacts_checksummed` | Gauge | 14.8 | Number of job artifacts successfully checksummed on primary | `url` | +| `geo_job_artifacts_checksum_failed` | Gauge | 14.8 | Number of job artifacts failed to calculate the checksum on primary | `url` | +| `geo_job_artifacts_synced` | Gauge | 14.8 | Number of syncable job artifacts synced on secondary | `url` | +| `geo_job_artifacts_failed` | Gauge | 14.8 | Number of syncable job artifacts failed to sync on secondary | `url` | +| `geo_job_artifacts_registry` | Gauge | 14.8 | Number of job artifacts in the registry | `url` | +| `geo_job_artifacts_verification_total` | Gauge | 14.8 | Number of job artifacts verifications tried on secondary | `url` | +| `geo_job_artifacts_verified` | Gauge | 14.8 | Number of job artifacts verified on secondary | `url` | +| `geo_job_artifacts_verification_failed` | Gauge | 14.8 | Number of job artifacts verifications failed on secondary | `url` | | `limited_capacity_worker_running_jobs` | Gauge | 13.5 | Number of running jobs | `worker` | | `limited_capacity_worker_max_running_jobs` | Gauge | 13.5 | Maximum number of running jobs | `worker` | | `limited_capacity_worker_remaining_work_count` | Gauge | 13.5 | Number of jobs waiting to be enqueued | `worker` | diff --git a/doc/api/geo_nodes.md b/doc/api/geo_nodes.md index fe1674649b62d9888042ba39aec43885c24f3d09..a152c443902a1090f3a1ca61b56356894eb4a84b 100644 --- a/doc/api/geo_nodes.md +++ b/doc/api/geo_nodes.md @@ -467,6 +467,19 @@ Example response: "uploads_verified_count": null, "uploads_verification_failed_count": null, "uploads_verified_in_percentage": "0.00%", + "job_artifacts_count": 5, + "job_artifacts_checksum_total_count": 5, + "job_artifacts_checksummed_count": 5, + "job_artifacts_checksum_failed_count": 0, + "job_artifacts_synced_count": 5, + "job_artifacts_failed_count": 0, + "job_artifacts_registry_count": 5, + "job_artifacts_verification_total_count": 5, + "job_artifacts_verified_count": 5, + "job_artifacts_verification_failed_count": 0, + "job_artifacts_synced_in_percentage": "100.00%", + "job_artifacts_verified_in_percentage": "100.00%", + "job_artifacts_synced_missing_on_primary_count": 0, }, { "geo_node_id": 2, @@ -623,6 +636,19 @@ Example response: "uploads_verified_count": null, "uploads_verification_failed_count": null, "uploads_verified_in_percentage": "0.00%", + "job_artifacts_count": 5, + "job_artifacts_checksum_total_count": 5, + "job_artifacts_checksummed_count": 5, + "job_artifacts_checksum_failed_count": 0, + "job_artifacts_synced_count": 5, + "job_artifacts_failed_count": 0, + "job_artifacts_registry_count": 5, + "job_artifacts_verification_total_count": 5, + "job_artifacts_verified_count": 5, + "job_artifacts_verification_failed_count": 0, + "job_artifacts_synced_in_percentage": "100.00%", + "job_artifacts_verified_in_percentage": "100.00%", + "job_artifacts_synced_missing_on_primary_count": 0, } ] ``` @@ -776,6 +802,19 @@ Example response: "uploads_verified_count": null, "uploads_verification_failed_count": null, "uploads_verified_in_percentage": "0.00%", + "job_artifacts_count": 5, + "job_artifacts_checksum_total_count": 5, + "job_artifacts_checksummed_count": 5, + "job_artifacts_checksum_failed_count": 0, + "job_artifacts_synced_count": 5, + "job_artifacts_failed_count": 0, + "job_artifacts_registry_count": 5, + "job_artifacts_verification_total_count": 5, + "job_artifacts_verified_count": 5, + "job_artifacts_verification_failed_count": 0, + "job_artifacts_synced_in_percentage": "100.00%", + "job_artifacts_verified_in_percentage": "100.00%", + "job_artifacts_synced_missing_on_primary_count": 0, } ``` diff --git a/doc/api/graphql/reference/index.md b/doc/api/graphql/reference/index.md index 11509e9f8a43a61572eedcf67e095a66c89c678f..702116f7f8f33c9b323a6e1693b81f66bb3f29ec 100644 --- a/doc/api/graphql/reference/index.md +++ b/doc/api/graphql/reference/index.md @@ -6803,6 +6803,29 @@ The edge type for [`JiraProject`](#jiraproject). | `cursor` | [`String!`](#string) | A cursor for use in pagination. | | `node` | [`JiraProject`](#jiraproject) | The item at the end of the edge. | +#### `JobArtifactRegistryConnection` + +The connection type for [`JobArtifactRegistry`](#jobartifactregistry). + +##### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `edges` | [`[JobArtifactRegistryEdge]`](#jobartifactregistryedge) | A list of edges. | +| `nodes` | [`[JobArtifactRegistry]`](#jobartifactregistry) | A list of nodes. | +| `pageInfo` | [`PageInfo!`](#pageinfo) | Information to aid in pagination. | + +#### `JobArtifactRegistryEdge` + +The edge type for [`JobArtifactRegistry`](#jobartifactregistry). + +##### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `cursor` | [`String!`](#string) | A cursor for use in pagination. | +| `node` | [`JobArtifactRegistry`](#jobartifactregistry) | The item at the end of the edge. | + #### `JobNeedUnionConnection` The connection type for [`JobNeedUnion`](#jobneedunion). @@ -10780,6 +10803,22 @@ four standard [pagination arguments](#connection-pagination-arguments): | ---- | ---- | ----------- | | `ids` | [`[ID!]`](#id) | Filters registries by their ID. | +##### `GeoNode.jobArtifactRegistries` + +Find Job Artifact registries on this Geo node Available only when feature flag `geo_job_artifact_replication` is enabled. This flag is disabled by default, because the feature is experimental and is subject to change without notice. + +Returns [`JobArtifactRegistryConnection`](#jobartifactregistryconnection). + +This field returns a [connection](#connections). It accepts the +four standard [pagination arguments](#connection-pagination-arguments): +`before: String`, `after: String`, `first: Int`, `last: Int`. + +###### Arguments + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `ids` | [`[ID!]`](#id) | Filters registries by their ID. | + ##### `GeoNode.lfsObjectRegistries` Find LFS object registries on this Geo node. @@ -11970,6 +12009,23 @@ four standard [pagination arguments](#connection-pagination-arguments): | `jiraDisplayName` | [`String!`](#string) | Display name of the Jira user. | | `jiraEmail` | [`String`](#string) | Email of the Jira user, returned only for users with public emails. | +### `JobArtifactRegistry` + +Represents the Geo replication and verification state of a job_artifact. + +#### Fields + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `artifactId` | [`ID!`](#id) | ID of the Job Artifact. | +| `createdAt` | [`Time`](#time) | Timestamp when the JobArtifactRegistry was created. | +| `id` | [`ID!`](#id) | ID of the JobArtifactRegistry. | +| `lastSyncFailure` | [`String`](#string) | Error message during sync of the JobArtifactRegistry. | +| `lastSyncedAt` | [`Time`](#time) | Timestamp of the most recent successful sync of the JobArtifactRegistry. | +| `retryAt` | [`Time`](#time) | Timestamp after which the JobArtifactRegistry should be resynced. | +| `retryCount` | [`Int`](#int) | Number of consecutive failed sync attempts of the JobArtifactRegistry. | +| `state` | [`RegistryState`](#registrystate) | Sync state of the JobArtifactRegistry. | + ### `JobPermissions` #### Fields diff --git a/ee/app/finders/geo/job_artifact_legacy_registry_finder.rb b/ee/app/finders/geo/job_artifact_legacy_registry_finder.rb new file mode 100644 index 0000000000000000000000000000000000000000..7b158e3e00857718b9ab48ae5c08d814b499242e --- /dev/null +++ b/ee/app/finders/geo/job_artifact_legacy_registry_finder.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +module Geo + class JobArtifactLegacyRegistryFinder < FileRegistryFinder + def registry_class + Geo::JobArtifactRegistry + end + end +end diff --git a/ee/app/finders/geo/job_artifact_registry_finder.rb b/ee/app/finders/geo/job_artifact_registry_finder.rb index bcdefdcd973b0882d5c5478a9581145c01acc97b..f96f56afc84aa4be0a38e66559ae4093274dc48e 100644 --- a/ee/app/finders/geo/job_artifact_registry_finder.rb +++ b/ee/app/finders/geo/job_artifact_registry_finder.rb @@ -2,8 +2,6 @@ module Geo class JobArtifactRegistryFinder < FileRegistryFinder - def registry_class - Geo::JobArtifactRegistry - end + include FrameworkRegistryFinder end end diff --git a/ee/app/graphql/resolvers/geo/job_artifact_registries_resolver.rb b/ee/app/graphql/resolvers/geo/job_artifact_registries_resolver.rb new file mode 100644 index 0000000000000000000000000000000000000000..92f9e77e2aa69f5ed18c686ade1e75ec382095e7 --- /dev/null +++ b/ee/app/graphql/resolvers/geo/job_artifact_registries_resolver.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Resolvers + module Geo + class JobArtifactRegistriesResolver < BaseResolver + type ::Types::Geo::GeoNodeType.connection_type, null: true + + include RegistriesResolver + end + end +end diff --git a/ee/app/graphql/types/geo/geo_node_type.rb b/ee/app/graphql/types/geo/geo_node_type.rb index bcd5fbe38e665a9c3045582709e116db738375db..995ddb527733da8456c5fa26b13ee4b83ff7fc2c 100644 --- a/ee/app/graphql/types/geo/geo_node_type.rb +++ b/ee/app/graphql/types/geo/geo_node_type.rb @@ -58,6 +58,11 @@ class GeoNodeType < BaseObject null: true, resolver: ::Resolvers::Geo::UploadRegistriesResolver, description: 'Find Upload registries on this Geo node' + field :job_artifact_registries, ::Types::Geo::JobArtifactRegistryType.connection_type, + null: true, + resolver: ::Resolvers::Geo::JobArtifactRegistriesResolver, + description: 'Find Job Artifact registries on this Geo node', + feature_flag: :geo_job_artifact_replication end end end diff --git a/ee/app/graphql/types/geo/job_artifact_registry_type.rb b/ee/app/graphql/types/geo/job_artifact_registry_type.rb new file mode 100644 index 0000000000000000000000000000000000000000..48753de303bc31dbda4187d32a16b16b0e07b74e --- /dev/null +++ b/ee/app/graphql/types/geo/job_artifact_registry_type.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Types + module Geo + # rubocop:disable Graphql/AuthorizeTypes because it is included + class JobArtifactRegistryType < BaseObject + include ::Types::Geo::RegistryType + + graphql_name 'JobArtifactRegistry' + description 'Represents the Geo replication and verification state of a job_artifact.' + + field :artifact_id, GraphQL::Types::ID, null: false, description: 'ID of the Job Artifact.' + end + end +end diff --git a/ee/app/helpers/ee/geo_helper.rb b/ee/app/helpers/ee/geo_helper.rb index 7c3f821c314a4f7f5a56bb6ad79d01ad24db6ece..106db6b592dd422814ea0d826d9805bf16270724 100644 --- a/ee/app/helpers/ee/geo_helper.rb +++ b/ee/app/helpers/ee/geo_helper.rb @@ -197,14 +197,6 @@ def replicable_types name: 'wiki', name_plural: 'wikis' }, - { - data_type: 'blob', - data_type_title: _('File'), - title: _('Job artifact'), - title_plural: _('Job artifacts'), - name: 'job_artifact', - name_plural: 'job_artifacts' - }, { data_type: 'blob', data_type_title: _('File'), @@ -224,6 +216,17 @@ def replicable_types } ] + if ::Geo::JobArtifactReplicator.disabled? + replicable_types.insert(2, { + data_type: 'blob', + data_type_title: _('File'), + title: _('Job artifact'), + title_plural: _('Job artifacts'), + name: 'job_artifact', + name_plural: 'job_artifacts' + }) + end + # Adds all the SSF Data Types automatically enabled_replicator_classes.each do |replicator_class| replicable_types.push( diff --git a/ee/app/models/ee/ci/job_artifact.rb b/ee/app/models/ee/ci/job_artifact.rb index 554f5ebd5a00041f5933af489c1ae8952d969217..e0723a176f8d5da19eaf9509191d715f82df5040 100644 --- a/ee/app/models/ee/ci/job_artifact.rb +++ b/ee/app/models/ee/ci/job_artifact.rb @@ -12,6 +12,13 @@ module Ci::JobArtifact SECURITY_REPORT_FILE_TYPES = %w[sast secret_detection dependency_scanning container_scanning cluster_image_scanning dast coverage_fuzzing api_fuzzing].freeze prepended do + include ::Geo::ReplicableModel + include ::Geo::VerifiableModel + + with_replicator ::Geo::JobArtifactReplicator + + has_one :job_artifact_state, autosave: false, inverse_of: :job_artifact, class_name: '::Geo::JobArtifactState' + # After destroy callbacks are often skipped because of FastDestroyAll. # All destroy callbacks should be implemented in `Ci::JobArtifacts::DestroyBatchService` # See https://gitlab.com/gitlab-org/gitlab/-/issues/297472 @@ -66,7 +73,26 @@ module Ci::JobArtifact with_file_types(API_FUZZING_REPORT_TYPES) end + scope :with_files_stored_locally, -> { where(file_store: ::ObjectStorage::Store::LOCAL) } + scope :with_files_stored_remotely, -> { where(file_store: ::ObjectStorage::Store::REMOTE) } + scope :with_verification_state, ->(state) { joins(:job_artifact_state).where(verification_arel_table[:verification_state].eq(verification_state_value(state))) } + scope :checksummed, -> { joins(:job_artifact_state).where.not(verification_arel_table[:verification_checksum].eq(nil)) } + scope :not_checksummed, -> { joins(:job_artifact_state).where(verification_arel_table[:verification_checksum].eq(nil)) } + + scope :available_verifiables, -> { joins(:job_artifact_state) } + delegate :validate_schema?, to: :job + + delegate :verification_retry_at, :verification_retry_at=, + :verified_at, :verified_at=, + :verification_checksum, :verification_checksum=, + :verification_failure, :verification_failure=, + :verification_retry_count, :verification_retry_count=, + :verification_state=, :verification_state, + :verification_started_at=, :verification_started_at, + to: :job_artifact_state + + after_save :save_verification_details end class_methods do @@ -79,6 +105,19 @@ def associated_file_types_for(file_type) super end + + override :verification_state_table_class + def verification_state_table_class + ::Geo::JobArtifactState + end + end + + def job_artifact_state + super || build_job_artifact_state + end + + def verification_state_object + job_artifact_state end def log_geo_deleted_event diff --git a/ee/app/models/geo/job_artifact_registry.rb b/ee/app/models/geo/job_artifact_registry.rb index 0f39fdd10684e443e7e9697a3f2685fbfe1d18ad..1c131d8b6e37f8681d114b26a43d4eddfe4a8a45 100644 --- a/ee/app/models/geo/job_artifact_registry.rb +++ b/ee/app/models/geo/job_artifact_registry.rb @@ -2,14 +2,18 @@ class Geo::JobArtifactRegistry < Geo::BaseRegistry include Geo::Syncable + include ::Geo::ReplicableRegistry + include ::Geo::VerifiableRegistry MODEL_CLASS = ::Ci::JobArtifact MODEL_FOREIGN_KEY = :artifact_id + belongs_to :job_artifact, class_name: 'Ci::JobArtifact', foreign_key: :artifact_id + # When false, RegistryConsistencyService will frequently check the end of the # table to quickly handle new replicables. def self.has_create_events? - false + ::Geo::JobArtifactReplicator.enabled? end # TODO: remove once `success` column has a default value set @@ -31,4 +35,42 @@ def self.delete_for_model_ids(artifact_ids) def self.delete_worker_class ::Geo::FileRegistryRemovalWorker end + + # TODO Remove this when enabling geo_job_artifact_replication by default + override :registry_consistency_worker_enabled? + def self.registry_consistency_worker_enabled? + true + end + + def self.failed + if ::Geo::JobArtifactReplicator.enabled? + with_state(:failed) + else + where(success: false).where.not(retry_count: nil) + end + end + + def self.never_attempted_sync + if ::Geo::JobArtifactReplicator.enabled? + pending.where(last_synced_at: nil) + else + where(success: false, retry_count: nil) + end + end + + def self.retry_due + if ::Geo::JobArtifactReplicator.enabled? + where(arel_table[:retry_at].eq(nil).or(arel_table[:retry_at].lt(Time.current))) + else + where('retry_at is NULL OR retry_at < ?', Time.current) + end + end + + def self.synced + if ::Geo::JobArtifactReplicator.enabled? + with_state(:synced).or(where(success: true)) + else + where(success: true) + end + end end diff --git a/ee/app/models/geo/job_artifact_state.rb b/ee/app/models/geo/job_artifact_state.rb new file mode 100644 index 0000000000000000000000000000000000000000..f6bb86d4b8361004e8dee8264f73ad8883d6067a --- /dev/null +++ b/ee/app/models/geo/job_artifact_state.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module Geo + class JobArtifactState < Ci::ApplicationRecord + include EachBatch + + self.primary_key = :job_artifact_id + + belongs_to :job_artifact, inverse_of: :job_artifact_state, class_name: 'Ci::JobArtifact' + end +end diff --git a/ee/app/models/geo_node_status.rb b/ee/app/models/geo_node_status.rb index ea75e39830e0e15131562a5d65179297aae440ee..a3e101211a75f3a65b220da35544145d82a3cd1f 100644 --- a/ee/app/models/geo_node_status.rb +++ b/ee/app/models/geo_node_status.rb @@ -40,8 +40,8 @@ def self.status_fields_for(replicable_class) "#{replicable_class.replicable_name_plural}_checksummed_count".to_sym => "Number of #{replicable_class.replicable_title_plural} checksummed on the primary", "#{replicable_class.replicable_name_plural}_checksum_failed_count".to_sym => "Number of #{replicable_class.replicable_title_plural} failed to checksum on primary", "#{replicable_class.replicable_name_plural}_synced_count".to_sym => "Number of #{replicable_class.replicable_title_plural} in the registry", - "#{replicable_class.replicable_name_plural}_failed_count".to_sym => "Number of #{replicable_class.replicable_title_plural} synced on secondary", - "#{replicable_class.replicable_name_plural}_registry_count".to_sym => "Number of #{replicable_class.replicable_title_plural} failed to sync on secondary", + "#{replicable_class.replicable_name_plural}_failed_count".to_sym => "Number of #{replicable_class.replicable_title_plural} failed on secondary", + "#{replicable_class.replicable_name_plural}_registry_count".to_sym => "Number of #{replicable_class.replicable_title_plural} synced to sync on secondary", "#{replicable_class.replicable_name_plural}_verification_total_count".to_sym => "Number of #{replicable_class.replicable_title_plural} available to verify on secondary", "#{replicable_class.replicable_name_plural}_verified_count".to_sym => "Number of #{replicable_class.replicable_title_plural} verified on the secondary", "#{replicable_class.replicable_name_plural}_verification_failed_count".to_sym => "Number of #{replicable_class.replicable_title_plural} failed to verify on secondary" @@ -67,9 +67,6 @@ def self.usage_data_fields wikis_synced_count wikis_failed_count job_artifacts_replication_enabled - job_artifacts_count - job_artifacts_synced_count - job_artifacts_failed_count repositories_verified_count repositories_verification_failed_count repositories_verification_total_count @@ -127,10 +124,6 @@ def self.replicator_class_prometheus_metrics wikis_verification_failed_count: 'Number of wikis failed to verify on secondary', wikis_checksum_mismatch_count: 'Number of wikis that checksum mismatch on secondary', job_artifacts_replication_enabled: 'Boolean denoting if replication is enabled for Job Artifacts', - job_artifacts_count: 'Total number of syncable job artifacts available on primary', - job_artifacts_synced_count: 'Number of syncable job artifacts synced on secondary', - job_artifacts_failed_count: 'Number of syncable job artifacts failed to sync on secondary', - job_artifacts_registry_count: 'Number of job artifacts in the registry', job_artifacts_synced_missing_on_primary_count: 'Number of job artifacts marked as synced due to the file missing on the primary', replication_slots_count: 'Total number of replication slots on the primary', replication_slots_used_count: 'Number of replication slots in use on the primary', @@ -377,7 +370,6 @@ def self.add_attr_in_percentage_for_replicable_classes attr_in_percentage :wikis_synced, :wikis_synced_count, :wikis_count attr_in_percentage :wikis_checksummed, :wikis_checksummed_count, :wikis_count attr_in_percentage :wikis_verified, :wikis_verified_count, :wikis_count - attr_in_percentage :job_artifacts_synced, :job_artifacts_synced_count, :job_artifacts_count attr_in_percentage :replication_slots_used, :replication_slots_used_count, :replication_slots_count attr_in_percentage :container_repositories_synced, :container_repositories_synced_count, :container_repositories_count attr_in_percentage :design_repositories_synced, :design_repositories_synced_count, :design_repositories_count @@ -468,6 +460,7 @@ def load_repositories_data def load_job_artifacts_data return unless job_artifacts_replication_enabled + return if ::Geo::JobArtifactReplicator.enabled? self.job_artifacts_count = job_artifacts_finder.registry_count self.job_artifacts_synced_count = job_artifacts_finder.synced_count @@ -576,7 +569,7 @@ def primary_storage_digest end def job_artifacts_finder - @job_artifacts_finder ||= Geo::JobArtifactRegistryFinder.new + @job_artifacts_finder ||= Geo::JobArtifactLegacyRegistryFinder.new end def container_registry_finder diff --git a/ee/app/replicators/geo/job_artifact_replicator.rb b/ee/app/replicators/geo/job_artifact_replicator.rb new file mode 100644 index 0000000000000000000000000000000000000000..626933cbc1d6bcf2d947932b44854492f91a97ef --- /dev/null +++ b/ee/app/replicators/geo/job_artifact_replicator.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Geo + class JobArtifactReplicator < Gitlab::Geo::Replicator + include ::Geo::BlobReplicatorStrategy + extend ::Gitlab::Utils::Override + + def self.model + ::Ci::JobArtifact + end + + def carrierwave_uploader + model_record.file + end + + # The feature flag follows the format `geo_#{replicable_name}_replication`, + # so here it would be `geo_job_artifact_replication` + def self.replication_enabled_by_default? + false + end + + override :verification_feature_flag_enabled? + def self.verification_feature_flag_enabled? + # We are adding verification at the same time as replication, so we + # don't need to toggle verification separately from replication. When + # the replication feature flag is off, then verification is also off + # (see `VerifiableReplicator.verification_enabled?`) + true + end + end +end diff --git a/ee/app/services/geo/file_registry_removal_service.rb b/ee/app/services/geo/file_registry_removal_service.rb index a726e5da958845a4d36a575503584de35af8af37..24ffb61d7d5f70f8ae392277c8f711b5e0bc39a6 100644 --- a/ee/app/services/geo/file_registry_removal_service.rb +++ b/ee/app/services/geo/file_registry_removal_service.rb @@ -51,7 +51,7 @@ def execute # rubocop: disable CodeReuse/ActiveRecord def file_registry strong_memoize(:file_registry) do - if job_artifact? + if job_artifact? && ::Geo::JobArtifactReplicator.disabled? ::Geo::JobArtifactRegistry.find_by(artifact_id: object_db_id) elsif replicator replicator.registry diff --git a/ee/app/workers/geo/file_download_dispatch_worker.rb b/ee/app/workers/geo/file_download_dispatch_worker.rb index 3bc7d1ace16b83528cb9e92931245f4133e56621..f433e9cc561c4101b7c47df79da7142c9ea523b4 100644 --- a/ee/app/workers/geo/file_download_dispatch_worker.rb +++ b/ee/app/workers/geo/file_download_dispatch_worker.rb @@ -1,12 +1,24 @@ # frozen_string_literal: true module Geo + # + # This Worker is deprecated and it only handles the Job Artifacts now + # class FileDownloadDispatchWorker < Geo::Scheduler::Secondary::SchedulerWorker # rubocop:disable Scalability/IdempotentWorker # rubocop:disable Scalability/CronWorkerContext # This worker does not perform work scoped to a context include CronjobQueue # rubocop:enable Scalability/CronWorkerContext + def perform + if ::Geo::JobArtifactReplicator.enabled? + log_info('JobArtifact replication is handled by Geo self service framework') + return + end + + super + end + private # Cannot utilise backoff because there are no events currently being @@ -26,6 +38,8 @@ def max_capacity end def schedule_job(object_type, object_db_id) + return if ::Geo::JobArtifactReplicator.enabled? + job_id = FileDownloadWorker.with_status.perform_async(object_type.to_s, object_db_id) { id: object_db_id, type: object_type, job_id: job_id } if job_id diff --git a/ee/app/workers/geo/file_download_dispatch_worker/job_artifact_job_finder.rb b/ee/app/workers/geo/file_download_dispatch_worker/job_artifact_job_finder.rb index 9db0feaf433096ee79875a737e7089991c59fdd8..cdc6b816a232be9fcd13ce7695919a5ef8bdc411 100644 --- a/ee/app/workers/geo/file_download_dispatch_worker/job_artifact_job_finder.rb +++ b/ee/app/workers/geo/file_download_dispatch_worker/job_artifact_job_finder.rb @@ -8,7 +8,7 @@ class JobArtifactJobFinder < JobFinder # rubocop:disable Scalability/IdempotentW FILE_SERVICE_OBJECT_TYPE = :job_artifact def registry_finder - @registry_finder ||= Geo::JobArtifactRegistryFinder.new + @registry_finder ||= Geo::JobArtifactLegacyRegistryFinder.new end end end diff --git a/ee/config/feature_flags/development/geo_job_artifact_replication.yml b/ee/config/feature_flags/development/geo_job_artifact_replication.yml new file mode 100644 index 0000000000000000000000000000000000000000..ba1d1d824d9abb33a9264d0821285c6a3b2fa7d6 --- /dev/null +++ b/ee/config/feature_flags/development/geo_job_artifact_replication.yml @@ -0,0 +1,8 @@ +--- +name: geo_job_artifact_replication +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/issues/327400 +rollout_issue_url: +milestone: '14.8' +type: development +group: group::geo +default_enabled: false diff --git a/ee/db/geo/migrate/20210111911002_prepare_job_artifact_registry_for_ssf.rb b/ee/db/geo/migrate/20210111911002_prepare_job_artifact_registry_for_ssf.rb new file mode 100644 index 0000000000000000000000000000000000000000..0b64f1fc839565c18682819d72d277c38e60d767 --- /dev/null +++ b/ee/db/geo/migrate/20210111911002_prepare_job_artifact_registry_for_ssf.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +class PrepareJobArtifactRegistryForSsf < Gitlab::Database::Migration[1.0] + def change + change_column_default :job_artifact_registry, :retry_count, from: nil, to: 0 + add_column :job_artifact_registry, :last_synced_at, :datetime_with_timezone + add_column :job_artifact_registry, :verified_at, :datetime_with_timezone + add_column :job_artifact_registry, :verification_started_at, :datetime_with_timezone + add_column :job_artifact_registry, :verification_retry_at, :datetime_with_timezone + add_column :job_artifact_registry, :state, :integer, null: false, limit: 2, default: 0 + add_column :job_artifact_registry, :verification_state, :integer, default: 0, null: false, limit: 2 + add_column :job_artifact_registry, :verification_retry_count, :integer, default: 0, limit: 2, null: false + add_column :job_artifact_registry, :verification_checksum, :binary + add_column :job_artifact_registry, :verification_checksum_mismatched, :binary + add_column :job_artifact_registry, :checksum_mismatch, :boolean, default: false, null: false + add_column :job_artifact_registry, :verification_failure, :string, limit: 255 # rubocop:disable Migration/PreventStrings see https://gitlab.com/gitlab-org/gitlab/-/issues/323806 + add_column :job_artifact_registry, :last_sync_failure, :string, limit: 255 # rubocop:disable Migration/PreventStrings see https://gitlab.com/gitlab-org/gitlab/-/issues/323806 + end +end diff --git a/ee/db/geo/migrate/20210111912220_prepare_job_artifact_registry_for_ssf_indecies.rb b/ee/db/geo/migrate/20210111912220_prepare_job_artifact_registry_for_ssf_indecies.rb new file mode 100644 index 0000000000000000000000000000000000000000..7b8aa419a3b6d9bb86a57ba7f095e658455196f7 --- /dev/null +++ b/ee/db/geo/migrate/20210111912220_prepare_job_artifact_registry_for_ssf_indecies.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +class PrepareJobArtifactRegistryForSsfIndecies < Gitlab::Database::Migration[1.0] + disable_ddl_transaction! + + def up + add_concurrent_index :job_artifact_registry, :verification_retry_at, name: :job_artifact_registry_failed_verification, order: "NULLS FIRST", where: "((state = 2) AND (verification_state = 3))" + add_concurrent_index :job_artifact_registry, :verification_state, name: :job_artifact_registry_needs_verification, where: "((state = 2) AND (verification_state = ANY (ARRAY[0, 3])))" + add_concurrent_index :job_artifact_registry, :verified_at, name: :job_artifact_registry_pending_verification, order: "NULLS FIRST", where: "((state = 2) AND (verification_state = 0))" + end + + def down + remove_concurrent_index :job_artifact_registry, :verification_retry_at, name: :job_artifact_registry_failed_verification + remove_concurrent_index :job_artifact_registry, :verification_state, name: :job_artifact_registry_needs_verification + remove_concurrent_index :job_artifact_registry, :verified_at, name: :job_artifact_registry_pending_verification + end +end diff --git a/ee/db/geo/post_migrate/20220202101354_migrate_job_artifact_registry.rb b/ee/db/geo/post_migrate/20220202101354_migrate_job_artifact_registry.rb new file mode 100644 index 0000000000000000000000000000000000000000..b07ca92f406ad2c6d0df84f47e4899459d8fafe0 --- /dev/null +++ b/ee/db/geo/post_migrate/20220202101354_migrate_job_artifact_registry.rb @@ -0,0 +1,23 @@ +# frozen_string_literal: true + +class MigrateJobArtifactRegistry < Gitlab::Database::Migration[1.0] + MIGRATION = 'MigrateJobArtifactRegistryToSsf' + DELAY_INTERVAL = 2.minutes + BATCH_SIZE = 5_000 + + disable_ddl_transaction! + + class JobArtifactRegistry < Geo::TrackingBase + include EachBatch + + self.table_name = 'job_artifact_registry' + end + + def up + queue_background_migration_jobs_by_range_at_intervals(JobArtifactRegistry, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + end + + def down + # no-op + end +end diff --git a/ee/db/geo/schema_migrations/20210111911002 b/ee/db/geo/schema_migrations/20210111911002 new file mode 100644 index 0000000000000000000000000000000000000000..e12048886c292af7a74292a7ed66c4c32ce30c95 --- /dev/null +++ b/ee/db/geo/schema_migrations/20210111911002 @@ -0,0 +1 @@ +418ae4a74ff25c09629196916400eeebe5795d1c82918d09aaaa040d228ecae8 \ No newline at end of file diff --git a/ee/db/geo/schema_migrations/20210111912220 b/ee/db/geo/schema_migrations/20210111912220 new file mode 100644 index 0000000000000000000000000000000000000000..243d7e41596f31762f8b0aaa4b28b0eb2d9db28c --- /dev/null +++ b/ee/db/geo/schema_migrations/20210111912220 @@ -0,0 +1 @@ +0eaee99b45bf4f98933b1a04cf92207ccadef29d89388049efe44a19a0c34c4a \ No newline at end of file diff --git a/ee/db/geo/schema_migrations/20220202101354 b/ee/db/geo/schema_migrations/20220202101354 new file mode 100644 index 0000000000000000000000000000000000000000..3ddf11af20107c9af7c41a96146411757db15beb --- /dev/null +++ b/ee/db/geo/schema_migrations/20220202101354 @@ -0,0 +1 @@ +ec84e9ecbed7fdd59541ebe7c374abba3d4e657bb6cd9cab1920421e8b0e349a \ No newline at end of file diff --git a/ee/db/geo/structure.sql b/ee/db/geo/structure.sql index 93dd31730c3bfdc7265d1e02dc730a892869ea5c..2f282457a588082d601cc4c734ec9f3864348be0 100644 --- a/ee/db/geo/structure.sql +++ b/ee/db/geo/structure.sql @@ -123,10 +123,22 @@ CREATE TABLE job_artifact_registry ( retry_at timestamp with time zone, bytes bigint, artifact_id integer, - retry_count integer, + retry_count integer DEFAULT 0, success boolean, sha256 character varying, - missing_on_primary boolean DEFAULT false NOT NULL + missing_on_primary boolean DEFAULT false NOT NULL, + state smallint DEFAULT 0 NOT NULL, + last_synced_at timestamp with time zone, + last_sync_failure character varying(255), + verified_at timestamp with time zone, + verification_started_at timestamp with time zone, + verification_retry_at timestamp with time zone, + verification_state smallint DEFAULT 0 NOT NULL, + verification_retry_count smallint DEFAULT 0 NOT NULL, + verification_checksum bytea, + verification_checksum_mismatched bytea, + checksum_mismatch boolean DEFAULT false NOT NULL, + verification_failure character varying(255) ); CREATE SEQUENCE job_artifact_registry_id_seq @@ -622,6 +634,12 @@ CREATE INDEX lfs_object_registry_needs_verification ON lfs_object_registry USING CREATE INDEX lfs_object_registry_pending_verification ON lfs_object_registry USING btree (verified_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 0)); +CREATE INDEX job_artifact_registry_failed_verification ON job_artifact_registry USING btree (verification_retry_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 3)); + +CREATE INDEX job_artifact_registry_needs_verification ON job_artifact_registry USING btree (verification_state) WHERE ((state = 2) AND (verification_state = ANY (ARRAY[0, 3]))); + +CREATE INDEX job_artifact_registry_pending_verification ON job_artifact_registry USING btree (verified_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 0)); + CREATE INDEX merge_request_diff_registry_failed_verification ON merge_request_diff_registry USING btree (verification_retry_at NULLS FIRST) WHERE ((state = 2) AND (verification_state = 3)); CREATE INDEX merge_request_diff_registry_needs_verification ON merge_request_diff_registry USING btree (verification_state) WHERE ((state = 2) AND (verification_state = ANY (ARRAY[0, 3]))); diff --git a/ee/lib/ee/api/entities/geo_node_status.rb b/ee/lib/ee/api/entities/geo_node_status.rb index caf6def3692046bc2d245b40489ac2943a081047..4f40bf24e050480ac3ae5492085d7068e85c4a67 100644 --- a/ee/lib/ee/api/entities/geo_node_status.rb +++ b/ee/lib/ee/api/entities/geo_node_status.rb @@ -35,7 +35,7 @@ class GeoNodeStatus < Grape::Entity expose :db_replication_lag_seconds - expose :job_artifacts_replication_enabled + expose :job_artifacts_replication_enabled, if: -> (*) { ::Geo::JobArtifactReplicator.disabled? } expose :container_repositories_replication_enabled expose :design_repositories_replication_enabled expose :repositories_replication_enabled diff --git a/ee/lib/ee/gitlab/background_migration/migrate_job_artifact_registry_to_ssf.rb b/ee/lib/ee/gitlab/background_migration/migrate_job_artifact_registry_to_ssf.rb new file mode 100644 index 0000000000000000000000000000000000000000..a663e2fbeeaec18bf0e125e593a5dd1b73d73e27 --- /dev/null +++ b/ee/lib/ee/gitlab/background_migration/migrate_job_artifact_registry_to_ssf.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module EE + module Gitlab + module BackgroundMigration + module MigrateJobArtifactRegistryToSsf + class JobArtifactRegistry < Geo::BaseRegistry + self.table_name = 'job_artifact_registry' + end + + def perform(start_id, end_id) + JobArtifactRegistry.where(id: start_id..end_id, success: true).update_all(state: 2) + end + end + end + end +end diff --git a/ee/lib/ee/gitlab/cache.rb b/ee/lib/ee/gitlab/cache.rb index d357eee7e6ce9094a3e5a91bc984cdd20bfd7efc..3dd22ae076056dc1922461c3133413b6712a918c 100644 --- a/ee/lib/ee/gitlab/cache.rb +++ b/ee/lib/ee/gitlab/cache.rb @@ -17,7 +17,7 @@ def delete(key) end def delete_on_geo_secondaries(key) - Geo::CacheInvalidationEventStore.new(key).create! + ::Geo::CacheInvalidationEventStore.new(key).create! end end end diff --git a/ee/lib/gitlab/geo.rb b/ee/lib/gitlab/geo.rb index adb23fad8f5d990f50fbabb99ddd564a4cf6cafb..f8b44fa6e819a55aa39075ae75f49bba04f2aefe 100644 --- a/ee/lib/gitlab/geo.rb +++ b/ee/lib/gitlab/geo.rb @@ -28,7 +28,8 @@ module Geo ::Geo::GroupWikiRepositoryReplicator, ::Geo::PipelineArtifactReplicator, ::Geo::PagesDeploymentReplicator, - ::Geo::UploadReplicator + ::Geo::UploadReplicator, + ::Geo::JobArtifactReplicator ].freeze def self.current_node diff --git a/ee/lib/gitlab/geo/geo_node_status_check.rb b/ee/lib/gitlab/geo/geo_node_status_check.rb index a030647ea399a71b59a38d1542df80035f9d209c..704eca60ce7b0249bd172dea62c96bf10407f43f 100644 --- a/ee/lib/gitlab/geo/geo_node_status_check.rb +++ b/ee/lib/gitlab/geo/geo_node_status_check.rb @@ -250,6 +250,8 @@ def print_verified_wikis end def print_ci_job_artifacts_status + return if ::Geo::JobArtifactReplicator.enabled? + print 'CI job artifacts: '.rjust(GEO_STATUS_COLUMN_WIDTH) show_failed_value(current_node_status.job_artifacts_failed_count) print "#{current_node_status.job_artifacts_synced_count}/#{current_node_status.job_artifacts_count} " diff --git a/ee/spec/factories/ci/job_artifacts.rb b/ee/spec/factories/ci/job_artifacts.rb index d14252ac8eb4e890712df96555833535ebb1048a..13411861d8aa828c9727fd31965e2f744c030620 100644 --- a/ee/spec/factories/ci/job_artifacts.rb +++ b/ee/spec/factories/ci/job_artifacts.rb @@ -12,6 +12,18 @@ end end + trait :verification_succeeded do + common_security_report # with file + verification_checksum { 'abc' } + verification_state { Ci::JobArtifact.verification_state_value(:verification_succeeded) } + end + + trait :verification_failed do + common_security_report # with file + verification_failure { 'Could not calculate the checksum' } + verification_state { Ci::JobArtifact.verification_state_value(:verification_failed) } + end + trait :sast_with_vulnerability_flags do file_type { :sast } file_format { :raw } diff --git a/ee/spec/factories/geo/job_artifact_registry.rb b/ee/spec/factories/geo/job_artifact_registry.rb index a9f51531fdd691f071b5c0e4a173369ad92d7654..1faf992505f53b3343ed217138f1408d41fef603 100644 --- a/ee/spec/factories/geo/job_artifact_registry.rb +++ b/ee/spec/factories/geo/job_artifact_registry.rb @@ -2,6 +2,41 @@ FactoryBot.define do factory :geo_job_artifact_registry, class: 'Geo::JobArtifactRegistry' do + association :job_artifact, factory: [:ci_job_artifact, :with_file] + state { Geo::JobArtifactRegistry.state_value(:pending) } + + trait :synced do + state { Geo::JobArtifactRegistry.state_value(:synced) } + last_synced_at { 5.days.ago } + end + + trait :failed do + state { Geo::JobArtifactRegistry.state_value(:failed) } + last_synced_at { 1.day.ago } + retry_count { 2 } + last_sync_failure { 'Random error' } + end + + trait :started do + state { Geo::JobArtifactRegistry.state_value(:started) } + last_synced_at { 1.day.ago } + retry_count { 0 } + end + + trait :verification_succeeded do + verification_checksum { 'e079a831cab27bcda7d81cd9b48296d0c3dd92ef' } + verification_state { Geo::JobArtifactRegistry.verification_state_value(:verification_succeeded) } + verified_at { 5.days.ago } + end + + trait :orphan do + after(:create) do |registry, _| + Ci::JobArtifact.find(registry.artifact_id).delete + end + end + end + + factory :geo_job_artifact_registry_legacy, class: 'Geo::JobArtifactRegistry' do sequence(:artifact_id) success { true } diff --git a/ee/spec/factories/geo/job_artifact_states.rb b/ee/spec/factories/geo/job_artifact_states.rb new file mode 100644 index 0000000000000000000000000000000000000000..5045c79d1dbaf92f1734b01d31e2b6f875241a08 --- /dev/null +++ b/ee/spec/factories/geo/job_artifact_states.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +FactoryBot.define do + factory :geo_job_artifact_state, class: 'Geo::JobArtifactState' do + job_artifact + + trait(:checksummed) do + verification_checksum { 'abc' } + end + + trait(:checksum_failure) do + verification_failure { 'Could not calculate the checksum' } + end + end +end diff --git a/ee/spec/factories/geo_node_statuses.rb b/ee/spec/factories/geo_node_statuses.rb index e44b8e50b4a5cf6a6109745f2e3ae6685dc468aa..8df3ee27c4d9344e873cb29fe49776cfffb318b5 100644 --- a/ee/spec/factories/geo_node_statuses.rb +++ b/ee/spec/factories/geo_node_statuses.rb @@ -7,9 +7,6 @@ trait :healthy do status_message { nil } - job_artifacts_count { 580 } - job_artifacts_failed_count { 3 } - job_artifacts_synced_count { 577 } job_artifacts_synced_missing_on_primary_count { 91 } container_repositories_count { 400 } container_repositories_registry_count { 203 } @@ -65,7 +62,6 @@ end trait :replicated_and_verified do - job_artifacts_failed_count { 0 } container_repositories_failed_count { 0 } design_repositories_failed_count { 0 } repositories_failed_count { 0 } @@ -85,14 +81,12 @@ wikis_checksum_total_count { 10 } wikis_verified_count { 10 } wikis_verification_total_count { 10 } - job_artifacts_synced_count { 10 } replication_slots_used_count { 10 } container_repositories_synced_count { 10 } design_repositories_synced_count { 10 } repositories_count { 10 } wikis_count { 10 } - job_artifacts_count { 10 } replication_slots_count { 10 } container_repositories_count { 10 } design_repositories_count { 10 } diff --git a/ee/spec/finders/geo/job_artifact_legacy_registry_finder_spec.rb b/ee/spec/finders/geo/job_artifact_legacy_registry_finder_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..eff0bb33d36c5b9038b0185b9b618c6e790a8b01 --- /dev/null +++ b/ee/spec/finders/geo/job_artifact_legacy_registry_finder_spec.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Geo::JobArtifactLegacyRegistryFinder, :geo do + it_behaves_like 'a file registry finder' do + before do + stub_artifacts_object_storage + + stub_feature_flags(geo_job_artifact_replication: false) + end + + let_it_be(:project) { create(:project) } + + let_it_be(:replicable_1) { create(:ci_job_artifact, project: project) } + let_it_be(:replicable_2) { create(:ci_job_artifact, project: project) } + let_it_be(:replicable_3) { create(:ci_job_artifact, project: project) } + let_it_be(:replicable_4) { create(:ci_job_artifact, project: project) } + let_it_be(:replicable_5) { create(:ci_job_artifact, project: project) } + let!(:replicable_6) { create(:ci_job_artifact, :remote_store, project: project) } + let!(:replicable_7) { create(:ci_job_artifact, :remote_store, project: project) } + let!(:replicable_8) { create(:ci_job_artifact, :remote_store, project: project) } + + let_it_be(:registry_1) { create(:geo_job_artifact_registry_legacy, :failed, artifact_id: replicable_1.id) } + let_it_be(:registry_2) { create(:geo_job_artifact_registry_legacy, artifact_id: replicable_2.id, missing_on_primary: true) } + let_it_be(:registry_3) { create(:geo_job_artifact_registry_legacy, :never_synced, artifact_id: replicable_3.id) } + let_it_be(:registry_4) { create(:geo_job_artifact_registry_legacy, :failed, artifact_id: replicable_4.id) } + let_it_be(:registry_5) { create(:geo_job_artifact_registry_legacy, artifact_id: replicable_5.id, missing_on_primary: true, retry_at: 1.day.ago) } + let!(:registry_6) { create(:geo_job_artifact_registry_legacy, :failed, artifact_id: replicable_6.id) } + let!(:registry_7) { create(:geo_job_artifact_registry_legacy, :failed, artifact_id: replicable_7.id, missing_on_primary: true) } + let!(:registry_8) { create(:geo_job_artifact_registry_legacy, :never_synced, artifact_id: replicable_8.id) } + end +end diff --git a/ee/spec/finders/geo/job_artifact_registry_finder_spec.rb b/ee/spec/finders/geo/job_artifact_registry_finder_spec.rb deleted file mode 100644 index c499c57dfd644e98123134b91efbed471a582e3f..0000000000000000000000000000000000000000 --- a/ee/spec/finders/geo/job_artifact_registry_finder_spec.rb +++ /dev/null @@ -1,31 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe Geo::JobArtifactRegistryFinder, :geo do - it_behaves_like 'a file registry finder' do - before do - stub_artifacts_object_storage - end - - let_it_be(:project) { create(:project) } - - let_it_be(:replicable_1) { create(:ci_job_artifact, project: project) } - let_it_be(:replicable_2) { create(:ci_job_artifact, project: project) } - let_it_be(:replicable_3) { create(:ci_job_artifact, project: project) } - let_it_be(:replicable_4) { create(:ci_job_artifact, project: project) } - let_it_be(:replicable_5) { create(:ci_job_artifact, project: project) } - let!(:replicable_6) { create(:ci_job_artifact, :remote_store, project: project) } - let!(:replicable_7) { create(:ci_job_artifact, :remote_store, project: project) } - let!(:replicable_8) { create(:ci_job_artifact, :remote_store, project: project) } - - let_it_be(:registry_1) { create(:geo_job_artifact_registry, :failed, artifact_id: replicable_1.id) } - let_it_be(:registry_2) { create(:geo_job_artifact_registry, artifact_id: replicable_2.id, missing_on_primary: true) } - let_it_be(:registry_3) { create(:geo_job_artifact_registry, :never_synced, artifact_id: replicable_3.id) } - let_it_be(:registry_4) { create(:geo_job_artifact_registry, :failed, artifact_id: replicable_4.id) } - let_it_be(:registry_5) { create(:geo_job_artifact_registry, artifact_id: replicable_5.id, missing_on_primary: true, retry_at: 1.day.ago) } - let!(:registry_6) { create(:geo_job_artifact_registry, :failed, artifact_id: replicable_6.id) } - let!(:registry_7) { create(:geo_job_artifact_registry, :failed, artifact_id: replicable_7.id, missing_on_primary: true) } - let!(:registry_8) { create(:geo_job_artifact_registry, :never_synced, artifact_id: replicable_8.id) } - end -end diff --git a/ee/spec/fixtures/api/schemas/public_api/v4/geo_node_status.json b/ee/spec/fixtures/api/schemas/public_api/v4/geo_node_status.json index d7584ba085c17c4cdf7fee5c040f0cf9acdf885d..c1448f63c9533bdc13f90465da312305c139768a 100644 --- a/ee/spec/fixtures/api/schemas/public_api/v4/geo_node_status.json +++ b/ee/spec/fixtures/api/schemas/public_api/v4/geo_node_status.json @@ -22,6 +22,14 @@ "job_artifacts_failed_count", "job_artifacts_synced_count", "job_artifacts_synced_missing_on_primary_count", + "job_artifacts_checksum_total_count", + "job_artifacts_checksummed_count", + "job_artifacts_checksum_failed_count", + "job_artifacts_registry_count", + "job_artifacts_verification_total_count", + "job_artifacts_verified_count", + "job_artifacts_verification_failed_count", + "job_artifacts_verified_in_percentage", "db_replication_lag_seconds", "container_repositories_replication_enabled", "container_repositories_count", @@ -205,6 +213,14 @@ "job_artifacts_synced_count": { "type": ["integer", "null"] }, "job_artifacts_synced_missing_on_primary_count": { "type": ["integer", "null"] }, "job_artifacts_synced_in_percentage": { "type": "string" }, + "job_artifacts_checksum_total_count": { "type": ["integer", "null"] }, + "job_artifacts_checksummed_count": { "type": ["integer", "null"] }, + "job_artifacts_checksum_failed_count": { "type": ["integer", "null"] }, + "job_artifacts_registry_count": { "type": ["integer", "null"] }, + "job_artifacts_verification_total_count": { "type": ["integer", "null"] }, + "job_artifacts_verified_count": { "type": ["integer", "null"] }, + "job_artifacts_verification_failed_count": { "type": ["integer", "null"] }, + "job_artifacts_verified_in_percentage": { "type": "string" }, "container_repositories_replication_enabled": { "type": ["boolean", "null"] }, "container_repositories_count": { "type": "integer" }, "container_repositories_failed_count": { "type": ["integer", "null"] }, diff --git a/ee/spec/graphql/resolvers/geo/job_artifact_registries_resolver_spec.rb b/ee/spec/graphql/resolvers/geo/job_artifact_registries_resolver_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..fd6f96c69f16a8f3dab12a79acd91d307c126577 --- /dev/null +++ b/ee/spec/graphql/resolvers/geo/job_artifact_registries_resolver_spec.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Resolvers::Geo::JobArtifactRegistriesResolver do + it_behaves_like 'a Geo registries resolver', :geo_job_artifact_registry +end diff --git a/ee/spec/graphql/types/geo/geo_node_type_spec.rb b/ee/spec/graphql/types/geo/geo_node_type_spec.rb index 87fd551e60b15bf5d086da61816d356283d80e0e..6d9becaa901c083d8bc4da165fb80c6c2bfabcca 100644 --- a/ee/spec/graphql/types/geo/geo_node_type_spec.rb +++ b/ee/spec/graphql/types/geo/geo_node_type_spec.rb @@ -15,7 +15,7 @@ package_file_registries snippet_repository_registries terraform_state_version_registries group_wiki_repository_registries pages_deployment_registries lfs_object_registries pipeline_artifact_registries - upload_registries + upload_registries job_artifact_registries ] expect(described_class).to have_graphql_fields(*expected_fields) diff --git a/ee/spec/graphql/types/geo/job_artifact_registry_type_spec.rb b/ee/spec/graphql/types/geo/job_artifact_registry_type_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..c150c0729733bcd3eea666c02aa8b2786f47b931 --- /dev/null +++ b/ee/spec/graphql/types/geo/job_artifact_registry_type_spec.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe GitlabSchema.types['JobArtifactRegistry'] do + it_behaves_like 'a Geo registry type' + + it 'has the expected fields (other than those included in RegistryType)' do + expected_fields = %i[artifact_id] + + expect(described_class).to have_graphql_fields(*expected_fields).at_least + end +end diff --git a/ee/spec/lib/ee/api/entities/geo_node_status_spec.rb b/ee/spec/lib/ee/api/entities/geo_node_status_spec.rb index 4547e669e3470cab1b2a1df1deee3cea647bd7b2..067461bbcc480bd3a03935962737fddd0b52dc05 100644 --- a/ee/spec/lib/ee/api/entities/geo_node_status_spec.rb +++ b/ee/spec/lib/ee/api/entities/geo_node_status_spec.rb @@ -57,7 +57,7 @@ describe '#job_artifacts_synced_in_percentage' do it 'formats as percentage' do - geo_node_status.assign_attributes(job_artifacts_count: 256, + geo_node_status.assign_attributes(job_artifacts_registry_count: 256, job_artifacts_failed_count: 12, job_artifacts_synced_count: 123) diff --git a/ee/spec/lib/ee/gitlab/background_migration/migrate_job_artifact_registry_to_ssf_spec.rb b/ee/spec/lib/ee/gitlab/background_migration/migrate_job_artifact_registry_to_ssf_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..3de40eb803d7b40332f542916fc1d63a8fbd4fc5 --- /dev/null +++ b/ee/spec/lib/ee/gitlab/background_migration/migrate_job_artifact_registry_to_ssf_spec.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::MigrateJobArtifactRegistryToSsf, :geo do + let(:registry) { table(:job_artifact_registry) } + + let!(:registry1) { registry.create!(artifact_id: 1, success: true, state: 0)} + let!(:registry2) { registry.create!(artifact_id: 2, success: true, state: 0)} + let!(:registry3) { registry.create!(artifact_id: 3, success: true, state: 0)} + + subject do + described_class.new.perform(registry1.id, registry3.id) + end + + it 'updates registries' do + subject + + expect(registry.where(state: 2).count).to eq 3 + end +end diff --git a/ee/spec/lib/ee/gitlab/cleanup/orphan_job_artifact_files_batch_spec.rb b/ee/spec/lib/ee/gitlab/cleanup/orphan_job_artifact_files_batch_spec.rb index 8fa2ae8c0f98c575e865e33a940df345f6a3543b..4121df8d29eeced277a668e809e517da32d70a24 100644 --- a/ee/spec/lib/ee/gitlab/cleanup/orphan_job_artifact_files_batch_spec.rb +++ b/ee/spec/lib/ee/gitlab/cleanup/orphan_job_artifact_files_batch_spec.rb @@ -14,8 +14,8 @@ let(:max_artifact_id) { Ci::JobArtifact.maximum(:id).to_i } let(:orphan_id_1) { max_artifact_id + 1 } let(:orphan_id_2) { max_artifact_id + 2 } - let!(:orphan_registry_1) { create(:geo_job_artifact_registry, artifact_id: orphan_id_1) } - let!(:orphan_registry_2) { create(:geo_job_artifact_registry, artifact_id: orphan_id_2) } + let!(:orphan_registry_1) { create(:geo_job_artifact_registry_legacy, artifact_id: orphan_id_1) } + let!(:orphan_registry_2) { create(:geo_job_artifact_registry_legacy, artifact_id: orphan_id_2) } before do stub_secondary_node @@ -35,8 +35,8 @@ context 'with dry run' do it 'does not remove registries' do - create(:geo_job_artifact_registry, :with_artifact, artifact_type: :archive) - create(:geo_job_artifact_registry, :orphan, artifact_type: :archive) + create(:geo_job_artifact_registry_legacy, :with_artifact, artifact_type: :archive) + create(:geo_job_artifact_registry_legacy, :orphan, artifact_type: :archive) expect { batch.clean! }.not_to change { Geo::JobArtifactRegistry.count } expect(batch.geo_registries_count).to eq(2) diff --git a/ee/spec/lib/ee/gitlab/cleanup/orphan_job_artifact_files_spec.rb b/ee/spec/lib/ee/gitlab/cleanup/orphan_job_artifact_files_spec.rb index 7749826e1607185e2820f1f783e19a0a01f78d06..49742eeba3ce8cb1f69ef4ac17adc1fcdd2343ef 100644 --- a/ee/spec/lib/ee/gitlab/cleanup/orphan_job_artifact_files_spec.rb +++ b/ee/spec/lib/ee/gitlab/cleanup/orphan_job_artifact_files_spec.rb @@ -34,7 +34,7 @@ it 'accumulates the number of cleaned Geo registries' do stub_const("#{described_class.name}::BATCH_SIZE", 2) - create_list(:geo_job_artifact_registry, 3, :orphan, artifact_type: :archive) + create_list(:geo_job_artifact_registry, 3, :orphan) create(:ci_job_artifact, :archive).delete cleanup.run! diff --git a/ee/spec/lib/gitlab/geo/geo_node_status_check_spec.rb b/ee/spec/lib/gitlab/geo/geo_node_status_check_spec.rb index a56eab0c573373379b462f5da8723b3b26117c74..40f72d9f8eb1df7a88f428ed77b4034f203bbaeb 100644 --- a/ee/spec/lib/gitlab/geo/geo_node_status_check_spec.rb +++ b/ee/spec/lib/gitlab/geo/geo_node_status_check_spec.rb @@ -14,6 +14,8 @@ describe '#replication_verification_complete?' do before do allow(Gitlab.config.geo.registry_replication).to receive(:enabled).and_return(true) + + stub_feature_flags(geo_job_artifact_replication: false) end context 'with legacy replication' do diff --git a/ee/spec/lib/gitlab/geo/log_cursor/events/job_artifact_deleted_event_spec.rb b/ee/spec/lib/gitlab/geo/log_cursor/events/job_artifact_deleted_event_spec.rb index c3b8a0516ea285b4fd004ef6e7fc96e2f1f492a8..d9955c1d4cf94874147231f6291f753436de0b61 100644 --- a/ee/spec/lib/gitlab/geo/log_cursor/events/job_artifact_deleted_event_spec.rb +++ b/ee/spec/lib/gitlab/geo/log_cursor/events/job_artifact_deleted_event_spec.rb @@ -11,6 +11,10 @@ subject { described_class.new(job_artifact_deleted_event, Time.now, logger) } + before do + stub_feature_flags(geo_job_artifact_replication: false) + end + around do |example| Sidekiq::Testing.inline! { example.run } end @@ -24,7 +28,7 @@ describe '#process' do context 'with a tracking database entry' do before do - create(:geo_job_artifact_registry, artifact_id: job_artifact.id) + create(:geo_job_artifact_registry_legacy, artifact_id: job_artifact.id) end context 'with a file' do diff --git a/ee/spec/migrations/geo/migrate_job_artifact_registry_spec.rb b/ee/spec/migrations/geo/migrate_job_artifact_registry_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..31432ec249a45842ef2cd1d8b81df684bc618d14 --- /dev/null +++ b/ee/spec/migrations/geo/migrate_job_artifact_registry_spec.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require 'spec_helper' + +require_migration! + +RSpec.describe MigrateJobArtifactRegistry do + let(:migration_name) { 'MigrateJobArtifactRegistryToSsf' } + + let(:registry) { table(:job_artifact_registry) } + + let!(:registry1) { registry.create!(artifact_id: 1, success: true, state: 0)} + let!(:registry2) { registry.create!(artifact_id: 2, success: true, state: 0) } + let!(:registry3) { registry.create!(artifact_id: 3, success: true, state: 0) } + let!(:registry4) { registry.create!(artifact_id: 4, success: true, state: 0) } + + before do + stub_const("#{described_class.name}::BATCH_SIZE", 2) + end + + it 'correctly schedules background migrations' do + Sidekiq::Testing.fake! do + freeze_time do + migrate! + + expect(BackgroundMigrationWorker.jobs.size).to eq(2) + expect(described_class::MIGRATION).to be_scheduled_migration_with_multiple_args(registry1.id, registry2.id) + expect(described_class::MIGRATION).to be_scheduled_migration_with_multiple_args(registry3.id, registry4.id) + end + end + end +end diff --git a/ee/spec/models/ee/ci/job_artifact_spec.rb b/ee/spec/models/ee/ci/job_artifact_spec.rb index 5efc5ffeb3d54fafa632b415953c0dac7a9c1117..9f0abdb3e5a878e688535a0ec33162ba17910019 100644 --- a/ee/spec/models/ee/ci/job_artifact_spec.rb +++ b/ee/spec/models/ee/ci/job_artifact_spec.rb @@ -6,6 +6,15 @@ using RSpec::Parameterized::TableSyntax include EE::GeoHelpers + include_examples 'a replicable model with a separate table for verification state' do + before do + stub_artifacts_object_storage + end + + let(:verifiable_model_record) { build(:ci_job_artifact) } # add extra params if needed to make sure the record is included in `available_verifiables` + let(:unverifiable_model_record) { build(:ci_job_artifact, :remote_store) } # add extra params if needed to make sure the record is NOT included in `available_verifiables` + end + it { is_expected.to delegate_method(:validate_schema?).to(:job) } describe '#destroy' do @@ -17,6 +26,8 @@ end it 'creates a JobArtifactDeletedEvent' do + stub_feature_flags(geo_job_artifact_replication: false) + job_artifact = create(:ee_ci_job_artifact, :archive) expect { job_artifact.destroy! }.to change { Geo::JobArtifactDeletedEvent.count }.by(1) diff --git a/ee/spec/models/geo/job_artifact_registry_spec.rb b/ee/spec/models/geo/job_artifact_registry_spec.rb index 3a433feee27b191deb736c3c7bbf4898ddf17d61..e7e6e0557a00b8283fadf3c0329c06fbb59fe8b7 100644 --- a/ee/spec/models/geo/job_artifact_registry_spec.rb +++ b/ee/spec/models/geo/job_artifact_registry_spec.rb @@ -2,208 +2,223 @@ require 'spec_helper' -RSpec.describe Geo::JobArtifactRegistry, :geo do +RSpec.describe Geo::JobArtifactRegistry, :geo, type: :model do include EE::GeoHelpers - it_behaves_like 'a BulkInsertSafe model', Geo::JobArtifactRegistry do - let(:valid_items_for_bulk_insertion) { build_list(:geo_job_artifact_registry, 10) } - let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined + let_it_be(:registry) { create(:geo_job_artifact_registry) } + + specify 'factory is valid' do + expect(registry).to be_valid end - describe '.insert_for_model_ids' do - it 'returns an array with the primary key values for all inserted records' do - ids = described_class.insert_for_model_ids([1]) + include_examples 'a Geo framework registry' + include_examples 'a Geo verifiable registry' - expect(ids).to contain_exactly(a_kind_of(Integer)) + context "deprecated registry functionality" do + before do + stub_feature_flags(geo_job_artifact_replication: false) end - it 'defaults success column to false for all inserted records' do - ids = described_class.insert_for_model_ids([1]) - - expect(described_class.where(id: ids).pluck(:success)).to eq([false]) + it_behaves_like 'a BulkInsertSafe model', Geo::JobArtifactRegistry do + let(:valid_items_for_bulk_insertion) { build_list(:geo_job_artifact_registry, 10) } + let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined end - end - describe '.find_registry_differences' do - let_it_be(:secondary) { create(:geo_node) } + describe '.insert_for_model_ids' do + it 'returns an array with the primary key values for all inserted records' do + ids = described_class.insert_for_model_ids([1]) - before do - stub_current_geo_node(secondary) - stub_artifacts_object_storage - end - - let_it_be(:synced_group) { create(:group) } - let_it_be(:nested_group_1) { create(:group, parent: synced_group) } - let_it_be(:synced_project) { create(:project, group: synced_group) } - let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) } - let_it_be(:unsynced_project) { create(:project) } - let_it_be(:project_broken_storage) { create(:project, :broken_storage) } - - let_it_be(:ci_job_artifact_1) { create(:ci_job_artifact, project: synced_project) } - let_it_be(:ci_job_artifact_2) { create(:ci_job_artifact, project: synced_project_in_nested_group) } - let_it_be(:ci_job_artifact_3) { create(:ci_job_artifact, project: synced_project_in_nested_group) } - let_it_be(:ci_job_artifact_4) { create(:ci_job_artifact, project: unsynced_project) } - let_it_be(:ci_job_artifact_5) { create(:ci_job_artifact, project: project_broken_storage) } - let!(:ci_job_artifact_remote_1) { create(:ci_job_artifact, :remote_store) } - let!(:ci_job_artifact_remote_2) { create(:ci_job_artifact, :remote_store) } - let!(:ci_job_artifact_remote_3) { create(:ci_job_artifact, :remote_store) } - - # Expired job artifacts used to be excluded, but are now included - let_it_be(:ci_job_artifact_6) { create(:ci_job_artifact, :expired, project: synced_project) } - - context 'untracked IDs' do - before do - create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id) - create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_3.id) - create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id) + expect(ids).to contain_exactly(a_kind_of(Integer)) end - it 'includes job artifact IDs without an entry on the tracking database' do - untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id) + it 'defaults success column to false for all inserted records' do + ids = described_class.insert_for_model_ids([1]) - expect(untracked_ids).to match_array( - [ci_job_artifact_2.id, ci_job_artifact_5.id, ci_job_artifact_remote_1.id, - ci_job_artifact_remote_2.id, ci_job_artifact_remote_3.id, - ci_job_artifact_6.id]) + expect(described_class.where(id: ids).pluck(:success)).to eq([false]) end + end - it 'excludes job artifacts outside the ID range' do - untracked_ids, _ = described_class.find_registry_differences(ci_job_artifact_3.id..ci_job_artifact_remote_2.id) + describe '.find_registry_differences' do + let_it_be(:secondary) { create(:geo_node) } - expect(untracked_ids).to match_array( - [ci_job_artifact_5.id, ci_job_artifact_remote_1.id, - ci_job_artifact_remote_2.id, ci_job_artifact_6.id]) + before do + stub_current_geo_node(secondary) + stub_artifacts_object_storage end - context 'with selective sync by namespace' do - let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) } + let_it_be(:synced_group) { create(:group) } + let_it_be(:nested_group_1) { create(:group, parent: synced_group) } + let_it_be(:synced_project) { create(:project, group: synced_group) } + let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) } + let_it_be(:unsynced_project) { create(:project) } + let_it_be(:project_broken_storage) { create(:project, :broken_storage) } + + let_it_be(:ci_job_artifact_1) { create(:ci_job_artifact, project: synced_project) } + let_it_be(:ci_job_artifact_2) { create(:ci_job_artifact, project: synced_project_in_nested_group) } + let_it_be(:ci_job_artifact_3) { create(:ci_job_artifact, project: synced_project_in_nested_group) } + let_it_be(:ci_job_artifact_4) { create(:ci_job_artifact, project: unsynced_project) } + let_it_be(:ci_job_artifact_5) { create(:ci_job_artifact, project: project_broken_storage) } + let!(:ci_job_artifact_remote_1) { create(:ci_job_artifact, :remote_store) } + let!(:ci_job_artifact_remote_2) { create(:ci_job_artifact, :remote_store) } + let!(:ci_job_artifact_remote_3) { create(:ci_job_artifact, :remote_store) } + + # Expired job artifacts used to be excluded, but are now included + let_it_be(:ci_job_artifact_6) { create(:ci_job_artifact, :expired, project: synced_project) } + + context 'untracked IDs' do + before do + create(:geo_job_artifact_registry_legacy, artifact_id: ci_job_artifact_1.id) + create(:geo_job_artifact_registry_legacy, :failed, artifact_id: ci_job_artifact_3.id) + create(:geo_job_artifact_registry_legacy, artifact_id: ci_job_artifact_4.id) + end - it 'excludes job artifact IDs that are not in selectively synced projects' do + it 'includes job artifact IDs without an entry on the tracking database' do untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id) - expect(untracked_ids).to match_array([ci_job_artifact_2.id, ci_job_artifact_6.id]) + expect(untracked_ids).to match_array( + [ci_job_artifact_2.id, ci_job_artifact_5.id, ci_job_artifact_remote_1.id, + ci_job_artifact_remote_2.id, ci_job_artifact_remote_3.id, + ci_job_artifact_6.id]) end - end - context 'with selective sync by shard' do - let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) } + it 'excludes job artifacts outside the ID range' do + untracked_ids, _ = described_class.find_registry_differences(ci_job_artifact_3.id..ci_job_artifact_remote_2.id) - it 'excludes job artifact IDs that are not in selectively synced projects' do - untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id) + expect(untracked_ids).to match_array( + [ci_job_artifact_5.id, ci_job_artifact_remote_1.id, + ci_job_artifact_remote_2.id, ci_job_artifact_6.id]) + end - expect(untracked_ids).to match_array([ci_job_artifact_5.id]) + context 'with selective sync by namespace' do + let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) } + + it 'excludes job artifact IDs that are not in selectively synced projects' do + untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id) + + expect(untracked_ids).to match_array([ci_job_artifact_2.id, ci_job_artifact_6.id]) + end end - end - context 'with object storage sync disabled' do - let(:secondary) { create(:geo_node, :local_storage_only) } + context 'with selective sync by shard' do + let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) } - it 'excludes job artifacts in object storage' do - untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id) + it 'excludes job artifact IDs that are not in selectively synced projects' do + untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id) - expect(untracked_ids).to match_array([ci_job_artifact_2.id, ci_job_artifact_5.id, ci_job_artifact_6.id]) + expect(untracked_ids).to match_array([ci_job_artifact_5.id]) + end + end + + context 'with object storage sync disabled' do + let(:secondary) { create(:geo_node, :local_storage_only) } + + it 'excludes job artifacts in object storage' do + untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id) + + expect(untracked_ids).to match_array([ci_job_artifact_2.id, ci_job_artifact_5.id, ci_job_artifact_6.id]) + end end end - end - context 'unused tracked IDs' do - context 'with an orphaned registry' do - let!(:orphaned) { create(:geo_job_artifact_registry, artifact_id: non_existing_record_id) } + context 'unused tracked IDs' do + context 'with an orphaned registry' do + let!(:orphaned) { create(:geo_job_artifact_registry_legacy, artifact_id: non_existing_record_id) } - it 'includes tracked IDs that do not exist in the model table' do - range = non_existing_record_id..non_existing_record_id + it 'includes tracked IDs that do not exist in the model table' do + range = non_existing_record_id..non_existing_record_id - _, unused_tracked_ids = described_class.find_registry_differences(range) + _, unused_tracked_ids = described_class.find_registry_differences(range) - expect(unused_tracked_ids).to match_array([non_existing_record_id]) - end + expect(unused_tracked_ids).to match_array([non_existing_record_id]) + end - it 'excludes IDs outside the ID range' do - range = 1..1000 + it 'excludes IDs outside the ID range' do + range = 1..1000 - _, unused_tracked_ids = described_class.find_registry_differences(range) + _, unused_tracked_ids = described_class.find_registry_differences(range) - expect(unused_tracked_ids).to be_empty + expect(unused_tracked_ids).to be_empty + end end - end - context 'with selective sync by namespace' do - let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) } + context 'with selective sync by namespace' do + let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) } - context 'with a tracked job artifact' do - let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id) } - let(:range) { ci_job_artifact_1.id..ci_job_artifact_4.id } + context 'with a tracked job artifact' do + let!(:registry_entry) { create(:geo_job_artifact_registry_legacy, artifact_id: ci_job_artifact_1.id) } + let(:range) { ci_job_artifact_1.id..ci_job_artifact_4.id } - context 'excluded from selective sync' do - it 'includes tracked job artifact IDs that exist but are not in a selectively synced project' do - create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id) + context 'excluded from selective sync' do + it 'includes tracked job artifact IDs that exist but are not in a selectively synced project' do + create(:geo_job_artifact_registry_legacy, artifact_id: ci_job_artifact_4.id) - _, unused_tracked_ids = described_class.find_registry_differences(range) + _, unused_tracked_ids = described_class.find_registry_differences(range) - expect(unused_tracked_ids).to match_array([ci_job_artifact_4.id]) + expect(unused_tracked_ids).to match_array([ci_job_artifact_4.id]) + end end - end - context 'included in selective sync' do - it 'excludes tracked job artifact IDs that are in selectively synced projects' do - _, unused_tracked_ids = described_class.find_registry_differences(range) + context 'included in selective sync' do + it 'excludes tracked job artifact IDs that are in selectively synced projects' do + _, unused_tracked_ids = described_class.find_registry_differences(range) - expect(unused_tracked_ids).to be_empty + expect(unused_tracked_ids).to be_empty + end end end end - end - context 'with selective sync by shard' do - let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) } + context 'with selective sync by shard' do + let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) } - context 'with a tracked job artifact' do - let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id) } - let(:range) { ci_job_artifact_1.id..ci_job_artifact_5.id } + context 'with a tracked job artifact' do + let!(:registry_entry) { create(:geo_job_artifact_registry_legacy, artifact_id: ci_job_artifact_5.id) } + let(:range) { ci_job_artifact_1.id..ci_job_artifact_5.id } - context 'excluded from selective sync' do - it 'includes tracked job artifact IDs that exist but are not in a selectively synced shard' do - create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id) + context 'excluded from selective sync' do + it 'includes tracked job artifact IDs that exist but are not in a selectively synced shard' do + create(:geo_job_artifact_registry_legacy, artifact_id: ci_job_artifact_1.id) - _, unused_tracked_ids = described_class.find_registry_differences(range) + _, unused_tracked_ids = described_class.find_registry_differences(range) - expect(unused_tracked_ids).to match_array([ci_job_artifact_1.id]) + expect(unused_tracked_ids).to match_array([ci_job_artifact_1.id]) + end end - end - context 'included in selective sync' do - it 'excludes tracked job artifact IDs that are in selectively synced shards' do - _, unused_tracked_ids = described_class.find_registry_differences(range) + context 'included in selective sync' do + it 'excludes tracked job artifact IDs that are in selectively synced shards' do + _, unused_tracked_ids = described_class.find_registry_differences(range) - expect(unused_tracked_ids).to be_empty + expect(unused_tracked_ids).to be_empty + end end end end - end - context 'with object storage sync disabled' do - let(:secondary) { create(:geo_node, :local_storage_only) } + context 'with object storage sync disabled' do + let(:secondary) { create(:geo_node, :local_storage_only) } - context 'with a tracked job artifact' do - context 'in object storage' do - it 'includes tracked job artifact IDs that are in object storage' do - create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_1.id) - range = ci_job_artifact_remote_1.id..ci_job_artifact_remote_1.id + context 'with a tracked job artifact' do + context 'in object storage' do + it 'includes tracked job artifact IDs that are in object storage' do + create(:geo_job_artifact_registry_legacy, artifact_id: ci_job_artifact_remote_1.id) + range = ci_job_artifact_remote_1.id..ci_job_artifact_remote_1.id - _, unused_tracked_ids = described_class.find_registry_differences(range) + _, unused_tracked_ids = described_class.find_registry_differences(range) - expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_1.id]) + expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_1.id]) + end end - end - context 'not in object storage' do - it 'excludes tracked job artifact IDs that are not in object storage' do - create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id) - range = ci_job_artifact_1.id..ci_job_artifact_1.id + context 'not in object storage' do + it 'excludes tracked job artifact IDs that are not in object storage' do + create(:geo_job_artifact_registry_legacy, artifact_id: ci_job_artifact_1.id) + range = ci_job_artifact_1.id..ci_job_artifact_1.id - _, unused_tracked_ids = described_class.find_registry_differences(range) + _, unused_tracked_ids = described_class.find_registry_differences(range) - expect(unused_tracked_ids).to be_empty + expect(unused_tracked_ids).to be_empty + end end end end diff --git a/ee/spec/models/geo_node_status_spec.rb b/ee/spec/models/geo_node_status_spec.rb index 3391a1871200249b65753ee47ca2e159d283cb93..3e9d282b62f46b75e39137b4db62c1d0e08fa623 100644 --- a/ee/spec/models/geo_node_status_spec.rb +++ b/ee/spec/models/geo_node_status_spec.rb @@ -164,9 +164,9 @@ it 'counts synced job artifacts' do # These should be ignored create(:geo_upload_registry) - create(:geo_job_artifact_registry, :with_artifact, success: false) + create(:geo_job_artifact_registry_legacy, :with_artifact, success: false) - create(:geo_job_artifact_registry, :with_artifact, success: true) + create(:geo_job_artifact_registry_legacy, :with_artifact, success: true) expect(subject.job_artifacts_synced_count).to eq(1) end @@ -174,11 +174,13 @@ describe '#job_artifacts_synced_missing_on_primary_count' do it 'counts job artifacts marked as synced due to file missing on the primary' do + stub_feature_flags(geo_job_artifact_replication: false) + # These should be ignored create(:geo_upload_registry, missing_on_primary: true) - create(:geo_job_artifact_registry, :with_artifact, success: true) + create(:geo_job_artifact_registry_legacy, :with_artifact, success: true) - create(:geo_job_artifact_registry, :with_artifact, success: true, missing_on_primary: true) + create(:geo_job_artifact_registry_legacy, :with_artifact, success: true, missing_on_primary: true) expect(subject.job_artifacts_synced_missing_on_primary_count).to eq(1) end @@ -186,10 +188,12 @@ describe '#job_artifacts_failed_count' do it 'counts failed job artifacts' do + stub_feature_flags(geo_job_artifact_replication: false) + # These should be ignored create(:geo_upload_registry, :failed) - create(:geo_job_artifact_registry, :with_artifact, success: true) - create(:geo_job_artifact_registry, :with_artifact, :failed) + create(:geo_job_artifact_registry_legacy, :with_artifact, success: true) + create(:geo_job_artifact_registry_legacy, :with_artifact, :failed) expect(subject.job_artifacts_failed_count).to eq(1) end @@ -202,7 +206,7 @@ build = create(:ci_build, project: project) job_artifact = create(:ci_job_artifact, job: build) - create(:geo_job_artifact_registry, success: index.even?, artifact_id: job_artifact.id) + create(:geo_job_artifact_registry_legacy, success: index.even?, artifact_id: job_artifact.id) end end @@ -1080,6 +1084,7 @@ Geo::GroupWikiRepositoryReplicator | :group_wiki_repository | :geo_group_wiki_repository_registry Geo::PagesDeploymentReplicator | :pages_deployment | :geo_pages_deployment_registry Geo::UploadReplicator | :upload | :geo_upload_registry + Geo::JobArtifactReplicator | :ci_job_artifact | :geo_job_artifact_registry end with_them do diff --git a/ee/spec/replicators/geo/job_artifact_replicator_spec.rb b/ee/spec/replicators/geo/job_artifact_replicator_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..48e9e6078ef7209dbc1c5e567d783524ed0fbfb2 --- /dev/null +++ b/ee/spec/replicators/geo/job_artifact_replicator_spec.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Geo::JobArtifactReplicator do + let(:model_record) { create(:ci_job_artifact, :with_file) } + + include_examples 'a blob replicator' + include_examples 'a verifiable replicator' +end diff --git a/ee/spec/requests/api/graphql/geo/registries_spec.rb b/ee/spec/requests/api/graphql/geo/registries_spec.rb index eeae20add83675b45c7420bd796003ca684becea..7c4cdd0680e6d5dfe54931b2bf27274f2be7b234 100644 --- a/ee/spec/requests/api/graphql/geo/registries_spec.rb +++ b/ee/spec/requests/api/graphql/geo/registries_spec.rb @@ -58,4 +58,11 @@ registry_factory: :geo_upload_registry, registry_foreign_key_field_name: 'fileId' } + + it_behaves_like 'gets registries for', { + field_name: 'jobArtifactRegistries', + registry_class_name: 'JobArtifactRegistry', + registry_factory: :geo_job_artifact_registry, + registry_foreign_key_field_name: 'artifactId' + } end diff --git a/ee/spec/services/geo/file_download_service_spec.rb b/ee/spec/services/geo/file_download_service_spec.rb index f3b5be7429b4629a5a129678ef0fc099b28dd9b2..6cabf95029c61c7bc32e56e245ba75659f083bfc 100644 --- a/ee/spec/services/geo/file_download_service_spec.rb +++ b/ee/spec/services/geo/file_download_service_spec.rb @@ -11,6 +11,7 @@ before do stub_current_geo_node(secondary) + stub_feature_flags(geo_job_artifact_replication: false) end describe '#downloader' do @@ -28,7 +29,7 @@ context 'with job_artifacts' do let!(:geo_job_artifact_registry) do - create(:geo_job_artifact_registry, success: false, retry_count: 31, artifact_id: file.id) + create(:geo_job_artifact_registry_legacy, success: false, retry_count: 31, artifact_id: file.id) end let(:file) { create(:ci_job_artifact) } @@ -181,7 +182,7 @@ context 'for a registered file that failed to sync' do let!(:geo_job_artifact_registry) do - create(:geo_job_artifact_registry, success: false, artifact_id: file.id, retry_count: 3, retry_at: 1.hour.ago) + create(:geo_job_artifact_registry_legacy, success: false, artifact_id: file.id, retry_count: 3, retry_at: 1.hour.ago) end context 'when the file is successfully downloaded' do diff --git a/ee/spec/services/geo/file_registry_removal_service_spec.rb b/ee/spec/services/geo/file_registry_removal_service_spec.rb index 59e780728ecc545cb8c92b81460affcbe5d1660f..799fe891c9c5146268009f2323c1f2d9d4badd1b 100644 --- a/ee/spec/services/geo/file_registry_removal_service_spec.rb +++ b/ee/spec/services/geo/file_registry_removal_service_spec.rb @@ -59,9 +59,13 @@ end let!(:job_artifact) { create(:ci_job_artifact, :archive) } - let!(:registry) { create(:geo_job_artifact_registry, artifact_id: job_artifact.id) } + let!(:registry) { create(:geo_job_artifact_registry_legacy, artifact_id: job_artifact.id) } let!(:file_path) { job_artifact.file.path } + before do + stub_feature_flags(geo_job_artifact_replication: false) + end + it_behaves_like 'removes artifact' context 'migrated to object storage' do diff --git a/ee/spec/services/geo/registry_consistency_service_spec.rb b/ee/spec/services/geo/registry_consistency_service_spec.rb index f97494415162102f6688b8f53c54a5b12910817b..d182ee2bb1d625f16ea94c291eb980b48734a8c2 100644 --- a/ee/spec/services/geo/registry_consistency_service_spec.rb +++ b/ee/spec/services/geo/registry_consistency_service_spec.rb @@ -20,7 +20,8 @@ def model_class_factory_name(registry_class) Geo::DesignRegistry => :project_with_design, Geo::MergeRequestDiffRegistry => :external_merge_request_diff, Geo::PackageFileRegistry => :package_file, - Geo::UploadRegistry => :upload + Geo::UploadRegistry => :upload, + Geo::JobArtifactRegistry => :ci_job_artifact }.fetch(registry_class, default_factory_name) end diff --git a/ee/spec/tasks/geo_rake_spec.rb b/ee/spec/tasks/geo_rake_spec.rb index 40d002bc6fed984c7dc449bf7db2aebc2e857878..9662fc14d8f03de697e5149c2b23d746a5dcc6c7 100644 --- a/ee/spec/tasks/geo_rake_spec.rb +++ b/ee/spec/tasks/geo_rake_spec.rb @@ -205,6 +205,8 @@ context 'with SSF LFS replication eneabled' do it 'prints messages for all the checks' do + stub_feature_flags(geo_job_artifact_replication: false) + checks.each do |text| expect { run_rake_task('geo:status') }.to output(text).to_stdout end diff --git a/ee/spec/uploaders/every_gitlab_uploader_spec.rb b/ee/spec/uploaders/every_gitlab_uploader_spec.rb index 3942afe0d6327338ee65a6b1480aa56f7492df8a..b1d7ae90533fe9fb5b6dbb2b3a5c653b71c60beb 100644 --- a/ee/spec/uploaders/every_gitlab_uploader_spec.rb +++ b/ee/spec/uploaders/every_gitlab_uploader_spec.rb @@ -30,6 +30,8 @@ # When this test starts failing means that we have migrated Geo's handling of uploads to the # SSF, and we can remove the tests for the file retriever and downloader classes. it 'has some uploads to be migrated' do + stub_feature_flags(geo_job_artifact_replication: false) + expect(object_types - replicable_names).not_to be_empty end end @@ -84,8 +86,10 @@ def known_unimplemented_uploader?(uploader) end def handled_by_ssf?(uploader) + return true if uploads?(uploader) + replicable_name = replicable_name_for(uploader) - replicable_names.include?(replicable_name) || uploads?(uploader) + replicable_names.include?(replicable_name) && ::Gitlab::Geo::Replicator.for_replicable_name(replicable_name).enabled? end def uploads?(uploader) @@ -108,7 +112,11 @@ def object_type_for(uploader) end def replicable_names - @replicable_names ||= replicators.map(&:replicable_name) + @replicable_names ||= begin + replicators + .map(&:replicable_name) + .select {|replicable_name| ::Gitlab::Geo::Replicator.for_replicable_name(replicable_name).enabled? } + end end def replicable_name_for(uploader) diff --git a/ee/spec/workers/geo/file_download_dispatch_worker_spec.rb b/ee/spec/workers/geo/file_download_dispatch_worker_spec.rb index b1329e6931589b34bec7ed15644346e80e588a6c..48fc43312826f54965be25ca2497903cb50dfa25 100644 --- a/ee/spec/workers/geo/file_download_dispatch_worker_spec.rb +++ b/ee/spec/workers/geo/file_download_dispatch_worker_spec.rb @@ -19,6 +19,7 @@ WebMock.stub_request(:get, /primary-geo-node/).to_return(status: 200, body: "", headers: {}) allow(Geo::FileDownloadWorker).to receive(:with_status).and_return(Geo::FileDownloadWorker) + stub_feature_flags(geo_job_artifact_replication: false) end it 'does not schedule anything when tracking database is not configured' do @@ -48,7 +49,7 @@ context 'with job artifacts' do it 'performs Geo::FileDownloadWorker for unsynced job artifacts' do - registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced) + registry = create(:geo_job_artifact_registry_legacy, :with_artifact, :never_synced) expect(Geo::FileDownloadWorker).to receive(:perform_async) .with('job_artifact', registry.artifact_id).once.and_return(spy) @@ -57,7 +58,7 @@ end it 'performs Geo::FileDownloadWorker for failed-sync job artifacts' do - registry = create(:geo_job_artifact_registry, :with_artifact, :failed) + registry = create(:geo_job_artifact_registry_legacy, :with_artifact, :failed) expect(Geo::FileDownloadWorker).to receive(:perform_async) .with('job_artifact', registry.artifact_id).once.and_return(spy) @@ -66,7 +67,7 @@ end it 'does not perform Geo::FileDownloadWorker for synced job artifacts' do - registry = create(:geo_job_artifact_registry, :with_artifact, bytes: 1234, success: true) + registry = create(:geo_job_artifact_registry_legacy, :with_artifact, bytes: 1234, success: true) expect(Geo::FileDownloadWorker).not_to receive(:perform_async) .with('job_artifact', registry.artifact_id) @@ -75,7 +76,7 @@ end it 'does not perform Geo::FileDownloadWorker for synced job artifacts even with 0 bytes downloaded' do - registry = create(:geo_job_artifact_registry, :with_artifact, bytes: 0, success: true) + registry = create(:geo_job_artifact_registry_legacy, :with_artifact, bytes: 0, success: true) expect(Geo::FileDownloadWorker).not_to receive(:perform_async) .with('job_artifact', registry.artifact_id) @@ -84,7 +85,7 @@ end it 'does not retry failed artifacts when retry_at is tomorrow' do - registry = create(:geo_job_artifact_registry, :with_artifact, :failed, retry_at: Date.tomorrow) + registry = create(:geo_job_artifact_registry_legacy, :with_artifact, :failed, retry_at: Date.tomorrow) expect(Geo::FileDownloadWorker).not_to receive(:perform_async) .with('job_artifact', registry.artifact_id) @@ -93,7 +94,7 @@ end it 'retries failed artifacts when retry_at is in the past' do - registry = create(:geo_job_artifact_registry, :with_artifact, :failed, retry_at: Date.yesterday) + registry = create(:geo_job_artifact_registry_legacy, :with_artifact, :failed, retry_at: Date.yesterday) expect(Geo::FileDownloadWorker).to receive(:perform_async) .with('job_artifact', registry.artifact_id).once.and_return(spy) @@ -103,10 +104,10 @@ context 'with files missing on the primary that are marked as synced' do let!(:artifact_file_missing_on_primary) { create(:ci_job_artifact) } - let!(:artifact_registry) { create(:geo_job_artifact_registry, artifact_id: artifact_file_missing_on_primary.id, bytes: 1234, success: true, missing_on_primary: true) } + let!(:artifact_registry) { create(:geo_job_artifact_registry_legacy, artifact_id: artifact_file_missing_on_primary.id, bytes: 1234, success: true, missing_on_primary: true) } it 'retries the files if there is spare capacity' do - registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced) + registry = create(:geo_job_artifact_registry_legacy, :with_artifact, :never_synced) expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', registry.artifact_id) expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id) @@ -131,7 +132,7 @@ end it 'does not retry those files if there is no spare capacity' do - registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced) + registry = create(:geo_job_artifact_registry_legacy, :with_artifact, :never_synced) expect(subject).to receive(:db_retrieve_batch_size).and_return(1).twice expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', registry.artifact_id) @@ -140,7 +141,7 @@ end it 'does not retry those files if they are already scheduled' do - registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced) + registry = create(:geo_job_artifact_registry_legacy, :with_artifact, :never_synced) scheduled_jobs = [{ type: 'job_artifact', id: artifact_file_missing_on_primary.id, job_id: 'foo' }] expect(subject).to receive(:scheduled_jobs).and_return(scheduled_jobs).at_least(1) @@ -171,7 +172,7 @@ result_object = double(:result, success: true, bytes_downloaded: 100, primary_missing_file: false) allow_any_instance_of(::Gitlab::Geo::Replication::BaseTransfer).to receive(:download_from_primary).and_return(result_object) - create_list(:geo_job_artifact_registry, 6, :with_artifact, :never_synced) + create_list(:geo_job_artifact_registry_legacy, 6, :with_artifact, :never_synced) expect(Geo::FileDownloadWorker).to receive(:perform_async).exactly(6).times.and_call_original # For 10 downloads, we expect four database reloads: diff --git a/ee/spec/workers/geo/secondary/registry_consistency_worker_spec.rb b/ee/spec/workers/geo/secondary/registry_consistency_worker_spec.rb index 8ec7eba8fb35a50fa024af8666330612d65f2ea5..94db718fda30c343bfaa1e4d9963e27e887b01c4 100644 --- a/ee/spec/workers/geo/secondary/registry_consistency_worker_spec.rb +++ b/ee/spec/workers/geo/secondary/registry_consistency_worker_spec.rb @@ -100,6 +100,7 @@ expect(Geo::TerraformStateVersionRegistry.where(terraform_state_version_id: terraform_state_version.id).count).to eq(0) expect(Geo::UploadRegistry.where(file_id: upload.id).count).to eq(0) expect(Geo::PagesDeploymentRegistry.where(pages_deployment: pages_deployment.id).count).to eq(0) + expect(Geo::JobArtifactRegistry.where(job_artifact: job_artifact.id).count).to eq(0) subject.perform @@ -114,6 +115,7 @@ expect(Geo::TerraformStateVersionRegistry.where(terraform_state_version_id: terraform_state_version.id).count).to eq(1) expect(Geo::UploadRegistry.where(file_id: upload.id).count).to eq(1) expect(Geo::PagesDeploymentRegistry.where(pages_deployment: pages_deployment.id).count).to eq(1) + expect(Geo::JobArtifactRegistry.where(job_artifact: job_artifact.id).count).to eq(1) end context 'when the current Geo node is disabled or primary' do diff --git a/lib/gitlab/background_migration/migrate_job_artifact_registry_to_ssf.rb b/lib/gitlab/background_migration/migrate_job_artifact_registry_to_ssf.rb new file mode 100644 index 0000000000000000000000000000000000000000..bcc02c1dbf2549ca1d5a7b5a749ebe654828a8be --- /dev/null +++ b/lib/gitlab/background_migration/migrate_job_artifact_registry_to_ssf.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # rubocop: disable Style/Documentation + class MigrateJobArtifactRegistryToSsf + def perform(*job_artifact_ids) + end + end + end +end + +Gitlab::BackgroundMigration::MigrateJobArtifactRegistryToSsf.prepend_mod_with('Gitlab::BackgroundMigration::MigrateJobArtifactRegistryToSsf') diff --git a/lib/gitlab/database/gitlab_schemas.yml b/lib/gitlab/database/gitlab_schemas.yml index e064f7ede5c6a2b86c570cb6d49840f0ca2c9507..93cd75ce5a7a036ada899d3567076a589e0d5b34 100644 --- a/lib/gitlab/database/gitlab_schemas.yml +++ b/lib/gitlab/database/gitlab_schemas.yml @@ -85,6 +85,7 @@ ci_instance_variables: :gitlab_ci ci_job_artifacts: :gitlab_ci ci_job_token_project_scope_links: :gitlab_ci ci_job_variables: :gitlab_ci +ci_job_artifact_states: :gitlab_ci ci_minutes_additional_packs: :gitlab_ci ci_namespace_monthly_usages: :gitlab_ci ci_namespace_mirrors: :gitlab_ci