From 76c5a0a308604fe4a946345977e1f304076f3940 Mon Sep 17 00:00:00 2001 From: Eugenia Grieff Date: Mon, 31 Jan 2022 19:50:36 +0000 Subject: [PATCH] Delete invalid epic_issue records migration This batched background migration finds and deletes records which are invalid because the issue's project does not belong to the epic's group or its descendants. The query for ancestors uses traversal_ids. Changelog: other --- ...dule_delete_invalid_epic_issues_revised.rb | 27 +++ db/schema_migrations/20220128103042 | 1 + .../delete_invalid_epic_issues.rb | 118 +++++++++++ .../delete_invalid_epic_issues_spec.rb | 186 ++++++++++++++++++ ...delete_invalid_epic_issues_revised_spec.rb | 30 +++ .../delete_invalid_epic_issues.rb | 14 ++ 6 files changed, 376 insertions(+) create mode 100644 db/post_migrate/20220128103042_schedule_delete_invalid_epic_issues_revised.rb create mode 100644 db/schema_migrations/20220128103042 create mode 100644 ee/lib/ee/gitlab/background_migration/delete_invalid_epic_issues.rb create mode 100644 ee/spec/lib/ee/gitlab/background_migration/delete_invalid_epic_issues_spec.rb create mode 100644 ee/spec/migrations/schedule_delete_invalid_epic_issues_revised_spec.rb create mode 100644 lib/gitlab/background_migration/delete_invalid_epic_issues.rb diff --git a/db/post_migrate/20220128103042_schedule_delete_invalid_epic_issues_revised.rb b/db/post_migrate/20220128103042_schedule_delete_invalid_epic_issues_revised.rb new file mode 100644 index 00000000000000..642bf012ce0edd --- /dev/null +++ b/db/post_migrate/20220128103042_schedule_delete_invalid_epic_issues_revised.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +class ScheduleDeleteInvalidEpicIssuesRevised < Gitlab::Database::Migration[1.0] + disable_ddl_transaction! + + MIGRATION = 'DeleteInvalidEpicIssues' + INTERVAL = 2.minutes + BATCH_SIZE = 1_000 + MAX_BATCH_SIZE = 2_000 + SUB_BATCH_SIZE = 50 + + def up + queue_batched_background_migration( + MIGRATION, + :epics, + :id, + job_interval: INTERVAL, + batch_size: BATCH_SIZE, + max_batch_size: MAX_BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :epics, :id, []) + end +end diff --git a/db/schema_migrations/20220128103042 b/db/schema_migrations/20220128103042 new file mode 100644 index 00000000000000..de1e50b66bfed8 --- /dev/null +++ b/db/schema_migrations/20220128103042 @@ -0,0 +1 @@ +57813d4c107938d8e58d6223719c2c67206172342b52655ca4a068c845edeb3a \ No newline at end of file diff --git a/ee/lib/ee/gitlab/background_migration/delete_invalid_epic_issues.rb b/ee/lib/ee/gitlab/background_migration/delete_invalid_epic_issues.rb new file mode 100644 index 00000000000000..8ecaec0e2df755 --- /dev/null +++ b/ee/lib/ee/gitlab/background_migration/delete_invalid_epic_issues.rb @@ -0,0 +1,118 @@ +# frozen_string_literal: true + +module EE + module Gitlab + module BackgroundMigration + # Class that removes invalid records from `epic_issues` table. + # For more information check: https://gitlab.com/gitlab-org/gitlab/-/issues/339514 + module DeleteInvalidEpicIssues + extend ::Gitlab::Utils::Override + class Namespace < ::ApplicationRecord + self.table_name = 'namespaces' + self.inheritance_column = :_type_disabled + + has_many :epics + has_many :projects + + SELF_AND_DESCENDANTS = <<~SQL + SELECT namespaces.id + FROM namespaces + WHERE namespaces.type = 'Group' AND (traversal_ids @> ('{%{id}}')) + SQL + + def self_with_descendants_ids + formatted_query = format(SELF_AND_DESCENDANTS, id: self.id) + + namespaces = ::ApplicationRecord.connection.execute(formatted_query) + namespaces.to_a.collect { |h| h['id'] } + end + end + + class Project < ::ApplicationRecord + self.table_name = 'projects' + + has_many :issues + belongs_to :group + end + + class Epic < ::ApplicationRecord + include EachBatch + + self.table_name = 'epics' + + has_many :epic_issues + belongs_to :group + end + + class Issue < ::ApplicationRecord + self.table_name = 'issues' + + has_many :epic_issues + belongs_to :project + end + + class EpicIssue < ::ApplicationRecord + self.table_name = 'epic_issues' + + belongs_to :epic + belongs_to :issue + end + + override :perform + def perform + batch_relation = Epic.where(id: start_id..end_id).order(:group_id) + + find_and_delete_invalid_records(batch_relation, sub_batch_size, pause_ms) + end + + private + + # rubocop:disable Layout/LineLength + def find_and_delete_invalid_records(epics, sub_batch_size, pause) + epics.each_batch(of: sub_batch_size) do |sub_batch| + to_be_deleted = [] + batch = sub_batch.joins(epic_issues: { issue: :project }) + .select('epics.group_id as group_id, epic_issues.id as epic_issue_id, projects.namespace_id as issue_namespace_id') + + batch.group_by(&:group_id).each do |group_id, group_epics| + group_hierarchy_ids = Namespace.find_by(id: group_id)&.self_with_descendants_ids + next if group_hierarchy_ids.blank? + + group_epics.each do |epic| + next if group_hierarchy_ids.include?(epic.issue_namespace_id) + + to_be_deleted << epic.epic_issue_id + end + end + + delete_records(to_be_deleted) + pause = 0 if pause < 0 + sleep(pause * 0.001) + end + end + # rubocop:enable Layout/LineLength + + def delete_records(records) + return unless records.present? + + to_delete = EpicIssue.where(id: records) + log_info('Removing EpicIssue records', deleted_count: to_delete.size, data: to_delete.map(&:attributes)) + + batch_metrics.time_operation(:delete_all) { to_delete.delete_all } + end + + def logger + @logger ||= ::Gitlab::BackgroundMigration::Logger.build + end + + def log_info(message, **extra) + logger.info(migrator: 'DeleteInvalidEpicIssues', message: message, **extra) + end + + def batch_metrics + @batch_metrics ||= ::Gitlab::Database::BackgroundMigration::BatchMetrics.new + end + end + end + end +end diff --git a/ee/spec/lib/ee/gitlab/background_migration/delete_invalid_epic_issues_spec.rb b/ee/spec/lib/ee/gitlab/background_migration/delete_invalid_epic_issues_spec.rb new file mode 100644 index 00000000000000..13855fc1bdef01 --- /dev/null +++ b/ee/spec/lib/ee/gitlab/background_migration/delete_invalid_epic_issues_spec.rb @@ -0,0 +1,186 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::DeleteInvalidEpicIssues do + # rubocop:disable RSpec/MultipleMemoizedHelpers + let!(:users) { table(:users) } + let!(:namespaces) { table(:namespaces) } + let!(:projects) { table(:projects) } + let!(:epics) { table(:epics) } + let!(:issues) { table(:issues) } + let!(:epic_issues) { table(:epic_issues) } + + let!(:user) { users.create!(name: 'test', email: 'test@example.com', projects_limit: 5) } + + let!(:root_group) do + namespaces.create!(id: 1, name: 'root-group', path: 'root-group', + type: 'Group', traversal_ids: [1]) + end + + let!(:group) do + namespaces.create!(id: 2, name: 'group', path: 'group', parent_id: root_group.id, + type: 'Group', traversal_ids: [1, 2]) + end + + let!(:sub_group) do + namespaces.create!(id: 3, name: 'subgroup', path: 'subgroup', parent_id: group.id, + type: 'Group', traversal_ids: [1, 2, 3]) + end + + let!(:other_group) do + namespaces.create!(id: 4, name: 'other group', path: 'other-group', + type: 'Group', traversal_ids: [4]) + end + + let!(:project_root) do + projects.create!(namespace_id: root_group.id, project_namespace_id: root_group.id, + name: 'root group project', path: 'root-group-project') + end + + let!(:project) do + projects.create!(namespace_id: group.id, project_namespace_id: group.id, + name: 'group project', path: 'group-project') + end + + let!(:project_sub) do + projects.create!(namespace_id: sub_group.id, project_namespace_id: sub_group.id, + name: 'subgroup project', path: 'subgroup-project') + end + + let!(:project_other) do + projects.create!(namespace_id: other_group.id, project_namespace_id: other_group.id, + name: 'other group project', path: 'other-group-project') + end + + describe '#perform' do + let(:batch_table) { :epics } + let(:batch_column) { :id } + let(:sub_batch_size) { 100 } + let(:pause_ms) { 0 } + let(:connection) { ApplicationRecord.connection } + let!(:epic_before) do + epics.create!(iid: 1, title: 'epic-before', title_html: 'epic before', + group_id: group.id, author_id: user.id) + end + + let!(:epic) do + epics.create!(iid: 2, title: 'group-epic', title_html: 'group-epic', + group_id: group.id, author_id: user.id) + end + + let!(:epic_root) do + epics.create!(iid: 3, title: 'root-group-epic', title_html: 'root-group-epic', + group_id: root_group.id, author_id: user.id) + end + + let!(:epic_sub) do + epics.create!(iid: 4, title: 'subgroup-epic', title_html: 'subgroup-epic', + group_id: sub_group.id, author_id: user.id) + end + + let!(:epic_other) do + epics.create!(iid: 5, title: 'other-group-epic', title_html: 'other-group-epic', + group_id: other_group.id, author_id: user.id) + end + + let!(:epic_last) do + epics.create!(iid: 6, title: 'epic_last', title_html: 'epic_last', + group_id: group.id, author_id: user.id) + end + + let!(:issue) { issues.create!(iid: 1, project_id: project.id, title: 'issue 1', author_id: user.id) } + let!(:issue2) { issues.create!(iid: 2, project_id: project.id, title: 'issue 2', author_id: user.id) } + let!(:issue3) { issues.create!(iid: 3, project_id: project.id, title: 'issue 3', author_id: user.id) } + let!(:issue4) { issues.create!(iid: 4, project_id: project.id, title: 'issue 4', author_id: user.id) } + let!(:issue5) { issues.create!(iid: 5, project_id: project.id, title: 'issue 5', author_id: user.id) } + let!(:issue_root) { issues.create!(iid: 6, project_id: project_root.id, title: 'issue_root', author_id: user.id) } + let!(:issue_sub) { issues.create!(iid: 7, project_id: project_sub.id, title: 'issue_sub', author_id: user.id) } + let!(:issue_sub2) { issues.create!(iid: 8, project_id: project_sub.id, title: 'issue_sub 2', author_id: user.id) } + let!(:issue_sub3) { issues.create!(iid: 9, project_id: project_sub.id, title: 'issue_sub 3', author_id: user.id) } + let!(:issue_other) { issues.create!(iid: 10, project_id: project_other.id, title: 'other', author_id: user.id) } + let!(:issue_other_2) { issues.create!(iid: 11, project_id: project_other.id, title: 'other 2', author_id: user.id) } + let!(:issue_other_3) { issues.create!(iid: 12, project_id: project_other.id, title: 'other 3', author_id: user.id) } + let!(:issue_other_4) { issues.create!(iid: 13, project_id: project_other.id, title: 'other 4', author_id: user.id) } + + let!(:valid_and_invalid_epic_issues) do + invalid_epic_issues = [] + valid_epic_issues = [] + + # group's issues linked to epic in group's ancestor + valid_epic_issues << epic_issues.create!(issue_id: issue4.id, epic_id: epic_root.id) + valid_epic_issues << epic_issues.create!(issue_id: issue_sub2.id, epic_id: epic.id) + valid_epic_issues << epic_issues.create!(issue_id: issue_sub3.id, epic_id: epic_root.id) + + # issues and epics in the same group + valid_epic_issues << epic_issues.create!(issue_id: issue.id, epic_id: epic.id) + valid_epic_issues << epic_issues.create!(issue_id: issue3.id, epic_id: epic_last.id) + valid_epic_issues << epic_issues.create!(issue_id: issue_sub.id, epic_id: epic_sub.id) + valid_epic_issues << epic_issues.create!(issue_id: issue_other_2.id, epic_id: epic_other.id) + + # invalid link that is not deleted because epic_before is not included in epics batch + valid_epic_issues << epic_issues.create!(issue_id: issue_other.id, epic_id: epic_before.id) + + # group's issues linked to epic in group's descendant + invalid_epic_issues << epic_issues.create!(issue_id: issue2.id, epic_id: epic_sub.id) + invalid_epic_issues << epic_issues.create!(issue_id: issue_root.id, epic_id: epic_sub.id) + + # issues linked to epics in a different group hierarchy + invalid_epic_issues << epic_issues.create!(issue_id: issue_other_4.id, epic_id: epic_root.id) + invalid_epic_issues << epic_issues.create!(issue_id: issue_other_3.id, epic_id: epic_sub.id) + invalid_epic_issues << epic_issues.create!(issue_id: issue5.id, epic_id: epic_other.id) + + { valid: valid_epic_issues, invalid: invalid_epic_issues } + end + + let(:valid_epic_issues) { valid_and_invalid_epic_issues[:valid] } + let(:invalid_epic_issues) { valid_and_invalid_epic_issues[:invalid] } + + let(:migration) do + described_class.new( + start_id: epic.id, end_id: epic_last.id, batch_table: batch_table, batch_column: batch_column, + sub_batch_size: sub_batch_size, pause_ms: pause_ms, connection: connection + ) + end + + subject { migration.perform } + + it 'removes invalid epic issues' do + expect { subject }.to change { epic_issues.count }.from(13).to(8) + + expect(epic_issues.all).to match_array(valid_epic_issues) + end + + it 'logs deleted records' do + expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance| + expect(instance).to receive(:info).once.with(message: 'Removing EpicIssue records', + migrator: 'DeleteInvalidEpicIssues', + data: invalid_epic_issues.map(&:attributes), + deleted_count: invalid_epic_issues.size) + end + + subject + end + + it 'prevents N+1 queries' do + control = ActiveRecord::QueryRecorder.new(skip_cached: false) do + subject + end + + # recreate deleted records + epic_issues.create!(issue_id: issue2.id, epic_id: epic_sub.id) + epic_issues.create!(issue_id: issue_root.id, epic_id: epic_sub.id) + epic_issues.create!(issue_id: issue_other_4.id, epic_id: epic_root.id) + epic_issues.create!(issue_id: issue_other_3.id, epic_id: epic_sub.id) + epic_issues.create!(issue_id: issue5.id, epic_id: epic_other.id) + + # create new records to delete + issue6 = issues.create!(iid: 14, project_id: project.id, title: 'issue 6', author_id: user.id) + issue7 = issues.create!(iid: 15, project_id: project.id, title: 'issue 7', author_id: user.id) + epic_issues.create!(issue_id: issue6.id, epic_id: epic_other.id) + epic_issues.create!(issue_id: issue7.id, epic_id: epic_other.id) + + expect { subject }.not_to exceed_all_query_limit(control) + end + end +end diff --git a/ee/spec/migrations/schedule_delete_invalid_epic_issues_revised_spec.rb b/ee/spec/migrations/schedule_delete_invalid_epic_issues_revised_spec.rb new file mode 100644 index 00000000000000..e6b84024f56f55 --- /dev/null +++ b/ee/spec/migrations/schedule_delete_invalid_epic_issues_revised_spec.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe ScheduleDeleteInvalidEpicIssuesRevised do + let(:migration) { described_class::MIGRATION } + + describe '#up' do + it 'schedules background jobs for each batch of epics' do + migrate! + + expect(migration).to have_scheduled_batched_migration( + table_name: :epics, + column_name: :id, + interval: described_class::INTERVAL, + batch_size: described_class::BATCH_SIZE + ) + end + end + + describe '#down' do + it 'deletes all batched migration records' do + migrate! + schema_migrate_down! + + expect(migration).not_to have_scheduled_batched_migration + end + end +end diff --git a/lib/gitlab/background_migration/delete_invalid_epic_issues.rb b/lib/gitlab/background_migration/delete_invalid_epic_issues.rb new file mode 100644 index 00000000000000..3af59ab4931cc0 --- /dev/null +++ b/lib/gitlab/background_migration/delete_invalid_epic_issues.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + # rubocop: disable Style/Documentation + class DeleteInvalidEpicIssues < BatchedMigrationJob + def perform + end + end + end +end + +# rubocop:disable Layout/LineLength +Gitlab::BackgroundMigration::DeleteInvalidEpicIssues.prepend_mod_with('Gitlab::BackgroundMigration::DeleteInvalidEpicIssues') -- GitLab