diff --git a/app/models/ci/pipeline.rb b/app/models/ci/pipeline.rb index ab0a4fd62891a020b9f26173f0a9a60172062c55..01c369f23d212f4d351f27c65ad399af7e060f2a 100644 --- a/app/models/ci/pipeline.rb +++ b/app/models/ci/pipeline.rb @@ -26,7 +26,7 @@ class Pipeline < ApplicationRecord belongs_to :merge_request, class_name: 'MergeRequest' belongs_to :external_pull_request - has_internal_id :iid, scope: :project, presence: false, ensure_if: -> { !importing? }, init: ->(s) do + has_internal_id :iid, scope: :project, presence: false, track_if: -> { !importing? }, ensure_if: -> { !importing? }, init: ->(s) do s&.project&.all_pipelines&.maximum(:iid) || s&.project&.all_pipelines&.count end diff --git a/app/models/concerns/atomic_internal_id.rb b/app/models/concerns/atomic_internal_id.rb index 64df265dc25303ea8bd2df2b3155571353c41f84..3e9b084e784a67ccd436eb2d30f7edf28342554f 100644 --- a/app/models/concerns/atomic_internal_id.rb +++ b/app/models/concerns/atomic_internal_id.rb @@ -27,13 +27,13 @@ module AtomicInternalId extend ActiveSupport::Concern class_methods do - def has_internal_id(column, scope:, init:, ensure_if: nil, presence: true) # rubocop:disable Naming/PredicateName + def has_internal_id(column, scope:, init:, ensure_if: nil, track_if: nil, presence: true) # rubocop:disable Naming/PredicateName # We require init here to retain the ability to recalculate in the absence of a - # InternaLId record (we may delete records in `internal_ids` for example). + # InternalId record (we may delete records in `internal_ids` for example). raise "has_internal_id requires a init block, none given." unless init raise "has_internal_id needs to be defined on association." unless self.reflect_on_association(scope) - before_validation :"track_#{scope}_#{column}!", on: :create + before_validation :"track_#{scope}_#{column}!", on: :create, if: track_if before_validation :"ensure_#{scope}_#{column}!", on: :create, if: ensure_if validates column, presence: presence diff --git a/app/models/deployment.rb b/app/models/deployment.rb index 5a22a6ada9d8d8ba2564557b5d79854f0c00d9f4..74cc7f935800691be91d35942a0d91a3ea9f8f34 100644 --- a/app/models/deployment.rb +++ b/app/models/deployment.rb @@ -5,6 +5,7 @@ class Deployment < ApplicationRecord include IidRoutes include AfterCommitQueue include UpdatedAtFilterable + include Importable include Gitlab::Utils::StrongMemoize belongs_to :project, required: true @@ -17,7 +18,7 @@ class Deployment < ApplicationRecord has_many :merge_requests, through: :deployment_merge_requests - has_internal_id :iid, scope: :project, init: ->(s) do + has_internal_id :iid, scope: :project, track_if: -> { !importing? }, init: ->(s) do Deployment.where(project: s.project).maximum(:iid) if s&.project end diff --git a/app/models/issue.rb b/app/models/issue.rb index da6450c6092ac803d5762889002c69e35d47ed8d..bf6002781627e134944eebf18e334285a40eb0f7 100644 --- a/app/models/issue.rb +++ b/app/models/issue.rb @@ -31,7 +31,7 @@ class Issue < ApplicationRecord belongs_to :duplicated_to, class_name: 'Issue' belongs_to :closed_by, class_name: 'User' - has_internal_id :iid, scope: :project, init: ->(s) { s&.project&.issues&.maximum(:iid) } + has_internal_id :iid, scope: :project, track_if: -> { !importing? }, init: ->(s) { s&.project&.issues&.maximum(:iid) } has_many :issue_milestones has_many :milestones, through: :issue_milestones @@ -78,8 +78,8 @@ class Issue < ApplicationRecord ignore_column :state, remove_with: '12.7', remove_after: '2019-12-22' - after_commit :expire_etag_cache - after_save :ensure_metrics, unless: :imported? + after_commit :expire_etag_cache, unless: :importing? + after_save :ensure_metrics, unless: :importing? attr_spammable :title, spam_title: true attr_spammable :description, spam_description: true diff --git a/app/models/merge_request.rb b/app/models/merge_request.rb index 4eb9c8706d363e4bc3b732ec7ad958defcfedafc..f05ff2d131cdc12d6738f12f4cf40cc43a36f49b 100644 --- a/app/models/merge_request.rb +++ b/app/models/merge_request.rb @@ -31,7 +31,7 @@ class MergeRequest < ApplicationRecord belongs_to :source_project, class_name: "Project" belongs_to :merge_user, class_name: "User" - has_internal_id :iid, scope: :target_project, init: ->(s) { s&.target_project&.merge_requests&.maximum(:iid) } + has_internal_id :iid, scope: :target_project, track_if: -> { !importing? }, init: ->(s) { s&.target_project&.merge_requests&.maximum(:iid) } has_many :merge_request_diffs @@ -97,8 +97,8 @@ def merge_request_diff after_create :ensure_merge_request_diff after_update :clear_memoized_shas after_update :reload_diff_if_branch_changed - after_save :ensure_metrics - after_commit :expire_etag_cache + after_save :ensure_metrics, unless: :importing? + after_commit :expire_etag_cache, unless: :importing? # When this attribute is true some MR validation is ignored # It allows us to close or modify broken merge requests diff --git a/app/models/merge_request_diff.rb b/app/models/merge_request_diff.rb index 71a344e69e3560858284edbc2870fece2e7db172..fa633a1a7257643edf446e00c14f446b4660b3db 100644 --- a/app/models/merge_request_diff.rb +++ b/app/models/merge_request_diff.rb @@ -138,7 +138,7 @@ def self.ids_for_external_storage_migration(limit:) # All diff information is collected from repository after object is created. # It allows you to override variables like head_commit_sha before getting diff. after_create :save_git_content, unless: :importing? - after_create_commit :set_as_latest_diff + after_create_commit :set_as_latest_diff, unless: :importing? after_save :update_external_diff_store, if: -> { !importing? && saved_change_to_external_diff? } diff --git a/app/models/milestone.rb b/app/models/milestone.rb index 920c28aeceb89e12b1e37882330f68511be765d5..5da92fc4bc547fb59af1bd819749593c16e6e729 100644 --- a/app/models/milestone.rb +++ b/app/models/milestone.rb @@ -17,6 +17,7 @@ class Milestone < ApplicationRecord include StripAttribute include Milestoneish include FromUnion + include Importable include Gitlab::SQL::Pattern prepend_if_ee('::EE::Milestone') # rubocop: disable Cop/InjectEnterpriseEditionModule @@ -30,8 +31,8 @@ class Milestone < ApplicationRecord has_many :milestone_releases has_many :releases, through: :milestone_releases - has_internal_id :iid, scope: :project, init: ->(s) { s&.project&.milestones&.maximum(:iid) } - has_internal_id :iid, scope: :group, init: ->(s) { s&.group&.milestones&.maximum(:iid) } + has_internal_id :iid, scope: :project, track_if: -> { !importing? }, init: ->(s) { s&.project&.milestones&.maximum(:iid) } + has_internal_id :iid, scope: :group, track_if: -> { !importing? }, init: ->(s) { s&.group&.milestones&.maximum(:iid) } has_many :issues has_many :labels, -> { distinct.reorder('labels.title') }, through: :issues diff --git a/app/models/release.rb b/app/models/release.rb index 4fac64689ab1e357c3a641c90cdcb0574c870c87..e125c825f0ecc0e7418b1ef315ab4a54d87e270a 100644 --- a/app/models/release.rb +++ b/app/models/release.rb @@ -3,6 +3,7 @@ class Release < ApplicationRecord include Presentable include CacheMarkdownField + include Importable include Gitlab::Utils::StrongMemoize cache_markdown_field :description @@ -33,8 +34,8 @@ class Release < ApplicationRecord delegate :repository, to: :project - after_commit :create_evidence!, on: :create - after_commit :notify_new_release, on: :create + after_commit :create_evidence!, on: :create, unless: :importing? + after_commit :notify_new_release, on: :create, unless: :importing? MAX_NUMBER_TO_DISPLAY = 3 diff --git a/changelogs/unreleased/mark-some-as-not-required-during-import.yml b/changelogs/unreleased/mark-some-as-not-required-during-import.yml new file mode 100644 index 0000000000000000000000000000000000000000..1d8449728babce6c6ad69e06555ad3a32bed5ed2 --- /dev/null +++ b/changelogs/unreleased/mark-some-as-not-required-during-import.yml @@ -0,0 +1,5 @@ +--- +title: Add `importing?` to disable some callbacks +merge_request: +author: +type: performance diff --git a/tmp/ar_bulk_insert.rb b/tmp/ar_bulk_insert.rb new file mode 100644 index 0000000000000000000000000000000000000000..ec260dc44e5205f125f3b7ac6b82b53ebf7dd6de --- /dev/null +++ b/tmp/ar_bulk_insert.rb @@ -0,0 +1,197 @@ +def bulk_insert_subject_key(subject) + "bulk_insert::#{subject.object_id}" +end + +def bulk_insert_process_collection_item(collection) + data = collection&.shift + return unless data + + # assign the key context with the collection + key = bulk_insert_subject_key(data[:record]) + + #puts "Processing item: #{key}" + Thread.current[key] = data[:collection] + + begin + saved = data[:association].insert_record(data[:record], data[:validate]) + #puts "Processed item: #{key} => #{saved}" + raise ActiveRecord::Rollback if !saved && !data[:validate] + true + ensure + Thread.current[key] = nil + end +end + +def bulk_insert_next_for_subject(subject) + key = bulk_insert_subject_key(subject) + bulk_insert_process_collection_item(Thread.current[key]) +end + +MAX_BULK_INSERT = 100 + +def bulk_insert_flush_table(table_name, with_ids) + key = "bulk_insert_#{table_name}" + return unless Thread.current[key].present? + + collection = Thread.current[key] + + puts "Doing bulk insert for #{table_name} with #{collection.count} items" + + ids = Gitlab::Database.bulk_insert(table_name, collection.pluck(:attr), return_ids: with_ids) + + collection.each.with_index do |record, index| + record[:call].call(ids[index]) + end if with_ids + +ensure + Thread.current[key] = nil +end + +def bulk_insert_add_table(table_name, attributes, &blk) + key = "bulk_insert_#{table_name}" + Thread.current[key] ||= [] + Thread.current[key] << { attr: attributes, call: blk } + + #puts "Added bulk insert item to #{table_name}, total: #{Thread.current[key].count}" + Thread.current[key].count >= MAX_BULK_INSERT +end + +def bulk_insert_subject_with_values(subject, values) + table_name = subject.class.table_name + + #puts "Inserting #{table_name} => #{values}" + + id = nil + + needs_flush = bulk_insert_add_table(table_name, values) do |received_id| + id = received_id + #puts "Received ID: #{id}" + end + + if needs_flush || !bulk_insert_next_for_subject(subject) + bulk_insert_flush_table(table_name, subject.class.primary_key.present?) + end + + #puts "Done #{table_name} => #{values}" + + id +end + +def bulk_insert_all_records(association, records, validate) + collection = [] + + records.each do |record| + collection << { + collection: collection, + association: association, + record: record, + validate: validate + } + end + + return if collection.empty? + + puts "Gathered all bulk inserts: #{association.reflection.class_name} => #{collection.count}" + + while bulk_insert_process_collection_item(collection) + end +end + +module ActiveRecordPersistence + extend ActiveSupport::Concern + + def _create_record(*) + Thread.current[:insert_record_current] = self + super + end + + class_methods do + def _insert_record(values) # :nodoc: + primary_key_value = nil + + if primary_key && Hash === values + primary_key_value = values[primary_key] + + if !primary_key_value && prefetch_primary_key? + primary_key_value = next_sequence_value + values[primary_key] = primary_key_value + end + end + + # we need to merge with the defaults, it is not always given... + casted_values = columns_hash.map do |key, column_def| + next if key == primary_key + + [key, values.include?(key) ? type_caster.type_cast_for_database(key, values[key]) : column_def.default] + end.compact.to_h + + bulk_insert_subject_with_values(Thread.current[:insert_record_current], casted_values) + end + end +end + +module ActiveRecordAutoSave + def save_collection_association(reflection) + # puts "save_collection_association: #{reflection.name}" + + association = association_instance_get(reflection.name) + return unless association + + autosave = reflection.options[:autosave] + + # reconstruct the scope now that we know the owner's id + association.reset_scope + + if records = associated_records_to_validate_or_save(association, @new_record_before_save, autosave) + if autosave + records_to_destroy = records.select(&:marked_for_destruction?) + records_to_destroy.each { |record| association.destroy(record) } + records -= records_to_destroy + end + + bulk_insert_collection = [] + + records.each do |record| + next if record.destroyed? + + saved = true + + if autosave != false && (@new_record_before_save || record.new_record?) + if autosave + bulk_insert_collection << { + collection: bulk_insert_collection, + association: association, + record: record, + validate: false + } + else + unless reflection.nested? + bulk_insert_collection << { + collection: bulk_insert_collection, + association: association, + record: record, + validate: true + } + end + end + elsif autosave + saved = record.save(validate: false) + end + + raise ActiveRecord::Rollback unless saved + end + + return if bulk_insert_collection.empty? + + puts "Gathered all bulk inserts: #{reflection.class_name} => #{bulk_insert_collection.count}" + + while bulk_insert_process_collection_item(bulk_insert_collection) + end + end + end +end + +ActiveRecord::Base.prepend(ActiveRecordAutoSave) +ActiveRecord::Base.prepend(ActiveRecordPersistence) + +puts "Loaded MyCollectionAssociation" diff --git a/tmp/restore-project-tree.rb b/tmp/restore-project-tree.rb new file mode 100644 index 0000000000000000000000000000000000000000..a298a768455956ce28028b64a861f11466bf5206 --- /dev/null +++ b/tmp/restore-project-tree.rb @@ -0,0 +1,29 @@ +shared_class = Struct.new(:export_path) do + def error(message) + raise message + end +end + +load 'tmp/ar_bulk_insert.rb' + +ActiveRecord::Base.logger = Logger.new(STDOUT) +RequestStore.begin! +RequestStore.clear! + +project = Project.find(38) +project.issues.all.delete_all +project.merge_requests.all.delete_all +project.ci_pipelines.delete_all + +# shared = shared_class.new('./exports/single-relation') +shared = shared_class.new('./tmp/exports/gitlabhq') + +result = Benchmark.measure do + Gitlab::ImportExport::ProjectTreeRestorer.new( + user: User.first, + shared: shared, + project: project + ).restore +end + +pp result