diff --git a/spec/lib/gitlab/import_export/import_export_equivalence_spec.rb b/spec/lib/gitlab/import_export/import_export_equivalence_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..50b26637cb14638f1c5cb592847f57f0d405e01c --- /dev/null +++ b/spec/lib/gitlab/import_export/import_export_equivalence_spec.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require 'spec_helper' + +# Verifies that given an exported project meta-data tree, when importing this +# tree and then exporting it again, we should obtain the initial tree. +# +# This equivalence only works up to a certain extent, for instance we need +# to ignore: +# +# - row IDs and foreign key IDs +# - some timestamps +# - randomly generated fields like tokens +# +# as these are expected to change between import/export cycles. +describe Gitlab::ImportExport do + include ImportExport::CommonUtil + include ConfigurationHelper + include ImportExport::ProjectTreeExpectations + + let(:json_fixture) { 'complex' } + + it 'yields the initial tree when importing and exporting it again' do + project = create(:project, creator: create(:user, :admin)) + + # We first generate a test fixture dynamically from a seed-fixture, so as to + # account for any fields in the initial fixture that are missing and set to + # defaults during import (ideally we should have realistic test fixtures + # that "honestly" represent exports) + expect( + restore_then_save_project( + project, + import_path: seed_fixture_path, + export_path: test_fixture_path) + ).to be true + # Import, then export again from the generated fixture. Any residual changes + # in the JSON will count towards comparison i.e. test failures. + expect( + restore_then_save_project( + project, + import_path: test_fixture_path, + export_path: test_tmp_path) + ).to be true + + imported_json = JSON.parse(File.read("#{test_fixture_path}/project.json")) + exported_json = JSON.parse(File.read("#{test_tmp_path}/project.json")) + + assert_relations_match(imported_json, exported_json) + end + + private + + def seed_fixture_path + "#{fixtures_path}/#{json_fixture}" + end + + def test_fixture_path + "#{test_tmp_path}/#{json_fixture}" + end +end diff --git a/spec/support/import_export/common_util.rb b/spec/support/import_export/common_util.rb index 72baec7bfcb7bdf0260e527f29d1a5d5648abd98..912a8e0a2ab2f34b783b5cc12283c86a751df7e9 100644 --- a/spec/support/import_export/common_util.rb +++ b/spec/support/import_export/common_util.rb @@ -17,5 +17,38 @@ def setup_import_export_config(name, prefix = nil) allow_any_instance_of(Gitlab::ImportExport).to receive(:export_path) { export_path } end + + def fixtures_path + "spec/fixtures/lib/gitlab/import_export" + end + + def test_tmp_path + "tmp/tests/gitlab-test/import_export" + end + + def restore_then_save_project(project, import_path:, export_path:) + project_restorer = get_project_restorer(project, import_path) + project_saver = get_project_saver(project, export_path) + + project_restorer.restore && project_saver.save + end + + def get_project_restorer(project, import_path) + Gitlab::ImportExport::ProjectTreeRestorer.new( + user: project.creator, shared: get_shared_env(path: import_path), project: project + ) + end + + def get_project_saver(project, export_path) + Gitlab::ImportExport::ProjectTreeSaver.new( + project: project, current_user: project.creator, shared: get_shared_env(path: export_path) + ) + end + + def get_shared_env(path:) + instance_double(Gitlab::ImportExport::Shared).tap do |shared| + allow(shared).to receive(:export_path).and_return(path) + end + end end end diff --git a/spec/support/import_export/project_tree_expectations.rb b/spec/support/import_export/project_tree_expectations.rb new file mode 100644 index 0000000000000000000000000000000000000000..966c977e8e95896b7b596589643d948fc23cab29 --- /dev/null +++ b/spec/support/import_export/project_tree_expectations.rb @@ -0,0 +1,128 @@ +# frozen_string_literal: true + +module ImportExport + module ProjectTreeExpectations + def assert_relations_match(imported_hash, exported_hash) + normalized_imported_hash = normalize_elements(imported_hash) + normalized_exported_hash = normalize_elements(exported_hash) + + # this is for sanity checking, to make sure we didn't accidentally pass the test + # because we essentially ignored everything + stats = { + hashes: 0, + arrays: { + direct: 0, + pairwise: 0, + fuzzy: 0 + }, + values: 0 + } + + failures = match_recursively(normalized_imported_hash, normalized_exported_hash, stats) + + puts "Elements checked:\n#{stats.pretty_inspect}" + + expect(failures).to be_empty, failures.join("\n\n") + end + + private + + def match_recursively(left_node, right_node, stats, location_stack = [], failures = []) + if Hash === left_node && Hash === right_node + match_hashes(left_node, right_node, stats, location_stack, failures) + elsif Array === left_node && Array === right_node + match_arrays(left_node, right_node, stats, location_stack, failures) + else + stats[:values] += 1 + if left_node != right_node + failures << failure_message("Value mismatch", location_stack, left_node, right_node) + end + end + + failures + end + + def match_hashes(left_node, right_node, stats, location_stack, failures) + stats[:hashes] += 1 + left_keys = left_node.keys.to_set + right_keys = right_node.keys.to_set + + if left_keys != right_keys + failures << failure_message("Hash keys mismatch", location_stack, left_keys, right_keys) + end + + left_node.keys.each do |key| + location_stack << key + match_recursively(left_node[key], right_node[key], stats, location_stack, failures) + location_stack.pop + end + end + + def match_arrays(left_node, right_node, stats, location_stack, failures) + has_simple_elements = left_node.none? { |el| Enumerable === el } + # for simple types, we can do a direct order-less set comparison + if has_simple_elements && left_node.to_set != right_node.to_set + stats[:arrays][:direct] += 1 + failures << failure_message("Elements mismatch", location_stack, left_node, right_node) + # if both arrays have the same number of complex elements, we can compare pair-wise in-order + elsif left_node.size == right_node.size + stats[:arrays][:pairwise] += 1 + left_node.zip(right_node).each do |left_entry, right_entry| + match_recursively(left_entry, right_entry, stats, location_stack, failures) + end + # otherwise we have to fall back to a best-effort match by probing into the right array; + # this means we will not account for elements that exist on the right, but not on the left + else + stats[:arrays][:fuzzy] += 1 + left_node.each do |left_entry| + right_entry = right_node.find { |el| el == left_entry } + match_recursively(left_entry, right_entry, stats, location_stack, failures) + end + end + end + + def failure_message(what, location_stack, left_value, right_value) + where = + if location_stack.empty? + "root" + else + location_stack.map { |loc| loc.to_sym.inspect }.join(' -> ') + end + + ">> [#{where}] #{what}\n\n#{left_value.pretty_inspect}\nNOT EQUAL TO\n\n#{right_value.pretty_inspect}" + end + + # Helper that traverses a project tree and normalizes data that we know + # to vary in the process of importing (such as list order or row IDs) + def normalize_elements(elem) + case elem + when Hash + elem.map do |key, value| + if ignore_key?(key, value) + [key, :ignored] + else + [key, normalize_elements(value)] + end + end.to_h + when Array + elem.map { |a| normalize_elements(a) } + else + elem + end + end + + # We currently need to ignore certain entries when checking for equivalence because + # we know them to change between imports/exports either by design or because of bugs; + # this helper filters out these problematic nodes. + def ignore_key?(key, value) + id?(key) || # IDs are known to be replaced during imports + key == 'updated_at' || # these get changed frequently during imports + key == 'next_run_at' || # these values change based on wall clock + key == 'notes' # the importer attaches an extra "by user XYZ" at the end of a note + end + + def id?(key) + key == 'id' || key.ends_with?('_id') + end + end +end