From 86b2e59f8d5966f6d79dd86bf6ae94b547727f66 Mon Sep 17 00:00:00 2001 From: Valery Sizov Date: Tue, 29 Jan 2019 11:28:10 +0200 Subject: [PATCH 1/3] Slower sync when backoff time is being respected If there is nothing to sync we can pick jobs that scheduled for a future sync --- ee/app/finders/geo/project_registry_finder.rb | 2 +- ee/app/finders/geo/registry_finder.rb | 4 +- ee/app/models/geo/project_registry.rb | 4 +- .../geo/repository_shard_sync_worker.rb | 7 +-- ...c-when-backoff-time-is-being-respected.yml | 5 ++ ee/lib/gitlab/geo.rb | 4 ++ ee/spec/factories/geo/project_registry.rb | 5 ++ .../geo/project_registry_finder_spec.rb | 30 ++++++++++ ee/spec/models/geo/project_registry_spec.rb | 2 +- .../geo/repository_shard_sync_worker_spec.rb | 58 ++++++++++++++++--- 10 files changed, 102 insertions(+), 19 deletions(-) create mode 100644 ee/changelogs/unreleased/5348-slower-sync-when-backoff-time-is-being-respected.yml diff --git a/ee/app/finders/geo/project_registry_finder.rb b/ee/app/finders/geo/project_registry_finder.rb index d250c834a7c540..c3fbb84aadc2f9 100644 --- a/ee/app/finders/geo/project_registry_finder.rb +++ b/ee/app/finders/geo/project_registry_finder.rb @@ -206,7 +206,7 @@ def fdw_find_synced_wikis def fdw_find_projects_updated_recently Geo::Fdw::Project.joins("INNER JOIN project_registry ON project_registry.project_id = #{fdw_project_table.name}.id") .merge(Geo::ProjectRegistry.dirty) - .merge(Geo::ProjectRegistry.retry_due) + .order('LEAST(project_registry.repository_retry_at, project_registry.wiki_retry_at) ASC NULLS FIRST') end # rubocop: enable CodeReuse/ActiveRecord diff --git a/ee/app/finders/geo/registry_finder.rb b/ee/app/finders/geo/registry_finder.rb index d430a77cc4a3ef..7cfb6898fec6ac 100644 --- a/ee/app/finders/geo/registry_finder.rb +++ b/ee/app/finders/geo/registry_finder.rb @@ -4,8 +4,6 @@ module Geo class RegistryFinder attr_reader :current_node - delegate :selective_sync?, to: :current_node, allow_nil: true - def initialize(current_node: nil) @current_node = current_node end @@ -23,7 +21,7 @@ def aggregate_pushdown_supported? def use_legacy_queries? # Selective project replication adds a wrinkle to FDW # queries, so we fallback to the legacy version for now. - !Gitlab::Geo::Fdw.enabled? || selective_sync? + Gitlab::Geo.legacy_queries? end # rubocop: disable CodeReuse/ActiveRecord diff --git a/ee/app/models/geo/project_registry.rb b/ee/app/models/geo/project_registry.rb index 385915a4c88aa9..d3711eaf04ddac 100644 --- a/ee/app/models/geo/project_registry.rb +++ b/ee/app/models/geo/project_registry.rb @@ -225,7 +225,7 @@ def reset_checksum! def repository_sync_due?(scheduled_time) return true if last_repository_synced_at.nil? return false unless resync_repository? - return false if repository_retry_at && scheduled_time < repository_retry_at + return false if Gitlab::Geo.legacy_queries? && repository_retry_at && scheduled_time < repository_retry_at scheduled_time > last_repository_synced_at end @@ -233,7 +233,7 @@ def repository_sync_due?(scheduled_time) def wiki_sync_due?(scheduled_time) return true if last_wiki_synced_at.nil? return false unless resync_wiki? - return false if wiki_retry_at && scheduled_time < wiki_retry_at + return false if Gitlab::Geo.legacy_queries? && wiki_retry_at && scheduled_time < wiki_retry_at scheduled_time > last_wiki_synced_at end diff --git a/ee/app/workers/geo/repository_shard_sync_worker.rb b/ee/app/workers/geo/repository_shard_sync_worker.rb index c613ac2aa535db..45f95d9e3716bc 100644 --- a/ee/app/workers/geo/repository_shard_sync_worker.rb +++ b/ee/app/workers/geo/repository_shard_sync_worker.rb @@ -67,12 +67,9 @@ def finder def load_pending_resources resources = find_project_ids_not_synced(batch_size: db_retrieve_batch_size) remaining_capacity = db_retrieve_batch_size - resources.size + return resources if remaining_capacity.zero? - if remaining_capacity.zero? - resources - else - resources + find_project_ids_updated_recently(batch_size: remaining_capacity) - end + resources + find_project_ids_updated_recently(batch_size: remaining_capacity) end # rubocop: disable CodeReuse/ActiveRecord diff --git a/ee/changelogs/unreleased/5348-slower-sync-when-backoff-time-is-being-respected.yml b/ee/changelogs/unreleased/5348-slower-sync-when-backoff-time-is-being-respected.yml new file mode 100644 index 00000000000000..402253f3ec2bca --- /dev/null +++ b/ee/changelogs/unreleased/5348-slower-sync-when-backoff-time-is-being-respected.yml @@ -0,0 +1,5 @@ +--- +title: "[Geo] Fix: Slower sync when backoff time is being respected" +merge_request: 9553 +author: +type: changed diff --git a/ee/lib/gitlab/geo.rb b/ee/lib/gitlab/geo.rb index ee36a6e0b1a713..c7cc7e8187ff70 100644 --- a/ee/lib/gitlab/geo.rb +++ b/ee/lib/gitlab/geo.rb @@ -132,5 +132,9 @@ def self.repository_verification_enabled? true end end + + def self.legacy_queries? + !Gitlab::Geo::Fdw.enabled? || current_node.selective_sync? + end end end diff --git a/ee/spec/factories/geo/project_registry.rb b/ee/spec/factories/geo/project_registry.rb index cb0cb4f382d883..c65b144c1b8449 100644 --- a/ee/spec/factories/geo/project_registry.rb +++ b/ee/spec/factories/geo/project_registry.rb @@ -23,6 +23,11 @@ resync_wiki true end + trait :future_sync do + repository_retry_at { 2.days.from_now } + wiki_retry_at { 2.days.from_now } + end + trait :synced do last_repository_synced_at { 5.days.ago } last_repository_successful_sync_at { 5.days.ago } diff --git a/ee/spec/finders/geo/project_registry_finder_spec.rb b/ee/spec/finders/geo/project_registry_finder_spec.rb index 1b949dd3edbb1a..a7eee943ea90c1 100644 --- a/ee/spec/finders/geo/project_registry_finder_spec.rb +++ b/ee/spec/finders/geo/project_registry_finder_spec.rb @@ -644,6 +644,22 @@ include_examples 'finds all the things' do let(:method_prefix) { 'fdw' } end + + describe '#find_projects_updated_recently' do + it 'returns all dirty projects (including projects with future *_retry_at) when *_retry_at nulls come first' do + project_repo_backoff = create(:project) + project_wiki_backoff = create(:project) + create(:geo_project_registry, :repository_dirty, project: project_repo_backoff, repository_retry_at: 1.day.from_now) + create(:geo_project_registry, :wiki_dirty, project: project_wiki_backoff, wiki_retry_at: 2.days.from_now) + create(:geo_project_registry, :repository_dirty, project: project_repository_dirty) + create(:geo_project_registry, :wiki_dirty, project: project_wiki_dirty) + + projects = subject.find_projects_updated_recently(batch_size: 10).pluck(:id) + + expected_order = [project_repository_dirty.id, project_wiki_dirty.id, project_repo_backoff.id, project_wiki_backoff.id] + expect(projects).to eq(expected_order) + end + end end context 'Legacy' do @@ -656,5 +672,19 @@ include_examples 'finds all the things' do let(:method_prefix) { 'legacy' } end + + describe '#find_projects_updated_recently' do + it 'returns dirty projects (not including projects with future *_retry_at)' do + create(:geo_project_registry, :repository_dirty, :future_sync) + create(:geo_project_registry, :wiki_dirty, :future_sync) + create(:geo_project_registry, :repository_dirty, project: project_repository_dirty) + create(:geo_project_registry, :wiki_dirty, project: project_wiki_dirty) + + projects = subject.find_projects_updated_recently(batch_size: 10).pluck(:id) + + expected_projects = [project_repository_dirty.id, project_wiki_dirty.id] + expect(projects).to eq(expected_projects) + end + end end end diff --git a/ee/spec/models/geo/project_registry_spec.rb b/ee/spec/models/geo/project_registry_spec.rb index f9b36867db89a4..ca7d7d7eb40848 100644 --- a/ee/spec/models/geo/project_registry_spec.rb +++ b/ee/spec/models/geo/project_registry_spec.rb @@ -1,6 +1,6 @@ require 'spec_helper' -describe Geo::ProjectRegistry do +describe Geo::ProjectRegistry, :geo do using RSpec::Parameterized::TableSyntax set(:project) { create(:project, description: 'kitten mittens') } diff --git a/ee/spec/workers/geo/repository_shard_sync_worker_spec.rb b/ee/spec/workers/geo/repository_shard_sync_worker_spec.rb index 79f10cb45dbec8..25a53443cb169a 100644 --- a/ee/spec/workers/geo/repository_shard_sync_worker_spec.rb +++ b/ee/spec/workers/geo/repository_shard_sync_worker_spec.rb @@ -10,9 +10,14 @@ let!(:secondary) { create(:geo_node) } let(:shard_name) { Gitlab.config.repositories.storages.keys.first } + let(:project) { create(:project) } before do stub_current_geo_node(secondary) + + stub_exclusive_lease(renew: true) + + Gitlab::ShardHealthCache.update([shard_name]) end shared_examples '#perform' do |skip_tests| @@ -25,12 +30,6 @@ skip('FDW is not configured') if skip_tests end - before do - stub_exclusive_lease(renew: true) - - Gitlab::ShardHealthCache.update([shard_name]) - end - it 'performs Geo::ProjectSyncWorker for each project' do expect(Geo::ProjectSyncWorker).to receive(:perform_async).twice.and_return(spy) @@ -105,9 +104,15 @@ end context 'multiple shards' do - it 'uses two loops to schedule jobs' do + it 'divides repos_max_capacity between all the healthy shards' do expect(subject).to receive(:schedule_jobs).twice.and_call_original + allow(Geo::ProjectSyncWorker).to receive(:perform_async) do |project_id| + Geo::ProjectRegistry.create!(project_id: project_id, resync_repository: false, resync_wiki: false) + + 'random-job-id-hash' + end + Gitlab::ShardHealthCache.update([shard_name, 'shard2', 'shard3', 'shard4', 'shard5']) secondary.update!(repos_max_capacity: 5) @@ -294,6 +299,24 @@ describe 'when PostgreSQL FDW is available', :geo do # Skip if FDW isn't activated on this database it_behaves_like '#perform', Gitlab::Database.postgresql? && !Gitlab::Geo::Fdw.enabled? + + context 'scheduled for future' do + it 'performs Geo::ProjectSyncWorker for repo that is scheduled for future sync' do + create(:geo_project_registry, :synced, :repository_dirty, :future_sync, project: project) + + expect(Geo::ProjectSyncWorker).to receive(:perform_async).with(project.id, { sync_repository: true, sync_wiki: false }) + + subject.perform(shard_name) + end + + it 'performs Geo::ProjectSyncWorker for wiki that is scheduled for future sync' do + create(:geo_project_registry, :synced, :wiki_dirty, :future_sync, project: project) + + expect(Geo::ProjectSyncWorker).to receive(:perform_async).with(project.id, { sync_repository: false, sync_wiki: true }) + + subject.perform(shard_name) + end + end end describe 'when PostgreSQL FDW is not enabled', :geo do @@ -302,5 +325,26 @@ end it_behaves_like '#perform', false + + context 'legacy behaviour' do + context 'scheduled for future' do + it 'does not perform Geo::ProjectSyncWorker for repo and wiki that are scheduled for future sync' do + create(:geo_project_registry, :synced, :repository_dirty, :future_sync) + create(:geo_project_registry, :synced, :wiki_dirty, :future_sync) + + expect(Geo::ProjectSyncWorker).not_to receive(:perform_async) + + subject.perform(shard_name) + end + + it 'performs Geo::ProjectSyncWorker for repo that is scheduled for future sync but sets sync_repository flag to false' do + create(:geo_project_registry, :synced, :repository_dirty, repository_retry_at: 2.days.from_now, project: project) + + expect(Geo::ProjectSyncWorker).to receive(:perform_async).with(project.id, { sync_repository: false, sync_wiki: false }) + + subject.perform(shard_name) + end + end + end end end -- GitLab From 4caf7f1cfae3f0db5a2fa89c1a7683a5b0182384 Mon Sep 17 00:00:00 2001 From: Michael Kozono Date: Mon, 4 Mar 2019 13:25:37 -0800 Subject: [PATCH 2/3] Add back delegate --- ee/app/finders/geo/registry_finder.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ee/app/finders/geo/registry_finder.rb b/ee/app/finders/geo/registry_finder.rb index 7cfb6898fec6ac..a870a571005a5a 100644 --- a/ee/app/finders/geo/registry_finder.rb +++ b/ee/app/finders/geo/registry_finder.rb @@ -4,6 +4,8 @@ module Geo class RegistryFinder attr_reader :current_node + delegate :selective_sync?, to: :current_node, allow_nil: true + def initialize(current_node: nil) @current_node = current_node end -- GitLab From 812965b5a9b657ff48215e9e633e204a35d0a433 Mon Sep 17 00:00:00 2001 From: Valery Sizov Date: Tue, 5 Mar 2019 11:41:09 +0200 Subject: [PATCH 3/3] Fix legacy_queries? method And tests that failed because of it --- ee/lib/gitlab/geo.rb | 2 +- ee/spec/models/geo/project_registry_spec.rb | 58 ++++++++++++--------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/ee/lib/gitlab/geo.rb b/ee/lib/gitlab/geo.rb index c7cc7e8187ff70..d6117c4e615a9c 100644 --- a/ee/lib/gitlab/geo.rb +++ b/ee/lib/gitlab/geo.rb @@ -134,7 +134,7 @@ def self.repository_verification_enabled? end def self.legacy_queries? - !Gitlab::Geo::Fdw.enabled? || current_node.selective_sync? + !Gitlab::Geo::Fdw.enabled? || current_node.try(:selective_sync?) end end end diff --git a/ee/spec/models/geo/project_registry_spec.rb b/ee/spec/models/geo/project_registry_spec.rb index ca7d7d7eb40848..2279a796f1f24c 100644 --- a/ee/spec/models/geo/project_registry_spec.rb +++ b/ee/spec/models/geo/project_registry_spec.rb @@ -209,25 +209,26 @@ end describe '#repository_sync_due?' do - where(:last_synced_at, :resync, :retry_at, :expected) do + where(:is_legacy, :last_synced_at, :resync, :retry_at, :expected) do now = Time.now past = now - 1.year future = now + 1.year - nil | false | nil | true - nil | true | nil | true - nil | true | past | true - nil | true | future | true + false | nil | false | nil | true + false | nil | true | nil | true + false | nil | true | past | true + false | nil | true | future | true - past | false | nil | false - past | true | nil | true - past | true | past | true - past | true | future | false + false | past | false | nil | false + false | past | true | nil | true + false | past | true | past | true + false | past | true | future | true + true | past | true | future | false - future | false | nil | false - future | true | nil | false - future | true | past | false - future | true | future | false + false | future | false | nil | false + false | future | true | nil | false + false | future | true | past | false + false | future | true | future | false end with_them do @@ -237,6 +238,8 @@ resync_repository: resync, repository_retry_at: retry_at ) + + allow(Gitlab::Geo).to receive(:legacy_queries?).and_return(is_legacy) end it { expect(registry.repository_sync_due?(Time.now)).to eq(expected) } @@ -244,25 +247,26 @@ end describe '#wiki_sync_due?' do - where(:last_synced_at, :resync, :retry_at, :expected) do + where(:is_legacy, :last_synced_at, :resync, :retry_at, :expected) do now = Time.now past = now - 1.year future = now + 1.year - nil | false | nil | true - nil | true | nil | true - nil | true | past | true - nil | true | future | true + false | nil | false | nil | true + false | nil | true | nil | true + false | nil | true | past | true + false | nil | true | future | true - past | false | nil | false - past | true | nil | true - past | true | past | true - past | true | future | false + false | past | false | nil | false + false | past | true | nil | true + false | past | true | past | true + false | past | true | future | true + true | past | true | future | false - future | false | nil | false - future | true | nil | false - future | true | past | false - future | true | future | false + false | future | false | nil | false + false | future | true | nil | false + false | future | true | past | false + false | future | true | future | false end with_them do @@ -272,6 +276,8 @@ resync_wiki: resync, wiki_retry_at: retry_at ) + + allow(Gitlab::Geo).to receive(:legacy_queries?).and_return(is_legacy) end it { expect(registry.wiki_sync_due?(Time.now)).to eq(expected) } -- GitLab