From b9aa07da519b5b3b9c803380999b20c6a57aa9f7 Mon Sep 17 00:00:00 2001 From: Alex Kalderimis Date: Mon, 3 May 2021 15:35:16 +0100 Subject: [PATCH 01/13] Add columns and indices on web hooks This adds three new columns on web-hooks: The two columns are - recent_failures: a counter of recent 4xx level failures - backoff_count: a counter of times we have backed-off - disabled_until: a timestamp for when we can next use the hook One new index is added, to aid finding enabled web-hooks, on `(project_id, recent_failures)`, which is used when selecting active project hooks. --- ...81325_add_failure_tracking_to_web_hooks.rb | 11 ++++++++++ ..._on_web_hook_project_id_recent_failures.rb | 20 +++++++++++++++++++ db/schema_migrations/20210429181325 | 1 + db/schema_migrations/20210504085144 | 1 + db/structure.sql | 7 ++++++- 5 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20210429181325_add_failure_tracking_to_web_hooks.rb create mode 100644 db/migrate/20210504085144_add_index_on_web_hook_project_id_recent_failures.rb create mode 100644 db/schema_migrations/20210429181325 create mode 100644 db/schema_migrations/20210504085144 diff --git a/db/migrate/20210429181325_add_failure_tracking_to_web_hooks.rb b/db/migrate/20210429181325_add_failure_tracking_to_web_hooks.rb new file mode 100644 index 00000000000000..4a34c2dd3073b2 --- /dev/null +++ b/db/migrate/20210429181325_add_failure_tracking_to_web_hooks.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class AddFailureTrackingToWebHooks < ActiveRecord::Migration[6.0] + def change + change_table(:web_hooks, bulk: true) do |t| + t.integer :recent_failures, null: false, limit: 2, default: 0 + t.integer :backoff_count, null: false, limit: 2, default: 0 + t.column :disabled_until, :timestamptz + end + end +end diff --git a/db/migrate/20210504085144_add_index_on_web_hook_project_id_recent_failures.rb b/db/migrate/20210504085144_add_index_on_web_hook_project_id_recent_failures.rb new file mode 100644 index 00000000000000..db21ebad854bbe --- /dev/null +++ b/db/migrate/20210504085144_add_index_on_web_hook_project_id_recent_failures.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +# See https://docs.gitlab.com/ee/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class AddIndexOnWebHookProjectIdRecentFailures < ActiveRecord::Migration[6.0] + include Gitlab::Database::MigrationHelpers + + INDEX_NAME = 'index_web_hooks_on_project_id_recent_failures' + + disable_ddl_transaction! + + def up + add_concurrent_index(:web_hooks, [:project_id, :recent_failures], name: INDEX_NAME) + end + + def down + remove_concurrent_index_by_name(:web_hooks, INDEX_NAME) + end +end diff --git a/db/schema_migrations/20210429181325 b/db/schema_migrations/20210429181325 new file mode 100644 index 00000000000000..d778566a580342 --- /dev/null +++ b/db/schema_migrations/20210429181325 @@ -0,0 +1 @@ +9674f04640f897928925ff1e23ff6d3ff918627b7c2374713a31071678956614 \ No newline at end of file diff --git a/db/schema_migrations/20210504085144 b/db/schema_migrations/20210504085144 new file mode 100644 index 00000000000000..67abcd8eece055 --- /dev/null +++ b/db/schema_migrations/20210504085144 @@ -0,0 +1 @@ +3cdf8e93c4b80867a5d8e086f3f44eaeb479e875abf16187b94b3f6238faf062 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 5837926893efd2..c0dcc868c566d1 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -19111,7 +19111,10 @@ CREATE TABLE web_hooks ( releases_events boolean DEFAULT false NOT NULL, feature_flag_events boolean DEFAULT false NOT NULL, member_events boolean DEFAULT false NOT NULL, - subgroup_events boolean DEFAULT false NOT NULL + subgroup_events boolean DEFAULT false NOT NULL, + recent_failures smallint DEFAULT 0 NOT NULL, + backoff_count smallint DEFAULT 0 NOT NULL, + disabled_until timestamp with time zone ); CREATE SEQUENCE web_hooks_id_seq @@ -24576,6 +24579,8 @@ CREATE INDEX index_web_hooks_on_group_id ON web_hooks USING btree (group_id) WHE CREATE INDEX index_web_hooks_on_project_id ON web_hooks USING btree (project_id); +CREATE INDEX index_web_hooks_on_project_id_recent_failures ON web_hooks USING btree (project_id, recent_failures); + CREATE INDEX index_web_hooks_on_service_id ON web_hooks USING btree (service_id); CREATE INDEX index_web_hooks_on_type ON web_hooks USING btree (type); -- GitLab From 2da8b852b648b693ec492b2c5472b4b453d1196a Mon Sep 17 00:00:00 2001 From: Alex Kalderimis Date: Thu, 29 Apr 2021 19:29:37 +0100 Subject: [PATCH 02/13] Add WebHook.executable and WebHook#executable? These methods serve to test if a hook should be considered executable or disabled. We set the disabled state in the WebHookService following each request. --- app/models/concerns/triggerable_hooks.rb | 2 +- app/models/hooks/web_hook.rb | 12 ++++- app/services/web_hook_service.rb | 22 ++++++++ spec/models/hooks/web_hook_spec.rb | 57 ++++++++++++++++++++ spec/services/web_hook_service_spec.rb | 67 ++++++++++++++++++++++-- 5 files changed, 154 insertions(+), 6 deletions(-) diff --git a/app/models/concerns/triggerable_hooks.rb b/app/models/concerns/triggerable_hooks.rb index db5df6c2c9f1e1..4d3a74b59fa68d 100644 --- a/app/models/concerns/triggerable_hooks.rb +++ b/app/models/concerns/triggerable_hooks.rb @@ -33,7 +33,7 @@ def hooks_for(trigger) end def select_active(hooks_scope, data) - select do |hook| + executable.select do |hook| ActiveHookFilter.new(hook).matches?(hooks_scope, data) end end diff --git a/app/models/hooks/web_hook.rb b/app/models/hooks/web_hook.rb index dbd5a1b032a6db..9a373b4388accc 100644 --- a/app/models/hooks/web_hook.rb +++ b/app/models/hooks/web_hook.rb @@ -3,6 +3,8 @@ class WebHook < ApplicationRecord include Sortable + FAILURE_THRESHOLD = 3 # three strikes + attr_encrypted :token, mode: :per_attribute_iv, algorithm: 'aes-256-gcm', @@ -21,15 +23,21 @@ class WebHook < ApplicationRecord validates :token, format: { without: /\n/ } validates :push_events_branch_filter, branch_filter: true + scope :executable, -> { where('recent_failures <= ? AND (disabled_until IS NULL OR disabled_until < ?)', FAILURE_THRESHOLD, Time.current) } + + def executable? + recent_failures <= FAILURE_THRESHOLD && (disabled_until.nil? || disabled_until < Time.current) + end + # rubocop: disable CodeReuse/ServiceClass def execute(data, hook_name) - WebHookService.new(self, data, hook_name).execute + WebHookService.new(self, data, hook_name).execute if executable? end # rubocop: enable CodeReuse/ServiceClass # rubocop: disable CodeReuse/ServiceClass def async_execute(data, hook_name) - WebHookService.new(self, data, hook_name).async_execute + WebHookService.new(self, data, hook_name).async_execute if executable? end # rubocop: enable CodeReuse/ServiceClass diff --git a/app/services/web_hook_service.rb b/app/services/web_hook_service.rb index 5a51b42f9f9479..d1121d57577740 100644 --- a/app/services/web_hook_service.rb +++ b/app/services/web_hook_service.rb @@ -6,6 +6,18 @@ class InternalErrorResponse attr_reader :body, :headers, :code + def success? + false + end + + def redirection? + false + end + + def internal_server_error? + true + end + def initialize @headers = Gitlab::HTTP::Response::Headers.new({}) @body = '' @@ -33,6 +45,8 @@ def initialize(hook, data, hook_name) end def execute + return { status: :error, message: 'Hook disabled' } unless hook.executable? + start_time = Gitlab::Metrics::System.monotonic_time response = if parsed_url.userinfo.blank? @@ -104,6 +118,14 @@ def make_request_with_auth end def log_execution(trigger:, url:, request_data:, response:, execution_duration:, error_message: nil) + if response.success? || response.redirection? + hook.update!(recent_failures: 0) + elsif response.internal_server_error? + hook.update!(disabled_until: 1.day.from_now) + else + hook.update!(recent_failures: hook.recent_failures + 1) + end + WebHookLog.create( web_hook: hook, trigger: trigger, diff --git a/spec/models/hooks/web_hook_spec.rb b/spec/models/hooks/web_hook_spec.rb index 413e69fb071ad5..24f71336e4a10e 100644 --- a/spec/models/hooks/web_hook_spec.rb +++ b/spec/models/hooks/web_hook_spec.rb @@ -94,4 +94,61 @@ expect { web_hook.destroy! }.to change(web_hook.web_hook_logs, :count).by(-3) end end + + describe '.executable' do + it 'finds the correct set of project hooks' do + project = create(:project) + + [ + [0, 1.minute.from_now], + [1, 1.minute.from_now], + [3, 1.minute.from_now], + [4, nil], + [4, 1.day.ago], + [4, 1.minute.from_now] + ].map do |(recent_failures, disabled_until)| + create(:project_hook, project: project, recent_failures: recent_failures, disabled_until: disabled_until) + end + + executables = [ + [0, nil], + [0, 1.day.ago], + [1, nil], + [1, 1.day.ago], + [3, nil], + [3, 1.day.ago] + ].map do |(recent_failures, disabled_until)| + create(:project_hook, project: project, recent_failures: recent_failures, disabled_until: disabled_until) + end + + expect(described_class.executable).to match_array executables + end + end + + describe '#executable?' do + where(:recent_failures, :disabled_until, :executable) do + [ + [0, nil, true], + [0, 1.day.ago, true], + [0, 1.minute.from_now, false], + [1, nil, true], + [1, 1.day.ago, true], + [1, 1.minute.from_now, false], + [3, nil, true], + [3, 1.day.ago, true], + [3, 1.minute.from_now, false], + [4, nil, false], + [4, 1.day.ago, false], + [4, 1.minute.from_now, false] + ] + end + + with_them do + it 'has the correct state' do + web_hook = create(:project_hook, recent_failures: recent_failures, disabled_until: disabled_until) + + expect(web_hook.executable?).to eq(executable) + end + end + end end diff --git a/spec/services/web_hook_service_spec.rb b/spec/services/web_hook_service_spec.rb index 2fe72ab31c2618..51ae8f4c540545 100644 --- a/spec/services/web_hook_service_spec.rb +++ b/spec/services/web_hook_service_spec.rb @@ -21,6 +21,10 @@ let(:service_instance) { described_class.new(project_hook, data, :push_hooks) } + around do |example| + travel_to(Time.current) { example.run } + end + describe '#initialize' do before do stub_application_setting(setting_name => setting) @@ -120,10 +124,21 @@ expect { service_instance.execute }.to raise_error(StandardError) end + it 'does not execute disabled hooks' do + project_hook.update!(recent_failures: 4) + + expect(service_instance.execute).to eq({ status: :error, message: 'Hook disabled' }) + end + it 'handles exceptions' do - exceptions = [SocketError, OpenSSL::SSL::SSLError, Errno::ECONNRESET, Errno::ECONNREFUSED, Errno::EHOSTUNREACH, Net::OpenTimeout, Net::ReadTimeout, Gitlab::HTTP::BlockedUrlError, Gitlab::HTTP::RedirectionTooDeep] + exceptions = [ + SocketError, OpenSSL::SSL::SSLError, Errno::ECONNRESET, Errno::ECONNREFUSED, + Errno::EHOSTUNREACH, Net::OpenTimeout, Net::ReadTimeout, + Gitlab::HTTP::BlockedUrlError, Gitlab::HTTP::RedirectionTooDeep + ] exceptions.each do |exception_class| exception = exception_class.new('Exception message') + project_hook.update!(disabled_until: nil) stub_full_request(project_hook.url, method: :post).to_raise(exception) expect(service_instance.execute).to eq({ status: :error, message: exception.to_s }) @@ -166,10 +181,11 @@ context 'with success' do before do stub_full_request(project_hook.url, method: :post).to_return(status: 200, body: 'Success') - service_instance.execute end it 'log successful execution' do + service_instance.execute + expect(hook_log.trigger).to eq('push_hooks') expect(hook_log.url).to eq(project_hook.url) expect(hook_log.request_headers).to eq(headers) @@ -178,15 +194,52 @@ expect(hook_log.execution_duration).to be > 0 expect(hook_log.internal_error_message).to be_nil end + + it 'does not increment the failure count' do + expect { service_instance.execute }.not_to change(project_hook, :recent_failures) + end + + it 'does not change the disabled_until attribute' do + expect { service_instance.execute }.not_to change(project_hook, :disabled_until) + end + end + + context 'with bad request' do + before do + stub_full_request(project_hook.url, method: :post).to_return(status: 400, body: 'Bad request') + end + + it 'logs failed execution' do + service_instance.execute + + expect(hook_log).to have_attributes( + trigger: eq('push_hooks'), + url: eq(project_hook.url), + request_headers: eq(headers), + response_body: eq('Bad request'), + response_status: eq('400'), + execution_duration: be > 0, + internal_error_message: be_nil + ) + end + + it 'increments the failure count' do + expect { service_instance.execute }.to change(project_hook, :recent_failures).by(1) + end + + it 'does not change the disabled_until attribute' do + expect { service_instance.execute }.not_to change(project_hook, :disabled_until) + end end context 'with exception' do before do stub_full_request(project_hook.url, method: :post).to_raise(SocketError.new('Some HTTP Post error')) - service_instance.execute end it 'log failed execution' do + service_instance.execute + expect(hook_log.trigger).to eq('push_hooks') expect(hook_log.url).to eq(project_hook.url) expect(hook_log.request_headers).to eq(headers) @@ -195,6 +248,14 @@ expect(hook_log.execution_duration).to be > 0 expect(hook_log.internal_error_message).to eq('Some HTTP Post error') end + + it 'does not increment the failure count' do + expect { service_instance.execute }.not_to change(project_hook, :recent_failures) + end + + it 'sets the disabled_until attribute' do + expect { service_instance.execute }.to change(project_hook, :disabled_until).to(1.day.from_now) + end end context 'with unsafe response body' do -- GitLab From d98c9ebc98ef5b1f608bde493826bac186cd6781 Mon Sep 17 00:00:00 2001 From: Alex Kalderimis Date: Mon, 3 May 2021 15:41:44 +0100 Subject: [PATCH 03/13] Add changelog entry --- changelogs/unreleased/ajk-disable-broken-webhooks.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelogs/unreleased/ajk-disable-broken-webhooks.yml diff --git a/changelogs/unreleased/ajk-disable-broken-webhooks.yml b/changelogs/unreleased/ajk-disable-broken-webhooks.yml new file mode 100644 index 00000000000000..b5a895763664fc --- /dev/null +++ b/changelogs/unreleased/ajk-disable-broken-webhooks.yml @@ -0,0 +1,5 @@ +--- +title: Disable web-hooks that fail repeatedly +merge_request: +author: +type: changed -- GitLab From e1fb3ec90346a843228ec67f9987ce42196fe7cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=F0=9F=A4=96=20GitLab=20Bot=20=F0=9F=A4=96?= Date: Mon, 3 May 2021 14:47:26 +0000 Subject: [PATCH 04/13] Set MR in changelog --- changelogs/unreleased/ajk-disable-broken-webhooks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelogs/unreleased/ajk-disable-broken-webhooks.yml b/changelogs/unreleased/ajk-disable-broken-webhooks.yml index b5a895763664fc..5f09552068cac2 100644 --- a/changelogs/unreleased/ajk-disable-broken-webhooks.yml +++ b/changelogs/unreleased/ajk-disable-broken-webhooks.yml @@ -1,5 +1,5 @@ --- title: Disable web-hooks that fail repeatedly -merge_request: +merge_request: 60837 author: type: changed -- GitLab From 1c2f790e4195b6c881e068b2234cba54a752c1dd Mon Sep 17 00:00:00 2001 From: Alex Kalderimis Date: Mon, 3 May 2021 17:19:34 +0100 Subject: [PATCH 05/13] Make spec more robust by scoping to project Also, freeze the test time so that we don't have to worry about time drift during test runs. --- spec/models/hooks/web_hook_spec.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spec/models/hooks/web_hook_spec.rb b/spec/models/hooks/web_hook_spec.rb index 24f71336e4a10e..afa2078ec7dfda 100644 --- a/spec/models/hooks/web_hook_spec.rb +++ b/spec/models/hooks/web_hook_spec.rb @@ -5,6 +5,10 @@ RSpec.describe WebHook do let(:hook) { build(:project_hook) } + around do |example| + travel_to(Time.current) { example.run } + end + describe 'associations' do it { is_expected.to have_many(:web_hook_logs) } end @@ -121,7 +125,7 @@ create(:project_hook, project: project, recent_failures: recent_failures, disabled_until: disabled_until) end - expect(described_class.executable).to match_array executables + expect(described_class.where(project_id: project.id).executable).to match_array executables end end -- GitLab From 324ef5304154a944461b22d3002a43e18e5285ce Mon Sep 17 00:00:00 2001 From: Alex Kalderimis Date: Mon, 3 May 2021 20:44:47 +0100 Subject: [PATCH 06/13] Delegate to super implementation --- app/models/hooks/service_hook.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/models/hooks/service_hook.rb b/app/models/hooks/service_hook.rb index 4caa45a13d4fea..b35542d5b9332c 100644 --- a/app/models/hooks/service_hook.rb +++ b/app/models/hooks/service_hook.rb @@ -6,9 +6,7 @@ class ServiceHook < WebHook belongs_to :service validates :service, presence: true - # rubocop: disable CodeReuse/ServiceClass def execute(data, hook_name = 'service_hook') - WebHookService.new(self, data, hook_name).execute + super(data, hook_name) end - # rubocop: enable CodeReuse/ServiceClass end -- GitLab From 081f38c837f5ae571bd2e59fd5d39cabcc85a3d6 Mon Sep 17 00:00:00 2001 From: Alex Kalderimis Date: Mon, 3 May 2021 20:46:54 +0100 Subject: [PATCH 07/13] Add feature flag definition --- .../development/web_hooks_disable_failed.yml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 config/feature_flags/development/web_hooks_disable_failed.yml diff --git a/config/feature_flags/development/web_hooks_disable_failed.yml b/config/feature_flags/development/web_hooks_disable_failed.yml new file mode 100644 index 00000000000000..a54034d73e8a2c --- /dev/null +++ b/config/feature_flags/development/web_hooks_disable_failed.yml @@ -0,0 +1,8 @@ +--- +name: web_hooks_disable_failed +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/60837 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/329849 +milestone: '13.12' +type: development +group: group::ecosystem +default_enabled: false -- GitLab From c6df195e46b522475b8283c73fed09538b904c2c Mon Sep 17 00:00:00 2001 From: Alex Kalderimis Date: Mon, 3 May 2021 20:52:28 +0100 Subject: [PATCH 08/13] Guard executable predicates with feature flag --- app/models/hooks/project_hook.rb | 4 + app/models/hooks/web_hook.rb | 14 +++- ee/app/models/hooks/group_hook.rb | 4 + spec/models/hooks/web_hook_spec.rb | 121 ++++++++++++++++++++++------- 4 files changed, 116 insertions(+), 27 deletions(-) diff --git a/app/models/hooks/project_hook.rb b/app/models/hooks/project_hook.rb index b625a70b4446d7..83858ee0c77143 100644 --- a/app/models/hooks/project_hook.rb +++ b/app/models/hooks/project_hook.rb @@ -29,6 +29,10 @@ class ProjectHook < WebHook def pluralized_name _('Webhooks') end + + def web_hooks_disable_failed? + Feature.enabled?(:web_hooks_disable_failed, project) + end end ProjectHook.prepend_if_ee('EE::ProjectHook') diff --git a/app/models/hooks/web_hook.rb b/app/models/hooks/web_hook.rb index 9a373b4388accc..3c2aecfcf9521f 100644 --- a/app/models/hooks/web_hook.rb +++ b/app/models/hooks/web_hook.rb @@ -23,9 +23,15 @@ class WebHook < ApplicationRecord validates :token, format: { without: /\n/ } validates :push_events_branch_filter, branch_filter: true - scope :executable, -> { where('recent_failures <= ? AND (disabled_until IS NULL OR disabled_until < ?)', FAILURE_THRESHOLD, Time.current) } + scope :executable, -> do + next all unless Feature.enabled?(:web_hooks_disable_failed) + + where('recent_failures <= ? AND (disabled_until IS NULL OR disabled_until < ?)', FAILURE_THRESHOLD, Time.current) + end def executable? + return true unless web_hooks_disable_failed? + recent_failures <= FAILURE_THRESHOLD && (disabled_until.nil? || disabled_until < Time.current) end @@ -49,4 +55,10 @@ def allow_local_requests? def help_path 'user/project/integrations/webhooks' end + + private + + def web_hooks_disable_failed? + Feature.enabled?(:web_hooks_disable_failed) + end end diff --git a/ee/app/models/hooks/group_hook.rb b/ee/app/models/hooks/group_hook.rb index c87a5bd7540954..f87c537e427223 100644 --- a/ee/app/models/hooks/group_hook.rb +++ b/ee/app/models/hooks/group_hook.rb @@ -32,4 +32,8 @@ class GroupHook < WebHook def pluralized_name _('Group Hooks') end + + def web_hooks_disable_failed? + Feature.enabled?(:web_hooks_disable_failed, group) + end end diff --git a/spec/models/hooks/web_hook_spec.rb b/spec/models/hooks/web_hook_spec.rb index afa2078ec7dfda..52725dee80a488 100644 --- a/spec/models/hooks/web_hook_spec.rb +++ b/spec/models/hooks/web_hook_spec.rb @@ -3,10 +3,14 @@ require 'spec_helper' RSpec.describe WebHook do - let(:hook) { build(:project_hook) } + include AfterNextHelpers + + let_it_be(:project) { create(:project) } + + let(:hook) { build(:project_hook, project: project) } around do |example| - travel_to(Time.current) { example.run } + freeze_time { example.run } end describe 'associations' do @@ -73,18 +77,30 @@ let(:data) { { key: 'value' } } let(:hook_name) { 'project hook' } - before do - expect(WebHookService).to receive(:new).with(hook, data, hook_name).and_call_original + it '#execute' do + expect_next(WebHookService).to receive(:execute) + + hook.execute(data, hook_name) end - it '#execute' do - expect_any_instance_of(WebHookService).to receive(:execute) + it 'does not execute non-executable hooks' do + hook.update!(disabled_until: 1.day.from_now) + + expect(WebHookService).not_to receive(:new) hook.execute(data, hook_name) end it '#async_execute' do - expect_any_instance_of(WebHookService).to receive(:async_execute) + expect_next(WebHookService).to receive(:async_execute) + + hook.async_execute(data, hook_name) + end + + it 'does not async execute non-executable hooks' do + hook.update!(disabled_until: 1.day.from_now) + + expect(WebHookService).not_to receive(:new) hook.async_execute(data, hook_name) end @@ -100,10 +116,9 @@ end describe '.executable' do - it 'finds the correct set of project hooks' do - project = create(:project) - + let(:not_executable) do [ + [0, Time.current], [0, 1.minute.from_now], [1, 1.minute.from_now], [3, 1.minute.from_now], @@ -113,8 +128,10 @@ ].map do |(recent_failures, disabled_until)| create(:project_hook, project: project, recent_failures: recent_failures, disabled_until: disabled_until) end + end - executables = [ + let(:executables) do + [ [0, nil], [0, 1.day.ago], [1, nil], @@ -124,35 +141,87 @@ ].map do |(recent_failures, disabled_until)| create(:project_hook, project: project, recent_failures: recent_failures, disabled_until: disabled_until) end + end + it 'finds the correct set of project hooks' do expect(described_class.where(project_id: project.id).executable).to match_array executables end + + context 'when the feature flag is not enabled' do + before do + stub_feature_flags(web_hooks_disable_failed: false) + end + + it 'is the same as all' do + expect(described_class.where(project_id: project.id).executable).to match_array(executables + not_executable) + end + end end describe '#executable?' do - where(:recent_failures, :disabled_until, :executable) do + let(:web_hook) { create(:project_hook, project: project) } + + where(:recent_failures, :not_until, :executable) do [ - [0, nil, true], - [0, 1.day.ago, true], - [0, 1.minute.from_now, false], - [1, nil, true], - [1, 1.day.ago, true], - [1, 1.minute.from_now, false], - [3, nil, true], - [3, 1.day.ago, true], - [3, 1.minute.from_now, false], - [4, nil, false], - [4, 1.day.ago, false], - [4, 1.minute.from_now, false] + [0, :not_set, true], + [0, :past, true], + [0, :future, false], + [0, :now, false], + [1, :not_set, true], + [1, :past, true], + [1, :future, false], + [3, :not_set, true], + [3, :past, true], + [3, :future, false], + [4, :not_set, false], + [4, :past, false], + [4, :future, false] ] end with_them do - it 'has the correct state' do - web_hook = create(:project_hook, recent_failures: recent_failures, disabled_until: disabled_until) + # Phasing means we cannot put these values in the where block, + # which is not subject to the frozen time context. + let(:disabled_until) do + case not_until + when :not_set + nil + when :past + 1.minute.ago + when :future + 1.minute.from_now + when :now + Time.current + end + end + before do + web_hook.update!(recent_failures: recent_failures, disabled_until: disabled_until) + end + + it 'has the correct state' do expect(web_hook.executable?).to eq(executable) end + + context 'when the feature flag is enabled for a project' do + before do + stub_feature_flags(web_hooks_disable_failed: project) + end + + it 'has the expected value' do + expect(web_hook.executable?).to eq(executable) + end + end + + context 'when the feature flag is not enabled' do + before do + stub_feature_flags(web_hooks_disable_failed: false) + end + + it 'is executable' do + expect(web_hook).to be_executable + end + end end end end -- GitLab From ec3b09293b0f3a740232c19ffab21a356a245575 Mon Sep 17 00:00:00 2001 From: Alex Kalderimis Date: Tue, 4 May 2021 11:47:43 +0100 Subject: [PATCH 09/13] Add executable filtering to group hooks --- ee/app/models/ee/group.rb | 2 +- ee/spec/models/ee/group_spec.rb | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/ee/app/models/ee/group.rb b/ee/app/models/ee/group.rb index 55017ad2bd7548..450f4c3364bbcb 100644 --- a/ee/app/models/ee/group.rb +++ b/ee/app/models/ee/group.rb @@ -461,7 +461,7 @@ def execute_hooks(data, hooks_scope) return unless feature_available?(:group_webhooks) self_and_ancestor_hooks = GroupHook.where(group_id: self_and_ancestors) - self_and_ancestor_hooks.hooks_for(hooks_scope).each do |hook| + self_and_ancestor_hooks.executable.hooks_for(hooks_scope).each do |hook| hook.async_execute(data, hooks_scope.to_s) end end diff --git a/ee/spec/models/ee/group_spec.rb b/ee/spec/models/ee/group_spec.rb index 49925128fe4fa7..8fc618a918ef06 100644 --- a/ee/spec/models/ee/group_spec.rb +++ b/ee/spec/models/ee/group_spec.rb @@ -986,6 +986,19 @@ group.execute_hooks(data, :member_hooks) end end + + context 'when a hook is not executable' do + before do + group_hook.update!(recent_failures: 4) + end + + it 'does not execute the disabled hook' do + expect(WebHookService).to receive(:new).with(parent_group_hook, anything, anything).and_call_original + expect(WebHookService).not_to receive(:new).with(group_hook, anything, anything) + + group.execute_hooks(data, :member_hooks) + end + end end context 'when group_webhooks feature is disabled' do -- GitLab From 98b1d0beb5c563be279d00156ed4afee956242bf Mon Sep 17 00:00:00 2001 From: Alex Kalderimis Date: Tue, 4 May 2021 12:56:37 +0100 Subject: [PATCH 10/13] Implement progressive backoff for internal errors Does not permanently disable hooks for 5xx errors --- app/models/hooks/web_hook.rb | 14 ++++++++ app/services/web_hook_service.rb | 19 +++++++---- spec/models/hooks/web_hook_spec.rb | 46 ++++++++++++++++++++++++++ spec/services/web_hook_service_spec.rb | 9 +++-- 4 files changed, 79 insertions(+), 9 deletions(-) diff --git a/app/models/hooks/web_hook.rb b/app/models/hooks/web_hook.rb index 3c2aecfcf9521f..a62162c12de23f 100644 --- a/app/models/hooks/web_hook.rb +++ b/app/models/hooks/web_hook.rb @@ -4,6 +4,8 @@ class WebHook < ApplicationRecord include Sortable FAILURE_THRESHOLD = 3 # three strikes + INITIAL_BACKOFF = 10.minutes + BACKOFF_GROWTH_FACTOR = 2.0 attr_encrypted :token, mode: :per_attribute_iv, @@ -56,6 +58,18 @@ def help_path 'user/project/integrations/webhooks' end + def next_backoff + INITIAL_BACKOFF * (BACKOFF_GROWTH_FACTOR**backoff_count) + end + + def disable! + update!(recent_failures: FAILURE_THRESHOLD + 1) + end + + def enable! + update!(recent_failures: 0, disabled_until: nil, backoff_count: 0) + end + private def web_hooks_disable_failed? diff --git a/app/services/web_hook_service.rb b/app/services/web_hook_service.rb index d1121d57577740..0535bc625ac1fc 100644 --- a/app/services/web_hook_service.rb +++ b/app/services/web_hook_service.rb @@ -118,13 +118,7 @@ def make_request_with_auth end def log_execution(trigger:, url:, request_data:, response:, execution_duration:, error_message: nil) - if response.success? || response.redirection? - hook.update!(recent_failures: 0) - elsif response.internal_server_error? - hook.update!(disabled_until: 1.day.from_now) - else - hook.update!(recent_failures: hook.recent_failures + 1) - end + handle_failure(response, hook) WebHookLog.create( web_hook: hook, @@ -140,6 +134,17 @@ def log_execution(trigger:, url:, request_data:, response:, execution_duration:, ) end + def handle_failure(response, hook) + if response.success? || response.redirection? + hook.enable! + elsif response.internal_server_error? + next_backoff = hook.next_backoff + hook.update!(disabled_until: next_backoff.from_now, backoff_count: hook.backoff_count + 1) + else + hook.update!(recent_failures: hook.recent_failures + 1) + end + end + def build_headers(hook_name) @headers ||= begin { diff --git a/spec/models/hooks/web_hook_spec.rb b/spec/models/hooks/web_hook_spec.rb index 52725dee80a488..0f026f9814d2a6 100644 --- a/spec/models/hooks/web_hook_spec.rb +++ b/spec/models/hooks/web_hook_spec.rb @@ -224,4 +224,50 @@ end end end + + describe '#next_backoff' do + context 'when there was no last backoff' do + before do + hook.backoff_count = 0 + end + + it 'is 10 minutes' do + expect(hook.next_backoff).to eq(described_class::INITIAL_BACKOFF) + end + end + + context 'when we have backed off once' do + before do + hook.backoff_count = 1 + end + + it 'is twice the initial value' do + expect(hook.next_backoff).to eq(20.minutes) + end + end + + context 'when we have backed off 3 times' do + before do + hook.backoff_count = 3 + end + + it 'grows exponentially' do + expect(hook.next_backoff).to eq(80.minutes) + end + end + end + + describe '#enable!' do + it 'makes a hook executable' do + hook.recent_failures = 1000 + + expect { hook.enable! }.to change(hook, :executable?).from(false).to(true) + end + end + + describe '#disable!' do + it 'disables a hook' do + expect { hook.disable! }.to change(hook, :executable?).from(true).to(false) + end + end end diff --git a/spec/services/web_hook_service_spec.rb b/spec/services/web_hook_service_spec.rb index 51ae8f4c540545..337cf31a1941c1 100644 --- a/spec/services/web_hook_service_spec.rb +++ b/spec/services/web_hook_service_spec.rb @@ -138,7 +138,7 @@ ] exceptions.each do |exception_class| exception = exception_class.new('Exception message') - project_hook.update!(disabled_until: nil) + project_hook.enable! stub_full_request(project_hook.url, method: :post).to_raise(exception) expect(service_instance.execute).to eq({ status: :error, message: exception.to_s }) @@ -254,7 +254,12 @@ end it 'sets the disabled_until attribute' do - expect { service_instance.execute }.to change(project_hook, :disabled_until).to(1.day.from_now) + expect { service_instance.execute } + .to change(project_hook, :disabled_until).to(project_hook.next_backoff.from_now) + end + + it 'increases the backoff count' do + expect { service_instance.execute }.to change(project_hook, :backoff_count).by(1) end end -- GitLab From fb89cccfdce24cb0118c4c3a35737f27bcf80099 Mon Sep 17 00:00:00 2001 From: Alex Kalderimis Date: Tue, 4 May 2021 13:56:37 +0100 Subject: [PATCH 11/13] Add executable scope for system hooks --- app/services/system_hooks_service.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/services/system_hooks_service.rb b/app/services/system_hooks_service.rb index d0099283afa3d5..2a2053cb912b4e 100644 --- a/app/services/system_hooks_service.rb +++ b/app/services/system_hooks_service.rb @@ -10,7 +10,7 @@ def execute_hooks_for(model, event) end def execute_hooks(data, hooks_scope = :all) - SystemHook.hooks_for(hooks_scope).find_each do |hook| + SystemHook.executable.hooks_for(hooks_scope).find_each do |hook| hook.async_execute(data, 'system_hooks') end -- GitLab From 19db65f760999ca99f6c521d137eca987018dd84 Mon Sep 17 00:00:00 2001 From: Alex Kalderimis Date: Tue, 4 May 2021 18:05:17 +0100 Subject: [PATCH 12/13] Call executable scope in hooks_for Also: clamp the next backoff value to `(10.minutes, 1.day)`. This requires 8 doublings to reach the maximum cooldown. --- app/models/concerns/triggerable_hooks.rb | 2 +- app/models/hooks/web_hook.rb | 7 ++++- ee/app/models/ee/group.rb | 2 +- spec/models/hooks/web_hook_spec.rb | 10 +++++++ spec/services/web_hook_service_spec.rb | 38 ++++++++++++++++++++++++ 5 files changed, 56 insertions(+), 3 deletions(-) diff --git a/app/models/concerns/triggerable_hooks.rb b/app/models/concerns/triggerable_hooks.rb index 4d3a74b59fa68d..8fe34632430b9d 100644 --- a/app/models/concerns/triggerable_hooks.rb +++ b/app/models/concerns/triggerable_hooks.rb @@ -29,7 +29,7 @@ def hooks_for(trigger) callable_scopes = triggers.keys + [:all] return none unless callable_scopes.include?(trigger) - public_send(trigger) # rubocop:disable GitlabSecurity/PublicSend + executable.public_send(trigger) # rubocop:disable GitlabSecurity/PublicSend end def select_active(hooks_scope, data) diff --git a/app/models/hooks/web_hook.rb b/app/models/hooks/web_hook.rb index a62162c12de23f..445381343186ea 100644 --- a/app/models/hooks/web_hook.rb +++ b/app/models/hooks/web_hook.rb @@ -5,6 +5,7 @@ class WebHook < ApplicationRecord FAILURE_THRESHOLD = 3 # three strikes INITIAL_BACKOFF = 10.minutes + MAX_BACKOFF = 1.day BACKOFF_GROWTH_FACTOR = 2.0 attr_encrypted :token, @@ -59,7 +60,11 @@ def help_path end def next_backoff - INITIAL_BACKOFF * (BACKOFF_GROWTH_FACTOR**backoff_count) + return MAX_BACKOFF if backoff_count >= 8 # optimization to prevent expensive exponentiation and possible overflows + + (INITIAL_BACKOFF * (BACKOFF_GROWTH_FACTOR**backoff_count)) + .clamp(INITIAL_BACKOFF, MAX_BACKOFF) + .seconds end def disable! diff --git a/ee/app/models/ee/group.rb b/ee/app/models/ee/group.rb index 450f4c3364bbcb..55017ad2bd7548 100644 --- a/ee/app/models/ee/group.rb +++ b/ee/app/models/ee/group.rb @@ -461,7 +461,7 @@ def execute_hooks(data, hooks_scope) return unless feature_available?(:group_webhooks) self_and_ancestor_hooks = GroupHook.where(group_id: self_and_ancestors) - self_and_ancestor_hooks.executable.hooks_for(hooks_scope).each do |hook| + self_and_ancestor_hooks.hooks_for(hooks_scope).each do |hook| hook.async_execute(data, hooks_scope.to_s) end end diff --git a/spec/models/hooks/web_hook_spec.rb b/spec/models/hooks/web_hook_spec.rb index 0f026f9814d2a6..b528dbedd2cef3 100644 --- a/spec/models/hooks/web_hook_spec.rb +++ b/spec/models/hooks/web_hook_spec.rb @@ -255,6 +255,16 @@ expect(hook.next_backoff).to eq(80.minutes) end end + + context 'when the previous backoff was large' do + before do + hook.backoff_count = 8 # last value before MAX_BACKOFF + end + + it 'does not exceed the max backoff value' do + expect(hook.next_backoff).to eq(described_class::MAX_BACKOFF) + end + end end describe '#enable!' do diff --git a/spec/services/web_hook_service_spec.rb b/spec/services/web_hook_service_spec.rb index 337cf31a1941c1..46dab4fa1715ce 100644 --- a/spec/services/web_hook_service_spec.rb +++ b/spec/services/web_hook_service_spec.rb @@ -202,6 +202,16 @@ it 'does not change the disabled_until attribute' do expect { service_instance.execute }.not_to change(project_hook, :disabled_until) end + + context 'when the hook had previously failed' do + before do + project_hook.update!(recent_failures: 2) + end + + it 'resets the failure count' do + expect { service_instance.execute }.to change(project_hook, :recent_failures).to(0) + end + end end context 'with bad request' do @@ -261,6 +271,34 @@ it 'increases the backoff count' do expect { service_instance.execute }.to change(project_hook, :backoff_count).by(1) end + + context 'when the previous cool-off was near the maximum' do + before do + project_hook.update!(disabled_until: 5.minutes.ago, backoff_count: 8) + end + + it 'sets the disabled_until attribute' do + expect { service_instance.execute }.to change(project_hook, :disabled_until).to(1.day.from_now) + end + + it 'sets the last_backoff attribute' do + expect { service_instance.execute }.to change(project_hook, :backoff_count).by(1) + end + end + + context 'when we have backed-off many many times' do + before do + project_hook.update!(disabled_until: 5.minutes.ago, backoff_count: 365) + end + + it 'sets the disabled_until attribute' do + expect { service_instance.execute }.to change(project_hook, :disabled_until).to(1.day.from_now) + end + + it 'sets the last_backoff attribute' do + expect { service_instance.execute }.to change(project_hook, :backoff_count).by(1) + end + end end context 'with unsafe response body' do -- GitLab From 847616277fd5ebf8924f3d2abe0018c05d1c3fd1 Mon Sep 17 00:00:00 2001 From: Alex Kalderimis Date: Wed, 5 May 2021 10:58:11 +0100 Subject: [PATCH 13/13] Remove guideline comments from migration --- ...4085144_add_index_on_web_hook_project_id_recent_failures.rb | 3 --- 1 file changed, 3 deletions(-) diff --git a/db/migrate/20210504085144_add_index_on_web_hook_project_id_recent_failures.rb b/db/migrate/20210504085144_add_index_on_web_hook_project_id_recent_failures.rb index db21ebad854bbe..898a0ccd1c52d1 100644 --- a/db/migrate/20210504085144_add_index_on_web_hook_project_id_recent_failures.rb +++ b/db/migrate/20210504085144_add_index_on_web_hook_project_id_recent_failures.rb @@ -1,8 +1,5 @@ # frozen_string_literal: true -# See https://docs.gitlab.com/ee/development/migration_style_guide.html -# for more information on how to write migrations for GitLab. - class AddIndexOnWebHookProjectIdRecentFailures < ActiveRecord::Migration[6.0] include Gitlab::Database::MigrationHelpers -- GitLab