diff --git a/app/models/concerns/web_hooks/auto_disabling.rb b/app/models/concerns/web_hooks/auto_disabling.rb index 3499f0056fd9d275684000de62cb472705511889..9ff6b40c266c42c8ebede7dcb39f90880c4a22c0 100644 --- a/app/models/concerns/web_hooks/auto_disabling.rb +++ b/app/models/concerns/web_hooks/auto_disabling.rb @@ -6,9 +6,14 @@ module AutoDisabling include ::Gitlab::Loggable ENABLED_HOOK_TYPES = %w[ProjectHook].freeze - MAX_FAILURES = 100 - FAILURE_THRESHOLD = 3 - EXCEEDED_FAILURE_THRESHOLD = FAILURE_THRESHOLD + 1 + + TEMPORARILY_DISABLED_FAILURE_THRESHOLD = 3 + # A webhook will be failing and being temporarily disabled for the max backoff of 1 day (`MAX_BACKOFF`) + # for at least 1 month before it becomes permanently disabled on its 40th failure. + # Exactly how quickly this happens depends on how frequently it triggers. + # https://gitlab.com/gitlab-org/gitlab/-/issues/503733#note_2217234805 + PERMANENTLY_DISABLED_FAILURE_THRESHOLD = 39 + INITIAL_BACKOFF = 1.minute.freeze MAX_BACKOFF = 1.day.freeze MAX_BACKOFF_COUNT = 11 @@ -32,40 +37,40 @@ def enabled_hook_types included do delegate :auto_disabling_enabled?, to: :class, private: true - # A hook is disabled if: + # A webhook is disabled if: # - # - we have exceeded the grace FAILURE_THRESHOLD (recent_failures > ?) - # - and either: - # - disabled_until is nil (i.e. this was set by WebHook#fail!) - # - or disabled_until is in the future (i.e. this was set by WebHook#backoff!) - # - OR silent mode is enabled. + # - it has exceeded the grace TEMPORARILY_DISABLED_FAILURE_THRESHOLD (recent_failures > ?) + # - AND the time period it was disabled for has not yet expired (disabled_until >= ?) + # - OR it has reached the PERMANENTLY_DISABLED_FAILURE_THRESHOLD (recent_failures > ?) scope :disabled, -> do return all if Gitlab::SilentMode.enabled? return none unless auto_disabling_enabled? where( - 'recent_failures > ? AND (disabled_until IS NULL OR disabled_until >= ?)', - FAILURE_THRESHOLD, - Time.current + '(recent_failures > ? AND (disabled_until IS NULL OR disabled_until >= ?)) OR recent_failures > ?', + TEMPORARILY_DISABLED_FAILURE_THRESHOLD, + Time.current, + PERMANENTLY_DISABLED_FAILURE_THRESHOLD ) end - # A hook is executable if: + # A webhook is executable if: # - # - we have not yet exceeeded the grace FAILURE_THRESHOLD (recent_failures <= ?) - # - OR we have exceeded the grace FAILURE_THRESHOLD and neither of the following is true: - # - disabled_until is nil (i.e. this was set by WebHook#fail!) - # - disabled_until is in the future (i.e. this was set by WebHook#backoff!) - # - AND silent mode is not enabled. + # - it has not exceeeded the grace TEMPORARILY_DISABLED_FAILURE_THRESHOLD (recent_failures <= ?) + # - OR it has exceeded the grace TEMPORARILY_DISABLED_FAILURE_THRESHOLD and: + # - it was temporarily disabled but can now be triggered again (disabled_until < ?) + # - AND has not reached the PERMANENTLY_DISABLED_FAILURE_THRESHOLD (recent_failures <= ?) scope :executable, -> do return none if Gitlab::SilentMode.enabled? return all unless auto_disabling_enabled? where( - 'recent_failures <= ? OR (recent_failures > ? AND (disabled_until IS NOT NULL) AND (disabled_until < ?))', - FAILURE_THRESHOLD, - FAILURE_THRESHOLD, - Time.current + '(recent_failures <= ? OR (recent_failures > ? AND disabled_until IS NOT NULL AND disabled_until < ?)) ' \ + 'AND recent_failures <= ?', + TEMPORARILY_DISABLED_FAILURE_THRESHOLD, + TEMPORARILY_DISABLED_FAILURE_THRESHOLD, + Time.current, + PERMANENTLY_DISABLED_FAILURE_THRESHOLD ) end end @@ -79,13 +84,18 @@ def executable? def temporarily_disabled? return false unless auto_disabling_enabled? - disabled_until.present? && disabled_until >= Time.current && recent_failures > FAILURE_THRESHOLD + disabled_until.present? && disabled_until >= Time.current && + recent_failures.between?(TEMPORARILY_DISABLED_FAILURE_THRESHOLD + 1, PERMANENTLY_DISABLED_FAILURE_THRESHOLD) end def permanently_disabled? return false unless auto_disabling_enabled? - recent_failures > FAILURE_THRESHOLD && disabled_until.blank? + recent_failures > PERMANENTLY_DISABLED_FAILURE_THRESHOLD || + # Keep the old definition of permanently disabled just until we have migrated all records to the new definition + # with `MigrateOldDisabledWebHookToNewState` + # TODO Remove the next line as part of https://gitlab.com/gitlab-org/gitlab/-/issues/525446 + (recent_failures > TEMPORARILY_DISABLED_FAILURE_THRESHOLD && disabled_until.blank?) end def enable! @@ -99,7 +109,7 @@ def enable! save(validate: false) end - # Don't actually back-off until a grace level of FAILURE_THRESHOLD failures have been seen + # Don't actually back-off until a grace level of TEMPORARILY_DISABLED_FAILURE_THRESHOLD failures have been seen # tracked in the recent_failures counter def backoff! return unless auto_disabling_enabled? @@ -107,7 +117,7 @@ def backoff! attrs = { recent_failures: next_failure_count } - if recent_failures >= FAILURE_THRESHOLD + if recent_failures >= TEMPORARILY_DISABLED_FAILURE_THRESHOLD attrs[:backoff_count] = next_backoff_count attrs[:disabled_until] = next_backoff.from_now end @@ -120,17 +130,6 @@ def backoff! save(validate: false) end - def failed! - return unless auto_disabling_enabled? - return unless recent_failures < MAX_FAILURES - - attrs = { disabled_until: nil, backoff_count: 0, recent_failures: next_failure_count } - - assign_attributes(**attrs) - logger.info(hook_id: id, action: 'disable', **attrs) - save(validate: false) - end - def next_backoff # Optimization to prevent expensive exponentiation and possible overflows return MAX_BACKOFF if backoff_count >= MAX_BACKOFF_COUNT @@ -159,11 +158,11 @@ def logger end def next_failure_count - recent_failures.succ.clamp(1, MAX_FAILURES) + recent_failures.succ.clamp(1, PERMANENTLY_DISABLED_FAILURE_THRESHOLD + 1) end def next_backoff_count - backoff_count.succ.clamp(1, MAX_FAILURES) + backoff_count.succ.clamp(1, PERMANENTLY_DISABLED_FAILURE_THRESHOLD + 1) end end end diff --git a/app/services/web_hook_service.rb b/app/services/web_hook_service.rb index 9fa870c9eac841e864f57d5c39e3aebc754d58a4..95f780c057758940933426827d9546dc0dbbc6db 100644 --- a/app/services/web_hook_service.rb +++ b/app/services/web_hook_service.rb @@ -202,10 +202,8 @@ def queue_log_execution_with_retry(log_data, category) def response_category(response) if response.success? || response.redirection? :ok - elsif response.internal_server_error? - :error else - :failed + :error end end diff --git a/app/services/web_hooks/log_execution_service.rb b/app/services/web_hooks/log_execution_service.rb index db614f37db95b36593f372132b4b4b5e3a1734a8..dcd6ada463392fc16b158b7f1cc06fdd1cde5cef 100644 --- a/app/services/web_hooks/log_execution_service.rb +++ b/app/services/web_hooks/log_execution_service.rb @@ -52,10 +52,10 @@ def update_hook_failure_state case response_category when :ok hook.enable! - when :error + # TODO remove handling of `:failed` as part of + # https://gitlab.com/gitlab-org/gitlab/-/issues/525446 + when :error, :failed hook.backoff! - when :failed - hook.failed! end hook.parent.update_last_webhook_failure(hook) if hook.parent diff --git a/app/views/shared/web_hooks/_hook.html.haml b/app/views/shared/web_hooks/_hook.html.haml index 384e312dce3b00decea5018414950b7ab3696772..8350eb03f18b7d50a097afb403eb539871ab45d3 100644 --- a/app/views/shared/web_hooks/_hook.html.haml +++ b/app/views/shared/web_hooks/_hook.html.haml @@ -11,11 +11,11 @@ = hook.url - if hook.rate_limited? - = gl_badge_tag(_('Disabled'), variant: :danger) + = gl_badge_tag(_('Webhooks|Rate limited'), variant: :danger) - elsif hook.permanently_disabled? - = gl_badge_tag(s_('Webhooks|Failed to connect'), variant: :danger) + = gl_badge_tag(s_('Webhooks|Disabled'), variant: :danger) - elsif hook.temporarily_disabled? - = gl_badge_tag(s_('Webhooks|Fails to connect'), variant: :warning) + = gl_badge_tag(s_('Webhooks|Temporarily disabled'), variant: :warning) %div - hook.class.triggers.each_value do |trigger| diff --git a/app/views/shared/web_hooks/_hook_errors.html.haml b/app/views/shared/web_hooks/_hook_errors.html.haml index b610f56ef1780e8513368433e7064b0442e8a17a..52402d5a337807d62411cf8bd03d4b6e99fec7c3 100644 --- a/app/views/shared/web_hooks/_hook_errors.html.haml +++ b/app/views/shared/web_hooks/_hook_errors.html.haml @@ -1,5 +1,6 @@ - strong = { strong_start: ''.html_safe, strong_end: ''.html_safe } +- help_link = link_to('', help_page_path('user/project/integrations/webhooks.md', anchor: 'auto-disabled-webhooks'), target: '_blank', rel: 'noopener noreferrer') - if hook.rate_limited? - placeholders = { limit: number_with_delimiter(hook.rate_limit), root_namespace: hook.parent.root_namespace.path } @@ -8,14 +9,15 @@ - c.with_body do = s_("Webhooks|Webhooks for %{root_namespace} are now disabled because they've been triggered more than %{limit} times per minute. These webhooks are re-enabled automatically in the next minute.").html_safe % placeholders - elsif hook.permanently_disabled? - = render Pajamas::AlertComponent.new(title: s_('Webhooks|Webhook failed to connect'), + - failure_count = { failure_count: hook.recent_failures } + = render Pajamas::AlertComponent.new(title: s_('Webhooks|Webhook disabled'), variant: :danger) do |c| - c.with_body do - = safe_format(s_('Webhooks|The webhook failed to connect and is now disabled. To re-enable the webhook, see %{strong_start}Recent events%{strong_end} for more information about the error, then test your settings.'), strong) + = safe_format(s_('Webhooks|The webhook has %{help_link_start}failed%{help_link_end} %{failure_count} times consecutively and has been disabled. To re-enable the webhook, see %{strong_start}Recent events%{strong_end} for more information about the error, then test your settings.'), strong, failure_count, tag_pair(help_link, :help_link_start, :help_link_end)) - elsif hook.temporarily_disabled? - - help_link = link_to('', help_page_path('user/project/integrations/webhooks.md', anchor: 'auto-disabled-webhooks'), target: '_blank', rel: 'noopener noreferrer') - retry_time = { retry_time: time_interval_in_words(hook.disabled_until - Time.now) } - = render Pajamas::AlertComponent.new(title: s_('Webhooks|Webhook fails to connect'), + - failure_count = { failure_count: hook.recent_failures } + = render Pajamas::AlertComponent.new(title: s_('Webhooks|Webhook temporarily disabled'), variant: :warning) do |c| - c.with_body do - = safe_format(s_('Webhooks|The webhook %{help_link_start}failed to connect%{help_link_end} and is scheduled to retry in %{retry_time}. To re-enable the webhook, see %{strong_start}Recent events%{strong_end} for more information about the error, then test your settings.'), retry_time, strong, tag_pair(help_link, :help_link_start, :help_link_end)) + = safe_format(s_('Webhooks|The webhook has %{help_link_start}failed%{help_link_end} %{failure_count} times consecutively and is disabled for %{retry_time}. To re-enable the webhook earlier, see %{strong_start}Recent events%{strong_end} for more information about the error, then test your settings.'), retry_time, strong, failure_count, tag_pair(help_link, :help_link_start, :help_link_end)) diff --git a/db/post_migrate/20250317021351_add_temporary_index_to_web_hooks_for_migrate_old_disabled_web_hook_to_new_state.rb b/db/post_migrate/20250317021351_add_temporary_index_to_web_hooks_for_migrate_old_disabled_web_hook_to_new_state.rb new file mode 100644 index 0000000000000000000000000000000000000000..07609ee57b6378fec730c74d0c0a1b684155cf42 --- /dev/null +++ b/db/post_migrate/20250317021351_add_temporary_index_to_web_hooks_for_migrate_old_disabled_web_hook_to_new_state.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +class AddTemporaryIndexToWebHooksForMigrateOldDisabledWebHookToNewState < Gitlab::Database::Migration[2.2] + milestone '17.11' + + INDEX_NAME = 'tmp_index_web_hooks_on_disabled_until_recent_failures' + TABLE = :web_hooks + COLUMNS = [:id, :recent_failures, :disabled_until] + + disable_ddl_transaction! + + def up + add_concurrent_index TABLE, COLUMNS, where: 'disabled_until is NULL', name: INDEX_NAME + + connection.execute("ANALYZE #{TABLE}") + end + + def down + remove_concurrent_index_by_name TABLE, INDEX_NAME + end +end diff --git a/db/post_migrate/20250317021451_migrate_old_disabled_web_hook_to_new_state.rb b/db/post_migrate/20250317021451_migrate_old_disabled_web_hook_to_new_state.rb new file mode 100644 index 0000000000000000000000000000000000000000..6679f25a868ba275ff022e7dc867f0eeba189d61 --- /dev/null +++ b/db/post_migrate/20250317021451_migrate_old_disabled_web_hook_to_new_state.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +class MigrateOldDisabledWebHookToNewState < Gitlab::Database::Migration[2.2] + BATCH_SIZE = 1000 + TABLE = 'web_hooks' + SCOPE = ->(table) { + table.where('recent_failures > 3').where(disabled_until: nil) + }.freeze + + NEW_RECENT_FAILURES = 40 # WebHooks::AutoDisabling::PERMANENTLY_DISABLED_FAILURE_THRESHOLD + 1 + NEW_BACKOFF_COUNT = 37 # NEW_RECENT_FAILURES - WebHooks::AutoDisabling::FAILURE_THRESHOLD + + disable_ddl_transaction! + restrict_gitlab_migration gitlab_schema: :gitlab_main + milestone '17.11' + + def up + disabled_until = Time.zone.now.to_fs(:db) # Specific time does not matter, just needs to be present + + each_batch(TABLE, connection: connection, scope: SCOPE, of: BATCH_SIZE) do |batch, _batchable_model| + batch.update_all( + recent_failures: NEW_RECENT_FAILURES, + backoff_count: NEW_BACKOFF_COUNT, + disabled_until: disabled_until + ) + end + end + + def down + # no-op + end +end diff --git a/db/post_migrate/20250317021551_remove_temporary_index_from_web_hooks_for_migrate_disabled_web_hook.rb b/db/post_migrate/20250317021551_remove_temporary_index_from_web_hooks_for_migrate_disabled_web_hook.rb new file mode 100644 index 0000000000000000000000000000000000000000..b69145b318e37a461f9ac5a23764467ef9efa546 --- /dev/null +++ b/db/post_migrate/20250317021551_remove_temporary_index_from_web_hooks_for_migrate_disabled_web_hook.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +class RemoveTemporaryIndexFromWebHooksForMigrateDisabledWebHook < Gitlab::Database::Migration[2.2] + milestone '17.11' + + INDEX_NAME = 'tmp_index_web_hooks_on_disabled_until_recent_failures' + TABLE = :web_hooks + COLUMNS = [:id, :recent_failures, :disabled_until] + + disable_ddl_transaction! + + def up + remove_concurrent_index_by_name TABLE, INDEX_NAME + end + + def down + add_concurrent_index TABLE, COLUMNS, where: 'disabled_until is NULL', name: INDEX_NAME + + connection.execute("ANALYZE #{TABLE}") + end +end diff --git a/db/schema_migrations/20250317021351 b/db/schema_migrations/20250317021351 new file mode 100644 index 0000000000000000000000000000000000000000..44ac91170f81af5b9da77db71cc870aae89509dd --- /dev/null +++ b/db/schema_migrations/20250317021351 @@ -0,0 +1 @@ +43a806f0236fcf8d57242c339a90e1afbb7c1ca5950d29423da5648eb4b855ff \ No newline at end of file diff --git a/db/schema_migrations/20250317021451 b/db/schema_migrations/20250317021451 new file mode 100644 index 0000000000000000000000000000000000000000..ff5142ed7d6aae3b29a9c6b4b5ba1b4afdde0b66 --- /dev/null +++ b/db/schema_migrations/20250317021451 @@ -0,0 +1 @@ +b90e017fcfdb70ab0d478f0d5fa2803fbcd6ee444c157902b34cab495496684b \ No newline at end of file diff --git a/db/schema_migrations/20250317021551 b/db/schema_migrations/20250317021551 new file mode 100644 index 0000000000000000000000000000000000000000..ffaffeff340f03b4b8379a37eefc900b60bc2fa7 --- /dev/null +++ b/db/schema_migrations/20250317021551 @@ -0,0 +1 @@ +93b32fdd10b4eddaad779c8aa8ae8f0a9f0806549ee9539659fefa09e79a87ad \ No newline at end of file diff --git a/doc/user/project/integrations/img/failed_badges_v14_9.png b/doc/user/project/integrations/img/failed_badges_v14_9.png deleted file mode 100644 index 5a1f481e54c3f992035ef08b634d3960ab34d831..0000000000000000000000000000000000000000 Binary files a/doc/user/project/integrations/img/failed_badges_v14_9.png and /dev/null differ diff --git a/doc/user/project/integrations/img/failed_badges_v17_11.png b/doc/user/project/integrations/img/failed_badges_v17_11.png new file mode 100644 index 0000000000000000000000000000000000000000..865f8cb10d9dc031081b46bcd417b626de3a244b Binary files /dev/null and b/doc/user/project/integrations/img/failed_badges_v17_11.png differ diff --git a/doc/user/project/integrations/webhooks.md b/doc/user/project/integrations/webhooks.md index 8229ef648c51fa464ee1431f0297f11502d9f098..f1afbfdc9aeac7b94e00d47d4805eb3eb13509fb 100644 --- a/doc/user/project/integrations/webhooks.md +++ b/doc/user/project/integrations/webhooks.md @@ -481,6 +481,8 @@ To optimize your webhook receivers: - [Generally available](https://gitlab.com/gitlab-org/gitlab/-/issues/329849) for project webhooks in GitLab 15.7. Feature flag `web_hooks_disable_failed` removed. - [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/385902) for group webhooks in GitLab 15.10. - [Disabled on GitLab Self-Managed](https://gitlab.com/gitlab-org/gitlab/-/issues/390157) in GitLab 15.10 [with a flag](../../../administration/feature_flags.md) named `auto_disabling_web_hooks`. +- **Fails to connect** and **Failing to connect** [renamed](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/166329) to **Disabled** and **Temporarily disabled** in GitLab 17.11. +- [Changed](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/166329) to become permanently disabled after 40 consecutive failures in GitLab 17.11. {{< /history >}} @@ -500,24 +502,34 @@ To view auto-disabled webhooks: In the webhook list, auto-disabled webhooks display as: -- **Fails to connect** for [temporarily disabled](#temporarily-disabled-webhooks) webhooks -- **Failed to connect** for [permanently disabled](#permanently-disabled-webhooks) webhooks +- **Temporarily disabled** for [temporarily disabled](#temporarily-disabled-webhooks) webhooks +- **Disabled** for [permanently disabled](#permanently-disabled-webhooks) webhooks -![Badges on failing webhooks](img/failed_badges_v14_9.png) +![Badges on failing webhooks](img/failed_badges_v17_11.png) #### Temporarily disabled webhooks -Webhooks are temporarily disabled if they: +Webhooks are temporarily disabled if they fail four consecutive times. +If webhooks fail 40 consecutive times, they become [permanently disabled](#permanently-disabled-webhooks). -- Return response codes in the `5xx` range. -- Experience a [timeout](../../gitlab_com/_index.md#webhooks). -- Encounter other HTTP errors. +Failure occurs when: -These webhooks are initially disabled for one minute, with the duration extending on subsequent failures up to 24 hours. +- The [webhook receiver](#webhook-receiver-requirements) returns a response code in the `4xx` or `5xx` range. +- The webhook experiences a [timeout](../../gitlab_com/_index.md#webhooks) when attempting to connect to the webhook receiver. +- The webhook encounters other HTTP errors. + +Temporarily disabled webhooks are initially disabled for one minute, +with the duration extending on subsequent failures up to 24 hours. +After this period has elapsed, these webhooks are automatically re-enabled. #### Permanently disabled webhooks -Webhooks are permanently disabled if they return response codes in the `4xx` range, indicating a misconfiguration. +Webhooks are permanently disabled if they fail 40 consecutive times. +Unlike [temporarily disabled webhooks](#temporarily-disabled-webhooks), these webhooks are not automatically re-enabled. + +Webhooks that were permanently disabled in GitLab 17.10 and earlier underwent a data migration. +These webhooks might display four failures in [**Recent events**](#view-webhook-request-history) +even though the UI might state they have 40 failures. #### Re-enable disabled webhooks @@ -528,10 +540,7 @@ Webhooks are permanently disabled if they return response codes in the `4xx` ran {{< /history >}} -To re-enable a temporarily or permanently disabled webhook: - -- [Send a test request](#test-a-webhook) to the webhook. - +To re-enable a disabled webhook, [send a test request](#test-a-webhook). The webhook is re-enabled if the test request returns a response code in the `2xx` range. ### Delivery headers diff --git a/locale/gitlab.pot b/locale/gitlab.pot index 32f29a8d696eae0bda0f7080720388459b06229b..e08715995eb3f8a6a30461243f9de5d5544b6c2a 100644 --- a/locale/gitlab.pot +++ b/locale/gitlab.pot @@ -65913,6 +65913,9 @@ msgstr "" msgid "Webhooks|Description (optional)" msgstr "" +msgid "Webhooks|Disabled" +msgstr "" + msgid "Webhooks|Do not show sensitive data such as tokens in the UI." msgstr "" @@ -65925,12 +65928,6 @@ msgstr "" msgid "Webhooks|Enable SSL verification" msgstr "" -msgid "Webhooks|Failed to connect" -msgstr "" - -msgid "Webhooks|Fails to connect" -msgstr "" - msgid "Webhooks|Feature flag events" msgstr "" @@ -65991,6 +65988,9 @@ msgstr "" msgid "Webhooks|Project or group access token events" msgstr "" +msgid "Webhooks|Rate limited" +msgstr "" + msgid "Webhooks|Regular expression" msgstr "" @@ -66030,6 +66030,9 @@ msgstr "" msgid "Webhooks|Tag push events" msgstr "" +msgid "Webhooks|Temporarily disabled" +msgstr "" + msgid "Webhooks|The URL must be percent-encoded if it contains one or more special characters." msgstr "" @@ -66039,10 +66042,10 @@ msgstr "" msgid "Webhooks|The secret token is cleared on save unless it is updated." msgstr "" -msgid "Webhooks|The webhook %{help_link_start}failed to connect%{help_link_end} and is scheduled to retry in %{retry_time}. To re-enable the webhook, see %{strong_start}Recent events%{strong_end} for more information about the error, then test your settings." +msgid "Webhooks|The webhook has %{help_link_start}failed%{help_link_end} %{failure_count} times consecutively and has been disabled. To re-enable the webhook, see %{strong_start}Recent events%{strong_end} for more information about the error, then test your settings." msgstr "" -msgid "Webhooks|The webhook failed to connect and is now disabled. To re-enable the webhook, see %{strong_start}Recent events%{strong_end} for more information about the error, then test your settings." +msgid "Webhooks|The webhook has %{help_link_start}failed%{help_link_end} %{failure_count} times consecutively and is disabled for %{retry_time}. To re-enable the webhook earlier, see %{strong_start}Recent events%{strong_end} for more information about the error, then test your settings." msgstr "" msgid "Webhooks|Trigger" @@ -66060,13 +66063,10 @@ msgstr "" msgid "Webhooks|Webhook disabled" msgstr "" -msgid "Webhooks|Webhook failed to connect" -msgstr "" - -msgid "Webhooks|Webhook fails to connect" +msgid "Webhooks|Webhook rate limit has been reached" msgstr "" -msgid "Webhooks|Webhook rate limit has been reached" +msgid "Webhooks|Webhook temporarily disabled" msgstr "" msgid "Webhooks|Webhooks for %{root_namespace} are now disabled because they've been triggered more than %{limit} times per minute. These webhooks are re-enabled automatically in the next minute." diff --git a/spec/factories/project_hooks.rb b/spec/factories/project_hooks.rb index 8691a79a1587048acd652a056aec4fa8bd63f588..482cec1195d5cefc576907d38f773bfe66578adb 100644 --- a/spec/factories/project_hooks.rb +++ b/spec/factories/project_hooks.rb @@ -39,7 +39,7 @@ end trait :permanently_disabled do - recent_failures { WebHooks::AutoDisabling::FAILURE_THRESHOLD + 1 } + recent_failures { WebHooks::AutoDisabling::PERMANENTLY_DISABLED_FAILURE_THRESHOLD + 1 } end end end diff --git a/spec/migrations/db/post_migrate/20250317021451_migrate_old_disabled_web_hook_to_new_state_spec.rb b/spec/migrations/db/post_migrate/20250317021451_migrate_old_disabled_web_hook_to_new_state_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..ad26f6e30769c3990f74aa62b8e6be9ec54b6bba --- /dev/null +++ b/spec/migrations/db/post_migrate/20250317021451_migrate_old_disabled_web_hook_to_new_state_spec.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe MigrateOldDisabledWebHookToNewState, :freeze_time, feature_category: :webhooks, migration_version: 20250206114301 do + let!(:web_hooks) { table(:web_hooks) } + + let!(:non_disabled_webhook) { web_hooks.create!(recent_failures: 3, backoff_count: 1) } + let!(:legacy_permanently_disabled_webhook) { web_hooks.create!(recent_failures: 4, backoff_count: 1) } + let!(:temporarily_disabled_webhook) do + web_hooks.create!(recent_failures: 4, backoff_count: 1, disabled_until: Time.current + 1.hour) + end + + describe '#up' do + it 'migrates legacy permanently disabled web hooks to new permanently disabled state' do + migrate! + + [non_disabled_webhook, temporarily_disabled_webhook, legacy_permanently_disabled_webhook].each(&:reload) + + expect(non_disabled_webhook.recent_failures).to eq(3) + expect(non_disabled_webhook.backoff_count).to eq(1) + expect(non_disabled_webhook.disabled_until).to be_nil + + expect(temporarily_disabled_webhook.recent_failures).to eq(4) + expect(temporarily_disabled_webhook.backoff_count).to eq(1) + expect(temporarily_disabled_webhook.disabled_until).to eq(Time.current + 1.hour) + + expect(legacy_permanently_disabled_webhook.recent_failures).to eq(40) + expect(legacy_permanently_disabled_webhook.backoff_count).to eq(37) + expect(legacy_permanently_disabled_webhook.disabled_until).to eq(Time.current) + + expect(web_hooks.where(disabled_until: nil).where('recent_failures > 3').count).to eq(0) + + expect(ProjectHook.executable.pluck_primary_key).to contain_exactly(non_disabled_webhook.id) + expect(ProjectHook.disabled.pluck_primary_key).to contain_exactly( + temporarily_disabled_webhook.id, + legacy_permanently_disabled_webhook.id + ) + end + + it 'migrates in batches' do + web_hooks.create!(recent_failures: 4, backoff_count: 1) + web_hooks.create!(recent_failures: 4, backoff_count: 1) + + stub_const("#{described_class}::BATCH_SIZE", 2) + disabled_until = Time.zone.now.to_fs(:db) + + expect do + migrate! + end.to make_queries_matching( + /UPDATE "web_hooks" SET "recent_failures" = 40, "backoff_count" = 37, "disabled_until" = '#{disabled_until}'/, + 2 + ) + end + end +end diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index d015e99d88c74a5e578f38397d66c6feeb57d7e0..923c0a4eccd27f6215f0aec23f9c407711a0d149 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -6409,7 +6409,7 @@ def has_external_wiki it 'executes hooks which were backed off and are no longer backed off' do project = create(:project) hook = create(:project_hook, project: project, push_events: true) - WebHooks::AutoDisabling::FAILURE_THRESHOLD.succ.times { hook.backoff! } + WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD.succ.times { hook.backoff! } expect_any_instance_of(ProjectHook).to receive(:async_execute).once diff --git a/spec/services/web_hook_service_spec.rb b/spec/services/web_hook_service_spec.rb index e1532656a182558527d96285eaf1708ff492c1d4..2b561ba201895bc6a0600088c4a7bb89c7c07edd 100644 --- a/spec/services/web_hook_service_spec.rb +++ b/spec/services/web_hook_service_spec.rb @@ -627,7 +627,7 @@ response_status: 400 ).deep_stringify_keys ), - 'failed', + 'error', '' ) diff --git a/spec/services/web_hooks/log_execution_service_spec.rb b/spec/services/web_hooks/log_execution_service_spec.rb index ff54508f1af7f00a0301ce0321ae15e736f68752..c2f0b8f67e59247acc6acbf45eed77dcd6b3c56d 100644 --- a/spec/services/web_hooks/log_execution_service_spec.rb +++ b/spec/services/web_hooks/log_execution_service_spec.rb @@ -116,28 +116,6 @@ end end - context 'when response_category is :failed' do - let(:response_category) { :failed } - - before do - data[:response_status] = '400' - end - - it 'increments the failure count' do - expect { service.execute }.to change { project_hook.recent_failures }.by(1) - end - - it 'does not change the disabled_until attribute' do - expect { service.execute }.not_to change { project_hook.disabled_until } - end - - it 'does not allow the failure count to overflow' do - project_hook.update!(recent_failures: 32767) - - expect { service.execute }.not_to change { project_hook.recent_failures } - end - end - context 'when response_category is :error' do let(:response_category) { :error } diff --git a/spec/support/shared_contexts/models/concerns/webhooks/webhook_autodisabling_shared_context.rb b/spec/support/shared_contexts/models/concerns/webhooks/webhook_autodisabling_shared_context.rb new file mode 100644 index 0000000000000000000000000000000000000000..b559374ced9d958c31bb14172477b8927357e979 --- /dev/null +++ b/spec/support/shared_contexts/models/concerns/webhooks/webhook_autodisabling_shared_context.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +RSpec.shared_context 'with webhook auto-disabling failure thresholds' do + where(:recent_failures, :disabled_until, :executable) do + past = 1.minute.ago + now = Time.current + future = 1.minute.from_now + + [ + # At 3 failures the hook is always executable + [3, nil, true], + [3, past, true], + [3, now, true], + [3, future, true], + # At 4 failures the hook is executable only when disabled_until is in the past + [4, nil, false], + [4, past, true], + [4, now, true], + [4, future, false], + # At 39 failures the logic should be the same as with 4 failures (testing the boundary of 40) + [39, nil, false], + [39, past, true], + [39, now, true], + [39, future, false], + # At 40 failures the hook is always disabled + [40, nil, false], + [40, past, false], + [40, now, false], + [40, future, false] + ] + end +end diff --git a/spec/support/shared_examples/models/concerns/auto_disabling_hooks_shared_examples.rb b/spec/support/shared_examples/models/concerns/auto_disabling_hooks_shared_examples.rb index 33b62564e5f9873681c03768d94d06b9817e5227..465209541a0d60b8c45baf331c33af67398446ee 100644 --- a/spec/support/shared_examples/models/concerns/auto_disabling_hooks_shared_examples.rb +++ b/spec/support/shared_examples/models/concerns/auto_disabling_hooks_shared_examples.rb @@ -8,144 +8,64 @@ allow(logger).to receive(:info) end - shared_examples 'is tolerant of invalid records' do - specify do - hook.url = nil - - expect(hook).to be_invalid - run_expectation - end - end + describe '.executable and .disabled', :freeze_time do + include_context 'with webhook auto-disabling failure thresholds' - describe '.executable/.disabled', :freeze_time do - let!(:not_executable) do - [ - [4, nil], # Exceeded the grace period, set by #fail! - [4, 1.second.from_now], # Exceeded the grace period, set by #backoff! - [4, Time.current] # Exceeded the grace period, set by #backoff!, edge-case - ].map do |(recent_failures, disabled_until)| - create( - hook_factory, - **default_factory_arguments, - recent_failures: recent_failures, - disabled_until: disabled_until - ) - end - end - - let!(:executables) do - expired = 1.second.ago - borderline = Time.current - suspended = 1.second.from_now - - [ - # Most of these are impossible states, but are included for completeness - [0, nil], - [1, nil], - [3, nil], - [4, expired], - - # Impossible cases: - [3, suspended], - [3, expired], - [3, borderline], - [1, suspended], - [1, expired], - [1, borderline], - [0, borderline], - [0, suspended], - [0, expired] - ].map do |(recent_failures, disabled_until)| - create( - hook_factory, - **default_factory_arguments, + with_them do + let(:web_hook) do + factory_arguments = default_factory_arguments.merge( recent_failures: recent_failures, disabled_until: disabled_until ) - end - end - - it 'finds the correct set of project hooks' do - expect(find_hooks.executable).to match_array executables - expect(find_hooks.executable).to all(be_executable) - # As expected, and consistent - expect(find_hooks.disabled).to match_array not_executable - expect(find_hooks.disabled.map(&:executable?)).not_to include(true) - - # Nothing is missing - expect(find_hooks.executable.to_a + find_hooks.disabled.to_a).to match_array(find_hooks.to_a) - end - - context 'when the flag is disabled' do - before do - stub_feature_flags(auto_disabling_web_hooks: false) + create(hook_factory, **factory_arguments) end - it 'causes all hooks to be considered executable' do - expect(find_hooks.executable.count).to eq(16) + it 'scopes correctly' do + if executable + expect(find_hooks.executable).to match_array([web_hook]) + expect(find_hooks.disabled).to be_empty + else + expect(find_hooks.executable).to be_empty + expect(find_hooks.disabled).to match_array([web_hook]) + end end - it 'causes no hooks to be considered disabled' do - expect(find_hooks.disabled).to be_empty - end - end + context 'when the flag is disabled' do + before do + stub_feature_flags(auto_disabling_web_hooks: false) + end - context 'when silent mode is enabled' do - before do - stub_application_setting(silent_mode_enabled: true) + it 'causes all hooks to be scoped as executable' do + expect(find_hooks.executable).to match_array([web_hook]) + expect(find_hooks.disabled).to be_empty + end end - it 'causes no hooks to be considered executable' do - expect(find_hooks.executable).to be_empty - end + context 'when silent mode is enabled' do + before do + stub_application_setting(silent_mode_enabled: true) + end - it 'causes all hooks to be considered disabled' do - expect(find_hooks.disabled.count).to eq(16) + it 'causes all hooks to be scoped as disabled' do + expect(find_hooks.executable).to be_empty + expect(find_hooks.disabled).to match_array([web_hook]) + end end end end describe '#executable?', :freeze_time do - let(:web_hook) { create(hook_factory, **default_factory_arguments) } - - where(:recent_failures, :not_until, :executable) do - [ - [0, :not_set, true], - [0, :past, true], - [0, :future, true], - [0, :now, true], - [1, :not_set, true], - [1, :past, true], - [1, :future, true], - [3, :not_set, true], - [3, :past, true], - [3, :future, true], - [4, :not_set, false], - [4, :past, true], # expired suspension - [4, :now, false], # active suspension - [4, :future, false] # active suspension - ] - end + include_context 'with webhook auto-disabling failure thresholds' with_them do - # Phasing means we cannot put these values in the where block, - # which is not subject to the frozen time context. - let(:disabled_until) do - case not_until - when :not_set - nil - when :past - 1.minute.ago - when :future - 1.minute.from_now - when :now - Time.current - end - end + let(:web_hook) do + factory_arguments = default_factory_arguments.merge( + recent_failures: recent_failures, + disabled_until: disabled_until + ) - before do - web_hook.update!(recent_failures: recent_failures, disabled_until: disabled_until) + create(hook_factory, **factory_arguments) end it 'has the correct state' do @@ -164,24 +84,17 @@ end end - describe '#enable!' do - it 'makes a hook executable if it was marked as failed' do - hook.recent_failures = 1000 - - expect { hook.enable! }.to change { hook.executable? }.from(false).to(true) + describe '#enable!', :freeze_time do + before do + hook.recent_failures = WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD + hook.backoff! end - it 'makes a hook executable if it is currently backed off' do - hook.recent_failures = 1000 - hook.disabled_until = 1.hour.from_now - + it 'makes a hook executable' do expect { hook.enable! }.to change { hook.executable? }.from(false).to(true) end it 'logs relevant information' do - hook.recent_failures = 1000 - hook.disabled_until = 1.hour.from_now - expect(logger) .to receive(:info) .with(a_hash_including( @@ -196,32 +109,50 @@ end it 'does not update hooks unless necessary' do - hook + hook.recent_failures = 0 + hook.backoff_count = 0 + hook.disabled_until = nil sql_count = ActiveRecord::QueryRecorder.new { hook.enable! }.count expect(sql_count).to eq(0) end - include_examples 'is tolerant of invalid records' do - def run_expectation - hook.recent_failures = 1000 + it 'is tolerant of invalid records' do + hook.url = nil - expect { hook.enable! }.to change { hook.executable? }.from(false).to(true) - end + expect(hook).to be_invalid + expect { hook.enable! }.to change { hook.executable? }.from(false).to(true) end end - describe '#backoff!', :freeze_time do + describe '#backoff!' do + around do |example| + if example.metadata[:skip_freeze_time] + example.run + else + freeze_time { example.run } + end + end + context 'when we have not backed off before' do it 'does not disable the hook' do expect { hook.backoff! }.not_to change { hook.executable? }.from(true) + expect(hook.class.executable).to include(hook) end it 'increments recent_failures' do expect { hook.backoff! }.to change { hook.recent_failures }.from(0).to(1) end + it 'does not increment backoff_count' do + expect { hook.backoff! }.not_to change { hook.backoff_count }.from(0) + end + + it 'does not set disabled_until' do + expect { hook.backoff! }.not_to change { hook.disabled_until }.from(nil) + end + it 'logs relevant information' do expect(logger) .to receive(:info) @@ -233,19 +164,29 @@ def run_expectation end end - context 'when we have exhausted the grace period' do + context 'when failures exceed the threshold' do before do - hook.update!(recent_failures: WebHooks::AutoDisabling::FAILURE_THRESHOLD) + hook.update!(recent_failures: WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD) end - it 'disables the hook' do + it 'temporarily disables the hook' do expect { hook.backoff! }.to change { hook.executable? }.from(true).to(false) + expect(hook).to be_temporarily_disabled + expect(hook).not_to be_permanently_disabled + expect(hook.class.executable).not_to include(hook) end it 'increments backoff_count' do expect { hook.backoff! }.to change { hook.backoff_count }.from(0).to(1) end + it 'increments recent_failures' do + expect { hook.backoff! }.to change { + hook.recent_failures + }.from(WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD) + .to(WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD + 1) + end + it 'sets disabled_until' do expect { hook.backoff! }.to change { hook.disabled_until }.from(nil).to(1.minute.from_now) end @@ -256,7 +197,7 @@ def run_expectation .with(a_hash_including( hook_id: hook.id, action: 'backoff', - recent_failures: WebHooks::AutoDisabling::FAILURE_THRESHOLD + 1, + recent_failures: WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD + 1, disabled_until: 1.minute.from_now, backoff_count: 1 )) @@ -264,42 +205,72 @@ def run_expectation hook.backoff! end - context 'when the hook is permanently disabled' do - before do - allow(hook).to receive(:permanently_disabled?).and_return(true) - end + it 'is no longer disabled after the backoff time has elapsed', :skip_freeze_time do + hook.backoff! - it 'does not set disabled_until' do - expect { hook.backoff! }.not_to change { hook.disabled_until } - end + expect(hook).to be_temporarily_disabled + expect(hook).not_to be_permanently_disabled + expect(hook.class.executable).not_to include(hook) - it 'does not increment the backoff count' do - expect { hook.backoff! }.not_to change { hook.backoff_count } + travel_to(hook.disabled_until + 1.second) do + expect(hook).not_to be_temporarily_disabled + expect(hook).not_to be_permanently_disabled + expect(hook.class.executable).to include(hook) end end - include_examples 'is tolerant of invalid records' do - def run_expectation - expect { hook.backoff! }.to change { hook.backoff_count }.by(1) + it 'increases the backoff time exponentially', :skip_freeze_time do + hook.backoff! + + expect(hook).to have_attributes( + recent_failures: (WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD + 1), + backoff_count: 1, + disabled_until: be_like_time(Time.zone.now + 1.minute) + ) + + travel_to(hook.disabled_until + 1.second) do + hook.backoff! + + expect(hook).to have_attributes( + recent_failures: (WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD + 2), + backoff_count: 2, + disabled_until: be_like_time(Time.zone.now + 2.minutes) + ) + end + + travel_to(hook.disabled_until + 1.second) do + hook.backoff! + + expect(hook).to have_attributes( + recent_failures: (WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD + 3), + backoff_count: 3, + disabled_until: be_like_time(Time.zone.now + 4.minutes) + ) end end - context 'when the flag is disabled' do + context 'when the hook is permanently disabled' do before do - stub_feature_flags(auto_disabling_web_hooks: false) + allow(hook).to receive(:permanently_disabled?).and_return(true) end - it 'does not increment backoff count' do - expect { hook.failed! }.not_to change { hook.backoff_count } + it 'does not do anything' do + sql_count = ActiveRecord::QueryRecorder.new { hook.backoff! }.count + + expect(sql_count).to eq(0) end end - end - end - describe '#failed!' do - include_examples 'is tolerant of invalid records' do - def run_expectation - expect { hook.failed! }.to change { hook.recent_failures }.by(1) + context 'when the hook is temporarily disabled' do + before do + allow(hook).to receive(:temporarily_disabled?).and_return(true) + end + + it 'does not do anything' do + sql_count = ActiveRecord::QueryRecorder.new { hook.backoff! }.count + + expect(sql_count).to eq(0) + end end context 'when the flag is disabled' do @@ -307,36 +278,66 @@ def run_expectation stub_feature_flags(auto_disabling_web_hooks: false) end - it 'does not increment recent failure count' do - expect { hook.failed! }.not_to change { hook.recent_failures } + it 'does not disable the hook' do + expect { hook.backoff! }.not_to change { hook.executable? }.from(true) + expect(hook).not_to be_temporarily_disabled + expect(hook).not_to be_permanently_disabled + expect(hook.class.executable).to include(hook) end end + + it 'is tolerant of invalid records' do + hook.url = nil + + expect(hook).to be_invalid + expect { hook.backoff! }.to change { hook.backoff_count }.by(1) + end end end - describe '#temporarily_disabled?' do - it 'is false when not temporarily disabled' do + describe '#temporarily_disabled? and #permanently_disabled?', :freeze_time do + it 'is initially not disabled at all' do expect(hook).not_to be_temporarily_disabled + expect(hook).not_to be_permanently_disabled end - it 'allows FAILURE_THRESHOLD initial failures before we back-off' do - WebHooks::AutoDisabling::FAILURE_THRESHOLD.times do + it 'becomes temporarily disabled after a threshold of failures has been exceeded' do + WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD.times do hook.backoff! + expect(hook).not_to be_temporarily_disabled + expect(hook).not_to be_permanently_disabled end hook.backoff! + expect(hook).to be_temporarily_disabled + expect(hook).not_to be_permanently_disabled + end + + context 'when the flag is disabled' do + before do + stub_feature_flags(auto_disabling_web_hooks: false) + end + + it 'is not disabled at all' do + hook.update!(recent_failures: WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD) + hook.backoff! + + expect(hook).not_to be_temporarily_disabled + expect(hook).not_to be_permanently_disabled + end end - context 'when hook has been told to back off' do + context 'when hook exceeds the permanently disabled threshold' do before do - hook.update!(recent_failures: WebHooks::AutoDisabling::FAILURE_THRESHOLD) + hook.update!(recent_failures: WebHooks::AutoDisabling::PERMANENTLY_DISABLED_FAILURE_THRESHOLD) hook.backoff! end - it 'is true' do - expect(hook).to be_temporarily_disabled + it 'is permanently disabled' do + expect(hook).to be_permanently_disabled + expect(hook).not_to be_temporarily_disabled end context 'when the flag is disabled' do @@ -344,25 +345,22 @@ def run_expectation stub_feature_flags(auto_disabling_web_hooks: false) end - it 'is false' do + it 'is not disabled at all' do expect(hook).not_to be_temporarily_disabled + expect(hook).not_to be_permanently_disabled end end end - end - describe '#permanently_disabled?' do - it 'is false when not disabled' do - expect(hook).not_to be_permanently_disabled - end - - context 'when hook has been disabled' do + # TODO Remove as part of https://gitlab.com/gitlab-org/gitlab/-/issues/525446 + context 'when hook has no disabled_until set and exceeds TEMPORARILY_DISABLED_FAILURE_THRESHOLD (legacy state)' do before do - hook.update!(recent_failures: WebHooks::AutoDisabling::EXCEEDED_FAILURE_THRESHOLD) + hook.update!(recent_failures: WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD + 1) end - it 'is true' do + it 'is permanently disabled' do expect(hook).to be_permanently_disabled + expect(hook).not_to be_temporarily_disabled end context 'when the flag is disabled' do @@ -370,8 +368,9 @@ def run_expectation stub_feature_flags(auto_disabling_web_hooks: false) end - it 'is false' do + it 'is not disabled at all' do expect(hook).not_to be_permanently_disabled + expect(hook).not_to be_temporarily_disabled end end end @@ -380,14 +379,14 @@ def run_expectation describe '#alert_status' do subject(:status) { hook.alert_status } - it { is_expected.to eq :executable } + it { is_expected.to eq(:executable) } - context 'when hook has been disabled' do + context 'when hook has been permanently disabled' do before do - hook.update!(recent_failures: WebHooks::AutoDisabling::EXCEEDED_FAILURE_THRESHOLD) + allow(hook).to receive(:permanently_disabled?).and_return(true) end - it { is_expected.to eq :disabled } + it { is_expected.to eq(:disabled) } context 'when the flag is disabled' do before do @@ -398,13 +397,12 @@ def run_expectation end end - context 'when hook has been backed off' do + context 'when hook has been temporarily disabled' do before do - hook.update!(recent_failures: WebHooks::AutoDisabling::EXCEEDED_FAILURE_THRESHOLD) - hook.disabled_until = 1.hour.from_now + allow(hook).to receive(:temporarily_disabled?).and_return(true) end - it { is_expected.to eq :temporarily_disabled } + it { is_expected.to eq(:temporarily_disabled) } context 'when the flag is disabled' do before do diff --git a/spec/support/shared_examples/models/concerns/unstoppable_hooks_shared_examples.rb b/spec/support/shared_examples/models/concerns/unstoppable_hooks_shared_examples.rb index cce52fd5fbd4ff9431c642ea122554cea179d017..05943e563b64d90f1b30754bc4685aad1127ee05 100644 --- a/spec/support/shared_examples/models/concerns/unstoppable_hooks_shared_examples.rb +++ b/spec/support/shared_examples/models/concerns/unstoppable_hooks_shared_examples.rb @@ -1,97 +1,57 @@ # frozen_string_literal: true RSpec.shared_examples 'a hook that does not get automatically disabled on failure' do - describe '.executable/.disabled', :freeze_time do - let(:attributes_for_webhooks) do - attributes_list = attributes_for_list(hook_factory, 13) - - merged_attributes = attributes_list.zip([ - [0, Time.current], - [0, 1.minute.from_now], - [1, 1.minute.from_now], - [3, 1.minute.from_now], - [4, nil], - [4, 1.day.ago], - [4, 1.minute.from_now], - [0, nil], - [0, 1.day.ago], - [1, nil], - [1, 1.day.ago], - [3, nil], - [3, 1.day.ago] - ]) - - merged_attributes.map do |attributes, (recent_failures, disabled_until)| - attributes.merge(**default_factory_arguments, recent_failures: recent_failures, disabled_until: disabled_until) - end - end + let(:exeeded_failure_threshold) { WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD + 1 } - let(:webhooks) { described_class.create!(attributes_for_webhooks) } + describe '.executable/.disabled', :freeze_time do + include_context 'with webhook auto-disabling failure thresholds' - it 'finds the correct set of project hooks' do - expect(find_hooks).to all(be_executable) - expect(find_hooks.executable).to match_array(webhooks) - expect(find_hooks.disabled).to be_empty - end + with_them do + let(:web_hook) do + factory_arguments = default_factory_arguments.merge( + recent_failures: recent_failures, + disabled_until: disabled_until + ) - context 'when silent mode is enabled' do - before do - stub_application_setting(silent_mode_enabled: true) + create(hook_factory, **factory_arguments) end - it 'causes no hooks to be considered executable' do - expect(find_hooks.executable).to be_empty + it 'is always enabled' do + expect(find_hooks).to all(be_executable) + expect(find_hooks.executable).to match_array(find_hooks) + expect(find_hooks.disabled).to be_empty end - it 'causes all hooks to be considered disabled' do - expect(find_hooks.disabled).to match_array(webhooks) + context 'when silent mode is enabled' do + before do + stub_application_setting(silent_mode_enabled: true) + end + + it 'causes no hooks to be considered executable' do + expect(find_hooks.executable).to be_empty + end + + it 'causes all hooks to be considered disabled' do + expect(find_hooks.disabled).to match_array(find_hooks) + end end end end describe '#executable?', :freeze_time do - let(:web_hook) { build(hook_factory, **default_factory_arguments) } - - where(:recent_failures, :not_until) do - [ - [0, :not_set], - [0, :past], - [0, :future], - [0, :now], - [1, :not_set], - [1, :past], - [1, :future], - [3, :not_set], - [3, :past], - [3, :future], - [4, :not_set], - [4, :past], # expired suspension - [4, :now], # active suspension - [4, :future] # active suspension - ] - end + include_context 'with webhook auto-disabling failure thresholds' with_them do - # Phasing means we cannot put these values in the where block, - # which is not subject to the frozen time context. - let(:disabled_until) do - case not_until - when :not_set - nil - when :past - 1.minute.ago - when :future - 1.minute.from_now - when :now - Time.current - end - end + let(:web_hook) do + factory_arguments = default_factory_arguments.merge( + recent_failures: recent_failures, + disabled_until: disabled_until + ) - before do - web_hook.update!(recent_failures: recent_failures, disabled_until: disabled_until) + build(hook_factory, **factory_arguments) end - it 'has the correct state' do + it 'is always executable' do expect(web_hook).to be_executable end end @@ -129,7 +89,7 @@ context 'when we have exhausted the grace period' do before do - hook.update!(recent_failures: WebHooks::AutoDisabling::FAILURE_THRESHOLD) + hook.update!(recent_failures: WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD) end it 'does not disable the hook' do @@ -144,7 +104,7 @@ expect(hook).not_to be_temporarily_disabled # Backing off - WebHooks::AutoDisabling::FAILURE_THRESHOLD.times do + WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD.times do hook.backoff! expect(hook).not_to be_temporarily_disabled end @@ -159,7 +119,7 @@ # Initially expect(hook).not_to be_permanently_disabled - hook.update!(recent_failures: WebHooks::AutoDisabling::EXCEEDED_FAILURE_THRESHOLD) + hook.update!(recent_failures: exeeded_failure_threshold) expect(hook).not_to be_permanently_disabled end @@ -172,7 +132,7 @@ context 'when hook has been disabled' do before do - hook.update!(recent_failures: WebHooks::AutoDisabling::EXCEEDED_FAILURE_THRESHOLD) + hook.update!(recent_failures: exeeded_failure_threshold) end it { is_expected.to eq :executable } @@ -180,7 +140,7 @@ context 'when hook has been backed off' do before do - hook.update!(recent_failures: WebHooks::AutoDisabling::EXCEEDED_FAILURE_THRESHOLD) + hook.update!(recent_failures: exeeded_failure_threshold) hook.disabled_until = 1.hour.from_now end diff --git a/spec/support/shared_examples/models/concerns/web_hooks/has_web_hooks_shared_examples.rb b/spec/support/shared_examples/models/concerns/web_hooks/has_web_hooks_shared_examples.rb index 113dcc266fc80b94b42ed5f83d7e83983ed646ab..841fe04a035c7c463d53704a385e2f75200f1113 100644 --- a/spec/support/shared_examples/models/concerns/web_hooks/has_web_hooks_shared_examples.rb +++ b/spec/support/shared_examples/models/concerns/web_hooks/has_web_hooks_shared_examples.rb @@ -19,7 +19,7 @@ context 'when there is a failed hook' do before do hook = create_hook - hook.update!(recent_failures: WebHooks::AutoDisabling::FAILURE_THRESHOLD + 1) + hook.update!(recent_failures: WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD + 1) end it { is_expected.to eq(true) } diff --git a/spec/support/shared_examples/models/concerns/web_hooks/web_hook_shared_examples.rb b/spec/support/shared_examples/models/concerns/web_hooks/web_hook_shared_examples.rb index 7ec0564eafbfcd1bcc89fe6e80d2419faf99248b..78698946b88e46b2a1f646a216ea5ed608974bd3 100644 --- a/spec/support/shared_examples/models/concerns/web_hooks/web_hook_shared_examples.rb +++ b/spec/support/shared_examples/models/concerns/web_hooks/web_hook_shared_examples.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.shared_examples 'a webhook' do |factory:, auto_disabling: true| +RSpec.shared_examples 'a webhook' do |factory:| include AfterNextHelpers let(:hook) { build(factory) } @@ -560,148 +560,4 @@ it { expect(hook.masked_token).to eq described_class::SECRET_MASK } end end - - describe '#backoff!', if: auto_disabling do - context 'when we have not backed off before' do - it 'increments the recent_failures count but does not disable the hook yet' do - expect { hook.backoff! }.to change { hook.recent_failures }.to(1) - expect(hook.class.executable).to include(hook) - end - end - - context 'when hook is at the failure threshold' do - before do - WebHooks::AutoDisabling::FAILURE_THRESHOLD.times { hook.backoff! } - end - - it 'is not yet disabled' do - expect(hook.class.executable).to include(hook) - expect(hook).to have_attributes( - recent_failures: WebHooks::AutoDisabling::FAILURE_THRESHOLD, - backoff_count: 0, - disabled_until: nil - ) - end - - context 'when hook is next told to backoff' do - before do - hook.backoff! - end - - it 'causes the hook to become disabled for initial backoff period' do - expect(hook.class.executable).not_to include(hook) - expect(hook).to have_attributes( - recent_failures: (WebHooks::AutoDisabling::FAILURE_THRESHOLD + 1), - backoff_count: 1, - disabled_until: 1.minute.from_now - ) - end - - context 'when the backoff time has elapsed', :skip_freeze_time do - it 'is no longer disabled' do - travel_to(hook.disabled_until + 1.minute) do - expect(hook.class.executable).to include(hook) - end - end - - context 'when the hook is next told to backoff' do - it 'disables the hook again, increasing the backoff time exponentially' do - travel_to(hook.disabled_until + 1.minute) do - hook.backoff! - - expect(hook.class.executable).not_to include(hook) - expect(hook).to have_attributes( - recent_failures: (WebHooks::AutoDisabling::FAILURE_THRESHOLD + 2), - backoff_count: 2, - disabled_until: 2.minutes.from_now - ) - end - end - end - end - end - end - - it 'does not do anything if the hook is currently temporarily disabled' do - allow(hook).to receive(:temporarily_disabled?).and_return(true) - - sql_count = ActiveRecord::QueryRecorder.new { hook.backoff! }.count - - expect(sql_count).to eq(0) - end - - it 'does not do anything if the hook is currently permanently disabled' do - allow(hook).to receive(:permanently_disabled?).and_return(true) - - sql_count = ActiveRecord::QueryRecorder.new { hook.backoff! }.count - - expect(sql_count).to eq(0) - end - - context 'when the counter are above MAX_FAILURES' do - let(:max_failures) { WebHooks::AutoDisabling::MAX_FAILURES } - - before do - hook.update!( - recent_failures: (max_failures + 1), - backoff_count: (max_failures + 1), - disabled_until: 1.hour.ago - ) - end - - it 'reduces the counter to MAX_FAILURES' do - hook.backoff! - - expect(hook).to have_attributes( - recent_failures: max_failures, - backoff_count: max_failures - ) - end - end - end - - describe '#failed!', if: auto_disabling do - it 'increments the recent_failures count but does not disable the hook yet' do - expect { hook.failed! }.to change { hook.recent_failures }.to(1) - expect(hook.class.executable).to include(hook) - end - - context 'when hook is at the failure threshold' do - before do - WebHooks::AutoDisabling::FAILURE_THRESHOLD.times { hook.failed! } - end - - it 'is not yet disabled' do - expect(hook.class.executable).to include(hook) - expect(hook).to have_attributes( - recent_failures: WebHooks::AutoDisabling::FAILURE_THRESHOLD, - backoff_count: 0, - disabled_until: nil - ) - end - - context 'when hook is next failed' do - before do - hook.failed! - end - - it 'causes the hook to become disabled' do - expect(hook.class.executable).not_to include(hook) - expect(hook).to have_attributes( - recent_failures: (WebHooks::AutoDisabling::FAILURE_THRESHOLD + 1), - backoff_count: 0, - disabled_until: nil - ) - end - end - end - - it 'does not do anything if recent_failures is at MAX_FAILURES' do - hook.recent_failures = WebHooks::AutoDisabling::MAX_FAILURES - - sql_count = ActiveRecord::QueryRecorder.new { hook.failed! }.count - - expect(sql_count).to eq(0) - end - end end diff --git a/spec/support/shared_examples/requests/api/hooks_shared_examples.rb b/spec/support/shared_examples/requests/api/hooks_shared_examples.rb index 45637423c9c8c95021b8c971a8f3ad4114cc23c9..af119d0cc4de5657c9f1dce040c5835de882b071 100644 --- a/spec/support/shared_examples/requests/api/hooks_shared_examples.rb +++ b/spec/support/shared_examples/requests/api/hooks_shared_examples.rb @@ -223,7 +223,7 @@ def hook_param_overrides context 'the hook is disabled' do before do - hook.update!(recent_failures: hook.class::EXCEEDED_FAILURE_THRESHOLD) + hook.update!(recent_failures: hook.class::TEMPORARILY_DISABLED_FAILURE_THRESHOLD + 1) end it "has the correct alert status", :aggregate_failures do @@ -237,7 +237,7 @@ def hook_param_overrides context 'the hook is backed-off' do before do - WebHooks::AutoDisabling::FAILURE_THRESHOLD.times { hook.backoff! } + WebHooks::AutoDisabling::TEMPORARILY_DISABLED_FAILURE_THRESHOLD.times { hook.backoff! } hook.backoff! end diff --git a/spec/views/projects/hooks/edit.html.haml_spec.rb b/spec/views/projects/hooks/edit.html.haml_spec.rb index 68dd6faeb76b497b687b2c913486270095cbb1d9..06c4598a867a79373f976b52df705e7cd788032a 100644 --- a/spec/views/projects/hooks/edit.html.haml_spec.rb +++ b/spec/views/projects/hooks/edit.html.haml_spec.rb @@ -39,7 +39,7 @@ it 'renders alert' do render - expect(rendered).to have_text(s_('Webhooks|Webhook failed to connect')) + expect(rendered).to have_text(s_('Webhooks|Webhook disabled')) end end @@ -52,7 +52,7 @@ it 'renders alert' do render - expect(rendered).to have_text(s_('Webhooks|Webhook fails to connect')) + expect(rendered).to have_text(s_('Webhooks|Webhook temporarily disabled')) end end end diff --git a/spec/views/projects/hooks/index.html.haml_spec.rb b/spec/views/projects/hooks/index.html.haml_spec.rb index e876ace3fcb554cd460607f4cbddc1c646742614..a6551ad86a0176e68d48ef88a9c8967d1d27b8ef 100644 --- a/spec/views/projects/hooks/index.html.haml_spec.rb +++ b/spec/views/projects/hooks/index.html.haml_spec.rb @@ -19,9 +19,9 @@ expect(rendered).to have_css('.gl-heading-2', text: _('Webhooks')) expect(rendered).to have_text('Webhooks') - expect(rendered).not_to have_css('.gl-badge', text: _('Disabled')) - expect(rendered).not_to have_css('.gl-badge', text: s_('Webhooks|Failed to connect')) - expect(rendered).not_to have_css('.gl-badge', text: s_('Webhooks|Fails to connect')) + expect(rendered).not_to have_css('.gl-badge', text: s_('Webhooks|Rate limited')) + expect(rendered).not_to have_css('.gl-badge', text: s_('Webhooks|Disabled')) + expect(rendered).not_to have_css('.gl-badge', text: s_('Webhooks|Temporarily disabled')) end context 'webhook is rate limited' do @@ -29,10 +29,10 @@ allow(existing_hook).to receive(:rate_limited?).and_return(true) end - it 'renders "Disabled" badge' do + it 'renders "Rate limited" badge' do render - expect(rendered).to have_css('.gl-badge', text: _('Disabled')) + expect(rendered).to have_css('.gl-badge', text: _('Webhooks|Rate limited')) end end @@ -41,10 +41,10 @@ allow(existing_hook).to receive(:permanently_disabled?).and_return(true) end - it 'renders "Failed to connect" badge' do + it 'renders "Disabled" badge' do render - expect(rendered).to have_css('.gl-badge', text: s_('Webhooks|Failed to connect')) + expect(rendered).to have_css('.gl-badge', text: s_('Webhooks|Disabled')) end end @@ -53,10 +53,10 @@ allow(existing_hook).to receive(:temporarily_disabled?).and_return(true) end - it 'renders "Fails to connect" badge' do + it 'renders "Temporarily disabled" badge' do render - expect(rendered).to have_css('.gl-badge', text: s_('Webhooks|Fails to connect')) + expect(rendered).to have_css('.gl-badge', text: s_('Webhooks|Temporarily disabled')) end end end diff --git a/spec/workers/merge_request_cleanup_refs_worker_spec.rb b/spec/workers/merge_request_cleanup_refs_worker_spec.rb index 6c87b6827a80b0a9f06bf7bdb57ac4122b5587af..0b96fe66ac3d67f5ef34741b6839aa9edac8e49e 100644 --- a/spec/workers/merge_request_cleanup_refs_worker_spec.rb +++ b/spec/workers/merge_request_cleanup_refs_worker_spec.rb @@ -30,8 +30,8 @@ expect(cleanup_schedule.completed_at).to be_nil end - context "and cleanup schedule has already failed #{WebHooks::AutoDisabling::FAILURE_THRESHOLD} times" do - let(:failed_count) { WebHooks::AutoDisabling::FAILURE_THRESHOLD } + context "and cleanup schedule has already failed #{described_class::FAILURE_THRESHOLD} times" do + let(:failed_count) { described_class::FAILURE_THRESHOLD } it 'marks the cleanup schedule as failed and track the failure' do expect(cleanup_schedule.reload).to be_failed