diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index a2ff67a533f5eeb22fe766d750dea5b70efcccd3..a518b9d1ec5e5e39bc47bbce62a7b07300843285 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -1059,6 +1059,9 @@ Settings.cron_jobs['analytics_dump_ai_user_metrics_database_write_buffer_cron_worker'] ||= {} Settings.cron_jobs['analytics_dump_ai_user_metrics_database_write_buffer_cron_worker']['cron'] ||= "*/10 * * * *" Settings.cron_jobs['analytics_dump_ai_user_metrics_database_write_buffer_cron_worker']['job_class'] = 'Analytics::DumpAiUserMetricsWriteBufferCronWorker' + Settings.cron_jobs['analytics_refresh_events_counts_cron_worker'] ||= {} + Settings.cron_jobs['analytics_refresh_events_counts_cron_worker']['cron'] ||= "0 1 * * *" + Settings.cron_jobs['analytics_refresh_events_counts_cron_worker']['job_class'] = 'Analytics::EventsCountsRefreshWorker' Settings.cron_jobs['delete_expired_vulnerability_exports_worker'] ||= {} Settings.cron_jobs['delete_expired_vulnerability_exports_worker']['cron'] ||= '0 4 * * *' Settings.cron_jobs['delete_expired_vulnerability_exports_worker']['job_class'] = 'Vulnerabilities::DeleteExpiredExportsWorker' diff --git a/db/docs/views/ai_events_counts_mv.yml b/db/docs/views/ai_events_counts_mv.yml new file mode 100644 index 0000000000000000000000000000000000000000..222bb7766def94321435d71eed78b12d3730a8c2 --- /dev/null +++ b/db/docs/views/ai_events_counts_mv.yml @@ -0,0 +1,9 @@ +--- +view_name: ai_events_counts_mv +feature_categories: +- value_stream_management +description: Materialized view that stores ai usage events aggregated counts. +introduced_by_url: +milestone: '18.04' +table_size: small +gitlab_schema: gitlab_main_org diff --git a/db/migrate/20250825132150_create_ai_events_counts_mv.rb b/db/migrate/20250825132150_create_ai_events_counts_mv.rb new file mode 100644 index 0000000000000000000000000000000000000000..18836b226765dd4cc37e47cada97952360de81be --- /dev/null +++ b/db/migrate/20250825132150_create_ai_events_counts_mv.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +class CreateAiEventsCountsMv < Gitlab::Database::Migration[2.3] + milestone '18.4' + + def up + execute <<~SQL + CREATE MATERIALIZED VIEW IF NOT EXISTS ai_events_counts_mv AS + SELECT + date_trunc('day', timestamp)::date AS events_date, + namespace_id, + user_id, + event, + count(*) AS total_occurrences + FROM ai_usage_events + GROUP BY + events_date, + namespace_id, + user_id, + event + WITH NO DATA + SQL + + # rubocop:disable Migration/AddIndex -- No Downtime required, the table is empty + add_index :ai_events_counts_mv, + [:events_date, :event, :namespace_id, :user_id], + unique: true, + name: 'index_ai_events_counts_mv_uniq', + include: [:total_occurrences] + # rubocop:enable Migration/AddIndex + end + + def down + execute "DROP MATERIALIZED VIEW IF EXISTS ai_events_counts_mv;" + end +end diff --git a/db/schema_migrations/20250825132150 b/db/schema_migrations/20250825132150 new file mode 100644 index 0000000000000000000000000000000000000000..d4fae4066d2d93753b9839ec65d9356126d879b3 --- /dev/null +++ b/db/schema_migrations/20250825132150 @@ -0,0 +1 @@ +4dc8ab344f753cb570a69342d3f3381fd6f2c8b46678433351304b033c74a74a \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index be4cf22d3aac58516e0f0068213178147f1d868c..b5beab5e3761bf0b8e931ca8733206fdfdb8edbf 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -9345,6 +9345,16 @@ CREATE SEQUENCE ai_duo_chat_events_id_seq ALTER SEQUENCE ai_duo_chat_events_id_seq OWNED BY ai_duo_chat_events.id; +CREATE MATERIALIZED VIEW ai_events_counts_mv AS + SELECT (date_trunc('day'::text, "timestamp"))::date AS events_date, + namespace_id, + user_id, + event, + count(*) AS total_occurrences + FROM ai_usage_events + GROUP BY ((date_trunc('day'::text, "timestamp"))::date), namespace_id, user_id, event + WITH NO DATA; + CREATE TABLE ai_feature_settings ( id bigint NOT NULL, created_at timestamp with time zone NOT NULL, @@ -36986,6 +36996,8 @@ CREATE INDEX index_ai_duo_chat_events_on_personal_namespace_id ON ONLY ai_duo_ch CREATE INDEX index_ai_duo_chat_events_on_user_id ON ONLY ai_duo_chat_events USING btree (user_id); +CREATE UNIQUE INDEX index_ai_events_counts_mv_uniq ON ai_events_counts_mv USING btree (events_date, event, namespace_id, user_id) INCLUDE (total_occurrences); + CREATE INDEX index_ai_feature_settings_on_ai_self_hosted_model_id ON ai_feature_settings USING btree (ai_self_hosted_model_id); CREATE UNIQUE INDEX index_ai_feature_settings_on_feature ON ai_feature_settings USING btree (feature); diff --git a/ee/app/graphql/resolvers/analytics/ai_usage/event_counts_metric_resolver.rb b/ee/app/graphql/resolvers/analytics/ai_usage/event_counts_metric_resolver.rb new file mode 100644 index 0000000000000000000000000000000000000000..71564354d1fa71497da877b2ea7088b9edd9cf77 --- /dev/null +++ b/ee/app/graphql/resolvers/analytics/ai_usage/event_counts_metric_resolver.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +module Resolvers + module Analytics + module AiUsage + class EventCountsMetricResolver < BaseResolver + type ::Types::Analytics::AiUsage::AiUsageEventType.connection_type, null: true + + argument :start_date, Types::DateType, + required: false, + description: 'Date range to start from. Default is 7 days ago.' + + argument :end_date, Types::DateType, + required: false, + description: 'Date range to end at. Default is the end of current day.' + + def ready?(**args) + validate_params!(args) + + super + end + + def resolve(**args) + params = params_with_defaults(args) + + EventsCountMetricsService.execute... + end + + private + + def namespace + object.is_a?(Namespace) ? object : object.project_namespace + end + + def params_with_defaults(args) + { start_date: 7.days.ago.beginning_of_day, end_date: Time.current.end_of_day }.merge(args) + end + end + end + end +end diff --git a/ee/app/graphql/types/analytics/ai_usage/ai_usage_data_type.rb b/ee/app/graphql/types/analytics/ai_usage/ai_usage_data_type.rb index 9d3f195b358f73228c54b99dbf230705d0c74a59..1c79e3cecb4771defc3cc9612ff1bfa873692349 100644 --- a/ee/app/graphql/types/analytics/ai_usage/ai_usage_data_type.rb +++ b/ee/app/graphql/types/analytics/ai_usage/ai_usage_data_type.rb @@ -18,6 +18,17 @@ class AiUsageDataType < BaseObject field :all, description: 'All Duo usage events.', resolver: ::Resolvers::Analytics::AiUsage::UsageEventsResolver + + field :code_suggestions_acceptance_rate, + GraphQL::Types::Float, + resolver: ::Resolvers::Analytics::AiUsage::EventCountsMetricResolver + + def code_suggestion_acceptance_rate + namespace = + object.is_a?(Namespace) ? object : object.project_namespace + + Ai::EventsCount.code_suggestion_acceptance_rate_for(namespace) + end end end end diff --git a/ee/app/models/ai/events_count.rb b/ee/app/models/ai/events_count.rb new file mode 100644 index 0000000000000000000000000000000000000000..fb0dfd2a1ff1014219471eaf281638ec087727d4 --- /dev/null +++ b/ee/app/models/ai/events_count.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module Ai + class EventsCount < ApplicationRecord + include MaterializedView + + self.table_name = "ai_events_counts_mv" + + enum :event, Gitlab::Tracking::AiTracking.registered_events + + validates :namespace_id, presence: true + + def self.pg_timeout + 40000 + end + + def code_suggestion_acceptance_rate_for(namespace, start_date: nil, end_date: nil) + target_namespace_ids = Namespace + .where(type: 'Project') + .where("traversal_ids @> ?", "{#{root_namespace_id}}") + .pluck(:id) + + return 0.0 if target_namespace_ids.empty? + + events_data = where(namespace_id: target_namespace_ids) + .where(events_date: start_date...end_date) + .where(event: [2, 3]) + .group(:event) + .sum(:total_occurrences) + + event_2_total = events_data[2] || 0 + event_3_total = events_data[3] || 0 + + return 0.0 if event_2_total.zero? + + (event_3_total.to_f / event_2_total * 100).round(2) + end + end +end diff --git a/ee/app/models/concerns/materialized_view.rb b/ee/app/models/concerns/materialized_view.rb new file mode 100644 index 0000000000000000000000000000000000000000..41e578abd3062f648dab5dae9f8f98322564b771 --- /dev/null +++ b/ee/app/models/concerns/materialized_view.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +module MaterializedView # rubocop:disable Gitlab/BoundedContexts -- general purpose concern + extend ActiveSupport::Concern + include Gitlab::Database::Migrations::TimeoutHelpers + + class_methods do + # Safely refresh materialized view + # by intializing it first if needed. + # + # Disables statement timeout if disable_database_timeout? + # returns true in subclasses. + def safe_refresh! + return true if initialized? && refresh! + + with_statement_timeout { refresh!(lock_table: true) } + end + + def initialized? + result = connection.execute <<-SQL + SELECT ispopulated + FROM pg_matviews + WHERE matviewname = '#{table_name}' + SQL + + result.first&.fetch('ispopulated', false) + end + + # Used to define how much time is allowed + # as statement_timeout in PostgreSQL + # 0 - does not disable timeout + def pg_timeout + 0 + end + + private + + # The first materialized view refresh + # cannot be made CONCURRENTLY and will lock the table from reading. + # + # Pass lock_table as true when populating for the first time. + # Use carefully in production. + def refresh!(lock_table: false) + concurrently_statement = + lock_table ? '' : 'CONCURRENTLY ' + + connection.execute( + "REFRESH MATERIALIZED VIEW #{concurrently_statement}#{table_name};" + ) + + true + end + + # Adquires a exclusive lock on table. + # Use carefully in production. + def initialize! + refresh!(lock_table: true) + end + + def with_statement_timeout + return yield if pg_timeout <= 0 + + # Allows maximum timeout of 1 minute + timeout = pg_timeout >= 60000 ? 60000 : pg_timeout + + begin + connection.execute("SET statement_timeout TO #{timeout}") + + yield + ensure + connection.execute('RESET statement_timeout') + end + end + end +end diff --git a/ee/app/workers/all_queues.yml b/ee/app/workers/all_queues.yml index 333f04d915329b9b8ee510084fd568026b4f8d09..9fe5d6b3ed420e32a81a6e7aea95d1331b925908 100644 --- a/ee/app/workers/all_queues.yml +++ b/ee/app/workers/all_queues.yml @@ -133,6 +133,16 @@ :idempotent: true :tags: [] :queue_namespace: :cronjob +- :name: cronjob:analytics_events_counts_refresh + :worker_name: Analytics::EventsCountsRefreshWorker + :feature_category: :value_stream_management + :has_external_dependencies: false + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] + :queue_namespace: :cronjob - :name: cronjob:analytics_value_stream_dashboard_count :worker_name: Analytics::ValueStreamDashboard::CountWorker :feature_category: :value_stream_management diff --git a/ee/app/workers/analytics/events_counts_refresh_worker.rb b/ee/app/workers/analytics/events_counts_refresh_worker.rb new file mode 100644 index 0000000000000000000000000000000000000000..2912d34d93ff5d70ceaa40e17855b4a0e3baa3ac --- /dev/null +++ b/ee/app/workers/analytics/events_counts_refresh_worker.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Analytics + # Refreshes ai_events_counts_mv PostgreSQL materialized view + class EventsCountsRefreshWorker + include ApplicationWorker + include CronjobQueue # rubocop:disable Scalability/CronWorkerContext -- worker does not perform work scoped to a context + + data_consistency :always + feature_category :value_stream_management + urgency :low + idempotent! + + def perform + Ai::EventsCount.safe_refresh! + end + end +end diff --git a/ee/spec/initializers/1_settings_spec.rb b/ee/spec/initializers/1_settings_spec.rb index e82f536242169243523abd85f26cde65462a5954..994e8ce9b1b915e50aeca562b5b77b212be42bd1 100644 --- a/ee/spec/initializers/1_settings_spec.rb +++ b/ee/spec/initializers/1_settings_spec.rb @@ -23,6 +23,7 @@ analytics_cycle_analytics_stage_aggregation_worker analytics_devops_adoption_create_all_snapshots_worker analytics_dump_ai_user_metrics_database_write_buffer_cron_worker + analytics_refresh_events_counts_cron_worker analytics_usage_trends_count_job_trigger_worker analytics_value_stream_dashboard_count_worker app_sec_dast_profile_schedule_worker diff --git a/ee/spec/models/ai/events_count_spec.rb b/ee/spec/models/ai/events_count_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..ea7cccfd885e87888893374ff22b037e43d1f6ff --- /dev/null +++ b/ee/spec/models/ai/events_count_spec.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Ai::EventsCount, feature_category: :value_stream_management do + it { is_expected.to validate_presence_of(:namespace_id) } + + it { expect(described_class.pg_timeout).to eq(40000) } + + it_behaves_like 'a materialized view' do + let_it_be(:now) { 1.week.ago } + let_it_be(:project_namespace_1) { create(:project_namespace) } + let_it_be(:project_namespace_2) { create(:project_namespace) } + let_it_be(:user_1) { create(:user) } + let_it_be(:user_2) { create(:user) } + + let_it_be(:event_1) { create(:ai_usage_event, timestamp: now + 1.hour, event: 'code_suggestion_shown_in_ide', namespace_id: project_namespace_1.id, user: user_1) } + let_it_be(:event_2) { create(:ai_usage_event, timestamp: now, event: 'code_suggestion_shown_in_ide', namespace_id: project_namespace_1.id, user: user_1) } + let_it_be(:event_3) { create(:ai_usage_event, timestamp: now, event: 'code_suggestion_accepted_in_ide', namespace_id: project_namespace_1.id, user: user_1) } + let_it_be(:event_4) { create(:ai_usage_event, timestamp: now, event: 'code_suggestion_shown_in_ide', namespace_id: project_namespace_2.id, user: user_1) } + let_it_be(:event_5) { create(:ai_usage_event, timestamp: now, event: 'code_suggestion_accepted_in_ide', namespace_id: project_namespace_1.id, user: user_2) } + let_it_be(:event_6) { create(:ai_usage_event, timestamp: now, event: 'code_suggestion_shown_in_ide', namespace_id: project_namespace_2.id, user: user_2) } + let_it_be(:event_7) { create(:ai_usage_event, timestamp: now + 1.day, event: 'code_suggestion_shown_in_ide', namespace_id: project_namespace_2.id, user: user_2) } + let_it_be(:event_8) { create(:ai_usage_event, timestamp: now + 1.day + 1.hour, event: 'code_suggestion_shown_in_ide', namespace_id: project_namespace_2.id, user: user_2) } + + let(:expected_results) do + [ + { events_date: now.to_date, namespace_id: project_namespace_1.id, user_id: user_1.id, event: 'code_suggestion_shown_in_ide', total_occurrences: 2 }, + { events_date: now.to_date, namespace_id: project_namespace_1.id, user_id: user_1.id, event: 'code_suggestion_accepted_in_ide', total_occurrences: 1 }, + { events_date: now.to_date, namespace_id: project_namespace_2.id, user_id: user_1.id, event: 'code_suggestion_shown_in_ide', total_occurrences: 1 }, + { events_date: now.to_date, namespace_id: project_namespace_1.id, user_id: user_2.id, event: 'code_suggestion_accepted_in_ide', total_occurrences: 1 }, + { events_date: now.to_date, namespace_id: project_namespace_2.id, user_id: user_2.id, event: 'code_suggestion_shown_in_ide', total_occurrences: 1 }, + { events_date: (now + 1.day).to_date, namespace_id: project_namespace_2.id, user_id: user_2.id, event: 'code_suggestion_shown_in_ide', total_occurrences: 2 } + ] + end + end +end diff --git a/ee/spec/support/shared_examples/models/concerns/materialized_view_shared_examples.rb b/ee/spec/support/shared_examples/models/concerns/materialized_view_shared_examples.rb new file mode 100644 index 0000000000000000000000000000000000000000..2a3255b4369828e8cf441ad42ce5577e2aee3486 --- /dev/null +++ b/ee/spec/support/shared_examples/models/concerns/materialized_view_shared_examples.rb @@ -0,0 +1,124 @@ +# frozen_string_literal: true + +RSpec.shared_examples 'a materialized view' do + describe '.initialized?' do + subject(:initialized?) do + described_class.initialized? + end + + it { is_expected.to be_falsey } + + context 'when materialized view has been refreshed at least once' do + before do + described_class.send(:refresh!, lock_table: true) + end + + it { is_expected.to be_truthy } + end + end + + describe '.safe_refresh!' do + context 'when materialized view is already initialized' do + before do + allow(described_class).to receive(:initialized?).and_return(true) + described_class.send(:initialize!) + end + + it 'refreshes materialized view without locking table' do + expect(described_class.connection).to receive(:execute) + .with("REFRESH MATERIALIZED VIEW CONCURRENTLY #{described_class.table_name};") + + result = described_class.safe_refresh! + + expect(result).to be true + end + + it 'refreshes table data correctly' do + described_class.safe_refresh! + + results = described_class.all.map(&:attributes) + + expect(results.map(&:with_indifferent_access)).to match_array(expected_results) + end + end + + context 'when materialized view is not initialized' do + before do + allow(described_class).to receive(:initialized?).and_return(false) + end + + it 'calls refresh with lock_table: true within with_statement_timeout block' do + expect(described_class.connection).to receive(:execute) + .with("REFRESH MATERIALIZED VIEW #{described_class.table_name};") + + + expect(described_class).to receive(:with_statement_timeout).and_yield + expect(described_class).to receive(:refresh!).with(lock_table: true).and_call_original + + described_class.safe_refresh! + end + + it 'refreshes table data correctly' do + described_class.safe_refresh! + + results = described_class.all.map(&:attributes) + + expect(results.map(&:with_indifferent_access)).to match_array(expected_results) + end + end + end + + describe '.with_statement_timeout' do + let(:block_result) { 'block executed' } + + context 'when pg_timeout is 0' do + before do + allow(described_class).to receive(:pg_timeout).and_return(0) + end + + it 'executes the block without setting timeout' do + expect(described_class.connection).not_to receive(:execute) + + result = described_class.send(:with_statement_timeout) { block_result } + + expect(result).to eq(block_result) + end + end + + context 'when pg_timeout is greater than 0' do + context 'when pg_timeout is less than 60000' do + let(:pg_timeout_value) { 30000 } + + before do + allow(described_class).to receive(:pg_timeout).and_return(pg_timeout_value) + end + + it 'sets statement_timeout to pg_timeout value' do + expect(described_class.connection).to receive(:execute).with("SET statement_timeout TO #{pg_timeout_value}") + expect(described_class.connection).to receive(:execute).with('RESET statement_timeout') + + result = described_class.send(:with_statement_timeout) { block_result } + + expect(result).to eq(block_result) + end + end + + context 'when pg_timeout is greater than 60000' do + let(:pg_timeout_value) { 120000 } + + before do + allow(described_class).to receive(:pg_timeout).and_return(pg_timeout_value) + end + + it 'caps the timeout at 60000 milliseconds (1 minute)' do + expect(described_class.connection).to receive(:execute).with("SET statement_timeout TO 60000") + expect(described_class.connection).to receive(:execute).with('RESET statement_timeout') + + result = described_class.send(:with_statement_timeout) { block_result } + + expect(result).to eq(block_result) + end + end + end + end +end diff --git a/ee/spec/workers/analytics/events_counts_refresh_worker_spec.rb b/ee/spec/workers/analytics/events_counts_refresh_worker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..1b30422391d818aa82ff59d4f2f8f5aa0f33fa1c --- /dev/null +++ b/ee/spec/workers/analytics/events_counts_refresh_worker_spec.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +require 'spec_helper' + +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Analytics::EventsCountsRefreshWorker, , feature_category: :value_stream_management do + let(:worker) { described_class.new } + + describe '#perform' do + before do + allow(worker).to receive(:disable_statement_timeout).and_yield + end + + context 'when Ai::EventsCount is initialized' do + before do + allow(Ai::EventsCount).to receive(:initialized?).and_return(true) + allow(Ai::EventsCount).to receive(:refresh!) + end + + it 'calls refresh! on Ai::EventsCount' do + worker.perform + + expect(Ai::EventsCount).to have_received(:refresh!) + end + + it 'does not call initialize! on Ai::EventsCount' do + allow(Ai::EventsCount).to receive(:initialize!) + + worker.perform + + expect(Ai::EventsCount).not_to have_received(:initialize!) + end + + it 'disables statement timeout' do + worker.perform + + expect(worker).to have_received(:disable_statement_timeout) + end + end + + context 'when Ai::EventsCount is not initialized' do + before do + allow(Ai::EventsCount).to receive(:initialized?).and_return(false) + allow(Ai::EventsCount).to receive(:initialize!) + end + + it 'calls initialize! on Ai::EventsCount' do + worker.perform + + expect(Ai::EventsCount).to have_received(:initialize!) + end + + it 'does not call refresh! on Ai::EventsCount' do + allow(Ai::EventsCount).to receive(:refresh!) + + worker.perform + + expect(Ai::EventsCount).not_to have_received(:refresh!) + end + + it 'disables statement timeout' do + worker.perform + + expect(worker).to have_received(:disable_statement_timeout) + end + end + end +end diff --git a/lib/gitlab/database/query_analyzers/gitlab_schemas_validate_connection.rb b/lib/gitlab/database/query_analyzers/gitlab_schemas_validate_connection.rb index 9ff100e9da676c11e99e2829960825582ed69e88..2b09280a5e68f1c9fe2eefa2fe75fffd4cb3ff93 100644 --- a/lib/gitlab/database/query_analyzers/gitlab_schemas_validate_connection.rb +++ b/lib/gitlab/database/query_analyzers/gitlab_schemas_validate_connection.rb @@ -13,16 +13,11 @@ def enabled? true end - # There is a special case where CREATE VIEW DDL statement can include DML statements. - # For this case, +select_tables+ should be empty, to keep the schema consistent between +main+ and +ci+. - # - # @example - # CREATE VIEW issues AS SELECT * FROM tickets def analyze(parsed) # This analyzer requires the PgQuery parsed query to be present return unless parsed.pg - select_tables = QueryAnalyzerHelpers.dml_from_create_view?(parsed) ? [] : parsed.pg.select_tables + select_tables = QueryAnalyzerHelpers.skip_dml_check?(parsed) ? [] : parsed.pg.select_tables tables = select_tables + parsed.pg.dml_tables table_schemas = ::Gitlab::Database::GitlabSchema.table_schemas!(tables) return if table_schemas.empty? diff --git a/lib/gitlab/database/query_analyzers/query_analyzer_helpers.rb b/lib/gitlab/database/query_analyzers/query_analyzer_helpers.rb index cd56fb195bfef9eb98467e598ac8b2effbf1e642..6cd676c66f5096e6cacea07a947455d671a2e04e 100644 --- a/lib/gitlab/database/query_analyzers/query_analyzer_helpers.rb +++ b/lib/gitlab/database/query_analyzers/query_analyzer_helpers.rb @@ -6,9 +6,37 @@ module QueryAnalyzers # Methods that are commonly used between analyzers class QueryAnalyzerHelpers class << self - def dml_from_create_view?(parsed) + # There are two special cases where a DDL statement can include DML statements. + # For this case, +select_tables+ should be empty, to avoid false positives. + # + # @example 1 + # Creating a regular view + # + # CREATE VIEW issues AS SELECT * FROM tickets + # @example 2 + # Creating a materialized view without any data + # + # CREATE MATERIALIZED VIEW issues AS SELECT * FROM tickets WITH NO DATA + def skip_dml_check?(parsed) + creates_view?(parsed) || creates_empty_materialized_view?(parsed) + end + + def creates_view?(parsed) parsed.pg.tree.stmts.select { |stmts| stmts.stmt.node == :view_stmt }.any? end + + def creates_empty_materialized_view?(parsed) + parsed.pg.tree.stmts.any? do |stmts| + create_table_statement = stmts.stmt.try(:create_table_as_stmt) + + next unless create_table_statement + + is_materialized_view = create_table_statement.objtype == :OBJECT_MATVIEW + + # Checks if materialized includes 'WITH NO DATA' statement + is_materialized_view && create_table_statement.into.skip_data + end + end end end end diff --git a/lib/gitlab/database/query_analyzers/restrict_allowed_schemas.rb b/lib/gitlab/database/query_analyzers/restrict_allowed_schemas.rb index 8ad4be5c31d7371ff9858078b48e95ede67601d4..8f8a0d730873f3cdf770ccaf0c706509f9f5486f 100644 --- a/lib/gitlab/database/query_analyzers/restrict_allowed_schemas.rb +++ b/lib/gitlab/database/query_analyzers/restrict_allowed_schemas.rb @@ -107,21 +107,21 @@ def ddl_mode? !self.dml_mode? end - # There is a special case where CREATE VIEW DDL statement can include DML statements. - # For this case, +select_tables+ should be empty, to avoid false positives. - # - # @example - # CREATE VIEW issues AS SELECT * FROM tickets def dml_tables(parsed) - select_tables = self.dml_from_create_view?(parsed) ? [] : parsed.pg.select_tables + select_tables = + if self.skip_dml_check?(parsed) + [] + else + parsed.pg.select_tables + end select_tables + parsed.pg.dml_tables end - def dml_from_create_view?(parsed) + def skip_dml_check?(parsed) return unless ddl_mode? - QueryAnalyzerHelpers.dml_from_create_view?(parsed) + QueryAnalyzerHelpers.skip_dml_check?(parsed) end def dml_schemas(tables) diff --git a/lib/tasks/gitlab/db.rake b/lib/tasks/gitlab/db.rake index 85bbc0dd5b884fb9e76465b9c38a23a7c03243ed..a419c3806f86013903d32a0554529b6bd9dae0d6 100644 --- a/lib/tasks/gitlab/db.rake +++ b/lib/tasks/gitlab/db.rake @@ -75,16 +75,24 @@ namespace :gitlab do Gitlab::Database::PgDepend.from_pg_extension('VIEW').pluck('relname') end + views = get_views_for(connection) + materialized_views = get_views_for(connection, is_materialized: true) + # Removes the entry from the array tables.delete 'schema_migrations' # Truncate schema_migrations to ensure migrations re-run connection.execute('TRUNCATE schema_migrations') if connection.table_exists? 'schema_migrations' # Drop any views - (connection.views - ignored_views).each do |view| + (views - ignored_views).each do |view| connection.execute("DROP VIEW IF EXISTS #{connection.quote_table_name(view)} CASCADE") end + # Drop any materialized views + materialized_views.each do |materialized_view| + connection.execute("DROP MATERIALIZED VIEW IF EXISTS #{connection.quote_table_name(materialized_view)} CASCADE") + end + # Drop tables with cascade to avoid dependent table errors # PG: http://www.postgresql.org/docs/current/static/ddl-depend.html # Add `IF EXISTS` because cascade could have already deleted a table. @@ -97,6 +105,21 @@ namespace :gitlab do end end + # Rails ActiveRecord::ConnectionAdapters::PostgreSQLAdapter#views method incorrectly returns both + # regular views AND materialized views. This method allows precise control + # over PostgreSQL object types by querying pg_class directly. + def get_views_for(connection, is_materialized: false) + view_type = is_materialized ? 'm' : 'v' + + connection.query_values <<~SQL + SELECT c.relname + FROM pg_class c + LEFT JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = ANY(current_schemas(false)) + AND c.relkind = '#{view_type}' + SQL + end + desc 'GitLab | DB | Configures the database by running migrate, or by loading the schema and seeding if needed' task configure: :environment do configure_pg_databases diff --git a/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_validate_connection_spec.rb b/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_validate_connection_spec.rb index 3450ba10006bae1136585637078c3fb8bc99fc4e..412517609c78bbb24019a7ffab611d5556f03410 100644 --- a/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_validate_connection_spec.rb +++ b/spec/lib/gitlab/database/query_analyzers/gitlab_schemas_validate_connection_spec.rb @@ -65,6 +65,18 @@ sql: "CREATE OR REPLACE VIEW my_view AS SELECT * FROM p_ci_builds WHERE id = 1", expect_error: nil, setup: nil + }, + "for DDL operation that adds a materialized view that changes MAIN and CI and contains DML statements" => { + model: ApplicationRecord, + sql: "CREATE MATERIALIZED VIEW my_view AS SELECT * FROM p_ci_builds WHERE id = 1", + expect_error: /The query tried to access \["p_ci_builds"\]/, + setup: ->(_) { skip_if_shared_database(:ci) } + }, + "for DDL that adds an empty materialized view that changes MAIN and CI and contains DML statements" => { + model: ApplicationRecord, + sql: "CREATE MATERIALIZED VIEW my_view AS SELECT * FROM p_ci_builds WHERE id = 1 WITH NO DATA", + expect_error: nil, + setup: nil } } end diff --git a/spec/lib/gitlab/database/query_analyzers/restrict_allowed_schemas_spec.rb b/spec/lib/gitlab/database/query_analyzers/restrict_allowed_schemas_spec.rb index 59a6ce663f5fa5a61effbd7cd4c8c2e192e9c5c0..d497c1091a44ba5af3d6409f5f5238c1aeb95d10 100644 --- a/spec/lib/gitlab/database/query_analyzers/restrict_allowed_schemas_spec.rb +++ b/spec/lib/gitlab/database/query_analyzers/restrict_allowed_schemas_spec.rb @@ -95,6 +95,22 @@ gitlab_main: :ddl_not_allowed, gitlab_ci: :ddl_not_allowed } + }, + "for CREATE MATERIALIZED VIEW WITH DATA" => { + sql: "CREATE MATERIALIZED VIEW my_view AS SELECT * FROM issues", + expected_allowed_gitlab_schemas: { + no_schema: :dml_not_allowed, + gitlab_main: :ddl_not_allowed, + gitlab_ci: :ddl_not_allowed + } + }, + "for CREATE MATERIALIZED VIEW WITHOUT DATA" => { + sql: "CREATE MATERIALIZED VIEW my_view AS SELECT * FROM issues WITH NO DATA", + expected_allowed_gitlab_schemas: { + no_schema: :success, + gitlab_main: :ddl_not_allowed, + gitlab_ci: :ddl_not_allowed + } } } diff --git a/spec/tasks/gitlab/db_rake_spec.rb b/spec/tasks/gitlab/db_rake_spec.rb index 665fe9dc5e4ba99be68bec09d6f7fbb2455f7878..0cc3ac7e48c6419400170e1c885be9bcd256d24d 100644 --- a/spec/tasks/gitlab/db_rake_spec.rb +++ b/spec/tasks/gitlab/db_rake_spec.rb @@ -1118,7 +1118,7 @@ allow(connection).to receive(:execute).and_return(nil) allow(connection).to receive(:tables).and_return(tables) - allow(connection).to receive(:views).and_return(views) + allow(connection).to receive(:query_values).and_call_original end it 'drops all objects for the database', :aggregate_failures do @@ -1128,20 +1128,30 @@ end end - context 'with multiple databases', :aggregate_failures do - let(:main_model) { double(:model, connection: double(:connection, tables: tables, views: views)) } - let(:ci_model) { double(:model, connection: double(:connection, tables: tables, views: views)) } + context 'with multiple databases' do + let(:connection) { double(:connection, tables: tables) } + let(:main_model) { double(:model, connection: connection) } + let(:ci_model) { double(:model, connection: connection) } let(:base_models) { { 'main' => main_model, 'ci' => ci_model } } + let(:materialized_views) { %w[mv_1 mv_2] } before do skip_if_shared_database(:ci) allow(Gitlab::Database).to receive(:database_base_models_with_gitlab_shared).and_return(base_models) + allow_any_instance_of(Object).to receive(:get_views_for) + .with(anything) + .and_return(views) + + allow_any_instance_of(Object).to receive(:get_views_for) + .with(anything, is_materialized: true) + .and_return(materialized_views) + allow(main_model.connection).to receive(:table_exists?).with('schema_migrations').and_return(true) allow(ci_model.connection).to receive(:table_exists?).with('schema_migrations').and_return(true) - (tables + views + schemas).each do |name| + (tables + views + schemas + materialized_views).each do |name| allow(main_model.connection).to receive(:quote_table_name).with(name).and_return("\"#{name}\"") allow(ci_model.connection).to receive(:quote_table_name).with(name).and_return("\"#{name}\"") end @@ -1185,6 +1195,9 @@ def expect_objects_to_be_dropped(connection) expect(Gitlab::Database::PgDepend).to receive(:from_pg_extension).with('VIEW') expect(connection).not_to receive(:execute).with('DROP VIEW IF EXISTS "pg_stat_statements" CASCADE') + expect(connection).to receive(:execute).with('DROP MATERIALIZED VIEW IF EXISTS "mv_1" CASCADE') + expect(connection).to receive(:execute).with('DROP MATERIALIZED VIEW IF EXISTS "mv_2" CASCADE') + expect(connection).to receive(:execute).with('TRUNCATE schema_migrations') Gitlab::Database::EXTRA_SCHEMAS.each do |schema| @@ -1824,6 +1837,25 @@ def expect_objects_to_be_dropped(connection) end end + describe '#get_views_for' do + let(:connection) { ActiveRecord::Base.connection } + let(:rake_context) { Rake.application.tasks.find { |t| t.name.include?('drop_tables') } } + + # PostgreSQLAdapter bug that returns both regular views and materialized views + let(:all_views) { ActiveRecord::Base.connection.views } + + it 'correctly distinguishes between regular views and materialized views' do + regular_views = rake_context.instance_exec(connection) do |conn| + get_views_for(conn) + end + materialized_views = rake_context.instance_exec(connection) do |conn| + get_views_for(conn, is_materialized: true) + end + + expect(regular_views + materialized_views).to match_array(all_views) + end + end + def run_rake_task(task_name, arguments = '') Rake::Task[task_name].reenable Rake.application.invoke_task("#{task_name}#{arguments}")