From cf3c238e5ae497ebf8ab88353bb794c3c26be909 Mon Sep 17 00:00:00 2001 From: Adam Hegyi Date: Tue, 24 Mar 2020 07:49:41 +0100 Subject: [PATCH] Migrate users.bio to user_details.bio This change migrates users.bio to user_details.bio. --- app/models/user.rb | 8 ++ .../unreleased/206913-migrate-users-bio.yml | 5 + .../20200323071918_add_bio_to_user_details.rb | 17 +++ ...00323074147_add_temp_index_on_users_bio.rb | 18 ++++ ...gger_background_migration_for_users_bio.rb | 31 ++++++ db/structure.sql | 8 +- .../migrate_users_bio_to_user_details.rb | 34 ++++++ lib/gitlab/database/migration_helpers.rb | 30 ++++-- .../migrate_users_bio_to_user_details_spec.rb | 102 ++++++++++++++++++ .../gitlab/database/migration_helpers_spec.rb | 24 +++++ spec/models/user_spec.rb | 61 +++++++++++ spec/requests/api/users_spec.rb | 11 ++ 12 files changed, 337 insertions(+), 12 deletions(-) create mode 100644 changelogs/unreleased/206913-migrate-users-bio.yml create mode 100644 db/migrate/20200323071918_add_bio_to_user_details.rb create mode 100644 db/migrate/20200323074147_add_temp_index_on_users_bio.rb create mode 100644 db/post_migrate/20200323080714_trigger_background_migration_for_users_bio.rb create mode 100644 lib/gitlab/background_migration/migrate_users_bio_to_user_details.rb create mode 100644 spec/lib/gitlab/background_migration/migrate_users_bio_to_user_details_spec.rb diff --git a/app/models/user.rb b/app/models/user.rb index 090d033f80cb12..37b4d7fbb72378 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -217,6 +217,7 @@ def update_tracked_fields!(request) before_save :check_for_verified_email, if: ->(user) { user.email_changed? && !user.new_record? } before_validation :ensure_namespace_correct before_save :ensure_namespace_correct # in case validation is skipped + before_save :ensure_bio_is_assigned_to_user_details, if: :bio_changed? after_validation :set_username_errors after_update :username_changed_hook, if: :saved_change_to_username? after_destroy :post_destroy_hook @@ -1262,6 +1263,13 @@ def ensure_namespace_correct end end + # Temporary, will be removed when bio is fully migrated + def ensure_bio_is_assigned_to_user_details + return if Feature.disabled?(:migrate_bio_to_user_details, default_enabled: true) + + user_detail.bio = bio.to_s[0...255] # bio can be NULL in users, but cannot be NULL in user_details + end + def set_username_errors namespace_path_errors = self.errors.delete(:"namespace.path") self.errors[:username].concat(namespace_path_errors) if namespace_path_errors diff --git a/changelogs/unreleased/206913-migrate-users-bio.yml b/changelogs/unreleased/206913-migrate-users-bio.yml new file mode 100644 index 00000000000000..92d1a8da4a31aa --- /dev/null +++ b/changelogs/unreleased/206913-migrate-users-bio.yml @@ -0,0 +1,5 @@ +--- +title: Add user_details.bio column and migrate data from users.bio +merge_request: 27773 +author: +type: changed diff --git a/db/migrate/20200323071918_add_bio_to_user_details.rb b/db/migrate/20200323071918_add_bio_to_user_details.rb new file mode 100644 index 00000000000000..90e4b964695e64 --- /dev/null +++ b/db/migrate/20200323071918_add_bio_to_user_details.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +class AddBioToUserDetails < ActiveRecord::Migration[6.0] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + def up + add_column_with_default(:user_details, :bio, :string, default: '', allow_null: false, limit: 255, update_column_in_batches_args: { batch_column_name: :user_id }) + end + + def down + remove_column(:user_details, :bio) + end +end diff --git a/db/migrate/20200323074147_add_temp_index_on_users_bio.rb b/db/migrate/20200323074147_add_temp_index_on_users_bio.rb new file mode 100644 index 00000000000000..4b26bb971f55bd --- /dev/null +++ b/db/migrate/20200323074147_add_temp_index_on_users_bio.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +class AddTempIndexOnUsersBio < ActiveRecord::Migration[6.0] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + INDEX_NAME = 'tmp_idx_on_user_id_where_bio_is_filled' + + disable_ddl_transaction! + + def up + add_concurrent_index :users, :id, where: "(COALESCE(bio, '') IS DISTINCT FROM '')", name: INDEX_NAME + end + + def down + remove_concurrent_index_by_name :users, INDEX_NAME + end +end diff --git a/db/post_migrate/20200323080714_trigger_background_migration_for_users_bio.rb b/db/post_migrate/20200323080714_trigger_background_migration_for_users_bio.rb new file mode 100644 index 00000000000000..31ab41a6b88ae0 --- /dev/null +++ b/db/post_migrate/20200323080714_trigger_background_migration_for_users_bio.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +class TriggerBackgroundMigrationForUsersBio < ActiveRecord::Migration[6.0] + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + INTERVAL = 2.minutes.to_i + BATCH_SIZE = 500 + MIGRATION = 'MigrateUsersBioToUserDetails' + + disable_ddl_transaction! + + class User < ActiveRecord::Base + self.table_name = 'users' + + include ::EachBatch + end + + def up + relation = User.where("(COALESCE(bio, '') IS DISTINCT FROM '')") + + queue_background_migration_jobs_by_range_at_intervals(relation, + MIGRATION, + INTERVAL, + batch_size: BATCH_SIZE) + end + + def down + # no-op + end +end diff --git a/db/structure.sql b/db/structure.sql index adaa0a594e69f0..8bd83517bb8ef4 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -6152,7 +6152,8 @@ ALTER SEQUENCE public.user_custom_attributes_id_seq OWNED BY public.user_custom_ CREATE TABLE public.user_details ( user_id bigint NOT NULL, - job_title character varying(200) DEFAULT ''::character varying NOT NULL + job_title character varying(200) DEFAULT ''::character varying NOT NULL, + bio character varying(255) DEFAULT ''::character varying NOT NULL ); CREATE SEQUENCE public.user_details_user_id_seq @@ -10243,6 +10244,8 @@ CREATE UNIQUE INDEX term_agreements_unique_index ON public.term_agreements USING CREATE INDEX tmp_build_stage_position_index ON public.ci_builds USING btree (stage_id, stage_idx) WHERE (stage_idx IS NOT NULL); +CREATE INDEX tmp_idx_on_user_id_where_bio_is_filled ON public.users USING btree (id) WHERE ((COALESCE(bio, ''::character varying))::text IS DISTINCT FROM ''::text); + CREATE INDEX undefined_vulnerabilities ON public.vulnerability_occurrences USING btree (id) WHERE (severity = 0); CREATE INDEX undefined_vulnerability ON public.vulnerabilities USING btree (id) WHERE (severity = 0); @@ -12797,7 +12800,10 @@ COPY "schema_migrations" (version) FROM STDIN; 20200319203901 20200320112455 20200320123839 +20200323071918 +20200323074147 20200323075043 +20200323080714 20200323122201 20200323134519 20200324115359 diff --git a/lib/gitlab/background_migration/migrate_users_bio_to_user_details.rb b/lib/gitlab/background_migration/migrate_users_bio_to_user_details.rb new file mode 100644 index 00000000000000..ca64d13b11855a --- /dev/null +++ b/lib/gitlab/background_migration/migrate_users_bio_to_user_details.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true +# rubocop:disable Style/Documentation + +module Gitlab + module BackgroundMigration + class MigrateUsersBioToUserDetails + class User < ActiveRecord::Base + self.table_name = 'users' + end + + class UserDetails < ActiveRecord::Base + self.table_name = 'user_details' + end + + def perform(start_id, stop_id) + return if Feature.disabled?(:migrate_bio_to_user_details, default_enabled: true) + + relation = User + .select("id AS user_id", "substring(COALESCE(bio, '') from 1 for 255) AS bio") + .where("(COALESCE(bio, '') IS DISTINCT FROM '')") + .where(id: (start_id..stop_id)) + + ActiveRecord::Base.connection.execute <<-EOF.strip_heredoc + INSERT INTO user_details + (user_id, bio) + #{relation.to_sql} + ON CONFLICT (user_id) + DO UPDATE SET + "bio" = EXCLUDED."bio"; + EOF + end + end + end +end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 64eef5306638e7..dc4de9b1906367 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -375,8 +375,11 @@ def false_value # less "complex" without introducing extra methods (which actually will # make things _more_ complex). # + # `batch_column_name` option is for tables without primary key, in this + # case an other unique integer column can be used. Example: :user_id + # # rubocop: disable Metrics/AbcSize - def update_column_in_batches(table, column, value, batch_size: nil) + def update_column_in_batches(table, column, value, batch_size: nil, batch_column_name: :id) if transaction_open? raise 'update_column_in_batches can not be run inside a transaction, ' \ 'you can disable transactions by calling disable_ddl_transaction! ' \ @@ -403,14 +406,14 @@ def update_column_in_batches(table, column, value, batch_size: nil) batch_size = max_size if batch_size > max_size end - start_arel = table.project(table[:id]).order(table[:id].asc).take(1) + start_arel = table.project(table[batch_column_name]).order(table[batch_column_name].asc).take(1) start_arel = yield table, start_arel if block_given? - start_id = exec_query(start_arel.to_sql).to_a.first['id'].to_i + start_id = exec_query(start_arel.to_sql).to_a.first[batch_column_name.to_s].to_i loop do - stop_arel = table.project(table[:id]) - .where(table[:id].gteq(start_id)) - .order(table[:id].asc) + stop_arel = table.project(table[batch_column_name]) + .where(table[batch_column_name].gteq(start_id)) + .order(table[batch_column_name].asc) .take(1) .skip(batch_size) @@ -420,12 +423,12 @@ def update_column_in_batches(table, column, value, batch_size: nil) update_arel = Arel::UpdateManager.new .table(table) .set([[table[column], value]]) - .where(table[:id].gteq(start_id)) + .where(table[batch_column_name].gteq(start_id)) if stop_row - stop_id = stop_row['id'].to_i + stop_id = stop_row[batch_column_name.to_s].to_i start_id = stop_id - update_arel = update_arel.where(table[:id].lt(stop_id)) + update_arel = update_arel.where(table[batch_column_name].lt(stop_id)) end update_arel = yield table, update_arel if block_given? @@ -461,7 +464,7 @@ def update_column_in_batches(table, column, value, batch_size: nil) # # This method can also take a block which is passed directly to the # `update_column_in_batches` method. - def add_column_with_default(table, column, type, default:, limit: nil, allow_null: false, &block) + def add_column_with_default(table, column, type, default:, limit: nil, allow_null: false, update_column_in_batches_args: {}, &block) if transaction_open? raise 'add_column_with_default can not be run inside a transaction, ' \ 'you can disable transactions by calling disable_ddl_transaction! ' \ @@ -483,7 +486,12 @@ def add_column_with_default(table, column, type, default:, limit: nil, allow_nul begin default_after_type_cast = connection.type_cast(default, column_for(table, column)) - update_column_in_batches(table, column, default_after_type_cast, &block) + + if update_column_in_batches_args.any? + update_column_in_batches(table, column, default_after_type_cast, **update_column_in_batches_args, &block) + else + update_column_in_batches(table, column, default_after_type_cast, &block) + end change_column_null(table, column, false) unless allow_null # We want to rescue _all_ exceptions here, even those that don't inherit diff --git a/spec/lib/gitlab/background_migration/migrate_users_bio_to_user_details_spec.rb b/spec/lib/gitlab/background_migration/migrate_users_bio_to_user_details_spec.rb new file mode 100644 index 00000000000000..8603eb73bd592f --- /dev/null +++ b/spec/lib/gitlab/background_migration/migrate_users_bio_to_user_details_spec.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::BackgroundMigration::MigrateUsersBioToUserDetails, :migration, schema: 20200323074147 do + let(:users) { table(:users) } + + let(:user_details) do + klass = table(:user_details) + klass.primary_key = :user_id + klass + end + + let!(:user_needs_migration) { users.create(name: 'user1', email: 'test1@test.com', projects_limit: 1, bio: 'bio') } + let!(:user_needs_no_migration) { users.create(name: 'user2', email: 'test2@test.com', projects_limit: 1) } + let!(:user_also_needs_no_migration) { users.create(name: 'user3', email: 'test3@test.com', projects_limit: 1, bio: '') } + let!(:user_with_long_bio) { users.create(name: 'user4', email: 'test4@test.com', projects_limit: 1, bio: 'a' * 256) } # 255 is the max + + let!(:user_already_has_details) { users.create(name: 'user5', email: 'test5@test.com', projects_limit: 1, bio: 'my bio') } + let!(:existing_user_details) { user_details.find_or_create_by(user_id: user_already_has_details.id).update(bio: 'my bio') } + + # unlikely scenario since we have triggers + let!(:user_has_different_details) { users.create(name: 'user6', email: 'test6@test.com', projects_limit: 1, bio: 'different') } + let!(:different_existing_user_details) { user_details.find_or_create_by(user_id: user_has_different_details.id).update(bio: 'bio') } + + let(:user_ids) do + [ + user_needs_migration, + user_needs_no_migration, + user_also_needs_no_migration, + user_with_long_bio, + user_already_has_details, + user_has_different_details + ].map(&:id) + end + + subject { described_class.new.perform(user_ids.min, user_ids.max) } + + it 'migrates all relevant records' do + subject + + all_user_details = user_details.all + expect(all_user_details.size).to eq(4) + end + + it 'migrates `bio`' do + subject + + user_detail = user_details.find_by!(user_id: user_needs_migration.id) + + expect(user_detail.bio).to eq('bio') + end + + it 'migrates long `bio`' do + subject + + user_detail = user_details.find_by!(user_id: user_with_long_bio.id) + + expect(user_detail.bio).to eq('a' * 255) + end + + it 'does not change existing user detail' do + expect { subject }.not_to change { user_details.find_by!(user_id: user_already_has_details.id).attributes } + end + + it 'changes existing user detail when the columns are different' do + expect { subject }.to change { user_details.find_by!(user_id: user_has_different_details.id).bio }.from('bio').to('different') + end + + it 'does not migrate record' do + subject + + user_detail = user_details.find_by(user_id: user_needs_no_migration.id) + + expect(user_detail).to be_nil + end + + it 'does not migrate empty bio' do + subject + + user_detail = user_details.find_by(user_id: user_also_needs_no_migration.id) + + expect(user_detail).to be_nil + end + + context 'when `migrate_bio_to_user_details` feature flag is off' do + before do + stub_feature_flags(migrate_bio_to_user_details: false) + end + + it 'does nothing' do + already_existing_user_details = user_details.where(user_id: [ + user_has_different_details.id, + user_already_has_details.id + ]) + + subject + + expect(user_details.all).to match_array(already_existing_user_details) + end + end +end diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb index 9ac2660908cbae..8b765ce122dbc7 100644 --- a/spec/lib/gitlab/database/migration_helpers_spec.rb +++ b/spec/lib/gitlab/database/migration_helpers_spec.rb @@ -657,6 +657,30 @@ end end + context 'when `update_column_in_batches_args` is given' do + let(:column) { UserDetail.columns.find { |c| c.name == "user_id" } } + + it 'uses `user_id` for `update_column_in_batches`' do + allow(model).to receive(:transaction_open?).and_return(false) + allow(model).to receive(:transaction).and_yield + allow(model).to receive(:column_for).with(:user_details, :foo).and_return(column) + allow(model).to receive(:update_column_in_batches).with(:user_details, :foo, 10, batch_column_name: :user_id) + allow(model).to receive(:change_column_null).with(:user_details, :foo, false) + allow(model).to receive(:change_column_default).with(:user_details, :foo, 10) + + expect(model).to receive(:add_column) + .with(:user_details, :foo, :integer, default: nil) + + model.add_column_with_default( + :user_details, + :foo, + :integer, + default: 10, + update_column_in_batches_args: { batch_column_name: :user_id } + ) + end + end + context 'when a column limit is set' do it 'adds the column with a limit' do allow(model).to receive(:transaction_open?).and_return(false) diff --git a/spec/models/user_spec.rb b/spec/models/user_spec.rb index 3286a891203c3a..5f5faf311965a0 100644 --- a/spec/models/user_spec.rb +++ b/spec/models/user_spec.rb @@ -55,6 +55,67 @@ it { is_expected.to have_many(:custom_attributes).class_name('UserCustomAttribute') } it { is_expected.to have_many(:releases).dependent(:nullify) } + describe "#bio" do + it 'syncs bio with `user_details.bio` on create' do + user = create(:user, bio: 'my bio') + + expect(user.bio).to eq(user.user_detail.bio) + end + + context 'when `migrate_bio_to_user_details` feature flag is off' do + before do + stub_feature_flags(migrate_bio_to_user_details: false) + end + + it 'does not sync bio with `user_details.bio`' do + user = create(:user, bio: 'my bio') + + expect(user.bio).to eq('my bio') + expect(user.user_detail.bio).to eq('') + end + end + + it 'syncs bio with `user_details.bio` on update' do + user = create(:user) + + user.update!(bio: 'my bio') + + expect(user.bio).to eq(user.user_detail.bio) + end + + context 'when `user_details` association already exists' do + let(:user) { create(:user) } + + before do + create(:user_detail, user: user) + end + + it 'syncs bio with `user_details.bio`' do + user.update!(bio: 'my bio') + + expect(user.bio).to eq(user.user_detail.bio) + end + + it 'falls back to "" when nil is given' do + user.update!(bio: nil) + + expect(user.bio).to eq(nil) + expect(user.user_detail.bio).to eq('') + end + + # very unlikely scenario + it 'truncates long bio when syncing to user_details' do + invalid_bio = 'a' * 256 + truncated_bio = 'a' * 255 + + user.bio = invalid_bio + user.save(validate: false) + + expect(user.user_detail.bio).to eq(truncated_bio) + end + end + end + describe "#abuse_report" do let(:current_user) { create(:user) } let(:other_user) { create(:user) } diff --git a/spec/requests/api/users_spec.rb b/spec/requests/api/users_spec.rb index 6d1b76a9aea254..92cc6dc887ead3 100644 --- a/spec/requests/api/users_spec.rb +++ b/spec/requests/api/users_spec.rb @@ -739,6 +739,17 @@ expect(user.reload.bio).to eq('new test bio') end + it "updates user with empty bio" do + user.bio = 'previous bio' + user.save! + + put api("/users/#{user.id}", admin), params: { bio: '' } + + expect(response).to have_gitlab_http_status(:ok) + expect(json_response['bio']).to eq('') + expect(user.reload.bio).to eq('') + end + it "updates user with new password and forces reset on next login" do put api("/users/#{user.id}", admin), params: { password: '12345678' } -- GitLab