From 2bbec66c9d738b0df435f3aab801747408929ed0 Mon Sep 17 00:00:00 2001 From: Sami Hiltunen Date: Wed, 1 Sep 2021 13:02:49 +0200 Subject: [PATCH 1/3] Link existing database record via repository ID Praefect generates a repository ID to uniquely identify a repository. While new records that get created are already being linked via the repository ID, historical records in the database are still not linked via the repository ID. In order to update the queries to join the records via the repository ID, all of the relevant records need to be linked via the repository ID. This commit adds a migration that links the records in 'storage_repositories' and 'repository_assignments' to the 'repositories' table's records via the ID. Replication jobs are not included in the migration as they are not long lived and they can be rescheduled. Changelog: other --- .../20210906145021_link_repository_id.go | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 internal/praefect/datastore/migrations/20210906145021_link_repository_id.go diff --git a/internal/praefect/datastore/migrations/20210906145021_link_repository_id.go b/internal/praefect/datastore/migrations/20210906145021_link_repository_id.go new file mode 100644 index 0000000000..08c9663bdd --- /dev/null +++ b/internal/praefect/datastore/migrations/20210906145021_link_repository_id.go @@ -0,0 +1,28 @@ +package migrations + +import migrate "github.com/rubenv/sql-migrate" + +func init() { + m := &migrate.Migration{ + Id: "20210906145021_link_repository_id", + Up: []string{ + ` +UPDATE storage_repositories +SET repository_id = repositories.repository_id +FROM repositories +WHERE storage_repositories.virtual_storage = repositories.virtual_storage +AND storage_repositories.relative_path = repositories.relative_path + `, + ` +UPDATE repository_assignments +SET repository_id = repositories.repository_id +FROM repositories +WHERE repository_assignments.virtual_storage = repositories.virtual_storage +AND repository_assignments.relative_path = repositories.relative_path + `, + }, + Down: []string{}, + } + + allMigrations = append(allMigrations, m) +} -- GitLab From 99199b0887c09d93166d77cf01caea93f07c9c5b Mon Sep 17 00:00:00 2001 From: Sami Hiltunen Date: Tue, 21 Sep 2021 15:18:45 +0200 Subject: [PATCH 2/3] Backfill replica_path in 'repositories' records Praefect now generates a repository id that can be used to join the database records of a repository across the tables. As such, the virtual_storage and relative_path columns will not be needed in other tables than the 'repositories' table as the joining will happen via the repository_id column. As Praefect will begin generating unique relative paths for the replicas to avoid stale disk state of deleted repositories affecting recreation of said repositories, the replica_path column was added to the 'repositories' table to store the actual disk path of the replicas. Right now, every newly created 'repositories' record has the replica_path set to the relative_path. To begin using the new column, we also need the historical records to have the column correctly filled. This commit adds a migration that fills the replica_path of existing records. As each repository is currently stored in the path sent by the client, we'll just fill the column using that. Changelog: other --- .../20210921131816_backfill_replica_path.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 internal/praefect/datastore/migrations/20210921131816_backfill_replica_path.go diff --git a/internal/praefect/datastore/migrations/20210921131816_backfill_replica_path.go b/internal/praefect/datastore/migrations/20210921131816_backfill_replica_path.go new file mode 100644 index 0000000000..e18ea3d4c6 --- /dev/null +++ b/internal/praefect/datastore/migrations/20210921131816_backfill_replica_path.go @@ -0,0 +1,13 @@ +package migrations + +import migrate "github.com/rubenv/sql-migrate" + +func init() { + m := &migrate.Migration{ + Id: "20210921131816_backfill_replica_path", + Up: []string{"UPDATE repositories SET replica_path = relative_path"}, + Down: []string{}, + } + + allMigrations = append(allMigrations, m) +} -- GitLab From 1c5ed6acd0386e2c8055cefdd44521239c24e20d Mon Sep 17 00:00:00 2001 From: Sami Hiltunen Date: Wed, 22 Sep 2021 11:20:22 +0200 Subject: [PATCH 3/3] Index repo id for storage_repositories and repository_assignments With repository ID present and backfilled in both 'storage_repositories' and 'repository_assignments' tables, Praefect can now start joining the records using the repository_id instead of (virtual_storage, relative_path). To prepare for that, this commit indexes both tables using (repository_id, storage). In a later release, (virtual_storage, relative_path) can be dropped from both tables and the primary key can be changed to use the new indexes. Changelog: performance --- ...91614_repository_id_primary_key_indexes.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 internal/praefect/datastore/migrations/20210922091614_repository_id_primary_key_indexes.go diff --git a/internal/praefect/datastore/migrations/20210922091614_repository_id_primary_key_indexes.go b/internal/praefect/datastore/migrations/20210922091614_repository_id_primary_key_indexes.go new file mode 100644 index 0000000000..e5441ff948 --- /dev/null +++ b/internal/praefect/datastore/migrations/20210922091614_repository_id_primary_key_indexes.go @@ -0,0 +1,19 @@ +package migrations + +import migrate "github.com/rubenv/sql-migrate" + +func init() { + m := &migrate.Migration{ + Id: "20210922091614_repository_id_primary_key_indexes", + Up: []string{ + "CREATE UNIQUE INDEX repository_assignments_new_pkey ON repository_assignments (repository_id, storage)", + "CREATE UNIQUE INDEX storage_repositories_new_pkey ON storage_repositories (repository_id, storage)", + }, + Down: []string{ + "DROP INDEX repository_assignments_new_pkey", + "DROP INDEX storage_repositories_new_pkey", + }, + } + + allMigrations = append(allMigrations, m) +} -- GitLab