From 23fb10dddc11e19b7ee3dbe99ff271b3ed68c449 Mon Sep 17 00:00:00 2001 From: Paul Okstad Date: Tue, 13 Aug 2019 21:22:27 -0700 Subject: [PATCH 1/4] Disk cache object initial clear --- internal/cache/export_test.go | 3 ++ internal/cache/walker.go | 53 +++++++++++++++++++++++++++++ internal/cache/walker_test.go | 64 +++++++++++++++++++++++++++-------- 3 files changed, 106 insertions(+), 14 deletions(-) diff --git a/internal/cache/export_test.go b/internal/cache/export_test.go index cb1f65b67a..2870721180 100644 --- a/internal/cache/export_test.go +++ b/internal/cache/export_test.go @@ -5,6 +5,9 @@ import "sync" var ( ExportMockRemovalCounter = new(mockCounter) ExportMockCheckCounter = new(mockCounter) + + ExportDisableMoveAndClear = &disableMoveAndClear + ExportDisableWalker = &disableWalker ) type mockCounter struct { diff --git a/internal/cache/walker.go b/internal/cache/walker.go index a782ebd6ba..649f2c3619 100644 --- a/internal/cache/walker.go +++ b/internal/cache/walker.go @@ -6,6 +6,7 @@ package cache import ( + "io/ioutil" "os" "path/filepath" "time" @@ -52,6 +53,9 @@ func cleanWalk(storagePath string) error { const cleanWalkFrequency = 10 * time.Minute func startCleanWalker(storage config.Storage) { + if disableWalker { + return + } logrus.WithField("storage", storage.Name).Info("Starting disk cache object walker") walkTick := time.NewTicker(cleanWalkFrequency) go func() { @@ -64,9 +68,58 @@ func startCleanWalker(storage config.Storage) { }() } +var ( + disableMoveAndClear bool // only used to disable move and clear in tests + disableWalker bool // only used to disable object walker in tests +) + +// moveAndClear will move the cache to the storage location's +// temporary folder, and then remove its contents asynchronously +func moveAndClear(storage config.Storage) error { + if disableMoveAndClear { + return nil + } + + logger := logrus.WithField("storage", storage.Name) + logger.Info("clearing disk cache object folder") + + cachePath := filepath.Join(storage.Path, tempdir.CachePrefix) + tempPath := filepath.Join(storage.Path, tempdir.TmpRootPrefix) + if err := os.MkdirAll(tempPath, 0755); err != nil { + return err + } + + tmpDir, err := ioutil.TempDir(tempPath, "diskcache") + if err != nil { + return err + } + + logger.Infof("moving disk cache object folder to %s", tmpDir) + if err := os.Rename(cachePath, filepath.Join(tmpDir, "moved")); err != nil { + if os.IsNotExist(err) { + logger.Info("disk cache object folder doesn't exist, no need to remove") + return nil + } + return err + } + + go func() { + start := time.Now() + if err := os.RemoveAll(tmpDir); err != nil { + logger.Errorf("unable to remove disk cache objects: %q", err) + } + logger.Infof("cleared all cache object files in %s after %s", tmpDir, time.Since(start)) + }() + + return nil +} + func init() { config.RegisterHook(func() error { for _, storage := range config.Config.Storages { + if err := moveAndClear(storage); err != nil { + return err + } startCleanWalker(storage) } return nil diff --git a/internal/cache/walker_test.go b/internal/cache/walker_test.go index ef6f9b9e3c..8237fd76a8 100644 --- a/internal/cache/walker_test.go +++ b/internal/cache/walker_test.go @@ -12,23 +12,12 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/cache" "gitlab.com/gitlab-org/gitaly/internal/config" "gitlab.com/gitlab-org/gitaly/internal/tempdir" + "gitlab.com/gitlab-org/gitaly/internal/testhelper" ) func TestDiskCacheObjectWalker(t *testing.T) { - tmpPath, err := ioutil.TempDir("", t.Name()) - require.NoError(t, err) - defer func() { require.NoError(t, os.RemoveAll(tmpPath)) }() - - oldStorages := config.Config.Storages - config.Config.Storages = []config.Storage{ - { - Name: t.Name(), - Path: tmpPath, - }, - } - defer func() { config.Config.Storages = oldStorages }() - - satisfyConfigValidation(tmpPath) + tmpPath, cleanup := setupDiskCacheWalker(t) + defer cleanup() var shouldExist, shouldNotExist []string @@ -61,6 +50,11 @@ func TestDiskCacheObjectWalker(t *testing.T) { expectChecks := cache.ExportMockCheckCounter.Count() + 4 expectRemovals := cache.ExportMockRemovalCounter.Count() + 2 + // disable the initial move-and-clear function since we are only + // evaluating the walker + *cache.ExportDisableMoveAndClear = true + defer func() { *cache.ExportDisableMoveAndClear = false }() + require.NoError(t, config.Validate()) // triggers walker pollCountersUntil(t, expectChecks, expectRemovals) @@ -75,6 +69,48 @@ func TestDiskCacheObjectWalker(t *testing.T) { } } +func TestDiskCacheInitialClear(t *testing.T) { + tmpPath, cleanup := setupDiskCacheWalker(t) + defer cleanup() + + canary := filepath.Join(tmpPath, tempdir.CachePrefix, "canary.txt") + require.NoError(t, os.MkdirAll(filepath.Dir(canary), 0755)) + require.NoError(t, ioutil.WriteFile(canary, []byte("chirp chirp"), 0755)) + + // disable the background walkers since we are only + // evaluating the initial move-and-clear function + *cache.ExportDisableWalker = true + defer func() { *cache.ExportDisableWalker = false }() + + // validation will run cache walker hook which synchronously + // runs the move-and-clear function + require.NoError(t, config.Validate()) + + testhelper.AssertFileNotExists(t, canary) +} + +func setupDiskCacheWalker(t testing.TB) (string, func()) { + tmpPath, err := ioutil.TempDir("", t.Name()) + require.NoError(t, err) + + oldStorages := config.Config.Storages + config.Config.Storages = []config.Storage{ + { + Name: t.Name(), + Path: tmpPath, + }, + } + + satisfyConfigValidation(tmpPath) + + cleanup := func() { + config.Config.Storages = oldStorages + require.NoError(t, os.RemoveAll(tmpPath)) + } + + return tmpPath, cleanup +} + // satisfyConfigValidation puts garbage values in the config file to satisfy // validation func satisfyConfigValidation(tmpPath string) { -- GitLab From 65d75b1c0ddbeb652c0b56d22a882d8129117040 Mon Sep 17 00:00:00 2001 From: Paul Okstad Date: Tue, 13 Aug 2019 21:27:52 -0700 Subject: [PATCH 2/4] Changelog for MR 1424 --- changelogs/unreleased/po-cache-init-clear.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 changelogs/unreleased/po-cache-init-clear.yml diff --git a/changelogs/unreleased/po-cache-init-clear.yml b/changelogs/unreleased/po-cache-init-clear.yml new file mode 100644 index 0000000000..f2f7d10058 --- /dev/null +++ b/changelogs/unreleased/po-cache-init-clear.yml @@ -0,0 +1,5 @@ +--- +title: Disk cache object directory initial clear +merge_request: 1424 +author: +type: performance -- GitLab From fe692959af900c4fc72ba0d77d686d2a84691a9e Mon Sep 17 00:00:00 2001 From: Paul Okstad Date: Thu, 15 Aug 2019 14:48:01 -0700 Subject: [PATCH 3/4] Whitespace after if statements --- internal/cache/walker.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/cache/walker.go b/internal/cache/walker.go index 649f2c3619..5b08b793b6 100644 --- a/internal/cache/walker.go +++ b/internal/cache/walker.go @@ -41,6 +41,7 @@ func cleanWalk(storagePath string) error { // have deleted the file already return nil } + return err } @@ -56,6 +57,7 @@ func startCleanWalker(storage config.Storage) { if disableWalker { return } + logrus.WithField("storage", storage.Name).Info("Starting disk cache object walker") walkTick := time.NewTicker(cleanWalkFrequency) go func() { @@ -63,6 +65,7 @@ func startCleanWalker(storage config.Storage) { if err := cleanWalk(storage.Path); err != nil { logrus.WithField("storage", storage.Name).Error(err) } + <-walkTick.C } }() @@ -100,6 +103,7 @@ func moveAndClear(storage config.Storage) error { logger.Info("disk cache object folder doesn't exist, no need to remove") return nil } + return err } @@ -108,6 +112,7 @@ func moveAndClear(storage config.Storage) error { if err := os.RemoveAll(tmpDir); err != nil { logger.Errorf("unable to remove disk cache objects: %q", err) } + logger.Infof("cleared all cache object files in %s after %s", tmpDir, time.Since(start)) }() @@ -120,6 +125,7 @@ func init() { if err := moveAndClear(storage); err != nil { return err } + startCleanWalker(storage) } return nil -- GitLab From 2c3d353d4589d691b54a92523a3f19698c49086e Mon Sep 17 00:00:00 2001 From: Paul Okstad Date: Thu, 15 Aug 2019 15:30:43 -0700 Subject: [PATCH 4/4] Refactor cache and temp dir --- internal/cache/keyer.go | 9 +-------- internal/cache/walker.go | 21 ++++++++++++++++----- internal/cache/walker_test.go | 18 ++++++++++++------ internal/tempdir/tempdir.go | 32 ++++++++++++++++++++++++++------ internal/tempdir/tempdir_test.go | 4 ++-- 5 files changed, 57 insertions(+), 27 deletions(-) diff --git a/internal/cache/keyer.go b/internal/cache/keyer.go index 20acbd414f..b8d4609d0c 100644 --- a/internal/cache/keyer.go +++ b/internal/cache/keyer.go @@ -213,14 +213,7 @@ func newPendingLease(repo *gitalypb.Repository) (string, error) { // cacheDir is $STORAGE/+gitaly/cache func cacheDir(repo *gitalypb.Repository) (string, error) { - storagePath, err := helper.GetStorageByName(repo.StorageName) - if err != nil { - return "", err - } - - absPath := filepath.Join(storagePath, tempdir.CachePrefix) - - return absPath, nil + return tempdir.CacheDir(repo.GetStorageName()) } func currentLeases(repo *gitalypb.Repository) ([]os.FileInfo, error) { diff --git a/internal/cache/walker.go b/internal/cache/walker.go index 5b08b793b6..43ba7a3ee8 100644 --- a/internal/cache/walker.go +++ b/internal/cache/walker.go @@ -16,8 +16,11 @@ import ( "gitlab.com/gitlab-org/gitaly/internal/tempdir" ) -func cleanWalk(storagePath string) error { - cachePath := filepath.Join(storagePath, tempdir.CachePrefix) +func cleanWalk(storageName string) error { + cachePath, err := tempdir.CacheDir(storageName) + if err != nil { + return err + } return filepath.Walk(cachePath, func(path string, info os.FileInfo, err error) error { if err != nil { @@ -62,7 +65,7 @@ func startCleanWalker(storage config.Storage) { walkTick := time.NewTicker(cleanWalkFrequency) go func() { for { - if err := cleanWalk(storage.Path); err != nil { + if err := cleanWalk(storage.Name); err != nil { logrus.WithField("storage", storage.Name).Error(err) } @@ -86,8 +89,16 @@ func moveAndClear(storage config.Storage) error { logger := logrus.WithField("storage", storage.Name) logger.Info("clearing disk cache object folder") - cachePath := filepath.Join(storage.Path, tempdir.CachePrefix) - tempPath := filepath.Join(storage.Path, tempdir.TmpRootPrefix) + cachePath, err := tempdir.CacheDir(storage.Name) + if err != nil { + return err + } + + tempPath, err := tempdir.TempDir(storage.Name) + if err != nil { + return err + } + if err := os.MkdirAll(tempPath, 0755); err != nil { return err } diff --git a/internal/cache/walker_test.go b/internal/cache/walker_test.go index 8237fd76a8..b3d3044d60 100644 --- a/internal/cache/walker_test.go +++ b/internal/cache/walker_test.go @@ -16,7 +16,7 @@ import ( ) func TestDiskCacheObjectWalker(t *testing.T) { - tmpPath, cleanup := setupDiskCacheWalker(t) + cleanup := setupDiskCacheWalker(t) defer cleanup() var shouldExist, shouldNotExist []string @@ -31,7 +31,10 @@ func TestDiskCacheObjectWalker(t *testing.T) { {"2b/ancient", 24 * time.Hour, true}, {"cd/baby", time.Second, false}, } { - path := filepath.Join(tmpPath, tempdir.CachePrefix, tt.name) + cacheDir, err := tempdir.CacheDir(t.Name()) // test name is storage name + require.NoError(t, err) + + path := filepath.Join(cacheDir, tt.name) require.NoError(t, os.MkdirAll(filepath.Dir(path), 0755)) f, err := os.Create(path) @@ -70,10 +73,13 @@ func TestDiskCacheObjectWalker(t *testing.T) { } func TestDiskCacheInitialClear(t *testing.T) { - tmpPath, cleanup := setupDiskCacheWalker(t) + cleanup := setupDiskCacheWalker(t) defer cleanup() - canary := filepath.Join(tmpPath, tempdir.CachePrefix, "canary.txt") + cacheDir, err := tempdir.CacheDir(t.Name()) // test name is storage name + require.NoError(t, err) + + canary := filepath.Join(cacheDir, "canary.txt") require.NoError(t, os.MkdirAll(filepath.Dir(canary), 0755)) require.NoError(t, ioutil.WriteFile(canary, []byte("chirp chirp"), 0755)) @@ -89,7 +95,7 @@ func TestDiskCacheInitialClear(t *testing.T) { testhelper.AssertFileNotExists(t, canary) } -func setupDiskCacheWalker(t testing.TB) (string, func()) { +func setupDiskCacheWalker(t testing.TB) func() { tmpPath, err := ioutil.TempDir("", t.Name()) require.NoError(t, err) @@ -108,7 +114,7 @@ func setupDiskCacheWalker(t testing.TB) (string, func()) { require.NoError(t, os.RemoveAll(tmpPath)) } - return tmpPath, cleanup + return cleanup } // satisfyConfigValidation puts garbage values in the config file to satisfy diff --git a/internal/tempdir/tempdir.go b/internal/tempdir/tempdir.go index 9431673b50..49ce183f15 100644 --- a/internal/tempdir/tempdir.go +++ b/internal/tempdir/tempdir.go @@ -23,13 +23,13 @@ const ( // character is not allowed in GitLab namespaces or repositories. GitalyDataPrefix = "+gitaly" - // TmpRootPrefix is the directory in which we store temporary + // tmpRootPrefix is the directory in which we store temporary // directories. - TmpRootPrefix = GitalyDataPrefix + "/tmp" + tmpRootPrefix = GitalyDataPrefix + "/tmp" - // CachePrefix is the directory where all cache data is stored on a + // cachePrefix is the directory where all cache data is stored on a // storage location. - CachePrefix = GitalyDataPrefix + "/cache" + cachePrefix = GitalyDataPrefix + "/cache" // MaxAge is used by ForDeleteAllRepositories. It is also a fallback // for the context-scoped temporary directories, to ensure they get @@ -37,6 +37,26 @@ const ( MaxAge = 7 * 24 * time.Hour ) +// CacheDir returns the path to the cache dir for a storage location +func CacheDir(storageName string) (string, error) { + storagePath, err := helper.GetStorageByName(storageName) + if err != nil { + return "", err + } + + return filepath.Join(storagePath, cachePrefix), nil +} + +// TempDir returns the path to the temp dir for a storage location +func TempDir(storageName string) (string, error) { + storagePath, err := helper.GetStorageByName(storageName) + if err != nil { + return "", err + } + + return filepath.Join(storagePath, tmpRootPrefix), nil +} + // ForDeleteAllRepositories returns a temporary directory for the given storage. It is not context-scoped but it will get removed eventuall (after MaxAge). func ForDeleteAllRepositories(storageName string) (string, error) { prefix := fmt.Sprintf("%s-repositories.old.%d.", storageName, time.Now().Unix()) @@ -90,7 +110,7 @@ func newAsRepository(ctx context.Context, storageName string, prefix string) (*g } func tmpRoot(storageRoot string) string { - return filepath.Join(storageRoot, TmpRootPrefix) + return filepath.Join(storageRoot, tmpRootPrefix) } // StartCleaning starts tempdir cleanup goroutines. @@ -119,7 +139,7 @@ type invalidCleanRoot string func clean(dir string) error { // If we start "cleaning up" the wrong directory we may delete user data // which is Really Bad. - if !strings.HasSuffix(dir, TmpRootPrefix) { + if !strings.HasSuffix(dir, tmpRootPrefix) { log.Print(dir) panic(invalidCleanRoot("invalid tempdir clean root: panicking to prevent data loss")) } diff --git a/internal/tempdir/tempdir_test.go b/internal/tempdir/tempdir_test.go index 67ef976e83..50a1f36e3e 100644 --- a/internal/tempdir/tempdir_test.go +++ b/internal/tempdir/tempdir_test.go @@ -52,7 +52,7 @@ func TestNewAsRepositoryFailStorageUnknown(t *testing.T) { require.Error(t, err) } -var cleanRoot = path.Join("testdata/clean", TmpRootPrefix) +var cleanRoot = path.Join("testdata/clean", tmpRootPrefix) func TestCleanerSafety(t *testing.T) { defer func() { @@ -134,7 +134,7 @@ func makeDir(t *testing.T, dirPath string, mtime time.Time) { func TestCleanNoTmpExists(t *testing.T) { // This directory is valid because it ends in the special prefix - dir := path.Join("testdata", "does-not-exist", TmpRootPrefix) + dir := path.Join("testdata", "does-not-exist", tmpRootPrefix) require.NoError(t, clean(dir)) } -- GitLab