From d31ebbda5e3335dab63e6660e7393d7ae1f19e66 Mon Sep 17 00:00:00 2001 From: Jacob Vosmaer Date: Thu, 30 Jun 2022 12:20:36 +0000 Subject: [PATCH] Add support for object storage bucket prefixes --- app/uploaders/object_storage.rb | 14 +++++ config/object_store_settings.rb | 24 ++++++- doc/administration/object_storage.md | 9 +++ spec/config/object_store_settings_spec.rb | 76 +++++++++++++++++++++++ spec/uploaders/object_storage_spec.rb | 44 +++++++++++++ 5 files changed, 165 insertions(+), 2 deletions(-) diff --git a/app/uploaders/object_storage.rb b/app/uploaders/object_storage.rb index 1d56cddca63f6c..891df5180d82aa 100644 --- a/app/uploaders/object_storage.rb +++ b/app/uploaders/object_storage.rb @@ -353,6 +353,20 @@ def store_dirs } end + def store_path(*args) + if self.object_store == Store::REMOTE + # We allow administrators to create "sub buckets" by setting a prefix. + # This makes it possible to deploy GitLab with only one object storage + # bucket. Because the prefix is configuration data we do not want to + # store it in the uploads table via RecordsUploads. That means that the + # prefix cannot be part of store_dir. This is why we chose to implement + # the prefix support here in store_path. + File.join([self.class.object_store_options.bucket_prefix, super].compact) + else + super + end + end + # Returns all the possible paths for an upload. # the `upload.path` is a lookup parameter, and it may change # depending on the `store` param. diff --git a/config/object_store_settings.rb b/config/object_store_settings.rb index ea954b7061a147..3280bc284addaf 100644 --- a/config/object_store_settings.rb +++ b/config/object_store_settings.rb @@ -26,7 +26,9 @@ class ObjectStoreSettings def self.legacy_parse(object_store, object_store_type) object_store ||= Settingslogic.new({}) object_store['enabled'] = false if object_store['enabled'].nil? - object_store['remote_directory'] ||= nil + object_store['remote_directory'], object_store['bucket_prefix'] = split_bucket_prefix( + object_store['remote_directory'] + ) if support_legacy_background_upload?(object_store_type) object_store['direct_upload'] = false @@ -48,6 +50,22 @@ def self.support_legacy_background_upload?(object_store_type) ENV[LEGACY_BACKGROUND_UPLOADS_ENV].to_s.split(',').map(&:strip).include?(object_store_type) end + def self.split_bucket_prefix(bucket) + return [nil, nil] unless bucket.present? + + # Strictly speaking, object storage keys are not Unix paths and + # characters like '/' and '.' have no special meaning. But in practice, + # we do treat them like paths, and somewhere along the line something or + # somebody may turn '//' into '/' or try to resolve '/..'. To guard + # against this we reject "bad" combinations of '/' and '.'. + [%r{\A\.*/}, %r{/\.*/}, %r{/\.*\z}].each do |re| + raise 'invalid bucket' if re.match(bucket) + end + + bucket, prefix = bucket.split('/', 2) + [bucket, prefix] + end + def initialize(settings) @settings = settings end @@ -156,7 +174,9 @@ def parse! next if allowed_storage_specific_settings?(store_type, section.to_h) # Map bucket (external name) -> remote_directory (internal representation) - target_config['remote_directory'] = target_config.delete('bucket') + target_config['remote_directory'], target_config['bucket_prefix'] = self.class.split_bucket_prefix( + target_config.delete('bucket') + ) target_config['consolidated_settings'] = true section['object_store'] = target_config # Settingslogic internally stores data as a Hash, but it also diff --git a/doc/administration/object_storage.md b/doc/administration/object_storage.md index 47d01d92da5509..5e54835c6700dc 100644 --- a/doc/administration/object_storage.md +++ b/doc/administration/object_storage.md @@ -573,6 +573,15 @@ This ensures there are no collisions across the various types of data GitLab sto There are plans to [enable the use of a single bucket](https://gitlab.com/gitlab-org/gitlab/-/issues/292958) in the future. +With Omnibus and source installations it is possible to split a single +real bucket into multiple virtual buckets. If your object storage +bucket is called `my-gitlab-objects` you can configure uploads to go +into `my-gitlab-objects/uploads`, artifacts into +`my-gitlab-objects/artifacts`, etc. The application will act as if +these are separate buckets. Note that use of bucket prefixes [may not +work correctly with Helm +backups](https://gitlab.com/gitlab-org/charts/gitlab/-/issues/3376). + Helm-based installs require separate buckets to [handle backup restorations](https://docs.gitlab.com/charts/advanced/external-object-storage/#lfs-artifacts-uploads-packages-external-diffs-terraform-state-dependency-proxy). diff --git a/spec/config/object_store_settings_spec.rb b/spec/config/object_store_settings_spec.rb index 56ad0943377aa0..1555124fe033d5 100644 --- a/spec/config/object_store_settings_spec.rb +++ b/spec/config/object_store_settings_spec.rb @@ -73,6 +73,7 @@ expect(settings.artifacts['object_store']['background_upload']).to be false expect(settings.artifacts['object_store']['proxy_download']).to be false expect(settings.artifacts['object_store']['remote_directory']).to eq('artifacts') + expect(settings.artifacts['object_store']['bucket_prefix']).to eq(nil) expect(settings.artifacts['object_store']['consolidated_settings']).to be true expect(settings.artifacts).to eq(settings['artifacts']) @@ -83,6 +84,7 @@ expect(settings.lfs['object_store']['background_upload']).to be false expect(settings.lfs['object_store']['proxy_download']).to be true expect(settings.lfs['object_store']['remote_directory']).to eq('lfs-objects') + expect(settings.lfs['object_store']['bucket_prefix']).to eq(nil) expect(settings.lfs['object_store']['consolidated_settings']).to be true expect(settings.lfs).to eq(settings['lfs']) @@ -90,6 +92,7 @@ expect(settings.pages['object_store']['enabled']).to be true expect(settings.pages['object_store']['connection']).to eq(connection) expect(settings.pages['object_store']['remote_directory']).to eq('pages') + expect(settings.pages['object_store']['bucket_prefix']).to eq(nil) expect(settings.pages['object_store']['consolidated_settings']).to be true expect(settings.pages).to eq(settings['pages']) @@ -98,6 +101,18 @@ expect(settings.external_diffs).to eq(settings['external_diffs']) end + it 'supports bucket prefixes' do + config['object_store']['objects']['artifacts']['bucket'] = 'gitlab/artifacts' + config['object_store']['objects']['lfs']['bucket'] = 'gitlab/lfs' + + subject + + expect(settings.artifacts['object_store']['remote_directory']).to eq('gitlab') + expect(settings.artifacts['object_store']['bucket_prefix']).to eq('artifacts') + expect(settings.lfs['object_store']['remote_directory']).to eq('gitlab') + expect(settings.lfs['object_store']['bucket_prefix']).to eq('lfs') + end + it 'raises an error when a bucket is missing' do config['object_store']['objects']['lfs'].delete('bucket') @@ -152,6 +167,7 @@ expect(settings.artifacts['enabled']).to be true expect(settings.artifacts['object_store']['remote_directory']).to be_nil + expect(settings.artifacts['object_store']['bucket_prefix']).to be_nil expect(settings.artifacts['object_store']['enabled']).to be_falsey expect(settings.artifacts['object_store']['consolidated_settings']).to be_falsey end @@ -177,6 +193,7 @@ expect(settings.artifacts['object_store']).to be_nil expect(settings.lfs['object_store']['remote_directory']).to eq('some-bucket') + expect(settings.lfs['object_store']['bucket_prefix']).to eq(nil) # Disable background_upload, regardless of the input config expect(settings.lfs['object_store']['direct_upload']).to eq(true) expect(settings.lfs['object_store']['background_upload']).to eq(false) @@ -203,6 +220,7 @@ expect(settings.artifacts['object_store']).to be_nil expect(settings.lfs['object_store']['remote_directory']).to eq('some-bucket') + expect(settings.lfs['object_store']['bucket_prefix']).to eq(nil) # Enable background_upload if the environment variable is available expect(settings.lfs['object_store']['direct_upload']).to eq(false) expect(settings.lfs['object_store']['background_upload']).to eq(true) @@ -220,6 +238,7 @@ expect(settings['direct_upload']).to be true expect(settings['background_upload']).to be false expect(settings['remote_directory']).to be nil + expect(settings['bucket_prefix']).to be nil end it 'respects original values' do @@ -234,6 +253,18 @@ expect(settings['direct_upload']).to be true expect(settings['background_upload']).to be false expect(settings['remote_directory']).to eq 'artifacts' + expect(settings['bucket_prefix']).to be nil + end + + it 'supports bucket prefixes' do + original_settings = Settingslogic.new({ + 'enabled' => true, + 'remote_directory' => 'gitlab/artifacts' + }) + + settings = described_class.legacy_parse(original_settings, 'artifacts') + expect(settings['remote_directory']).to eq 'gitlab' + expect(settings['bucket_prefix']).to eq 'artifacts' end context 'legacy background upload environment variable is enabled' do @@ -253,6 +284,7 @@ expect(settings['direct_upload']).to be false expect(settings['background_upload']).to be true expect(settings['remote_directory']).to eq 'artifacts' + expect(settings['bucket_prefix']).to eq nil end end @@ -273,6 +305,50 @@ expect(settings['direct_upload']).to be true expect(settings['background_upload']).to be false expect(settings['remote_directory']).to eq 'artifacts' + expect(settings['bucket_prefix']).to eq nil + end + end + end + + describe '.split_bucket_prefix' do + using RSpec::Parameterized::TableSyntax + + subject { described_class.split_bucket_prefix(input) } + + context 'valid inputs' do + where(:input, :bucket, :prefix) do + nil | nil | nil + '' | nil | nil + 'bucket' | 'bucket' | nil + 'bucket/prefix' | 'bucket' | 'prefix' + 'bucket/pre/fix' | 'bucket' | 'pre/fix' + end + + with_them do + it { expect(subject).to eq([bucket, prefix]) } + end + end + + context 'invalid inputs' do + where(:input) do + [ + ['bucket/'], + ['bucket/.'], + ['bucket/..'], + ['bucket/prefix/'], + ['bucket/prefix/.'], + ['bucket/prefix/..'], + ['/bucket/prefix'], + ['./bucket/prefix'], + ['../bucket/prefix'], + ['bucket//prefix'], + ['bucket/./prefix'], + ['bucket/../prefix'] + ] + end + + with_them do + it { expect { subject }.to raise_error(/invalid bucket/) } end end end diff --git a/spec/uploaders/object_storage_spec.rb b/spec/uploaders/object_storage_spec.rb index 13f70e3f85b64f..1bcc43b81a847d 100644 --- a/spec/uploaders/object_storage_spec.rb +++ b/spec/uploaders/object_storage_spec.rb @@ -48,6 +48,28 @@ def dynamic_segment expect(uploader.store_dir).to start_with("uploads/-/system/user/") end end + + describe '#store_path' do + subject { uploader.store_path('filename') } + + it 'uses store_dir' do + expect(subject).to eq("uploads/-/system/user/#{object.id}/filename") + end + + context 'when a bucket prefix is configured' do + before do + allow(uploader_class).to receive(:object_store_options) do + double( + bucket_prefix: 'my/prefix' + ) + end + end + + it 'uses store_dir and ignores prefix' do + expect(subject).to eq("uploads/-/system/user/#{object.id}/filename") + end + end + end end context 'object_store is Store::REMOTE' do @@ -60,6 +82,28 @@ def dynamic_segment expect(uploader.store_dir).to start_with("user/") end end + + describe '#store_path' do + subject { uploader.store_path('filename') } + + it 'uses store_dir' do + expect(subject).to eq("user/#{object.id}/filename") + end + + context 'when a bucket prefix is configured' do + before do + allow(uploader_class).to receive(:object_store_options) do + double( + bucket_prefix: 'my/prefix' + ) + end + end + + it 'uses the prefix and store_dir' do + expect(subject).to eq("my/prefix/user/#{object.id}/filename") + end + end + end end end -- GitLab