From e906b662f3434fc0e72f542f1fa7a820d6f821d0 Mon Sep 17 00:00:00 2001 From: krisberry Date: Thu, 10 Nov 2022 17:57:02 +0200 Subject: [PATCH] Add worker to import gist to snippet It is a first step of Import all GitHub gists into Snippets. This commit adds a new Gitlab::GithubGistsImport::ImportGistWorker that responsible for GistImporter execution that creates snippet from GitHub gist attributes. Details: https://gitlab.com/gitlab-org/gitlab/-/issues/371099 Changelog: added --- app/workers/all_queues.yml | 9 ++ .../github_gists_import/import_gist_worker.rb | 75 ++++++++++ config/sidekiq_queues.yml | 2 + .../importer/gist_importer.rb | 84 ++++++++++++ .../representation/gist.rb | 71 ++++++++++ .../importer/gist_importer_spec.rb | 128 ++++++++++++++++++ .../representation/gist_spec.rb | 111 +++++++++++++++ spec/workers/every_sidekiq_worker_spec.rb | 1 + .../import_gist_worker_spec.rb | 94 +++++++++++++ 9 files changed, 575 insertions(+) create mode 100644 app/workers/gitlab/github_gists_import/import_gist_worker.rb create mode 100644 lib/gitlab/github_gists_import/importer/gist_importer.rb create mode 100644 lib/gitlab/github_gists_import/representation/gist.rb create mode 100644 spec/lib/gitlab/github_gists_import/importer/gist_importer_spec.rb create mode 100644 spec/lib/gitlab/github_gists_import/representation/gist_spec.rb create mode 100644 spec/workers/gitlab/github_gists_import/import_gist_worker_spec.rb diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 40c74e9429ff5b..17931152bd0f2e 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -1038,6 +1038,15 @@ :weight: 1 :idempotent: false :tags: [] +- :name: github_gists_importer:github_gists_import_import_gist + :worker_name: Gitlab::GithubGistsImport::ImportGistWorker + :feature_category: :importers + :has_external_dependencies: false + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: false + :tags: [] - :name: github_importer:github_import_attachments_import_issue :worker_name: Gitlab::GithubImport::Attachments::ImportIssueWorker :feature_category: :importers diff --git a/app/workers/gitlab/github_gists_import/import_gist_worker.rb b/app/workers/gitlab/github_gists_import/import_gist_worker.rb new file mode 100644 index 00000000000000..7e2b3709597910 --- /dev/null +++ b/app/workers/gitlab/github_gists_import/import_gist_worker.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +module Gitlab + module GithubGistsImport + class ImportGistWorker # rubocop:disable Scalability/IdempotentWorker + include ApplicationWorker + include Gitlab::NotifyUponDeath + + data_consistency :always + queue_namespace :github_gists_importer + feature_category :importers + + sidekiq_options dead: false, retry: 5 + + def perform(user_id, gist_hash, notify_key) + gist = ::Gitlab::GithubGistsImport::Representation::Gist.from_json_hash(gist_hash) + + with_logging(user_id, gist.github_identifiers) do + result = importer_class.new(gist, user_id).execute + error(user_id, result.errors, gist.github_identifiers) unless result.success? + + JobWaiter.notify(notify_key, jid) + end + rescue StandardError => e + log_and_track_error(user_id, e, gist.github_identifiers) + + raise + end + + private + + def importer_class + ::Gitlab::GithubGistsImport::Importer::GistImporter + end + + def with_logging(user_id, gist_id) + info(user_id, 'start importer', gist_id) + + yield + + info(user_id, 'importer finished', gist_id) + end + + def log_and_track_error(user_id, exception, gist_id) + error(user_id, exception.message, gist_id) + + Gitlab::ErrorTracking.track_exception(exception, + import_type: :github_gists, + user_id: user_id + ) + end + + def error(user_id, error_message, gist_id) + attributes = { + user_id: user_id, + github_identifiers: gist_id, + message: 'importer failed', + 'error.message': error_message + } + + Gitlab::GithubImport::Logger.error(structured_payload(attributes)) + end + + def info(user_id, message, gist_id) + attributes = { + user_id: user_id, + message: message, + github_identifiers: gist_id + } + + Gitlab::GithubImport::Logger.info(structured_payload(attributes)) + end + end + end +end diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 5b1b78b3046985..86e417dd92b0c0 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -193,6 +193,8 @@ - 1 - - geo - 1 +- - github_gists_importer + - 1 - - github_import_advance_stage - 1 - - github_importer diff --git a/lib/gitlab/github_gists_import/importer/gist_importer.rb b/lib/gitlab/github_gists_import/importer/gist_importer.rb new file mode 100644 index 00000000000000..a5e87d3cf7db59 --- /dev/null +++ b/lib/gitlab/github_gists_import/importer/gist_importer.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +module Gitlab + module GithubGistsImport + module Importer + class GistImporter + attr_reader :gist, :user + + FileCountLimitError = Class.new(StandardError) + + # gist - An instance of `Gitlab::GithubGistsImport::Representation::Gist`. + def initialize(gist, user_id) + @gist = gist + @user = User.find(user_id) + end + + def execute + snippet = build_snippet + import_repository(snippet) if snippet.save! + + return ServiceResponse.success unless max_snippet_files_count_exceeded?(snippet) + + fail_and_track(snippet) + end + + private + + def build_snippet + attrs = { + title: gist.truncated_title, + visibility_level: gist.visibility_level, + content: gist.first_file[:file_content], + file_name: gist.first_file[:file_name], + author: user, + created_at: gist.created_at, + updated_at: gist.updated_at + } + + PersonalSnippet.new(attrs) + end + + def import_repository(snippet) + resolved_address = get_resolved_address + + snippet.create_repository + snippet.repository.fetch_as_mirror(gist.git_pull_url, forced: true, resolved_address: resolved_address) + rescue StandardError + remove_snippet_and_repository(snippet) + + raise + end + + def get_resolved_address + validated_pull_url, host = Gitlab::UrlBlocker.validate!(gist.git_pull_url, + schemes: Project::VALID_IMPORT_PROTOCOLS, + ports: Project::VALID_IMPORT_PORTS, + allow_localhost: allow_local_requests?, + allow_local_network: allow_local_requests?) + + host.present? ? validated_pull_url.host.to_s : '' + end + + def max_snippet_files_count_exceeded?(snippet) + snippet.all_files.size > Snippet.max_file_limit + end + + def remove_snippet_and_repository(snippet) + snippet.repository.remove if snippet.repository_exists? + snippet.destroy + end + + def allow_local_requests? + Gitlab::CurrentSettings.allow_local_requests_from_web_hooks_and_services? + end + + def fail_and_track(snippet) + remove_snippet_and_repository(snippet) + + ServiceResponse.error(message: 'Snippet max file count exceeded').track_exception(as: FileCountLimitError) + end + end + end + end +end diff --git a/lib/gitlab/github_gists_import/representation/gist.rb b/lib/gitlab/github_gists_import/representation/gist.rb new file mode 100644 index 00000000000000..0d309a98f382b0 --- /dev/null +++ b/lib/gitlab/github_gists_import/representation/gist.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +module Gitlab + module GithubGistsImport + module Representation + class Gist + include Gitlab::GithubImport::Representation::ToHash + include Gitlab::GithubImport::Representation::ExposeAttribute + + attr_reader :attributes + + expose_attribute :id, :description, :is_public, :created_at, :updated_at, :files, :git_pull_url + + # Builds a gist from a GitHub API response. + # + # gist - An instance of `Hash` containing the gist + # details. + def self.from_api_response(gist, additional_data = {}) + hash = { + id: gist[:id], + description: gist[:description], + is_public: gist[:public], + files: gist[:files], + git_pull_url: gist[:git_pull_url], + created_at: gist[:created_at], + updated_at: gist[:updated_at] + } + + new(hash) + end + + # Builds a new gist using a Hash that was built from a JSON payload. + def self.from_json_hash(raw_hash) + new(Gitlab::GithubImport::Representation.symbolize_hash(raw_hash)) + end + + # attributes - A hash containing the raw gist details. The keys of this + # Hash (and any nested hashes) must be symbols. + def initialize(attributes) + @attributes = attributes + end + + # Gist description can be an empty string, so we returning nil to use first file + # name as a title in such case on snippet creation + # Gist description has a limit of 256, while the snippet's title can be up to 255 + def truncated_title + title = description.presence || first_file[:file_name] + + title.truncate(255) + end + + def visibility_level + is_public ? Gitlab::VisibilityLevel::PUBLIC : Gitlab::VisibilityLevel::PRIVATE + end + + def first_file + _key, value = files.first + + { + file_name: value[:filename], + file_content: Gitlab::HTTP.try_get(value[:raw_url])&.body + } + end + + def github_identifiers + { id: id } + end + end + end + end +end diff --git a/spec/lib/gitlab/github_gists_import/importer/gist_importer_spec.rb b/spec/lib/gitlab/github_gists_import/importer/gist_importer_spec.rb new file mode 100644 index 00000000000000..69a4d6465628db --- /dev/null +++ b/spec/lib/gitlab/github_gists_import/importer/gist_importer_spec.rb @@ -0,0 +1,128 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::GithubGistsImport::Importer::GistImporter, feature_category: :importer do + subject { described_class.new(gist_object, user.id).execute } + + let_it_be(:user) { create(:user) } + let(:created_at) { Time.utc(2022, 1, 9, 12, 15) } + let(:updated_at) { Time.utc(2022, 5, 9, 12, 17) } + let(:gist_file) { { file_name: '_Summary.md', file_content: 'File content' } } + let(:url) { 'https://host.com/gistid.git' } + let(:gist_object) do + instance_double('Gitlab::GithubGistsImport::Representation::Gist', + truncated_title: 'My Gist', + visibility_level: 0, + files: { '_Summary.md': gist_file }, + first_file: gist_file, + git_pull_url: url, + created_at: created_at, + updated_at: updated_at + ) + end + + let(:expected_snippet_attrs) do + { + title: 'My Gist', + visibility_level: 0, + content: 'File content', + file_name: '_Summary.md', + author_id: user.id, + created_at: gist_object.created_at, + updated_at: gist_object.updated_at + }.stringify_keys + end + + describe '#execute' do + context 'when success' do + it 'creates expected snippet and snippet repository' do + expect_next_instance_of(Repository) do |repository| + expect(repository).to receive(:fetch_as_mirror) + end + + expect { subject }.to change { user.snippets.count }.by(1) + expect(user.snippets[0].attributes).to include expected_snippet_attrs + end + end + + context 'when file size limit exeeded' do + before do + files = [].tap { |array| 11.times { |n| array << ["file#{n}.txt", {}] } }.to_h + + allow(gist_object).to receive(:files).and_return(files) + allow_next_instance_of(Repository) do |repository| + allow(repository).to receive(:fetch_as_mirror) + allow(repository).to receive(:empty?).and_return(false) + allow(repository).to receive(:ls_files).and_return(files.keys) + end + end + + it 'returns error' do + result = subject + + expect(user.snippets.count).to eq(0) + expect(result.error?).to eq(true) + expect(result.errors).to match_array(['Snippet max file count exceeded']) + end + end + + context 'when invalid attributes' do + let(:gist_file) { { file_name: '_Summary.md', file_content: nil } } + + it 'raises an error' do + expect { subject }.to raise_error(ActiveRecord::RecordInvalid, "Validation failed: Content can't be blank") + end + end + + context 'when repository cloning fails' do + it 'returns error' do + expect_next_instance_of(Repository) do |repository| + expect(repository).to receive(:fetch_as_mirror).and_raise(Gitlab::Shell::Error) + expect(repository).to receive(:remove) + end + + expect { subject }.to raise_error(Gitlab::Shell::Error) + expect(user.snippets.count).to eq(0) + end + end + + context 'when url is invalid' do + let(:url) { 'invalid' } + + context 'when local network is allowed' do + before do + allow(::Gitlab::CurrentSettings) + .to receive(:allow_local_requests_from_web_hooks_and_services?).and_return(true) + end + + it 'raises error' do + expect(Gitlab::UrlBlocker) + .to receive(:validate!) + .with(url, ports: [80, 443], schemes: %w[http https git], + allow_localhost: true, allow_local_network: true) + .and_raise(Gitlab::UrlBlocker::BlockedUrlError) + + expect { subject }.to raise_error(Gitlab::UrlBlocker::BlockedUrlError) + end + end + + context 'when local network is not allowed' do + before do + allow(::Gitlab::CurrentSettings) + .to receive(:allow_local_requests_from_web_hooks_and_services?).and_return(false) + end + + it 'raises error' do + expect(Gitlab::UrlBlocker) + .to receive(:validate!) + .with(url, ports: [80, 443], schemes: %w[http https git], + allow_localhost: false, allow_local_network: false) + .and_raise(Gitlab::UrlBlocker::BlockedUrlError) + + expect { subject }.to raise_error(Gitlab::UrlBlocker::BlockedUrlError) + end + end + end + end +end diff --git a/spec/lib/gitlab/github_gists_import/representation/gist_spec.rb b/spec/lib/gitlab/github_gists_import/representation/gist_spec.rb new file mode 100644 index 00000000000000..f36fbc637d06b6 --- /dev/null +++ b/spec/lib/gitlab/github_gists_import/representation/gist_spec.rb @@ -0,0 +1,111 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::GithubGistsImport::Representation::Gist do + shared_examples 'a Gist' do + it 'returns an instance of Gist' do + expect(gist).to be_an_instance_of(described_class) + end + + context 'with Gist' do + it 'includes gist attributes' do + expect(gist).to have_attributes( + id: '1', + description: 'Gist title', + is_public: true, + files: { '_Summary.md': { filename: '_Summary.md', raw_url: 'https://some_url' } }, + git_pull_url: 'https://gist.github.com/gistid.git' + ) + end + end + end + + describe '.from_api_response' do + let(:response) do + { + id: '1', + description: 'Gist title', + public: true, + created_at: '2022-04-26 18:30:53 UTC', + updated_at: '2022-04-26 18:30:53 UTC', + files: { '_Summary.md': { filename: '_Summary.md', raw_url: 'https://some_url' } }, + git_pull_url: 'https://gist.github.com/gistid.git' + } + end + + it_behaves_like 'a Gist' do + let(:gist) { described_class.from_api_response(response) } + end + end + + describe '.from_json_hash' do + it_behaves_like 'a Gist' do + let(:hash) do + { + 'id' => '1', + 'description' => 'Gist title', + 'is_public' => true, + 'files' => { '_Summary.md': { filename: '_Summary.md', raw_url: 'https://some_url' } }, + 'git_pull_url' => 'https://gist.github.com/gistid.git' + } + end + + let(:gist) { described_class.from_json_hash(hash) } + end + end + + describe '#truncated_title' do + it 'truncates the title to 255 characters' do + object = described_class.new(description: 'm' * 300) + + expect(object.truncated_title.length).to eq(255) + end + + it 'does not truncate the title if it is shorter than 255 characters' do + object = described_class.new(description: 'foo') + + expect(object.truncated_title).to eq('foo') + end + end + + describe '#github_identifiers' do + it 'returns a hash with needed identifiers' do + github_identifiers = { id: 1 } + gist = described_class.new(github_identifiers.merge(something_else: '_something_else_')) + + expect(gist.github_identifiers).to eq(github_identifiers) + end + end + + describe '#visibility_level' do + it 'returns 20 when public' do + visibility = { is_public: true } + gist = described_class.new(visibility.merge(something_else: '_something_else_')) + + expect(gist.visibility_level).to eq(20) + end + + it 'returns 0 when private' do + visibility = { is_public: false } + gist = described_class.new(visibility.merge(something_else: '_something_else_')) + + expect(gist.visibility_level).to eq(0) + end + end + + describe '#first_file' do + let(:http_response) { instance_double('HTTParty::Response', body: 'File content') } + + before do + allow(Gitlab::HTTP).to receive(:try_get).and_return(http_response) + end + + it 'returns a hash with needed identifiers' do + files = { files: { '_Summary.md': { filename: '_Summary.md', raw_url: 'https://some_url' } } } + gist = described_class.new(files.merge(something_else: '_something_else_')) + + expect(gist.first_file).to eq(file_name: '_Summary.md', file_content: 'File content') + end + end +end diff --git a/spec/workers/every_sidekiq_worker_spec.rb b/spec/workers/every_sidekiq_worker_spec.rb index 1cee67f68abeda..70772fd8b521a1 100644 --- a/spec/workers/every_sidekiq_worker_spec.rb +++ b/spec/workers/every_sidekiq_worker_spec.rb @@ -286,6 +286,7 @@ 'Gitlab::GithubImport::Stage::ImportPullRequestsReviewsWorker' => 5, 'Gitlab::GithubImport::Stage::ImportPullRequestsWorker' => 5, 'Gitlab::GithubImport::Stage::ImportRepositoryWorker' => 5, + 'Gitlab::GithubGistsImport::ImportGistWorker' => 5, 'Gitlab::JiraImport::AdvanceStageWorker' => 5, 'Gitlab::JiraImport::ImportIssueWorker' => 5, 'Gitlab::JiraImport::Stage::FinishImportWorker' => 5, diff --git a/spec/workers/gitlab/github_gists_import/import_gist_worker_spec.rb b/spec/workers/gitlab/github_gists_import/import_gist_worker_spec.rb new file mode 100644 index 00000000000000..dfc5084bb101c9 --- /dev/null +++ b/spec/workers/gitlab/github_gists_import/import_gist_worker_spec.rb @@ -0,0 +1,94 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::GithubGistsImport::ImportGistWorker, feature_category: :importer do + subject { described_class.new } + + let_it_be(:user) { create(:user) } + let(:token) { 'token' } + let(:gist_hash) do + { + id: '055b70', + git_pull_url: 'https://gist.github.com/foo/bar.git', + files: { + 'random.txt': { + filename: 'random.txt', + type: 'text/plain', + language: 'Text', + raw_url: 'https://gist.githubusercontent.com/user_name/055b70/raw/66a7be0d/random.txt', + size: 166903 + } + }, + is_public: false, + created_at: '2022-09-06T11:38:18Z', + updated_at: '2022-09-06T11:38:18Z', + description: 'random text' + } + end + + let(:importer) { instance_double('Gitlab::GithubGistsImport::Importer::GistImporter') } + let(:importer_result) { instance_double('ServiceResponse', success?: true) } + let(:gist_object) do + instance_double('Gitlab::GithubGistsImport::Representation::Gist', + gist_hash.merge(github_identifiers: { id: '055b70' }, truncated_title: 'random text', visibility_level: 0)) + end + + let(:log_attributes) do + { + 'user_id' => user.id, + 'github_identifiers' => { 'id': gist_object.id }, + 'class' => 'Gitlab::GithubGistsImport::ImportGistWorker', + 'correlation_id' => 'new-correlation-id', + 'jid' => nil, + 'job_status' => 'running', + 'queue' => 'github_gists_importer:github_gists_import_import_gist' + } + end + + describe '#perform' do + before do + allow(Gitlab::GithubGistsImport::Representation::Gist) + .to receive(:from_json_hash) + .with(gist_hash) + .and_return(gist_object) + + allow(Gitlab::GithubGistsImport::Importer::GistImporter) + .to receive(:new) + .with(gist_object, user.id) + .and_return(importer) + + allow(Gitlab::ApplicationContext).to receive(:current).and_return('correlation_id' => 'new-correlation-id') + allow(described_class).to receive(:queue).and_return('github_gists_importer:github_gists_import_import_gist') + end + + context 'when success' do + it 'imports gist' do + expect(Gitlab::GithubImport::Logger) + .to receive(:info) + .with(log_attributes.merge('message' => 'start importer')) + expect(importer).to receive(:execute).and_return(importer_result) + expect(Gitlab::JobWaiter).to receive(:notify).with('some_key', subject.jid) + expect(Gitlab::GithubImport::Logger) + .to receive(:info) + .with(log_attributes.merge('message' => 'importer finished')) + + subject.perform(user.id, gist_hash, 'some_key') + end + end + + context 'when importer raised an error' do + it 'raises an error' do + exception = StandardError.new('_some_error_') + + expect(importer).to receive(:execute).and_raise(exception) + expect(Gitlab::GithubImport::Logger) + .to receive(:error) + .with(log_attributes.merge('message' => 'importer failed', 'error.message' => '_some_error_')) + expect(Gitlab::ErrorTracking).to receive(:track_exception) + + expect { subject.perform(user.id, gist_hash, 'some_key') }.to raise_error(StandardError) + end + end + end +end -- GitLab