From befe18984f6c45494e316f78bfcabb28376ff8f2 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Mon, 7 May 2018 11:42:34 +0000 Subject: [PATCH 1/2] Merge branch 'live-trace-v2' into 'master' New CI Job live-trace architecture (v2) Closes #44935 See merge request gitlab-org/gitlab-ce!18169 --- .flayignore | 1 + app/models/ci/build.rb | 1 + app/models/ci/build_trace_chunk.rb | 157 ++++ app/workers/all_queues.yml | 1 + .../ci/build_trace_chunk_flush_worker.rb | 12 + changelogs/unreleased/live-trace-v2.yml | 5 + ...0326202229_create_ci_build_trace_chunks.rb | 17 + ...i_build_trace_chunks_raw_data_for_mysql.rb | 13 + ...i_build_trace_chunks_raw_data_for_mysql.rb | 9 + db/schema.rb | 10 + doc/administration/job_traces.md | 95 +++ lib/api/runner.rb | 17 +- lib/gitlab/ci/trace.rb | 32 +- lib/gitlab/ci/trace/chunked_io.rb | 231 ++++++ lib/gitlab/ci/trace/stream.rb | 11 +- lib/tasks/migrate/add_limits_mysql.rake | 2 + .../projects/jobs_controller_spec.rb | 3 +- spec/factories/ci/build_trace_chunks.rb | 7 + spec/features/projects/jobs_spec.rb | 46 +- spec/lib/gitlab/ci/trace/chunked_io_spec.rb | 383 +++++++++ spec/lib/gitlab/ci/trace/stream_spec.rb | 547 ++++++++----- spec/lib/gitlab/ci/trace_spec.rb | 547 +------------ spec/models/ci/build_trace_chunk_spec.rb | 381 +++++++++ spec/requests/api/runner_spec.rb | 51 +- spec/services/ci/retry_build_service_spec.rb | 2 +- spec/spec_helper.rb | 19 +- spec/support/chunked_io/chunked_io_helpers.rb | 11 + spec/support/redis/redis_helpers.rb | 18 + .../ci_trace_shared_examples.rb | 741 ++++++++++++++++++ 29 files changed, 2556 insertions(+), 814 deletions(-) create mode 100644 app/models/ci/build_trace_chunk.rb create mode 100644 app/workers/ci/build_trace_chunk_flush_worker.rb create mode 100644 changelogs/unreleased/live-trace-v2.yml create mode 100644 db/migrate/20180326202229_create_ci_build_trace_chunks.rb create mode 100644 db/migrate/20180406204716_add_limits_ci_build_trace_chunks_raw_data_for_mysql.rb create mode 100644 db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql.rb create mode 100644 lib/gitlab/ci/trace/chunked_io.rb create mode 100644 spec/factories/ci/build_trace_chunks.rb create mode 100644 spec/lib/gitlab/ci/trace/chunked_io_spec.rb create mode 100644 spec/models/ci/build_trace_chunk_spec.rb create mode 100644 spec/support/chunked_io/chunked_io_helpers.rb create mode 100644 spec/support/redis/redis_helpers.rb create mode 100644 spec/support/shared_examples/ci_trace_shared_examples.rb diff --git a/.flayignore b/.flayignore index 553ef46322b41c..8f19c162945827 100644 --- a/.flayignore +++ b/.flayignore @@ -16,6 +16,7 @@ lib/gitlab/background_migration/normalize_ldap_extern_uids_range.rb lib/gitlab/background_migration/* app/models/project_services/kubernetes_service.rb lib/gitlab/workhorse.rb +lib/gitlab/ci/trace/chunked_io.rb ee/db/**/* ee/app/serializers/ee/merge_request_widget_entity.rb diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index b29f731df5c5be..03221b53aab8bb 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -23,6 +23,7 @@ class Build < CommitStatus has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment' has_many :trace_sections, class_name: 'Ci::BuildTraceSection' + has_many :trace_chunks, class_name: 'Ci::BuildTraceChunk', foreign_key: :build_id, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id diff --git a/app/models/ci/build_trace_chunk.rb b/app/models/ci/build_trace_chunk.rb new file mode 100644 index 00000000000000..d9e923daa71c46 --- /dev/null +++ b/app/models/ci/build_trace_chunk.rb @@ -0,0 +1,157 @@ +module Ci + class BuildTraceChunk < ActiveRecord::Base + extend Gitlab::Ci::Model + + belongs_to :build, class_name: "Ci::Build", foreign_key: :build_id + + after_destroy :redis_delete_data, if: :redis? + + default_value_for :data_store, :redis + + WriteError = Class.new(StandardError) + + CHUNK_SIZE = 128.kilobytes + CHUNK_REDIS_TTL = 1.week + WRITE_LOCK_RETRY = 10 + WRITE_LOCK_SLEEP = 0.01.seconds + WRITE_LOCK_TTL = 1.minute + + enum data_store: { + redis: 1, + db: 2 + } + + ## + # Data is memoized for optimizing #size and #end_offset + def data + @data ||= get_data.to_s + end + + def truncate(offset = 0) + raise ArgumentError, 'Offset is out of range' if offset > size || offset < 0 + return if offset == size # Skip the following process as it doesn't affect anything + + self.append("", offset) + end + + def append(new_data, offset) + raise ArgumentError, 'Offset is out of range' if offset > size || offset < 0 + raise ArgumentError, 'Chunk size overflow' if CHUNK_SIZE < (offset + new_data.bytesize) + + set_data(data.byteslice(0, offset) + new_data) + end + + def size + data&.bytesize.to_i + end + + def start_offset + chunk_index * CHUNK_SIZE + end + + def end_offset + start_offset + size + end + + def range + (start_offset...end_offset) + end + + def use_database! + in_lock do + break if db? + break unless size > 0 + + self.update!(raw_data: data, data_store: :db) + redis_delete_data + end + end + + private + + def get_data + if redis? + redis_data + elsif db? + raw_data + else + raise 'Unsupported data store' + end&.force_encoding(Encoding::BINARY) # Redis/Database return UTF-8 string as default + end + + def set_data(value) + raise ArgumentError, 'too much data' if value.bytesize > CHUNK_SIZE + + in_lock do + if redis? + redis_set_data(value) + elsif db? + self.raw_data = value + else + raise 'Unsupported data store' + end + + @data = value + + save! if changed? + end + + schedule_to_db if full? + end + + def schedule_to_db + return if db? + + Ci::BuildTraceChunkFlushWorker.perform_async(id) + end + + def full? + size == CHUNK_SIZE + end + + def redis_data + Gitlab::Redis::SharedState.with do |redis| + redis.get(redis_data_key) + end + end + + def redis_set_data(data) + Gitlab::Redis::SharedState.with do |redis| + redis.set(redis_data_key, data, ex: CHUNK_REDIS_TTL) + end + end + + def redis_delete_data + Gitlab::Redis::SharedState.with do |redis| + redis.del(redis_data_key) + end + end + + def redis_data_key + "gitlab:ci:trace:#{build_id}:chunks:#{chunk_index}" + end + + def redis_lock_key + "trace_write:#{build_id}:chunks:#{chunk_index}" + end + + def in_lock + lease = Gitlab::ExclusiveLease.new(redis_lock_key, timeout: WRITE_LOCK_TTL) + retry_count = 0 + + until uuid = lease.try_obtain + # Keep trying until we obtain the lease. To prevent hammering Redis too + # much we'll wait for a bit between retries. + sleep(WRITE_LOCK_SLEEP) + break if WRITE_LOCK_RETRY < (retry_count += 1) + end + + raise WriteError, 'Failed to obtain write lock' unless uuid + + self.reload if self.persisted? + return yield + ensure + Gitlab::ExclusiveLease.cancel(redis_lock_key, uuid) + end + end +end diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 7ab9fcb5bbd2fb..f7df14c9feaabc 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -52,6 +52,7 @@ - pipeline_creation:create_pipeline - pipeline_creation:run_pipeline_schedule - pipeline_background:archive_trace +- pipeline_background:ci_build_trace_chunk_flush - pipeline_default:build_coverage - pipeline_default:build_trace_sections - pipeline_default:pipeline_metrics diff --git a/app/workers/ci/build_trace_chunk_flush_worker.rb b/app/workers/ci/build_trace_chunk_flush_worker.rb new file mode 100644 index 00000000000000..218d6688bd9e7a --- /dev/null +++ b/app/workers/ci/build_trace_chunk_flush_worker.rb @@ -0,0 +1,12 @@ +module Ci + class BuildTraceChunkFlushWorker + include ApplicationWorker + include PipelineBackgroundQueue + + def perform(build_trace_chunk_id) + ::Ci::BuildTraceChunk.find_by(id: build_trace_chunk_id).try do |build_trace_chunk| + build_trace_chunk.use_database! + end + end + end +end diff --git a/changelogs/unreleased/live-trace-v2.yml b/changelogs/unreleased/live-trace-v2.yml new file mode 100644 index 00000000000000..875a66bc565296 --- /dev/null +++ b/changelogs/unreleased/live-trace-v2.yml @@ -0,0 +1,5 @@ +--- +title: New CI Job live-trace architecture +merge_request: 18169 +author: +type: changed diff --git a/db/migrate/20180326202229_create_ci_build_trace_chunks.rb b/db/migrate/20180326202229_create_ci_build_trace_chunks.rb new file mode 100644 index 00000000000000..fb3f5786e856c5 --- /dev/null +++ b/db/migrate/20180326202229_create_ci_build_trace_chunks.rb @@ -0,0 +1,17 @@ +class CreateCiBuildTraceChunks < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def change + create_table :ci_build_trace_chunks, id: :bigserial do |t| + t.integer :build_id, null: false + t.integer :chunk_index, null: false + t.integer :data_store, null: false + t.binary :raw_data + + t.foreign_key :ci_builds, column: :build_id, on_delete: :cascade + t.index [:build_id, :chunk_index], unique: true + end + end +end diff --git a/db/migrate/20180406204716_add_limits_ci_build_trace_chunks_raw_data_for_mysql.rb b/db/migrate/20180406204716_add_limits_ci_build_trace_chunks_raw_data_for_mysql.rb new file mode 100644 index 00000000000000..0f2734853e6b66 --- /dev/null +++ b/db/migrate/20180406204716_add_limits_ci_build_trace_chunks_raw_data_for_mysql.rb @@ -0,0 +1,13 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. +require Rails.root.join('db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql') + +class AddLimitsCiBuildTraceChunksRawDataForMysql < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def up + LimitsCiBuildTraceChunksRawDataForMysql.new.up + end +end diff --git a/db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql.rb b/db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql.rb new file mode 100644 index 00000000000000..e1771912c3cd08 --- /dev/null +++ b/db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql.rb @@ -0,0 +1,9 @@ +class LimitsCiBuildTraceChunksRawDataForMysql < ActiveRecord::Migration + def up + return unless Gitlab::Database.mysql? + + # Mysql needs MEDIUMTEXT type (up to 16MB) rather than TEXT (up to 64KB) + # Because 'raw_data' is always capped by Ci::BuildTraceChunk::CHUNK_SIZE, which is 128KB + change_column :ci_build_trace_chunks, :raw_data, :binary, limit: 16.megabytes - 1 #MEDIUMTEXT + end +end diff --git a/db/schema.rb b/db/schema.rb index 699d2cd161020f..7b3a50be1411b6 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -342,6 +342,15 @@ add_index "chat_teams", ["namespace_id"], name: "index_chat_teams_on_namespace_id", unique: true, using: :btree + create_table "ci_build_trace_chunks", id: :bigserial, force: :cascade do |t| + t.integer "build_id", null: false + t.integer "chunk_index", null: false + t.integer "data_store", null: false + t.binary "raw_data" + end + + add_index "ci_build_trace_chunks", ["build_id", "chunk_index"], name: "index_ci_build_trace_chunks_on_build_id_and_chunk_index", unique: true, using: :btree + create_table "ci_build_trace_section_names", force: :cascade do |t| t.integer "project_id", null: false t.string "name", null: false @@ -2723,6 +2732,7 @@ add_foreign_key "boards", "namespaces", column: "group_id", name: "fk_1e9a074a35", on_delete: :cascade add_foreign_key "boards", "projects", name: "fk_f15266b5f9", on_delete: :cascade add_foreign_key "chat_teams", "namespaces", on_delete: :cascade + add_foreign_key "ci_build_trace_chunks", "ci_builds", column: "build_id", on_delete: :cascade add_foreign_key "ci_build_trace_section_names", "projects", on_delete: :cascade add_foreign_key "ci_build_trace_sections", "ci_build_trace_section_names", column: "section_name_id", name: "fk_264e112c66", on_delete: :cascade add_foreign_key "ci_build_trace_sections", "ci_builds", column: "build_id", name: "fk_4ebe41f502", on_delete: :cascade diff --git a/doc/administration/job_traces.md b/doc/administration/job_traces.md index 84a1ffeec98ba6..f0b2054a7f3f33 100644 --- a/doc/administration/job_traces.md +++ b/doc/administration/job_traces.md @@ -40,3 +40,98 @@ To change the location where the job logs will be stored, follow the steps below [reconfigure gitlab]: restart_gitlab.md#omnibus-gitlab-reconfigure "How to reconfigure Omnibus GitLab" [restart gitlab]: restart_gitlab.md#installations-from-source "How to restart GitLab" + +## New live trace architecture + +> [Introduced][ce-18169] in GitLab 10.4. + +> **Notes**: +- This feature is still Beta, which could impact GitLab.com/on-premises instances, and in the worst case scenario, traces will be lost. +- This feature is still being discussed in [an issue](https://gitlab.com/gitlab-org/gitlab-ce/issues/46097) for the performance improvements. +- This feature is off by default. Please check below how to enable/disable this featrue. + +**What is "live trace"?** + +Job trace that is sent by runner while jobs are running. You can see live trace in job pages UI. +The live traces are archived once job finishes. + +**What is new architecture?** + +So far, when GitLab Runner sends a job trace to GitLab-Rails, traces have been saved to file storage as text files. +This was a problem for [Cloud Native-compatible GitLab application](https://gitlab.com/gitlab-com/migration/issues/23) where GitLab had to rely on File Storage. + +This new live trace architecture stores chunks of traces in Redis and database instead of file storage. +Redis is used as first-class storage, and it stores up-to 128kB. Once the full chunk is sent it will be flushed to database. Afterwhile, the data in Redis and database will be archived to ObjectStorage. + +Here is the detailed data flow. + +1. GitLab Runner picks a job from GitLab-Rails +1. GitLab Runner sends a piece of trace to GitLab-Rails +1. GitLab-Rails appends the data to Redis +1. If the data in Redis is fulfilled 128kB, the data is flushed to Database. +1. 2.~4. is continued until the job is finished +1. Once the job is finished, GitLab-Rails schedules a sidekiq worker to archive the trace +1. The sidekiq worker archives the trace to Object Storage, and cleanup the trace in Redis and Database + +**How to check if it's on or off?** + +```ruby +Feature.enabled?('ci_enable_live_trace') +``` + +**How to enable?** + +```ruby +Feature.enable('ci_enable_live_trace') +``` + +>**Note:** +The transition period will be handled gracefully. Upcoming traces will be generated with the new architecture, and on-going live traces will stay with the legacy architecture (i.e. on-going live traces won't be re-generated forcibly with the new architecture). + +**How to disable?** + +```ruby +Feature.disable('ci_enable_live_trace') +``` + +>**Note:** +The transition period will be handled gracefully. Upcoming traces will be generated with the legacy architecture, and on-going live traces will stay with the new architecture (i.e. on-going live traces won't be re-generated forcibly with the legacy architecture). + +**Redis namespace:** + +`Gitlab::Redis::SharedState` + +**Potential impact:** + +- This feature could incur data loss: + - Case 1: When all data in Redis are accidentally flushed. + - On-going live traces could be recovered by re-sending traces (This is supported by all versions of GitLab Runner) + - Finished jobs which has not archived live traces will lose the last part (~128kB) of trace data. + - Case 2: When sidekiq workers failed to archive (e.g. There was a bug that prevents archiving process, Sidekiq inconsistancy, etc): + - Currently all trace data in Redis will be deleted after one week. If the sidekiq workers can't finish by the expiry date, the part of trace data will be lost. +- This feature could consume all memory on Redis instance. If the number of jobs is 1000, 128MB (128kB * 1000) is consumed. +- This feature could pressure Database replication lag. `INSERT` are generated to indicate that we have trace chunk. `UPDATE` with 128kB of data is issued once we receive multiple chunks. +- and so on + +**How to test?** + +We're currently evaluating this feature on dev.gitalb.org or staging.gitlab.com to verify this features. Here is the list of tests/measurements. + +- Features: + - Live traces should be visible on job pages + - Archived traces should be visible on job pages + - Live traces should be archived to Object storage + - Live traces should be cleaned up after archived + - etc +- Performance: + - Schedule 1000~10000 jobs and let GitLab-runners process concurrently. Measure memoery presssure, IO load, etc. + - etc +- Failover: + - Simulate Redis outage + - etc + +**How to verify the correctnesss?** + +- TBD + +[ce-44935]: https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/18169 diff --git a/lib/api/runner.rb b/lib/api/runner.rb index 67896ae1fc5557..cd7d6603171bdf 100644 --- a/lib/api/runner.rb +++ b/lib/api/runner.rb @@ -153,9 +153,20 @@ class Runner < Grape::API content_range = request.headers['Content-Range'] content_range = content_range.split('-') - stream_size = job.trace.append(request.body.read, content_range[0].to_i) - if stream_size < 0 - break error!('416 Range Not Satisfiable', 416, { 'Range' => "0-#{-stream_size}" }) + # TODO: + # it seems that `Content-Range` as formatted by runner is wrong, + # the `byte_end` should point to final byte, but it points byte+1 + # that means that we have to calculate end of body, + # as we cannot use `content_length[1]` + # Issue: https://gitlab.com/gitlab-org/gitlab-runner/issues/3275 + + body_data = request.body.read + body_start = content_range[0].to_i + body_end = body_start + body_data.bytesize + + stream_size = job.trace.append(body_data, body_start) + unless stream_size == body_end + break error!('416 Range Not Satisfiable', 416, { 'Range' => "0-#{stream_size}" }) end status 202 diff --git a/lib/gitlab/ci/trace.rb b/lib/gitlab/ci/trace.rb index 47b67930c6d188..f46d1d39ea77dd 100644 --- a/lib/gitlab/ci/trace.rb +++ b/lib/gitlab/ci/trace.rb @@ -36,16 +36,16 @@ def extract_sections end def set(data) - write do |stream| + write('w+b') do |stream| data = job.hide_secrets(data) stream.set(data) end end def append(data, offset) - write do |stream| + write('a+b') do |stream| current_length = stream.size - break -current_length unless current_length == offset + break current_length unless current_length == offset data = job.hide_secrets(data) stream.append(data, offset) @@ -54,13 +54,15 @@ def append(data, offset) end def exist? - trace_artifact&.exists? || current_path.present? || old_trace.present? + trace_artifact&.exists? || job.trace_chunks.any? || current_path.present? || old_trace.present? end def read stream = Gitlab::Ci::Trace::Stream.new do if trace_artifact trace_artifact.open + elsif job.trace_chunks.any? + Gitlab::Ci::Trace::ChunkedIO.new(job) elsif current_path File.open(current_path, "rb") elsif old_trace @@ -73,9 +75,15 @@ def read stream&.close end - def write + def write(mode) stream = Gitlab::Ci::Trace::Stream.new do - File.open(ensure_path, "a+b") + if current_path + File.open(current_path, mode) + elsif Feature.enabled?('ci_enable_live_trace') + Gitlab::Ci::Trace::ChunkedIO.new(job) + else + File.open(ensure_path, mode) + end end yield(stream).tap do @@ -92,6 +100,7 @@ def erase! FileUtils.rm(trace_path, force: true) end + job.trace_chunks.destroy_all job.erase_old_trace! end @@ -99,7 +108,12 @@ def archive! raise ArchiveError, 'Already archived' if trace_artifact raise ArchiveError, 'Job is not finished yet' unless job.complete? - if current_path + if job.trace_chunks.any? + Gitlab::Ci::Trace::ChunkedIO.new(job) do |stream| + archive_stream!(stream) + stream.destroy! + end + elsif current_path File.open(current_path) do |stream| archive_stream!(stream) FileUtils.rm(current_path) @@ -116,7 +130,7 @@ def archive! def archive_stream!(stream) clone_file!(stream, JobArtifactUploader.workhorse_upload_path) do |clone_path| - create_job_trace!(job, clone_path) + create_build_trace!(job, clone_path) end end @@ -132,7 +146,7 @@ def clone_file!(src_stream, temp_dir) end end - def create_job_trace!(job, path) + def create_build_trace!(job, path) File.open(path) do |stream| job.create_job_artifacts_trace!( project: job.project, diff --git a/lib/gitlab/ci/trace/chunked_io.rb b/lib/gitlab/ci/trace/chunked_io.rb new file mode 100644 index 00000000000000..cd3d13644110c5 --- /dev/null +++ b/lib/gitlab/ci/trace/chunked_io.rb @@ -0,0 +1,231 @@ +## +# This class is compatible with IO class (https://ruby-doc.org/core-2.3.1/IO.html) +# source: https://gitlab.com/snippets/1685610 +module Gitlab + module Ci + class Trace + class ChunkedIO + CHUNK_SIZE = ::Ci::BuildTraceChunk::CHUNK_SIZE + + FailedToGetChunkError = Class.new(StandardError) + + attr_reader :build + attr_reader :tell, :size + attr_reader :chunk, :chunk_range + + alias_method :pos, :tell + + def initialize(build, &block) + @build = build + @chunks_cache = [] + @tell = 0 + @size = calculate_size + yield self if block_given? + end + + def close + # no-op + end + + def binmode + # no-op + end + + def binmode? + true + end + + def seek(pos, where = IO::SEEK_SET) + new_pos = + case where + when IO::SEEK_END + size + pos + when IO::SEEK_SET + pos + when IO::SEEK_CUR + tell + pos + else + -1 + end + + raise ArgumentError, 'new position is outside of file' if new_pos < 0 || new_pos > size + + @tell = new_pos + end + + def eof? + tell == size + end + + def each_line + until eof? + line = readline + break if line.nil? + + yield(line) + end + end + + def read(length = nil, outbuf = "") + out = "" + + length ||= size - tell + + until length <= 0 || eof? + data = chunk_slice_from_offset + break if data.empty? + + chunk_bytes = [CHUNK_SIZE - chunk_offset, length].min + chunk_data = data.byteslice(0, chunk_bytes) + + out << chunk_data + @tell += chunk_data.bytesize + length -= chunk_data.bytesize + end + + # If outbuf is passed, we put the output into the buffer. This supports IO.copy_stream functionality + if outbuf + outbuf.slice!(0, outbuf.bytesize) + outbuf << out + end + + out + end + + def readline + out = "" + + until eof? + data = chunk_slice_from_offset + new_line = data.index("\n") + + if !new_line.nil? + out << data[0..new_line] + @tell += new_line + 1 + break + else + out << data + @tell += data.bytesize + end + end + + out + end + + def write(data) + start_pos = tell + + while tell < start_pos + data.bytesize + # get slice from current offset till the end where it falls into chunk + chunk_bytes = CHUNK_SIZE - chunk_offset + chunk_data = data.byteslice(tell - start_pos, chunk_bytes) + + # append data to chunk, overwriting from that point + ensure_chunk.append(chunk_data, chunk_offset) + + # move offsets within buffer + @tell += chunk_data.bytesize + @size = [size, tell].max + end + + tell - start_pos + ensure + invalidate_chunk_cache + end + + def truncate(offset) + raise ArgumentError, 'Outside of file' if offset > size || offset < 0 + return if offset == size # Skip the following process as it doesn't affect anything + + @tell = offset + @size = offset + + # remove all next chunks + trace_chunks.where('chunk_index > ?', chunk_index).destroy_all + + # truncate current chunk + current_chunk.truncate(chunk_offset) + ensure + invalidate_chunk_cache + end + + def flush + # no-op + end + + def present? + true + end + + def destroy! + trace_chunks.destroy_all + @tell = @size = 0 + ensure + invalidate_chunk_cache + end + + private + + ## + # The below methods are not implemented in IO class + # + def in_range? + @chunk_range&.include?(tell) + end + + def chunk_slice_from_offset + unless in_range? + current_chunk.tap do |chunk| + raise FailedToGetChunkError unless chunk + + @chunk = chunk.data + @chunk_range = chunk.range + end + end + + @chunk[chunk_offset..CHUNK_SIZE] + end + + def chunk_offset + tell % CHUNK_SIZE + end + + def chunk_index + tell / CHUNK_SIZE + end + + def chunk_start + chunk_index * CHUNK_SIZE + end + + def chunk_end + [chunk_start + CHUNK_SIZE, size].min + end + + def invalidate_chunk_cache + @chunks_cache = [] + end + + def current_chunk + @chunks_cache[chunk_index] ||= trace_chunks.find_by(chunk_index: chunk_index) + end + + def build_chunk + @chunks_cache[chunk_index] = ::Ci::BuildTraceChunk.new(build: build, chunk_index: chunk_index) + end + + def ensure_chunk + current_chunk || build_chunk + end + + def trace_chunks + ::Ci::BuildTraceChunk.where(build: build) + end + + def calculate_size + trace_chunks.order(chunk_index: :desc).first.try(&:end_offset).to_i + end + end + end + end +end diff --git a/lib/gitlab/ci/trace/stream.rb b/lib/gitlab/ci/trace/stream.rb index 187ad8b833a4d8..a71040e5e564f4 100644 --- a/lib/gitlab/ci/trace/stream.rb +++ b/lib/gitlab/ci/trace/stream.rb @@ -39,6 +39,8 @@ def limit(last_bytes = LIMIT_SIZE) end def append(data, offset) + data = data.force_encoding(Encoding::BINARY) + stream.truncate(offset) stream.seek(0, IO::SEEK_END) stream.write(data) @@ -46,8 +48,11 @@ def append(data, offset) end def set(data) - truncate(0) + data = data.force_encoding(Encoding::BINARY) + + stream.seek(0, IO::SEEK_SET) stream.write(data) + stream.truncate(data.bytesize) stream.flush() end @@ -127,11 +132,11 @@ def reverse_line buf += debris debris, *lines = buf.each_line.to_a lines.reverse_each do |line| - yield(line.force_encoding('UTF-8')) + yield(line.force_encoding(Encoding.default_external)) end end - yield(debris.force_encoding('UTF-8')) unless debris.empty? + yield(debris.force_encoding(Encoding.default_external)) unless debris.empty? end def read_backward(length) diff --git a/lib/tasks/migrate/add_limits_mysql.rake b/lib/tasks/migrate/add_limits_mysql.rake index 151f42a2222942..c6204f89de4cbb 100644 --- a/lib/tasks/migrate/add_limits_mysql.rake +++ b/lib/tasks/migrate/add_limits_mysql.rake @@ -1,6 +1,7 @@ require Rails.root.join('db/migrate/limits_to_mysql') require Rails.root.join('db/migrate/markdown_cache_limits_to_mysql') require Rails.root.join('db/migrate/merge_request_diff_file_limits_to_mysql') +require Rails.root.join('db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql') desc "GitLab | Add limits to strings in mysql database" task add_limits_mysql: :environment do @@ -8,4 +9,5 @@ task add_limits_mysql: :environment do LimitsToMysql.new.up MarkdownCacheLimitsToMysql.new.up MergeRequestDiffFileLimitsToMysql.new.up + LimitsCiBuildTraceChunksRawDataForMysql.new.up end diff --git a/spec/controllers/projects/jobs_controller_spec.rb b/spec/controllers/projects/jobs_controller_spec.rb index b9a979044fe89d..2281cb420d9e15 100644 --- a/spec/controllers/projects/jobs_controller_spec.rb +++ b/spec/controllers/projects/jobs_controller_spec.rb @@ -1,7 +1,7 @@ # coding: utf-8 require 'spec_helper' -describe Projects::JobsController do +describe Projects::JobsController, :clean_gitlab_redis_shared_state do include ApiHelpers include HttpIOHelpers @@ -10,6 +10,7 @@ let(:user) { create(:user) } before do + stub_feature_flags(ci_enable_live_trace: true) stub_not_protect_default_branch end diff --git a/spec/factories/ci/build_trace_chunks.rb b/spec/factories/ci/build_trace_chunks.rb new file mode 100644 index 00000000000000..c0b9a25bfe8e7f --- /dev/null +++ b/spec/factories/ci/build_trace_chunks.rb @@ -0,0 +1,7 @@ +FactoryBot.define do + factory :ci_build_trace_chunk, class: Ci::BuildTraceChunk do + build factory: :ci_build + chunk_index 0 + data_store :redis + end +end diff --git a/spec/features/projects/jobs_spec.rb b/spec/features/projects/jobs_spec.rb index 94f10dc3043ede..540d72c92a007d 100644 --- a/spec/features/projects/jobs_spec.rb +++ b/spec/features/projects/jobs_spec.rb @@ -1,7 +1,7 @@ require 'spec_helper' require 'tempfile' -feature 'Jobs' do +feature 'Jobs', :clean_gitlab_redis_shared_state do let(:user) { create(:user) } let(:user_access_level) { :developer } let(:project) { create(:project, :repository) } @@ -282,7 +282,7 @@ it 'loads job trace' do expect(page).to have_content 'BUILD TRACE' - job.trace.write do |stream| + job.trace.write('a+b') do |stream| stream.append(' and more trace', 11) end @@ -604,48 +604,6 @@ end end - context 'storage form' do - let(:existing_file) { Tempfile.new('existing-trace-file').path } - - before do - job.run! - end - - context 'when job has trace in file', :js do - let(:paths) do - [existing_file] - end - - before do - allow_any_instance_of(Gitlab::Ci::Trace) - .to receive(:paths) - .and_return([existing_file]) - end - - it 'sends the right headers' do - requests = inspect_requests(inject_headers: { 'X-Sendfile-Type' => 'X-Sendfile' }) do - visit raw_project_job_path(project, job) - end - expect(requests.first.response_headers['Content-Type']).to eq('text/plain; charset=utf-8') - expect(requests.first.response_headers['X-Sendfile']).to eq(existing_file) - end - end - - context 'when job has trace in the database', :js do - before do - allow_any_instance_of(Gitlab::Ci::Trace) - .to receive(:paths) - .and_return([]) - - visit project_job_path(project, job) - end - - it 'sends the right headers' do - expect(page).not_to have_selector('.js-raw-link-controller') - end - end - end - context "when visiting old URL" do let(:raw_job_url) do raw_project_job_path(project, job) diff --git a/spec/lib/gitlab/ci/trace/chunked_io_spec.rb b/spec/lib/gitlab/ci/trace/chunked_io_spec.rb new file mode 100644 index 00000000000000..6259b952adda0f --- /dev/null +++ b/spec/lib/gitlab/ci/trace/chunked_io_spec.rb @@ -0,0 +1,383 @@ +require 'spec_helper' + +describe Gitlab::Ci::Trace::ChunkedIO, :clean_gitlab_redis_cache do + include ChunkedIOHelpers + + set(:build) { create(:ci_build, :running) } + let(:chunked_io) { described_class.new(build) } + + before do + stub_feature_flags(ci_enable_live_trace: true) + end + + context "#initialize" do + context 'when a chunk exists' do + before do + build.trace.set('ABC') + end + + it { expect(chunked_io.size).to eq(3) } + end + + context 'when two chunks exist' do + before do + stub_buffer_size(4) + build.trace.set('ABCDEF') + end + + it { expect(chunked_io.size).to eq(6) } + end + + context 'when no chunks exists' do + it { expect(chunked_io.size).to eq(0) } + end + end + + context "#seek" do + subject { chunked_io.seek(pos, where) } + + before do + build.trace.set(sample_trace_raw) + end + + context 'when moves pos to end of the file' do + let(:pos) { 0 } + let(:where) { IO::SEEK_END } + + it { is_expected.to eq(sample_trace_raw.bytesize) } + end + + context 'when moves pos to middle of the file' do + let(:pos) { sample_trace_raw.bytesize / 2 } + let(:where) { IO::SEEK_SET } + + it { is_expected.to eq(pos) } + end + + context 'when moves pos around' do + it 'matches the result' do + expect(chunked_io.seek(0)).to eq(0) + expect(chunked_io.seek(100, IO::SEEK_CUR)).to eq(100) + expect { chunked_io.seek(sample_trace_raw.bytesize + 1, IO::SEEK_CUR) } + .to raise_error('new position is outside of file') + end + end + end + + context "#eof?" do + subject { chunked_io.eof? } + + before do + build.trace.set(sample_trace_raw) + end + + context 'when current pos is at end of the file' do + before do + chunked_io.seek(sample_trace_raw.bytesize, IO::SEEK_SET) + end + + it { is_expected.to be_truthy } + end + + context 'when current pos is not at end of the file' do + before do + chunked_io.seek(0, IO::SEEK_SET) + end + + it { is_expected.to be_falsey } + end + end + + context "#each_line" do + let(:string_io) { StringIO.new(sample_trace_raw) } + + context 'when buffer size is smaller than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize / 2) + build.trace.set(sample_trace_raw) + end + + it 'yields lines' do + expect { |b| chunked_io.each_line(&b) } + .to yield_successive_args(*string_io.each_line.to_a) + end + end + + context 'when buffer size is larger than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize * 2) + build.trace.set(sample_trace_raw) + end + + it 'calls get_chunk only once' do + expect_any_instance_of(Gitlab::Ci::Trace::ChunkedIO) + .to receive(:current_chunk).once.and_call_original + + chunked_io.each_line { |line| } + end + end + end + + context "#read" do + subject { chunked_io.read(length) } + + context 'when read the whole size' do + let(:length) { nil } + + context 'when buffer size is smaller than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize / 2) + build.trace.set(sample_trace_raw) + end + + it { is_expected.to eq(sample_trace_raw) } + end + + context 'when buffer size is larger than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize * 2) + build.trace.set(sample_trace_raw) + end + + it { is_expected.to eq(sample_trace_raw) } + end + end + + context 'when read only first 100 bytes' do + let(:length) { 100 } + + context 'when buffer size is smaller than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize / 2) + build.trace.set(sample_trace_raw) + end + + it 'reads a trace' do + is_expected.to eq(sample_trace_raw.byteslice(0, length)) + end + end + + context 'when buffer size is larger than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize * 2) + build.trace.set(sample_trace_raw) + end + + it 'reads a trace' do + is_expected.to eq(sample_trace_raw.byteslice(0, length)) + end + end + end + + context 'when tries to read oversize' do + let(:length) { sample_trace_raw.bytesize + 1000 } + + context 'when buffer size is smaller than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize / 2) + build.trace.set(sample_trace_raw) + end + + it 'reads a trace' do + is_expected.to eq(sample_trace_raw) + end + end + + context 'when buffer size is larger than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize * 2) + build.trace.set(sample_trace_raw) + end + + it 'reads a trace' do + is_expected.to eq(sample_trace_raw) + end + end + end + + context 'when tries to read 0 bytes' do + let(:length) { 0 } + + context 'when buffer size is smaller than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize / 2) + build.trace.set(sample_trace_raw) + end + + it 'reads a trace' do + is_expected.to be_empty + end + end + + context 'when buffer size is larger than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize * 2) + build.trace.set(sample_trace_raw) + end + + it 'reads a trace' do + is_expected.to be_empty + end + end + end + end + + context "#readline" do + subject { chunked_io.readline } + + let(:string_io) { StringIO.new(sample_trace_raw) } + + shared_examples 'all line matching' do + it do + (0...sample_trace_raw.lines.count).each do + expect(chunked_io.readline).to eq(string_io.readline) + end + end + end + + context 'when buffer size is smaller than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize / 2) + build.trace.set(sample_trace_raw) + end + + it_behaves_like 'all line matching' + end + + context 'when buffer size is larger than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize * 2) + build.trace.set(sample_trace_raw) + end + + it_behaves_like 'all line matching' + end + + context 'when pos is at middle of the file' do + before do + stub_buffer_size(sample_trace_raw.bytesize / 2) + build.trace.set(sample_trace_raw) + + chunked_io.seek(chunked_io.size / 2) + string_io.seek(string_io.size / 2) + end + + it 'reads from pos' do + expect(chunked_io.readline).to eq(string_io.readline) + end + end + end + + context "#write" do + subject { chunked_io.write(data) } + + let(:data) { sample_trace_raw } + + context 'when data does not exist' do + shared_examples 'writes a trace' do + it do + is_expected.to eq(data.bytesize) + + chunked_io.seek(0, IO::SEEK_SET) + expect(chunked_io.read).to eq(data) + end + end + + context 'when buffer size is smaller than file size' do + before do + stub_buffer_size(data.bytesize / 2) + end + + it_behaves_like 'writes a trace' + end + + context 'when buffer size is larger than file size' do + before do + stub_buffer_size(data.bytesize * 2) + end + + it_behaves_like 'writes a trace' + end + end + + context 'when data already exists' do + let(:exist_data) { 'exist data' } + + shared_examples 'appends a trace' do + it do + chunked_io.seek(0, IO::SEEK_END) + is_expected.to eq(data.bytesize) + + chunked_io.seek(0, IO::SEEK_SET) + expect(chunked_io.read).to eq(exist_data + data) + end + end + + context 'when buffer size is smaller than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize / 2) + build.trace.set(exist_data) + end + + it_behaves_like 'appends a trace' + end + + context 'when buffer size is larger than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize * 2) + build.trace.set(exist_data) + end + + it_behaves_like 'appends a trace' + end + end + end + + context "#truncate" do + let(:offset) { 10 } + + context 'when data does not exist' do + shared_examples 'truncates a trace' do + it do + chunked_io.truncate(offset) + + chunked_io.seek(0, IO::SEEK_SET) + expect(chunked_io.read).to eq(sample_trace_raw.byteslice(0, offset)) + end + end + + context 'when buffer size is smaller than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize / 2) + build.trace.set(sample_trace_raw) + end + + it_behaves_like 'truncates a trace' + end + + context 'when buffer size is larger than file size' do + before do + stub_buffer_size(sample_trace_raw.bytesize * 2) + build.trace.set(sample_trace_raw) + end + + it_behaves_like 'truncates a trace' + end + end + end + + context "#destroy!" do + subject { chunked_io.destroy! } + + before do + build.trace.set(sample_trace_raw) + end + + it 'deletes' do + expect { subject }.to change { chunked_io.size } + .from(sample_trace_raw.bytesize).to(0) + + expect(Ci::BuildTraceChunk.where(build: build).count).to eq(0) + end + end +end diff --git a/spec/lib/gitlab/ci/trace/stream_spec.rb b/spec/lib/gitlab/ci/trace/stream_spec.rb index e5555546fa8bac..4f49958dd33dea 100644 --- a/spec/lib/gitlab/ci/trace/stream_spec.rb +++ b/spec/lib/gitlab/ci/trace/stream_spec.rb @@ -1,6 +1,12 @@ require 'spec_helper' -describe Gitlab::Ci::Trace::Stream do +describe Gitlab::Ci::Trace::Stream, :clean_gitlab_redis_cache do + set(:build) { create(:ci_build, :running) } + + before do + stub_feature_flags(ci_enable_live_trace: true) + end + describe 'delegates' do subject { described_class.new { nil } } @@ -11,337 +17,470 @@ it { is_expected.to delegate_method(:path).to(:stream) } it { is_expected.to delegate_method(:truncate).to(:stream) } it { is_expected.to delegate_method(:valid?).to(:stream).as(:present?) } - it { is_expected.to delegate_method(:file?).to(:path).as(:present?) } end describe '#limit' do - let(:stream) do - described_class.new do - StringIO.new((1..8).to_a.join("\n")) + shared_examples_for 'limits' do + it 'if size is larger we start from beginning' do + stream.limit(20) + + expect(stream.tell).to eq(0) end - end - it 'if size is larger we start from beginning' do - stream.limit(20) + it 'if size is smaller we start from the end' do + stream.limit(2) - expect(stream.tell).to eq(0) - end + expect(stream.raw).to eq("8") + end - it 'if size is smaller we start from the end' do - stream.limit(2) + context 'when the trace contains ANSI sequence and Unicode' do + let(:stream) do + described_class.new do + File.open(expand_fixture_path('trace/ansi-sequence-and-unicode')) + end + end - expect(stream.raw).to eq("8") - end + it 'forwards to the next linefeed, case 1' do + stream.limit(7) - context 'when the trace contains ANSI sequence and Unicode' do - let(:stream) do - described_class.new do - File.open(expand_fixture_path('trace/ansi-sequence-and-unicode')) + result = stream.raw + + expect(result).to eq('') + expect(result.encoding).to eq(Encoding.default_external) end - end - it 'forwards to the next linefeed, case 1' do - stream.limit(7) + it 'forwards to the next linefeed, case 2' do + stream.limit(29) - result = stream.raw + result = stream.raw - expect(result).to eq('') - expect(result.encoding).to eq(Encoding.default_external) - end + expect(result).to eq("\e[01;32m許功蓋\e[0m\n") + expect(result.encoding).to eq(Encoding.default_external) + end - it 'forwards to the next linefeed, case 2' do - stream.limit(29) + # See https://gitlab.com/gitlab-org/gitlab-ce/issues/30796 + it 'reads in binary, output as Encoding.default_external' do + stream.limit(52) - result = stream.raw + result = stream.html - expect(result).to eq("\e[01;32m許功蓋\e[0m\n") - expect(result.encoding).to eq(Encoding.default_external) + expect(result).to eq("ヾ(´༎ຶД༎ຶ`)ノ
許功蓋
") + expect(result.encoding).to eq(Encoding.default_external) + end end + end - # See https://gitlab.com/gitlab-org/gitlab-ce/issues/30796 - it 'reads in binary, output as Encoding.default_external' do - stream.limit(52) + context 'when stream is StringIO' do + let(:stream) do + described_class.new do + StringIO.new((1..8).to_a.join("\n")) + end + end - result = stream.html + it_behaves_like 'limits' + end - expect(result).to eq("ヾ(´༎ຶД༎ຶ`)ノ
許功蓋
") - expect(result.encoding).to eq(Encoding.default_external) + context 'when stream is ChunkedIO' do + let(:stream) do + described_class.new do + Gitlab::Ci::Trace::ChunkedIO.new(build).tap do |chunked_io| + chunked_io.write((1..8).to_a.join("\n")) + chunked_io.seek(0, IO::SEEK_SET) + end + end end + + it_behaves_like 'limits' end end describe '#append' do - let(:tempfile) { Tempfile.new } + shared_examples_for 'appends' do + it "truncates and append content" do + stream.append("89", 4) + stream.seek(0) - let(:stream) do - described_class.new do - tempfile.write("12345678") - tempfile.rewind - tempfile + expect(stream.size).to eq(6) + expect(stream.raw).to eq("123489") end - end - after do - tempfile.unlink - end + it 'appends in binary mode' do + '😺'.force_encoding('ASCII-8BIT').each_char.with_index do |byte, offset| + stream.append(byte, offset) + end - it "truncates and append content" do - stream.append("89", 4) - stream.seek(0) + stream.seek(0) - expect(stream.size).to eq(6) - expect(stream.raw).to eq("123489") + expect(stream.size).to eq(4) + expect(stream.raw).to eq('😺') + end end - it 'appends in binary mode' do - '😺'.force_encoding('ASCII-8BIT').each_char.with_index do |byte, offset| - stream.append(byte, offset) + context 'when stream is Tempfile' do + let(:tempfile) { Tempfile.new } + + let(:stream) do + described_class.new do + tempfile.write("12345678") + tempfile.rewind + tempfile + end + end + + after do + tempfile.unlink end - stream.seek(0) + it_behaves_like 'appends' + end - expect(stream.size).to eq(4) - expect(stream.raw).to eq('😺') + context 'when stream is ChunkedIO' do + let(:stream) do + described_class.new do + Gitlab::Ci::Trace::ChunkedIO.new(build).tap do |chunked_io| + chunked_io.write('12345678') + chunked_io.seek(0, IO::SEEK_SET) + end + end + end + + it_behaves_like 'appends' end end describe '#set' do - let(:stream) do - described_class.new do - StringIO.new("12345678") + shared_examples_for 'sets' do + before do + stream.set("8901") + end + + it "overwrite content" do + stream.seek(0) + + expect(stream.size).to eq(4) + expect(stream.raw).to eq("8901") end end - before do - stream.set("8901") + context 'when stream is StringIO' do + let(:stream) do + described_class.new do + StringIO.new("12345678") + end + end + + it_behaves_like 'sets' end - it "overwrite content" do - stream.seek(0) + context 'when stream is ChunkedIO' do + let(:stream) do + described_class.new do + Gitlab::Ci::Trace::ChunkedIO.new(build).tap do |chunked_io| + chunked_io.write('12345678') + chunked_io.seek(0, IO::SEEK_SET) + end + end + end - expect(stream.size).to eq(4) - expect(stream.raw).to eq("8901") + it_behaves_like 'sets' end end describe '#raw' do - let(:path) { __FILE__ } - let(:lines) { File.readlines(path) } - let(:stream) do - described_class.new do - File.open(path) + shared_examples_for 'sets' do + it 'returns all contents if last_lines is not specified' do + result = stream.raw + + expect(result).to eq(lines.join) + expect(result.encoding).to eq(Encoding.default_external) end - end - it 'returns all contents if last_lines is not specified' do - result = stream.raw + context 'limit max lines' do + before do + # specifying BUFFER_SIZE forces to seek backwards + allow(described_class).to receive(:BUFFER_SIZE) + .and_return(2) + end - expect(result).to eq(lines.join) - expect(result.encoding).to eq(Encoding.default_external) - end + it 'returns last few lines' do + result = stream.raw(last_lines: 2) - context 'limit max lines' do - before do - # specifying BUFFER_SIZE forces to seek backwards - allow(described_class).to receive(:BUFFER_SIZE) - .and_return(2) + expect(result).to eq(lines.last(2).join) + expect(result.encoding).to eq(Encoding.default_external) + end + + it 'returns everything if trying to get too many lines' do + result = stream.raw(last_lines: lines.size * 2) + + expect(result).to eq(lines.join) + expect(result.encoding).to eq(Encoding.default_external) + end end + end - it 'returns last few lines' do - result = stream.raw(last_lines: 2) + let(:path) { __FILE__ } + let(:lines) { File.readlines(path) } - expect(result).to eq(lines.last(2).join) - expect(result.encoding).to eq(Encoding.default_external) + context 'when stream is File' do + let(:stream) do + described_class.new do + File.open(path) + end end - it 'returns everything if trying to get too many lines' do - result = stream.raw(last_lines: lines.size * 2) + it_behaves_like 'sets' + end - expect(result).to eq(lines.join) - expect(result.encoding).to eq(Encoding.default_external) + context 'when stream is ChunkedIO' do + let(:stream) do + described_class.new do + Gitlab::Ci::Trace::ChunkedIO.new(build).tap do |chunked_io| + chunked_io.write(File.binread(path)) + chunked_io.seek(0, IO::SEEK_SET) + end + end end + + it_behaves_like 'sets' end end describe '#html_with_state' do - let(:stream) do - described_class.new do - StringIO.new("1234") + shared_examples_for 'html_with_states' do + it 'returns html content with state' do + result = stream.html_with_state + + expect(result.html).to eq("1234") end - end - it 'returns html content with state' do - result = stream.html_with_state + context 'follow-up state' do + let!(:last_result) { stream.html_with_state } - expect(result.html).to eq("1234") - end + before do + stream.append("5678", 4) + stream.seek(0) + end - context 'follow-up state' do - let!(:last_result) { stream.html_with_state } + it "returns appended trace" do + result = stream.html_with_state(last_result.state) - before do - stream.append("5678", 4) - stream.seek(0) + expect(result.append).to be_truthy + expect(result.html).to eq("5678") + end + end + end + + context 'when stream is StringIO' do + let(:stream) do + described_class.new do + StringIO.new("1234") + end end - it "returns appended trace" do - result = stream.html_with_state(last_result.state) + it_behaves_like 'html_with_states' + end - expect(result.append).to be_truthy - expect(result.html).to eq("5678") + context 'when stream is ChunkedIO' do + let(:stream) do + described_class.new do + Gitlab::Ci::Trace::ChunkedIO.new(build).tap do |chunked_io| + chunked_io.write("1234") + chunked_io.seek(0, IO::SEEK_SET) + end + end end + + it_behaves_like 'html_with_states' end end describe '#html' do - let(:stream) do - described_class.new do - StringIO.new("12\n34\n56") + shared_examples_for 'htmls' do + it "returns html" do + expect(stream.html).to eq("12
34
56") + end + + it "returns html for last line only" do + expect(stream.html(last_lines: 1)).to eq("56") end end - it "returns html" do - expect(stream.html).to eq("12
34
56") + context 'when stream is StringIO' do + let(:stream) do + described_class.new do + StringIO.new("12\n34\n56") + end + end + + it_behaves_like 'htmls' end - it "returns html for last line only" do - expect(stream.html(last_lines: 1)).to eq("56") + context 'when stream is ChunkedIO' do + let(:stream) do + described_class.new do + Gitlab::Ci::Trace::ChunkedIO.new(build).tap do |chunked_io| + chunked_io.write("12\n34\n56") + chunked_io.seek(0, IO::SEEK_SET) + end + end + end + + it_behaves_like 'htmls' end end describe '#extract_coverage' do - let(:stream) do - described_class.new do - StringIO.new(data) - end - end + shared_examples_for 'extract_coverages' do + context 'valid content & regex' do + let(:data) { 'Coverage 1033 / 1051 LOC (98.29%) covered' } + let(:regex) { '\(\d+.\d+\%\) covered' } - subject { stream.extract_coverage(regex) } + it { is_expected.to eq("98.29") } + end - context 'valid content & regex' do - let(:data) { 'Coverage 1033 / 1051 LOC (98.29%) covered' } - let(:regex) { '\(\d+.\d+\%\) covered' } + context 'valid content & bad regex' do + let(:data) { 'Coverage 1033 / 1051 LOC (98.29%) covered\n' } + let(:regex) { 'very covered' } - it { is_expected.to eq("98.29") } - end + it { is_expected.to be_nil } + end - context 'valid content & bad regex' do - let(:data) { 'Coverage 1033 / 1051 LOC (98.29%) covered\n' } - let(:regex) { 'very covered' } + context 'no coverage content & regex' do + let(:data) { 'No coverage for today :sad:' } + let(:regex) { '\(\d+.\d+\%\) covered' } - it { is_expected.to be_nil } - end + it { is_expected.to be_nil } + end - context 'no coverage content & regex' do - let(:data) { 'No coverage for today :sad:' } - let(:regex) { '\(\d+.\d+\%\) covered' } + context 'multiple results in content & regex' do + let(:data) do + <<~HEREDOC + (98.39%) covered + (98.29%) covered + HEREDOC + end - it { is_expected.to be_nil } - end + let(:regex) { '\(\d+.\d+\%\) covered' } - context 'multiple results in content & regex' do - let(:data) do - <<~HEREDOC - (98.39%) covered - (98.29%) covered - HEREDOC + it 'returns the last matched coverage' do + is_expected.to eq("98.29") + end end - let(:regex) { '\(\d+.\d+\%\) covered' } + context 'when BUFFER_SIZE is smaller than stream.size' do + let(:data) { 'Coverage 1033 / 1051 LOC (98.29%) covered\n' } + let(:regex) { '\(\d+.\d+\%\) covered' } - it 'returns the last matched coverage' do - is_expected.to eq("98.29") + before do + stub_const('Gitlab::Ci::Trace::Stream::BUFFER_SIZE', 5) + end + + it { is_expected.to eq("98.29") } end - end - context 'when BUFFER_SIZE is smaller than stream.size' do - let(:data) { 'Coverage 1033 / 1051 LOC (98.29%) covered\n' } - let(:regex) { '\(\d+.\d+\%\) covered' } + context 'when regex is multi-byte char' do + let(:data) { '95.0 ゴッドファット\n' } + let(:regex) { '\d+\.\d+ ゴッドファット' } - before do - stub_const('Gitlab::Ci::Trace::Stream::BUFFER_SIZE', 5) + before do + stub_const('Gitlab::Ci::Trace::Stream::BUFFER_SIZE', 5) + end + + it { is_expected.to eq('95.0') } end - it { is_expected.to eq("98.29") } - end + context 'when BUFFER_SIZE is equal to stream.size' do + let(:data) { 'Coverage 1033 / 1051 LOC (98.29%) covered\n' } + let(:regex) { '\(\d+.\d+\%\) covered' } - context 'when regex is multi-byte char' do - let(:data) { '95.0 ゴッドファット\n' } - let(:regex) { '\d+\.\d+ ゴッドファット' } + before do + stub_const('Gitlab::Ci::Trace::Stream::BUFFER_SIZE', data.length) + end - before do - stub_const('Gitlab::Ci::Trace::Stream::BUFFER_SIZE', 5) + it { is_expected.to eq("98.29") } end - it { is_expected.to eq('95.0') } - end - - context 'when BUFFER_SIZE is equal to stream.size' do - let(:data) { 'Coverage 1033 / 1051 LOC (98.29%) covered\n' } - let(:regex) { '\(\d+.\d+\%\) covered' } + context 'using a regex capture' do + let(:data) { 'TOTAL 9926 3489 65%' } + let(:regex) { 'TOTAL\s+\d+\s+\d+\s+(\d{1,3}\%)' } - before do - stub_const('Gitlab::Ci::Trace::Stream::BUFFER_SIZE', data.length) + it { is_expected.to eq("65") } end - it { is_expected.to eq("98.29") } - end + context 'malicious regexp' do + let(:data) { malicious_text } + let(:regex) { malicious_regexp } - context 'using a regex capture' do - let(:data) { 'TOTAL 9926 3489 65%' } - let(:regex) { 'TOTAL\s+\d+\s+\d+\s+(\d{1,3}\%)' } + include_examples 'malicious regexp' + end - it { is_expected.to eq("65") } - end + context 'multi-line data with rooted regexp' do + let(:data) { "\n65%\n" } + let(:regex) { '^(\d+)\%$' } - context 'malicious regexp' do - let(:data) { malicious_text } - let(:regex) { malicious_regexp } + it { is_expected.to eq('65') } + end - include_examples 'malicious regexp' - end + context 'long line' do + let(:data) { 'a' * 80000 + '100%' + 'a' * 80000 } + let(:regex) { '\d+\%' } - context 'multi-line data with rooted regexp' do - let(:data) { "\n65%\n" } - let(:regex) { '^(\d+)\%$' } + it { is_expected.to eq('100') } + end - it { is_expected.to eq('65') } - end + context 'many lines' do + let(:data) { "foo\n" * 80000 + "100%\n" + "foo\n" * 80000 } + let(:regex) { '\d+\%' } - context 'long line' do - let(:data) { 'a' * 80000 + '100%' + 'a' * 80000 } - let(:regex) { '\d+\%' } + it { is_expected.to eq('100') } + end - it { is_expected.to eq('100') } - end + context 'empty regex' do + let(:data) { 'foo' } + let(:regex) { '' } - context 'many lines' do - let(:data) { "foo\n" * 80000 + "100%\n" + "foo\n" * 80000 } - let(:regex) { '\d+\%' } + it 'skips processing' do + expect(stream).not_to receive(:read) - it { is_expected.to eq('100') } - end + is_expected.to be_nil + end + end - context 'empty regex' do - let(:data) { 'foo' } - let(:regex) { '' } + context 'nil regex' do + let(:data) { 'foo' } + let(:regex) { nil } - it 'skips processing' do - expect(stream).not_to receive(:read) + it 'skips processing' do + expect(stream).not_to receive(:read) - is_expected.to be_nil + is_expected.to be_nil + end end end - context 'nil regex' do - let(:data) { 'foo' } - let(:regex) { nil } + subject { stream.extract_coverage(regex) } - it 'skips processing' do - expect(stream).not_to receive(:read) + context 'when stream is StringIO' do + let(:stream) do + described_class.new do + StringIO.new(data) + end + end + + it_behaves_like 'extract_coverages' + end - is_expected.to be_nil + context 'when stream is ChunkedIO' do + let(:stream) do + described_class.new do + Gitlab::Ci::Trace::ChunkedIO.new(build).tap do |chunked_io| + chunked_io.write(data) + chunked_io.seek(0, IO::SEEK_SET) + end + end end + + it_behaves_like 'extract_coverages' end end end diff --git a/spec/lib/gitlab/ci/trace_spec.rb b/spec/lib/gitlab/ci/trace_spec.rb index 6a9c6442282ad2..e9d755c2021a1d 100644 --- a/spec/lib/gitlab/ci/trace_spec.rb +++ b/spec/lib/gitlab/ci/trace_spec.rb @@ -1,6 +1,6 @@ require 'spec_helper' -describe Gitlab::Ci::Trace do +describe Gitlab::Ci::Trace, :clean_gitlab_redis_cache do let(:build) { create(:ci_build) } let(:trace) { described_class.new(build) } @@ -9,552 +9,19 @@ it { expect(trace).to delegate_method(:old_trace).to(:job) } end - describe '#html' do + context 'when live trace feature is disabled' do before do - trace.set("12\n34") + stub_feature_flags(ci_enable_live_trace: false) end - it "returns formatted html" do - expect(trace.html).to eq("12
34") - end - - it "returns last line of formatted html" do - expect(trace.html(last_lines: 1)).to eq("34") - end - end - - describe '#raw' do - before do - trace.set("12\n34") - end - - it "returns raw output" do - expect(trace.raw).to eq("12\n34") - end - - it "returns last line of raw output" do - expect(trace.raw(last_lines: 1)).to eq("34") - end - end - - describe '#extract_coverage' do - let(:regex) { '\(\d+.\d+\%\) covered' } - - context 'matching coverage' do - before do - trace.set('Coverage 1033 / 1051 LOC (98.29%) covered') - end - - it "returns valid coverage" do - expect(trace.extract_coverage(regex)).to eq("98.29") - end - end - - context 'no coverage' do - before do - trace.set('No coverage') - end - - it 'returs nil' do - expect(trace.extract_coverage(regex)).to be_nil - end - end - end - - describe '#extract_sections' do - let(:log) { 'No sections' } - let(:sections) { trace.extract_sections } - - before do - trace.set(log) - end - - context 'no sections' do - it 'returs []' do - expect(trace.extract_sections).to eq([]) - end - end - - context 'multiple sections available' do - let(:log) { File.read(expand_fixture_path('trace/trace_with_sections')) } - let(:sections_data) do - [ - { name: 'prepare_script', lines: 2, duration: 3.seconds }, - { name: 'get_sources', lines: 4, duration: 1.second }, - { name: 'restore_cache', lines: 0, duration: 0.seconds }, - { name: 'download_artifacts', lines: 0, duration: 0.seconds }, - { name: 'build_script', lines: 2, duration: 1.second }, - { name: 'after_script', lines: 0, duration: 0.seconds }, - { name: 'archive_cache', lines: 0, duration: 0.seconds }, - { name: 'upload_artifacts', lines: 0, duration: 0.seconds } - ] - end - - it "returns valid sections" do - expect(sections).not_to be_empty - expect(sections.size).to eq(sections_data.size), - "expected #{sections_data.size} sections, got #{sections.size}" - - buff = StringIO.new(log) - sections.each_with_index do |s, i| - expected = sections_data[i] - - expect(s[:name]).to eq(expected[:name]) - expect(s[:date_end] - s[:date_start]).to eq(expected[:duration]) - - buff.seek(s[:byte_start], IO::SEEK_SET) - length = s[:byte_end] - s[:byte_start] - lines = buff.read(length).count("\n") - expect(lines).to eq(expected[:lines]) - end - end - end - - context 'logs contains "section_start"' do - let(:log) { "section_start:1506417476:a_section\r\033[0Klooks like a section_start:invalid\nsection_end:1506417477:a_section\r\033[0K"} - - it "returns only one section" do - expect(sections).not_to be_empty - expect(sections.size).to eq(1) - - section = sections[0] - expect(section[:name]).to eq('a_section') - expect(section[:byte_start]).not_to eq(section[:byte_end]), "got an empty section" - end - end - - context 'missing section_end' do - let(:log) { "section_start:1506417476:a_section\r\033[0KSome logs\nNo section_end\n"} - - it "returns no sections" do - expect(sections).to be_empty - end - end - - context 'missing section_start' do - let(:log) { "Some logs\nNo section_start\nsection_end:1506417476:a_section\r\033[0K"} - - it "returns no sections" do - expect(sections).to be_empty - end - end - - context 'inverted section_start section_end' do - let(:log) { "section_end:1506417476:a_section\r\033[0Klooks like a section_start:invalid\nsection_start:1506417477:a_section\r\033[0K"} - - it "returns no sections" do - expect(sections).to be_empty - end - end - end - - describe '#set' do - before do - trace.set("12") - end - - it "returns trace" do - expect(trace.raw).to eq("12") - end - - context 'overwrite trace' do - before do - trace.set("34") - end - - it "returns new trace" do - expect(trace.raw).to eq("34") - end - end - - context 'runners token' do - let(:token) { 'my_secret_token' } - - before do - build.project.update(runners_token: token) - trace.set(token) - end - - it "hides token" do - expect(trace.raw).not_to include(token) - end - end - - context 'hides build token' do - let(:token) { 'my_secret_token' } - - before do - build.update(token: token) - trace.set(token) - end - - it "hides token" do - expect(trace.raw).not_to include(token) - end - end + it_behaves_like 'trace with disabled live trace feature' end - describe '#append' do + context 'when live trace feature is enabled' do before do - trace.set("1234") - end - - it "returns correct trace" do - expect(trace.append("56", 4)).to eq(6) - expect(trace.raw).to eq("123456") - end - - context 'tries to append trace at different offset' do - it "fails with append" do - expect(trace.append("56", 2)).to eq(-4) - expect(trace.raw).to eq("1234") - end - end - - context 'runners token' do - let(:token) { 'my_secret_token' } - - before do - build.project.update(runners_token: token) - trace.append(token, 0) - end - - it "hides token" do - expect(trace.raw).not_to include(token) - end - end - - context 'build token' do - let(:token) { 'my_secret_token' } - - before do - build.update(token: token) - trace.append(token, 0) - end - - it "hides token" do - expect(trace.raw).not_to include(token) - end - end - end - - describe '#read' do - shared_examples 'read successfully with IO' do - it 'yields with source' do - trace.read do |stream| - expect(stream).to be_a(Gitlab::Ci::Trace::Stream) - expect(stream.stream).to be_a(IO) - end - end - end - - shared_examples 'read successfully with StringIO' do - it 'yields with source' do - trace.read do |stream| - expect(stream).to be_a(Gitlab::Ci::Trace::Stream) - expect(stream.stream).to be_a(StringIO) - end - end - end - - shared_examples 'failed to read' do - it 'yields without source' do - trace.read do |stream| - expect(stream).to be_a(Gitlab::Ci::Trace::Stream) - expect(stream.stream).to be_nil - end - end - end - - context 'when trace artifact exists' do - before do - create(:ci_job_artifact, :trace, job: build) - end - - it_behaves_like 'read successfully with IO' - end - - context 'when current_path (with project_id) exists' do - before do - expect(trace).to receive(:default_path) { expand_fixture_path('trace/sample_trace') } - end - - it_behaves_like 'read successfully with IO' - end - - context 'when current_path (with project_ci_id) exists' do - before do - expect(trace).to receive(:deprecated_path) { expand_fixture_path('trace/sample_trace') } - end - - it_behaves_like 'read successfully with IO' - end - - context 'when db trace exists' do - before do - build.send(:write_attribute, :trace, "data") - end - - it_behaves_like 'read successfully with StringIO' - end - - context 'when no sources exist' do - it_behaves_like 'failed to read' - end - end - - describe 'trace handling' do - subject { trace.exist? } - - context 'trace does not exist' do - it { expect(trace.exist?).to be(false) } - end - - context 'when trace artifact exists' do - before do - create(:ci_job_artifact, :trace, job: build) - end - - it { is_expected.to be_truthy } - - context 'when the trace artifact has been erased' do - before do - trace.erase! - end - - it { is_expected.to be_falsy } - - it 'removes associations' do - expect(Ci::JobArtifact.exists?(job_id: build.id, file_type: :trace)).to be_falsy - end - end - end - - context 'new trace path is used' do - before do - trace.send(:ensure_directory) - - File.open(trace.send(:default_path), "w") do |file| - file.write("data") - end - end - - it "trace exist" do - expect(trace.exist?).to be(true) - end - - it "can be erased" do - trace.erase! - expect(trace.exist?).to be(false) - end - end - - context 'deprecated path' do - let(:path) { trace.send(:deprecated_path) } - - context 'with valid ci_id' do - before do - build.project.update(ci_id: 1000) - - FileUtils.mkdir_p(File.dirname(path)) - - File.open(path, "w") do |file| - file.write("data") - end - end - - it "trace exist" do - expect(trace.exist?).to be(true) - end - - it "can be erased" do - trace.erase! - expect(trace.exist?).to be(false) - end - end - - context 'without valid ci_id' do - it "does not return deprecated path" do - expect(path).to be_nil - end - end - end - - context 'stored in database' do - before do - build.send(:write_attribute, :trace, "data") - end - - it "trace exist" do - expect(trace.exist?).to be(true) - end - - it "can be erased" do - trace.erase! - expect(trace.exist?).to be(false) - end - - it "returns database data" do - expect(trace.raw).to eq("data") - end - end - end - - describe '#archive!' do - subject { trace.archive! } - - shared_examples 'archive trace file' do - it do - expect { subject }.to change { Ci::JobArtifact.count }.by(1) - - build.reload - expect(build.trace.exist?).to be_truthy - expect(build.job_artifacts_trace.file.exists?).to be_truthy - expect(build.job_artifacts_trace.file.filename).to eq('job.log') - expect(File.exist?(src_path)).to be_falsy - expect(src_checksum) - .to eq(Digest::SHA256.file(build.job_artifacts_trace.file.path).hexdigest) - expect(build.job_artifacts_trace.file_sha256).to eq(src_checksum) - end - end - - shared_examples 'source trace file stays intact' do |error:| - it do - expect { subject }.to raise_error(error) - - build.reload - expect(build.trace.exist?).to be_truthy - expect(build.job_artifacts_trace).to be_nil - expect(File.exist?(src_path)).to be_truthy - end - end - - shared_examples 'archive trace in database' do - it do - expect { subject }.to change { Ci::JobArtifact.count }.by(1) - - build.reload - expect(build.trace.exist?).to be_truthy - expect(build.job_artifacts_trace.file.exists?).to be_truthy - expect(build.job_artifacts_trace.file.filename).to eq('job.log') - expect(build.old_trace).to be_nil - expect(src_checksum) - .to eq(Digest::SHA256.file(build.job_artifacts_trace.file.path).hexdigest) - expect(build.job_artifacts_trace.file_sha256).to eq(src_checksum) - end - end - - shared_examples 'source trace in database stays intact' do |error:| - it do - expect { subject }.to raise_error(error) - - build.reload - expect(build.trace.exist?).to be_truthy - expect(build.job_artifacts_trace).to be_nil - expect(build.old_trace).to eq(trace_content) - end - end - - context 'when job does not have trace artifact' do - context 'when trace file stored in default path' do - let!(:build) { create(:ci_build, :success, :trace_live) } - let!(:src_path) { trace.read { |s| s.path } } - let!(:src_checksum) { Digest::SHA256.file(src_path).hexdigest } - - it_behaves_like 'archive trace file' - - context 'when failed to create clone file' do - before do - allow(IO).to receive(:copy_stream).and_return(0) - end - - it_behaves_like 'source trace file stays intact', error: Gitlab::Ci::Trace::ArchiveError - end - - context 'when failed to create job artifact record' do - before do - allow_any_instance_of(Ci::JobArtifact).to receive(:save).and_return(false) - allow_any_instance_of(Ci::JobArtifact).to receive_message_chain(:errors, :full_messages) - .and_return(%w[Error Error]) - end - - it_behaves_like 'source trace file stays intact', error: ActiveRecord::RecordInvalid - end - end - - context 'when trace is stored in database' do - let(:build) { create(:ci_build, :success) } - let(:trace_content) { 'Sample trace' } - let!(:src_checksum) { Digest::SHA256.hexdigest(trace_content) } - - before do - build.update_column(:trace, trace_content) - end - - it_behaves_like 'archive trace in database' - - context 'when failed to create clone file' do - before do - allow(IO).to receive(:copy_stream).and_return(0) - end - - it_behaves_like 'source trace in database stays intact', error: Gitlab::Ci::Trace::ArchiveError - end - - context 'when failed to create job artifact record' do - before do - allow_any_instance_of(Ci::JobArtifact).to receive(:save).and_return(false) - allow_any_instance_of(Ci::JobArtifact).to receive_message_chain(:errors, :full_messages) - .and_return(%w[Error Error]) - end - - it_behaves_like 'source trace in database stays intact', error: ActiveRecord::RecordInvalid - end - - context 'when there is a validation error on Ci::Build' do - before do - allow_any_instance_of(Ci::Build).to receive(:save).and_return(false) - allow_any_instance_of(Ci::Build).to receive_message_chain(:errors, :full_messages) - .and_return(%w[Error Error]) - end - - context "when erase old trace with 'save'" do - before do - build.send(:write_attribute, :trace, nil) - build.save - end - - it 'old trace is not deleted' do - build.reload - expect(build.trace.raw).to eq(trace_content) - end - end - - it_behaves_like 'archive trace in database' - end - end + stub_feature_flags(ci_enable_live_trace: true) end - context 'when job has trace artifact' do - before do - create(:ci_job_artifact, :trace, job: build) - end - - it 'does not archive' do - expect_any_instance_of(described_class).not_to receive(:archive_stream!) - expect { subject }.to raise_error('Already archived') - expect(build.job_artifacts_trace.file.exists?).to be_truthy - end - end - - context 'when job is not finished yet' do - let!(:build) { create(:ci_build, :running, :trace_live) } - - it 'does not archive' do - expect_any_instance_of(described_class).not_to receive(:archive_stream!) - expect { subject }.to raise_error('Job is not finished yet') - expect(build.trace.exist?).to be_truthy - end - end + it_behaves_like 'trace with enabled live trace feature' end end diff --git a/spec/models/ci/build_trace_chunk_spec.rb b/spec/models/ci/build_trace_chunk_spec.rb new file mode 100644 index 00000000000000..46d09dff52cb90 --- /dev/null +++ b/spec/models/ci/build_trace_chunk_spec.rb @@ -0,0 +1,381 @@ +require 'spec_helper' + +describe Ci::BuildTraceChunk, :clean_gitlab_redis_shared_state do + set(:build) { create(:ci_build, :running) } + let(:chunk_index) { 0 } + let(:data_store) { :redis } + let(:raw_data) { nil } + + let(:build_trace_chunk) do + described_class.new(build: build, chunk_index: chunk_index, data_store: data_store, raw_data: raw_data) + end + + before do + stub_feature_flags(ci_enable_live_trace: true) + end + + describe 'CHUNK_SIZE' do + it 'Chunk size can not be changed without special care' do + expect(described_class::CHUNK_SIZE).to eq(128.kilobytes) + end + end + + describe '#data' do + subject { build_trace_chunk.data } + + context 'when data_store is redis' do + let(:data_store) { :redis } + + before do + build_trace_chunk.send(:redis_set_data, 'Sample data in redis') + end + + it { is_expected.to eq('Sample data in redis') } + end + + context 'when data_store is database' do + let(:data_store) { :db } + let(:raw_data) { 'Sample data in db' } + + it { is_expected.to eq('Sample data in db') } + end + + context 'when data_store is others' do + before do + build_trace_chunk.send(:write_attribute, :data_store, -1) + end + + it { expect { subject }.to raise_error('Unsupported data store') } + end + end + + describe '#set_data' do + subject { build_trace_chunk.send(:set_data, value) } + + let(:value) { 'Sample data' } + + context 'when value bytesize is bigger than CHUNK_SIZE' do + let(:value) { 'a' * (described_class::CHUNK_SIZE + 1) } + + it { expect { subject }.to raise_error('too much data') } + end + + context 'when data_store is redis' do + let(:data_store) { :redis } + + it do + expect(build_trace_chunk.send(:redis_data)).to be_nil + + subject + + expect(build_trace_chunk.send(:redis_data)).to eq(value) + end + + context 'when fullfilled chunk size' do + let(:value) { 'a' * described_class::CHUNK_SIZE } + + it 'schedules stashing data' do + expect(Ci::BuildTraceChunkFlushWorker).to receive(:perform_async).once + + subject + end + end + end + + context 'when data_store is database' do + let(:data_store) { :db } + + it 'sets data' do + expect(build_trace_chunk.raw_data).to be_nil + + subject + + expect(build_trace_chunk.raw_data).to eq(value) + expect(build_trace_chunk.persisted?).to be_truthy + end + + context 'when raw_data is not changed' do + it 'does not execute UPDATE' do + expect(build_trace_chunk.raw_data).to be_nil + build_trace_chunk.save! + + # First set + expect(ActiveRecord::QueryRecorder.new { subject }.count).to be > 0 + expect(build_trace_chunk.raw_data).to eq(value) + expect(build_trace_chunk.persisted?).to be_truthy + + # Second set + build_trace_chunk.reload + expect(ActiveRecord::QueryRecorder.new { subject }.count).to be(0) + end + end + + context 'when fullfilled chunk size' do + it 'does not schedule stashing data' do + expect(Ci::BuildTraceChunkFlushWorker).not_to receive(:perform_async) + + subject + end + end + end + + context 'when data_store is others' do + before do + build_trace_chunk.send(:write_attribute, :data_store, -1) + end + + it { expect { subject }.to raise_error('Unsupported data store') } + end + end + + describe '#truncate' do + subject { build_trace_chunk.truncate(offset) } + + shared_examples_for 'truncates' do + context 'when offset is negative' do + let(:offset) { -1 } + + it { expect { subject }.to raise_error('Offset is out of range') } + end + + context 'when offset is bigger than data size' do + let(:offset) { data.bytesize + 1 } + + it { expect { subject }.to raise_error('Offset is out of range') } + end + + context 'when offset is 10' do + let(:offset) { 10 } + + it 'truncates' do + subject + + expect(build_trace_chunk.data).to eq(data.byteslice(0, offset)) + end + end + end + + context 'when data_store is redis' do + let(:data_store) { :redis } + let(:data) { 'Sample data in redis' } + + before do + build_trace_chunk.send(:redis_set_data, data) + end + + it_behaves_like 'truncates' + end + + context 'when data_store is database' do + let(:data_store) { :db } + let(:raw_data) { 'Sample data in db' } + let(:data) { raw_data } + + it_behaves_like 'truncates' + end + end + + describe '#append' do + subject { build_trace_chunk.append(new_data, offset) } + + let(:new_data) { 'Sample new data' } + let(:offset) { 0 } + let(:total_data) { data + new_data } + + shared_examples_for 'appends' do + context 'when offset is negative' do + let(:offset) { -1 } + + it { expect { subject }.to raise_error('Offset is out of range') } + end + + context 'when offset is bigger than data size' do + let(:offset) { data.bytesize + 1 } + + it { expect { subject }.to raise_error('Offset is out of range') } + end + + context 'when offset is bigger than data size' do + let(:new_data) { 'a' * (described_class::CHUNK_SIZE + 1) } + + it { expect { subject }.to raise_error('Chunk size overflow') } + end + + context 'when offset is EOF' do + let(:offset) { data.bytesize } + + it 'appends' do + subject + + expect(build_trace_chunk.data).to eq(total_data) + end + end + + context 'when offset is 10' do + let(:offset) { 10 } + + it 'appends' do + subject + + expect(build_trace_chunk.data).to eq(data.byteslice(0, offset) + new_data) + end + end + end + + context 'when data_store is redis' do + let(:data_store) { :redis } + let(:data) { 'Sample data in redis' } + + before do + build_trace_chunk.send(:redis_set_data, data) + end + + it_behaves_like 'appends' + end + + context 'when data_store is database' do + let(:data_store) { :db } + let(:raw_data) { 'Sample data in db' } + let(:data) { raw_data } + + it_behaves_like 'appends' + end + end + + describe '#size' do + subject { build_trace_chunk.size } + + context 'when data_store is redis' do + let(:data_store) { :redis } + + context 'when data exists' do + let(:data) { 'Sample data in redis' } + + before do + build_trace_chunk.send(:redis_set_data, data) + end + + it { is_expected.to eq(data.bytesize) } + end + + context 'when data exists' do + it { is_expected.to eq(0) } + end + end + + context 'when data_store is database' do + let(:data_store) { :db } + + context 'when data exists' do + let(:raw_data) { 'Sample data in db' } + let(:data) { raw_data } + + it { is_expected.to eq(data.bytesize) } + end + + context 'when data does not exist' do + it { is_expected.to eq(0) } + end + end + end + + describe '#use_database!' do + subject { build_trace_chunk.use_database! } + + context 'when data_store is redis' do + let(:data_store) { :redis } + + context 'when data exists' do + let(:data) { 'Sample data in redis' } + + before do + build_trace_chunk.send(:redis_set_data, data) + end + + it 'stashes the data' do + expect(build_trace_chunk.data_store).to eq('redis') + expect(build_trace_chunk.send(:redis_data)).to eq(data) + expect(build_trace_chunk.raw_data).to be_nil + + subject + + expect(build_trace_chunk.data_store).to eq('db') + expect(build_trace_chunk.send(:redis_data)).to be_nil + expect(build_trace_chunk.raw_data).to eq(data) + end + end + + context 'when data does not exist' do + it 'does not call UPDATE' do + expect(ActiveRecord::QueryRecorder.new { subject }.count).to eq(0) + end + end + end + + context 'when data_store is database' do + let(:data_store) { :db } + + it 'does not call UPDATE' do + expect(ActiveRecord::QueryRecorder.new { subject }.count).to eq(0) + end + end + end + + describe 'ExclusiveLock' do + before do + allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain) { nil } + stub_const('Ci::BuildTraceChunk::WRITE_LOCK_RETRY', 1) + end + + it 'raise an error' do + expect { build_trace_chunk.append('ABC', 0) }.to raise_error('Failed to obtain write lock') + end + end + + describe 'deletes data in redis after a parent record destroyed' do + let(:project) { create(:project) } + + before do + pipeline = create(:ci_pipeline, project: project) + create(:ci_build, :running, :trace_live, pipeline: pipeline, project: project) + create(:ci_build, :running, :trace_live, pipeline: pipeline, project: project) + create(:ci_build, :running, :trace_live, pipeline: pipeline, project: project) + end + + shared_examples_for 'deletes all build_trace_chunk and data in redis' do + it do + Gitlab::Redis::SharedState.with do |redis| + expect(redis.scan_each(match: "gitlab:ci:trace:*:chunks:*").to_a.size).to eq(3) + end + + expect(described_class.count).to eq(3) + + subject + + expect(described_class.count).to eq(0) + + Gitlab::Redis::SharedState.with do |redis| + expect(redis.scan_each(match: "gitlab:ci:trace:*:chunks:*").to_a.size).to eq(0) + end + end + end + + context 'when traces are archived' do + let(:subject) do + project.builds.each do |build| + build.success! + end + end + + it_behaves_like 'deletes all build_trace_chunk and data in redis' + end + + context 'when project is destroyed' do + let(:subject) do + project.destroy! + end + + it_behaves_like 'deletes all build_trace_chunk and data in redis' + end + end +end diff --git a/spec/requests/api/runner_spec.rb b/spec/requests/api/runner_spec.rb index 0bdc19293d5797..3e324b575d7e2e 100644 --- a/spec/requests/api/runner_spec.rb +++ b/spec/requests/api/runner_spec.rb @@ -1,11 +1,13 @@ require 'spec_helper' -describe API::Runner do +describe API::Runner, :clean_gitlab_redis_shared_state do include StubGitlabCalls + include RedisHelpers let(:registration_token) { 'abcdefg123456' } before do + stub_feature_flags(ci_enable_live_trace: true) stub_gitlab_calls stub_application_setting(runners_registration_token: registration_token) allow_any_instance_of(Ci::Runner).to receive(:cache_attributes) @@ -884,6 +886,49 @@ def update_job(token = job.token, **params) expect(response.status).to eq(403) end end + + context 'when trace is patched' do + before do + patch_the_trace + end + + it 'has valid trace' do + expect(response.status).to eq(202) + expect(job.reload.trace.raw).to eq 'BUILD TRACE appended appended' + end + + context 'when redis data are flushed' do + before do + redis_shared_state_cleanup! + end + + it 'has empty trace' do + expect(job.reload.trace.raw).to eq '' + end + + context 'when we perform partial patch' do + before do + patch_the_trace('hello', headers.merge({ 'Content-Range' => "28-32/5" })) + end + + it 'returns an error' do + expect(response.status).to eq(416) + expect(response.header['Range']).to eq('0-0') + end + end + + context 'when we resend full trace' do + before do + patch_the_trace('BUILD TRACE appended appended hello', headers.merge({ 'Content-Range' => "0-34/35" })) + end + + it 'succeeds with updating trace' do + expect(response.status).to eq(202) + expect(job.reload.trace.raw).to eq 'BUILD TRACE appended appended hello' + end + end + end + end end context 'when Runner makes a force-patch' do @@ -900,7 +945,7 @@ def update_job(token = job.token, **params) end context 'when content-range start is too big' do - let(:headers_with_range) { headers.merge({ 'Content-Range' => '15-20' }) } + let(:headers_with_range) { headers.merge({ 'Content-Range' => '15-20/6' }) } it 'gets 416 error response with range headers' do expect(response.status).to eq 416 @@ -910,7 +955,7 @@ def update_job(token = job.token, **params) end context 'when content-range start is too small' do - let(:headers_with_range) { headers.merge({ 'Content-Range' => '8-20' }) } + let(:headers_with_range) { headers.merge({ 'Content-Range' => '8-20/13' }) } it 'gets 416 error response with range headers' do expect(response.status).to eq 416 diff --git a/spec/services/ci/retry_build_service_spec.rb b/spec/services/ci/retry_build_service_spec.rb index a312ea15e63234..50104ebf970c31 100644 --- a/spec/services/ci/retry_build_service_spec.rb +++ b/spec/services/ci/retry_build_service_spec.rb @@ -32,7 +32,7 @@ runner_id tag_taggings taggings tags trigger_request_id user_id auto_canceled_by_id retried failure_reason sourced_pipelines artifacts_file_store artifacts_metadata_store - metadata].freeze # EE + metadata trace_chunks].freeze # EE shared_examples 'build duplication' do let(:another_pipeline) { create(:ci_empty_pipeline, project: project) } diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 958113a28d439e..40356e090ef591 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -93,6 +93,7 @@ config.include WaitForRequests, :js config.include LiveDebugger, :js config.include MigrationsHelpers, :migration + config.include RedisHelpers # EE only START config.include EE::LicenseHelpers @@ -165,21 +166,27 @@ end config.around(:each, :clean_gitlab_redis_cache) do |example| - Gitlab::Redis::Cache.with(&:flushall) + redis_cache_cleanup! example.run - Gitlab::Redis::Cache.with(&:flushall) + redis_cache_cleanup! end config.around(:each, :clean_gitlab_redis_shared_state) do |example| - Gitlab::Redis::SharedState.with(&:flushall) - Sidekiq.redis(&:flushall) + redis_shared_state_cleanup! example.run - Gitlab::Redis::SharedState.with(&:flushall) - Sidekiq.redis(&:flushall) + redis_shared_state_cleanup! + end + + config.around(:each, :clean_gitlab_redis_queues) do |example| + redis_queues_cleanup! + + example.run + + redis_queues_cleanup! end # The :each scope runs "inside" the example, so this hook ensures the DB is in the diff --git a/spec/support/chunked_io/chunked_io_helpers.rb b/spec/support/chunked_io/chunked_io_helpers.rb new file mode 100644 index 00000000000000..fec1f9515632f7 --- /dev/null +++ b/spec/support/chunked_io/chunked_io_helpers.rb @@ -0,0 +1,11 @@ +module ChunkedIOHelpers + def sample_trace_raw + @sample_trace_raw ||= File.read(expand_fixture_path('trace/sample_trace')) + .force_encoding(Encoding::BINARY) + end + + def stub_buffer_size(size) + stub_const('Ci::BuildTraceChunk::CHUNK_SIZE', size) + stub_const('Gitlab::Ci::Trace::ChunkedIO::CHUNK_SIZE', size) + end +end diff --git a/spec/support/redis/redis_helpers.rb b/spec/support/redis/redis_helpers.rb new file mode 100644 index 00000000000000..0457e8487d8cd9 --- /dev/null +++ b/spec/support/redis/redis_helpers.rb @@ -0,0 +1,18 @@ +module RedisHelpers + # config/README.md + + # Usage: performance enhancement + def redis_cache_cleanup! + Gitlab::Redis::Cache.with(&:flushall) + end + + # Usage: SideKiq, Mailroom, CI Runner, Workhorse, push services + def redis_queues_cleanup! + Gitlab::Redis::Queues.with(&:flushall) + end + + # Usage: session state, rate limiting + def redis_shared_state_cleanup! + Gitlab::Redis::SharedState.with(&:flushall) + end +end diff --git a/spec/support/shared_examples/ci_trace_shared_examples.rb b/spec/support/shared_examples/ci_trace_shared_examples.rb new file mode 100644 index 00000000000000..21c6f3c829fe79 --- /dev/null +++ b/spec/support/shared_examples/ci_trace_shared_examples.rb @@ -0,0 +1,741 @@ +shared_examples_for 'common trace features' do + describe '#html' do + before do + trace.set("12\n34") + end + + it "returns formatted html" do + expect(trace.html).to eq("12
34") + end + + it "returns last line of formatted html" do + expect(trace.html(last_lines: 1)).to eq("34") + end + end + + describe '#raw' do + before do + trace.set("12\n34") + end + + it "returns raw output" do + expect(trace.raw).to eq("12\n34") + end + + it "returns last line of raw output" do + expect(trace.raw(last_lines: 1)).to eq("34") + end + end + + describe '#extract_coverage' do + let(:regex) { '\(\d+.\d+\%\) covered' } + + context 'matching coverage' do + before do + trace.set('Coverage 1033 / 1051 LOC (98.29%) covered') + end + + it "returns valid coverage" do + expect(trace.extract_coverage(regex)).to eq("98.29") + end + end + + context 'no coverage' do + before do + trace.set('No coverage') + end + + it 'returs nil' do + expect(trace.extract_coverage(regex)).to be_nil + end + end + end + + describe '#extract_sections' do + let(:log) { 'No sections' } + let(:sections) { trace.extract_sections } + + before do + trace.set(log) + end + + context 'no sections' do + it 'returs []' do + expect(trace.extract_sections).to eq([]) + end + end + + context 'multiple sections available' do + let(:log) { File.read(expand_fixture_path('trace/trace_with_sections')) } + let(:sections_data) do + [ + { name: 'prepare_script', lines: 2, duration: 3.seconds }, + { name: 'get_sources', lines: 4, duration: 1.second }, + { name: 'restore_cache', lines: 0, duration: 0.seconds }, + { name: 'download_artifacts', lines: 0, duration: 0.seconds }, + { name: 'build_script', lines: 2, duration: 1.second }, + { name: 'after_script', lines: 0, duration: 0.seconds }, + { name: 'archive_cache', lines: 0, duration: 0.seconds }, + { name: 'upload_artifacts', lines: 0, duration: 0.seconds } + ] + end + + it "returns valid sections" do + expect(sections).not_to be_empty + expect(sections.size).to eq(sections_data.size), + "expected #{sections_data.size} sections, got #{sections.size}" + + buff = StringIO.new(log) + sections.each_with_index do |s, i| + expected = sections_data[i] + + expect(s[:name]).to eq(expected[:name]) + expect(s[:date_end] - s[:date_start]).to eq(expected[:duration]) + + buff.seek(s[:byte_start], IO::SEEK_SET) + length = s[:byte_end] - s[:byte_start] + lines = buff.read(length).count("\n") + expect(lines).to eq(expected[:lines]) + end + end + end + + context 'logs contains "section_start"' do + let(:log) { "section_start:1506417476:a_section\r\033[0Klooks like a section_start:invalid\nsection_end:1506417477:a_section\r\033[0K"} + + it "returns only one section" do + expect(sections).not_to be_empty + expect(sections.size).to eq(1) + + section = sections[0] + expect(section[:name]).to eq('a_section') + expect(section[:byte_start]).not_to eq(section[:byte_end]), "got an empty section" + end + end + + context 'missing section_end' do + let(:log) { "section_start:1506417476:a_section\r\033[0KSome logs\nNo section_end\n"} + + it "returns no sections" do + expect(sections).to be_empty + end + end + + context 'missing section_start' do + let(:log) { "Some logs\nNo section_start\nsection_end:1506417476:a_section\r\033[0K"} + + it "returns no sections" do + expect(sections).to be_empty + end + end + + context 'inverted section_start section_end' do + let(:log) { "section_end:1506417476:a_section\r\033[0Klooks like a section_start:invalid\nsection_start:1506417477:a_section\r\033[0K"} + + it "returns no sections" do + expect(sections).to be_empty + end + end + end + + describe '#set' do + before do + trace.set("12") + end + + it "returns trace" do + expect(trace.raw).to eq("12") + end + + context 'overwrite trace' do + before do + trace.set("34") + end + + it "returns new trace" do + expect(trace.raw).to eq("34") + end + end + + context 'runners token' do + let(:token) { 'my_secret_token' } + + before do + build.project.update(runners_token: token) + trace.set(token) + end + + it "hides token" do + expect(trace.raw).not_to include(token) + end + end + + context 'hides build token' do + let(:token) { 'my_secret_token' } + + before do + build.update(token: token) + trace.set(token) + end + + it "hides token" do + expect(trace.raw).not_to include(token) + end + end + end + + describe '#append' do + before do + trace.set("1234") + end + + it "returns correct trace" do + expect(trace.append("56", 4)).to eq(6) + expect(trace.raw).to eq("123456") + end + + context 'tries to append trace at different offset' do + it "fails with append" do + expect(trace.append("56", 2)).to eq(4) + expect(trace.raw).to eq("1234") + end + end + + context 'runners token' do + let(:token) { 'my_secret_token' } + + before do + build.project.update(runners_token: token) + trace.append(token, 0) + end + + it "hides token" do + expect(trace.raw).not_to include(token) + end + end + + context 'build token' do + let(:token) { 'my_secret_token' } + + before do + build.update(token: token) + trace.append(token, 0) + end + + it "hides token" do + expect(trace.raw).not_to include(token) + end + end + end +end + +shared_examples_for 'trace with disabled live trace feature' do + it_behaves_like 'common trace features' + + describe '#read' do + shared_examples 'read successfully with IO' do + it 'yields with source' do + trace.read do |stream| + expect(stream).to be_a(Gitlab::Ci::Trace::Stream) + expect(stream.stream).to be_a(IO) + end + end + end + + shared_examples 'read successfully with StringIO' do + it 'yields with source' do + trace.read do |stream| + expect(stream).to be_a(Gitlab::Ci::Trace::Stream) + expect(stream.stream).to be_a(StringIO) + end + end + end + + shared_examples 'failed to read' do + it 'yields without source' do + trace.read do |stream| + expect(stream).to be_a(Gitlab::Ci::Trace::Stream) + expect(stream.stream).to be_nil + end + end + end + + context 'when trace artifact exists' do + before do + create(:ci_job_artifact, :trace, job: build) + end + + it_behaves_like 'read successfully with IO' + end + + context 'when current_path (with project_id) exists' do + before do + expect(trace).to receive(:default_path) { expand_fixture_path('trace/sample_trace') } + end + + it_behaves_like 'read successfully with IO' + end + + context 'when current_path (with project_ci_id) exists' do + before do + expect(trace).to receive(:deprecated_path) { expand_fixture_path('trace/sample_trace') } + end + + it_behaves_like 'read successfully with IO' + end + + context 'when db trace exists' do + before do + build.send(:write_attribute, :trace, "data") + end + + it_behaves_like 'read successfully with StringIO' + end + + context 'when no sources exist' do + it_behaves_like 'failed to read' + end + end + + describe 'trace handling' do + subject { trace.exist? } + + context 'trace does not exist' do + it { expect(trace.exist?).to be(false) } + end + + context 'when trace artifact exists' do + before do + create(:ci_job_artifact, :trace, job: build) + end + + it { is_expected.to be_truthy } + + context 'when the trace artifact has been erased' do + before do + trace.erase! + end + + it { is_expected.to be_falsy } + + it 'removes associations' do + expect(Ci::JobArtifact.exists?(job_id: build.id, file_type: :trace)).to be_falsy + end + end + end + + context 'new trace path is used' do + before do + trace.send(:ensure_directory) + + File.open(trace.send(:default_path), "w") do |file| + file.write("data") + end + end + + it "trace exist" do + expect(trace.exist?).to be(true) + end + + it "can be erased" do + trace.erase! + expect(trace.exist?).to be(false) + end + end + + context 'deprecated path' do + let(:path) { trace.send(:deprecated_path) } + + context 'with valid ci_id' do + before do + build.project.update(ci_id: 1000) + + FileUtils.mkdir_p(File.dirname(path)) + + File.open(path, "w") do |file| + file.write("data") + end + end + + it "trace exist" do + expect(trace.exist?).to be(true) + end + + it "can be erased" do + trace.erase! + expect(trace.exist?).to be(false) + end + end + + context 'without valid ci_id' do + it "does not return deprecated path" do + expect(path).to be_nil + end + end + end + + context 'stored in database' do + before do + build.send(:write_attribute, :trace, "data") + end + + it "trace exist" do + expect(trace.exist?).to be(true) + end + + it "can be erased" do + trace.erase! + expect(trace.exist?).to be(false) + end + + it "returns database data" do + expect(trace.raw).to eq("data") + end + end + end + + describe '#archive!' do + subject { trace.archive! } + + shared_examples 'archive trace file' do + it do + expect { subject }.to change { Ci::JobArtifact.count }.by(1) + + build.reload + expect(build.trace.exist?).to be_truthy + expect(build.job_artifacts_trace.file.exists?).to be_truthy + expect(build.job_artifacts_trace.file.filename).to eq('job.log') + expect(File.exist?(src_path)).to be_falsy + expect(src_checksum) + .to eq(Digest::SHA256.file(build.job_artifacts_trace.file.path).hexdigest) + expect(build.job_artifacts_trace.file_sha256).to eq(src_checksum) + end + end + + shared_examples 'source trace file stays intact' do |error:| + it do + expect { subject }.to raise_error(error) + + build.reload + expect(build.trace.exist?).to be_truthy + expect(build.job_artifacts_trace).to be_nil + expect(File.exist?(src_path)).to be_truthy + end + end + + shared_examples 'archive trace in database' do + it do + expect { subject }.to change { Ci::JobArtifact.count }.by(1) + + build.reload + expect(build.trace.exist?).to be_truthy + expect(build.job_artifacts_trace.file.exists?).to be_truthy + expect(build.job_artifacts_trace.file.filename).to eq('job.log') + expect(build.old_trace).to be_nil + expect(src_checksum) + .to eq(Digest::SHA256.file(build.job_artifacts_trace.file.path).hexdigest) + expect(build.job_artifacts_trace.file_sha256).to eq(src_checksum) + end + end + + shared_examples 'source trace in database stays intact' do |error:| + it do + expect { subject }.to raise_error(error) + + build.reload + expect(build.trace.exist?).to be_truthy + expect(build.job_artifacts_trace).to be_nil + expect(build.old_trace).to eq(trace_content) + end + end + + context 'when job does not have trace artifact' do + context 'when trace file stored in default path' do + let!(:build) { create(:ci_build, :success, :trace_live) } + let!(:src_path) { trace.read { |s| s.path } } + let!(:src_checksum) { Digest::SHA256.file(src_path).hexdigest } + + it_behaves_like 'archive trace file' + + context 'when failed to create clone file' do + before do + allow(IO).to receive(:copy_stream).and_return(0) + end + + it_behaves_like 'source trace file stays intact', error: Gitlab::Ci::Trace::ArchiveError + end + + context 'when failed to create job artifact record' do + before do + allow_any_instance_of(Ci::JobArtifact).to receive(:save).and_return(false) + allow_any_instance_of(Ci::JobArtifact).to receive_message_chain(:errors, :full_messages) + .and_return(%w[Error Error]) + end + + it_behaves_like 'source trace file stays intact', error: ActiveRecord::RecordInvalid + end + end + + context 'when trace is stored in database' do + let(:build) { create(:ci_build, :success) } + let(:trace_content) { 'Sample trace' } + let(:src_checksum) { Digest::SHA256.hexdigest(trace_content) } + + before do + build.update_column(:trace, trace_content) + end + + it_behaves_like 'archive trace in database' + + context 'when failed to create clone file' do + before do + allow(IO).to receive(:copy_stream).and_return(0) + end + + it_behaves_like 'source trace in database stays intact', error: Gitlab::Ci::Trace::ArchiveError + end + + context 'when failed to create job artifact record' do + before do + allow_any_instance_of(Ci::JobArtifact).to receive(:save).and_return(false) + allow_any_instance_of(Ci::JobArtifact).to receive_message_chain(:errors, :full_messages) + .and_return(%w[Error Error]) + end + + it_behaves_like 'source trace in database stays intact', error: ActiveRecord::RecordInvalid + end + + context 'when there is a validation error on Ci::Build' do + before do + allow_any_instance_of(Ci::Build).to receive(:save).and_return(false) + allow_any_instance_of(Ci::Build).to receive_message_chain(:errors, :full_messages) + .and_return(%w[Error Error]) + end + + context "when erase old trace with 'save'" do + before do + build.send(:write_attribute, :trace, nil) + build.save + end + + it 'old trace is not deleted' do + build.reload + expect(build.trace.raw).to eq(trace_content) + end + end + + it_behaves_like 'archive trace in database' + end + end + end + + context 'when job has trace artifact' do + before do + create(:ci_job_artifact, :trace, job: build) + end + + it 'does not archive' do + expect_any_instance_of(described_class).not_to receive(:archive_stream!) + expect { subject }.to raise_error('Already archived') + expect(build.job_artifacts_trace.file.exists?).to be_truthy + end + end + + context 'when job is not finished yet' do + let!(:build) { create(:ci_build, :running, :trace_live) } + + it 'does not archive' do + expect_any_instance_of(described_class).not_to receive(:archive_stream!) + expect { subject }.to raise_error('Job is not finished yet') + expect(build.trace.exist?).to be_truthy + end + end + end +end + +shared_examples_for 'trace with enabled live trace feature' do + it_behaves_like 'common trace features' + + describe '#read' do + shared_examples 'read successfully with IO' do + it 'yields with source' do + trace.read do |stream| + expect(stream).to be_a(Gitlab::Ci::Trace::Stream) + expect(stream.stream).to be_a(IO) + end + end + end + + shared_examples 'read successfully with ChunkedIO' do + it 'yields with source' do + trace.read do |stream| + expect(stream).to be_a(Gitlab::Ci::Trace::Stream) + expect(stream.stream).to be_a(Gitlab::Ci::Trace::ChunkedIO) + end + end + end + + shared_examples 'failed to read' do + it 'yields without source' do + trace.read do |stream| + expect(stream).to be_a(Gitlab::Ci::Trace::Stream) + expect(stream.stream).to be_nil + end + end + end + + context 'when trace artifact exists' do + before do + create(:ci_job_artifact, :trace, job: build) + end + + it_behaves_like 'read successfully with IO' + end + + context 'when live trace exists' do + before do + Gitlab::Ci::Trace::ChunkedIO.new(build) do |stream| + stream.write('abc') + end + end + + it_behaves_like 'read successfully with ChunkedIO' + end + + context 'when no sources exist' do + it_behaves_like 'failed to read' + end + end + + describe 'trace handling' do + subject { trace.exist? } + + context 'trace does not exist' do + it { expect(trace.exist?).to be(false) } + end + + context 'when trace artifact exists' do + before do + create(:ci_job_artifact, :trace, job: build) + end + + it { is_expected.to be_truthy } + + context 'when the trace artifact has been erased' do + before do + trace.erase! + end + + it { is_expected.to be_falsy } + + it 'removes associations' do + expect(Ci::JobArtifact.exists?(job_id: build.id, file_type: :trace)).to be_falsy + end + end + end + + context 'stored in live trace' do + before do + Gitlab::Ci::Trace::ChunkedIO.new(build) do |stream| + stream.write('abc') + end + end + + it "trace exist" do + expect(trace.exist?).to be(true) + end + + it "can be erased" do + trace.erase! + expect(trace.exist?).to be(false) + expect(Ci::BuildTraceChunk.where(build: build)).not_to be_exist + end + + it "returns live trace data" do + expect(trace.raw).to eq("abc") + end + end + end + + describe '#archive!' do + subject { trace.archive! } + + shared_examples 'archive trace file in ChunkedIO' do + it do + expect { subject }.to change { Ci::JobArtifact.count }.by(1) + + build.reload + expect(build.trace.exist?).to be_truthy + expect(build.job_artifacts_trace.file.exists?).to be_truthy + expect(build.job_artifacts_trace.file.filename).to eq('job.log') + expect(Ci::BuildTraceChunk.where(build: build)).not_to be_exist + expect(src_checksum) + .to eq(Digest::SHA256.file(build.job_artifacts_trace.file.path).hexdigest) + expect(build.job_artifacts_trace.file_sha256).to eq(src_checksum) + end + end + + shared_examples 'source trace in ChunkedIO stays intact' do |error:| + it do + expect { subject }.to raise_error(error) + + build.reload + expect(build.trace.exist?).to be_truthy + expect(build.job_artifacts_trace).to be_nil + Gitlab::Ci::Trace::ChunkedIO.new(build) do |stream| + expect(stream.read).to eq(trace_raw) + end + end + end + + context 'when job does not have trace artifact' do + context 'when trace is stored in ChunkedIO' do + let!(:build) { create(:ci_build, :success, :trace_live) } + let!(:trace_raw) { build.trace.raw } + let!(:src_checksum) { Digest::SHA256.hexdigest(trace_raw) } + + it_behaves_like 'archive trace file in ChunkedIO' + + context 'when failed to create clone file' do + before do + allow(IO).to receive(:copy_stream).and_return(0) + end + + it_behaves_like 'source trace in ChunkedIO stays intact', error: Gitlab::Ci::Trace::ArchiveError + end + + context 'when failed to create job artifact record' do + before do + allow_any_instance_of(Ci::JobArtifact).to receive(:save).and_return(false) + allow_any_instance_of(Ci::JobArtifact).to receive_message_chain(:errors, :full_messages) + .and_return(%w[Error Error]) + end + + it_behaves_like 'source trace in ChunkedIO stays intact', error: ActiveRecord::RecordInvalid + end + end + end + + context 'when job has trace artifact' do + before do + create(:ci_job_artifact, :trace, job: build) + end + + it 'does not archive' do + expect_any_instance_of(described_class).not_to receive(:archive_stream!) + expect { subject }.to raise_error('Already archived') + expect(build.job_artifacts_trace.file.exists?).to be_truthy + end + end + + context 'when job is not finished yet' do + let!(:build) { create(:ci_build, :running, :trace_live) } + + it 'does not archive' do + expect_any_instance_of(described_class).not_to receive(:archive_stream!) + expect { subject }.to raise_error('Job is not finished yet') + expect(build.trace.exist?).to be_truthy + end + end + end +end -- GitLab From 21bbed2df1552857f3702cafc1619b2d8e022a38 Mon Sep 17 00:00:00 2001 From: Grzegorz Bizon Date: Mon, 7 May 2018 12:01:32 +0000 Subject: [PATCH 2/2] Merge branch 'live-trace-v2-efficient-destroy-all' into 'master' Live trace: Use efficient destroy all (for `dependent: :destory` problem) See merge request gitlab-org/gitlab-ce!18575 --- app/models/ci/build.rb | 2 +- app/models/ci/build_trace_chunk.rb | 55 +++++++---- app/models/concerns/fast_destroy_all.rb | 91 +++++++++++++++++++ app/models/project.rb | 5 + .../live-trace-v2-efficient-destroy-all.yml | 5 + lib/gitlab/ci/trace.rb | 2 +- lib/gitlab/ci/trace/chunked_io.rb | 4 +- spec/lib/gitlab/import_export/all_models.yml | 1 + spec/models/ci/build_trace_chunk_spec.rb | 15 +++ .../shared_examples/fast_destroy_all.rb | 38 ++++++++ 10 files changed, 198 insertions(+), 20 deletions(-) create mode 100644 app/models/concerns/fast_destroy_all.rb create mode 100644 changelogs/unreleased/live-trace-v2-efficient-destroy-all.yml create mode 100644 spec/support/shared_examples/fast_destroy_all.rb diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index 03221b53aab8bb..1ecd365a8074ae 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -23,7 +23,7 @@ class Build < CommitStatus has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment' has_many :trace_sections, class_name: 'Ci::BuildTraceSection' - has_many :trace_chunks, class_name: 'Ci::BuildTraceChunk', foreign_key: :build_id, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent + has_many :trace_chunks, class_name: 'Ci::BuildTraceChunk', foreign_key: :build_id has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id diff --git a/app/models/ci/build_trace_chunk.rb b/app/models/ci/build_trace_chunk.rb index d9e923daa71c46..4856f10846c882 100644 --- a/app/models/ci/build_trace_chunk.rb +++ b/app/models/ci/build_trace_chunk.rb @@ -1,11 +1,10 @@ module Ci class BuildTraceChunk < ActiveRecord::Base + include FastDestroyAll extend Gitlab::Ci::Model belongs_to :build, class_name: "Ci::Build", foreign_key: :build_id - after_destroy :redis_delete_data, if: :redis? - default_value_for :data_store, :redis WriteError = Class.new(StandardError) @@ -21,6 +20,38 @@ class BuildTraceChunk < ActiveRecord::Base db: 2 } + class << self + def redis_data_key(build_id, chunk_index) + "gitlab:ci:trace:#{build_id}:chunks:#{chunk_index}" + end + + def redis_data_keys + redis.pluck(:build_id, :chunk_index).map do |data| + redis_data_key(data.first, data.second) + end + end + + def redis_delete_data(keys) + return if keys.empty? + + Gitlab::Redis::SharedState.with do |redis| + redis.del(keys) + end + end + + ## + # FastDestroyAll concerns + def begin_fast_destroy + redis_data_keys + end + + ## + # FastDestroyAll concerns + def finalize_fast_destroy(keys) + redis_delete_data(keys) + end + end + ## # Data is memoized for optimizing #size and #end_offset def data @@ -63,7 +94,7 @@ def use_database! break unless size > 0 self.update!(raw_data: data, data_store: :db) - redis_delete_data + self.class.redis_delete_data([redis_data_key]) end end @@ -121,22 +152,14 @@ def redis_set_data(data) end end - def redis_delete_data - Gitlab::Redis::SharedState.with do |redis| - redis.del(redis_data_key) - end - end - def redis_data_key - "gitlab:ci:trace:#{build_id}:chunks:#{chunk_index}" - end - - def redis_lock_key - "trace_write:#{build_id}:chunks:#{chunk_index}" + self.class.redis_data_key(build_id, chunk_index) end def in_lock - lease = Gitlab::ExclusiveLease.new(redis_lock_key, timeout: WRITE_LOCK_TTL) + write_lock_key = "trace_write:#{build_id}:chunks:#{chunk_index}" + + lease = Gitlab::ExclusiveLease.new(write_lock_key, timeout: WRITE_LOCK_TTL) retry_count = 0 until uuid = lease.try_obtain @@ -151,7 +174,7 @@ def in_lock self.reload if self.persisted? return yield ensure - Gitlab::ExclusiveLease.cancel(redis_lock_key, uuid) + Gitlab::ExclusiveLease.cancel(write_lock_key, uuid) end end end diff --git a/app/models/concerns/fast_destroy_all.rb b/app/models/concerns/fast_destroy_all.rb new file mode 100644 index 00000000000000..7ea042c6742b87 --- /dev/null +++ b/app/models/concerns/fast_destroy_all.rb @@ -0,0 +1,91 @@ +## +# This module is for replacing `dependent: :destroy` and `before_destroy` hooks. +# +# In general, `destroy_all` is inefficient because it calls each callback with `DELETE` queries i.e. O(n), whereas, +# `delete_all` is efficient as it deletes all rows with a single `DELETE` query. +# +# It's better to use `delete_all` as our best practice, however, +# if external data (e.g. ObjectStorage, FileStorage or Redis) are assosiated with database records, +# it is difficult to accomplish it. +# +# This module defines a format to use `delete_all` and delete associated external data. +# Here is an exmaple +# +# Situation +# - `Project` has many `Ci::BuildTraceChunk` through `Ci::Build` +# - `Ci::BuildTraceChunk` stores associated data in Redis, so it relies on `dependent: :destroy` and `before_destroy` for the deletion +# +# How to use +# - Define `use_fast_destroy :build_trace_chunks` in `Project` model. +# - Define `begin_fast_destroy` and `finalize_fast_destroy(params)` in `Ci::BuildTraceChunk` model. +# - Use `fast_destroy_all` instead of `destroy` and `destroy_all` +# - Remove `dependent: :destroy` and `before_destroy` as it's no longer need +# +# Expectation +# - When a project is `destroy`ed, the associated trace_chunks will be deleted by `delete_all`, +# and the associated data will be removed, too. +# - When `fast_destroy_all` is called, it also performns as same. +module FastDestroyAll + extend ActiveSupport::Concern + + ForbiddenActionError = Class.new(StandardError) + + included do + before_destroy do + raise ForbiddenActionError, '`destroy` and `destroy_all` are forbbiden. Please use `fast_destroy_all`' + end + end + + class_methods do + ## + # This method delete rows and associated external data efficiently + # + # This method can replace `destroy` and `destroy_all` without having `after_destroy` hook + def fast_destroy_all + params = begin_fast_destroy + + delete_all + + finalize_fast_destroy(params) + end + + ## + # This method returns identifiers to delete associated external data (e.g. file paths, redis keys) + # + # This method must be defined in fast destroyable model + def begin_fast_destroy + raise NotImplementedError + end + + ## + # This method deletes associated external data with the identifiers returned by `begin_fast_destroy` + # + # This method must be defined in fast destroyable model + def finalize_fast_destroy(params) + raise NotImplementedError + end + end + + module Helpers + extend ActiveSupport::Concern + + class_methods do + ## + # This method is to be defined on models which have fast destroyable models as children, + # and let us avoid to use `dependent: :destroy` hook + def use_fast_destroy(relation) + before_destroy(prepend: true) do + perform_fast_destroy(public_send(relation)) # rubocop:disable GitlabSecurity/PublicSend + end + end + end + + def perform_fast_destroy(subject) + params = subject.begin_fast_destroy + + run_after_commit do + subject.finalize_fast_destroy(params) + end + end + end +end diff --git a/app/models/project.rb b/app/models/project.rb index abe71052e4bf3e..8746b7330d33ed 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -22,6 +22,7 @@ class Project < ActiveRecord::Base include DeploymentPlatform include ::Gitlab::Utils::StrongMemoize include ChronicDurationAttribute + include FastDestroyAll::Helpers # EE specific modules prepend EE::Project @@ -85,6 +86,9 @@ class Project < ActiveRecord::Base after_update :update_forks_visibility_level before_destroy :remove_private_deploy_keys + + use_fast_destroy :build_trace_chunks + after_destroy -> { run_after_commit { remove_pages } } after_destroy :remove_exports @@ -232,6 +236,7 @@ class Project < ActiveRecord::Base # still using `dependent: :destroy` here. has_many :builds, class_name: 'Ci::Build', inverse_of: :project, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent has_many :build_trace_section_names, class_name: 'Ci::BuildTraceSectionName' + has_many :build_trace_chunks, class_name: 'Ci::BuildTraceChunk', through: :builds, source: :trace_chunks has_many :runner_projects, class_name: 'Ci::RunnerProject' has_many :runners, through: :runner_projects, source: :runner, class_name: 'Ci::Runner' has_many :variables, class_name: 'Ci::Variable' diff --git a/changelogs/unreleased/live-trace-v2-efficient-destroy-all.yml b/changelogs/unreleased/live-trace-v2-efficient-destroy-all.yml new file mode 100644 index 00000000000000..ab22739b73dc8b --- /dev/null +++ b/changelogs/unreleased/live-trace-v2-efficient-destroy-all.yml @@ -0,0 +1,5 @@ +--- +title: Destroy build_chunks efficiently with FastDestroyAll module +merge_request: 18575 +author: +type: performance diff --git a/lib/gitlab/ci/trace.rb b/lib/gitlab/ci/trace.rb index f46d1d39ea77dd..fe15fabc2e8e21 100644 --- a/lib/gitlab/ci/trace.rb +++ b/lib/gitlab/ci/trace.rb @@ -100,7 +100,7 @@ def erase! FileUtils.rm(trace_path, force: true) end - job.trace_chunks.destroy_all + job.trace_chunks.fast_destroy_all job.erase_old_trace! end diff --git a/lib/gitlab/ci/trace/chunked_io.rb b/lib/gitlab/ci/trace/chunked_io.rb index cd3d13644110c5..bfe0c2a2c26f42 100644 --- a/lib/gitlab/ci/trace/chunked_io.rb +++ b/lib/gitlab/ci/trace/chunked_io.rb @@ -141,7 +141,7 @@ def truncate(offset) @size = offset # remove all next chunks - trace_chunks.where('chunk_index > ?', chunk_index).destroy_all + trace_chunks.where('chunk_index > ?', chunk_index).fast_destroy_all # truncate current chunk current_chunk.truncate(chunk_offset) @@ -158,7 +158,7 @@ def present? end def destroy! - trace_chunks.destroy_all + trace_chunks.fast_destroy_all @tell = @size = 0 ensure invalidate_chunk_cache diff --git a/spec/lib/gitlab/import_export/all_models.yml b/spec/lib/gitlab/import_export/all_models.yml index ffbd0b51c372a7..e55c09d25ba2ac 100644 --- a/spec/lib/gitlab/import_export/all_models.yml +++ b/spec/lib/gitlab/import_export/all_models.yml @@ -313,6 +313,7 @@ project: - sourced_pipelines - members_and_requesters - build_trace_section_names +- build_trace_chunks - root_of_fork_network - fork_network_member - fork_network diff --git a/spec/models/ci/build_trace_chunk_spec.rb b/spec/models/ci/build_trace_chunk_spec.rb index 46d09dff52cb90..cbcf1e55979a0e 100644 --- a/spec/models/ci/build_trace_chunk_spec.rb +++ b/spec/models/ci/build_trace_chunk_spec.rb @@ -14,6 +14,21 @@ stub_feature_flags(ci_enable_live_trace: true) end + context 'FastDestroyAll' do + let(:parent) { create(:project) } + let(:pipeline) { create(:ci_pipeline, project: parent) } + let(:build) { create(:ci_build, :running, :trace_live, pipeline: pipeline, project: parent) } + let(:subjects) { build.trace_chunks } + + it_behaves_like 'fast destroyable' + + def external_data_counter + Gitlab::Redis::SharedState.with do |redis| + redis.scan_each(match: "gitlab:ci:trace:*:chunks:*").to_a.size + end + end + end + describe 'CHUNK_SIZE' do it 'Chunk size can not be changed without special care' do expect(described_class::CHUNK_SIZE).to eq(128.kilobytes) diff --git a/spec/support/shared_examples/fast_destroy_all.rb b/spec/support/shared_examples/fast_destroy_all.rb new file mode 100644 index 00000000000000..5448ddcfe335d3 --- /dev/null +++ b/spec/support/shared_examples/fast_destroy_all.rb @@ -0,0 +1,38 @@ +shared_examples_for 'fast destroyable' do + describe 'Forbid #destroy and #destroy_all' do + it 'does not delete database rows and associted external data' do + expect(external_data_counter).to be > 0 + expect(subjects.count).to be > 0 + + expect { subjects.first.destroy }.to raise_error('`destroy` and `destroy_all` are forbbiden. Please use `fast_destroy_all`') + expect { subjects.destroy_all }.to raise_error('`destroy` and `destroy_all` are forbbiden. Please use `fast_destroy_all`') + + expect(subjects.count).to be > 0 + expect(external_data_counter).to be > 0 + end + end + + describe '.fast_destroy_all' do + it 'deletes database rows and associted external data' do + expect(external_data_counter).to be > 0 + expect(subjects.count).to be > 0 + + expect { subjects.fast_destroy_all }.not_to raise_error + + expect(subjects.count).to eq(0) + expect(external_data_counter).to eq(0) + end + end + + describe '.use_fast_destroy' do + it 'performs cascading delete with fast_destroy_all' do + expect(external_data_counter).to be > 0 + expect(subjects.count).to be > 0 + + expect { parent.destroy }.not_to raise_error + + expect(subjects.count).to eq(0) + expect(external_data_counter).to eq(0) + end + end +end -- GitLab