From 00d4fc63c4c60208821207ba64d349c42758634a Mon Sep 17 00:00:00 2001 From: Kerri Miller Date: Tue, 29 Sep 2020 14:50:31 -0700 Subject: [PATCH 01/10] Check for default_max_patch_size changes If the value is updated by an admin, we may _want_ to display the highlighted lines on a page, but will be unable to deliver them as the cached version might have been set as empty ([]) when the file PREVIOUSLY was considered "too large". When one of these situations is detected - the file is unreadable from the cache, and would've violated the default value but NOT violate the current value, go ahead and manually update the cache with it. --- lib/gitlab/diff/highlight_cache.rb | 42 ++++++++++++++++++-- spec/lib/gitlab/diff/highlight_cache_spec.rb | 19 +++++++++ 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/lib/gitlab/diff/highlight_cache.rb b/lib/gitlab/diff/highlight_cache.rb index 0eb22e6b3cbd8c..a215e7dfca01e9 100644 --- a/lib/gitlab/diff/highlight_cache.rb +++ b/lib/gitlab/diff/highlight_cache.rb @@ -20,10 +20,23 @@ def initialize(diff_collection) # - Assigns DiffFile#highlighted_diff_lines for cached files # def decorate(diff_file) - if content = read_file(diff_file) - diff_file.highlighted_diff_lines = content.map do |line| - Gitlab::Diff::Line.safe_init_from_hash(line) - end + content = read_file(diff_file) + + return [] unless content + + if content.empty? && recache_due_to_size?(diff_file) + # If the file is missing from the cache and there's reason to believe + # it is uncached due to a size issue around changing the values for + # max patch size, manually populate the hash and then set the value. + # + new_cache_content = {} + new_cache_content[diff_file.file_path] = diff_file.highlighted_diff_lines.map(&:to_hash) + + write_to_redis_hash(new_cache_content) + + set_highlighted_diff_lines(diff_file, read_file(diff_file)) + else + set_highlighted_diff_lines(diff_file, content) end end @@ -58,6 +71,27 @@ def key private + def set_highlighted_diff_lines(diff_file, content) + diff_file.highlighted_diff_lines = content.map do |line| + Gitlab::Diff::Line.safe_init_from_hash(line) + end + end + + def recache_due_to_size?(diff_file) + diff_file_class = diff_file.diff.class + default_max_patch_bytes = diff_file_class::DEFAULT_MAX_PATCH_BYTES + diff_bytesize = diff_file.diff.diff.bytesize + + current_patch_safe_limit_bytes = diff_file_class.patch_safe_limit_bytes + default_patch_safe_limit_bytes = default_max_patch_bytes / 10 + + # If the diff is >= than the default limit, but less than the current + # limit, it is likely uncached due to having hit the default limit, + # making it eligible for recalculating. + # + diff_bytesize >= default_patch_safe_limit_bytes && diff_bytesize < current_patch_safe_limit_bytes + end + def cacheable_files strong_memoize(:cacheable_files) do diff_files.select { |file| cacheable?(file) && read_file(file).nil? } diff --git a/spec/lib/gitlab/diff/highlight_cache_spec.rb b/spec/lib/gitlab/diff/highlight_cache_spec.rb index 7e926f86096826..f94005af5204b8 100644 --- a/spec/lib/gitlab/diff/highlight_cache_spec.rb +++ b/spec/lib/gitlab/diff/highlight_cache_spec.rb @@ -73,6 +73,25 @@ expect(rich_texts).to all(be_html_safe) end + + context "when diff_file is uncached due to default_max_patch_bytes change" do + before do + expect(cache).to receive(:read_file).at_least(:once).and_return([]) + expect(cache).to receive(:recache_due_to_size?).with(diff_file).and_return(true) + end + + it "manually writes highlighted lines to the cache" do + expect(cache).to receive(:write_to_redis_hash).and_call_original + + cache.decorate(diff_file) + end + + it "assigns highlighted diff lines to the DiffFile" do + expect(diff_file.highlighted_diff_lines.size).to be > 5 + + cache.decorate(diff_file) + end + end end shared_examples 'caches missing entries' do -- GitLab From 24751f272366262e4901d731478f106b0a9087e8 Mon Sep 17 00:00:00 2001 From: Kerri Miller Date: Wed, 30 Sep 2020 15:52:57 +0000 Subject: [PATCH 02/10] Apply 1 suggestion(s) to 1 file(s) --- lib/gitlab/diff/highlight_cache.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gitlab/diff/highlight_cache.rb b/lib/gitlab/diff/highlight_cache.rb index a215e7dfca01e9..15ab77d216e46d 100644 --- a/lib/gitlab/diff/highlight_cache.rb +++ b/lib/gitlab/diff/highlight_cache.rb @@ -89,7 +89,7 @@ def recache_due_to_size?(diff_file) # limit, it is likely uncached due to having hit the default limit, # making it eligible for recalculating. # - diff_bytesize >= default_patch_safe_limit_bytes && diff_bytesize < current_patch_safe_limit_bytes + diff_bytesize.between?(default_patch_safe_limit_bytes, current_patch_safe_limit_bytes) end def cacheable_files -- GitLab From 971b1499e96c9d297f3208dde3ced8fa8f7540f9 Mon Sep 17 00:00:00 2001 From: Kerri Miller Date: Wed, 30 Sep 2020 08:51:20 -0700 Subject: [PATCH 03/10] Remove 1 piece of abstraction --- lib/gitlab/diff/highlight_cache.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/gitlab/diff/highlight_cache.rb b/lib/gitlab/diff/highlight_cache.rb index 15ab77d216e46d..e51999ea67da10 100644 --- a/lib/gitlab/diff/highlight_cache.rb +++ b/lib/gitlab/diff/highlight_cache.rb @@ -79,11 +79,10 @@ def set_highlighted_diff_lines(diff_file, content) def recache_due_to_size?(diff_file) diff_file_class = diff_file.diff.class - default_max_patch_bytes = diff_file_class::DEFAULT_MAX_PATCH_BYTES diff_bytesize = diff_file.diff.diff.bytesize current_patch_safe_limit_bytes = diff_file_class.patch_safe_limit_bytes - default_patch_safe_limit_bytes = default_max_patch_bytes / 10 + default_patch_safe_limit_bytes = diff_file_class::DEFAULT_MAX_PATCH_BYTES / 10 # If the diff is >= than the default limit, but less than the current # limit, it is likely uncached due to having hit the default limit, -- GitLab From 078f6e88f6aec9a0b3af2d2bb6c0260f1d1885ce Mon Sep 17 00:00:00 2001 From: Kerri Miller Date: Wed, 30 Sep 2020 09:11:15 -0700 Subject: [PATCH 04/10] Nit picky cleanup of comment formatting --- spec/lib/gitlab/diff/highlight_cache_spec.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spec/lib/gitlab/diff/highlight_cache_spec.rb b/spec/lib/gitlab/diff/highlight_cache_spec.rb index f94005af5204b8..329661eddf8d79 100644 --- a/spec/lib/gitlab/diff/highlight_cache_spec.rb +++ b/spec/lib/gitlab/diff/highlight_cache_spec.rb @@ -43,7 +43,8 @@ describe '#decorate' do # Manually creates a Diff::File object to avoid triggering the cache on - # the FileCollection::MergeRequestDiff + # the FileCollection::MergeRequestDiff + # let(:diff_file) do diffs = merge_request.diffs raw_diff = diffs.diffable.raw_diffs(diffs.diff_options.merge(paths: ['CHANGELOG'])).first -- GitLab From 093dea6578b16c4815ec08f81c8e7888f6b927a1 Mon Sep 17 00:00:00 2001 From: Kerri Miller Date: Wed, 30 Sep 2020 10:09:31 -0700 Subject: [PATCH 05/10] Improve tests by exercising private method --- spec/lib/gitlab/diff/highlight_cache_spec.rb | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/spec/lib/gitlab/diff/highlight_cache_spec.rb b/spec/lib/gitlab/diff/highlight_cache_spec.rb index 329661eddf8d79..46e3fb91b88b0d 100644 --- a/spec/lib/gitlab/diff/highlight_cache_spec.rb +++ b/spec/lib/gitlab/diff/highlight_cache_spec.rb @@ -78,7 +78,15 @@ context "when diff_file is uncached due to default_max_patch_bytes change" do before do expect(cache).to receive(:read_file).at_least(:once).and_return([]) - expect(cache).to receive(:recache_due_to_size?).with(diff_file).and_return(true) + + # Stub out the application's default and current patch size limits. We + # want them to be different, and the diff file to be sized between + # the 2 values. + # + diff_file_size_kb = (diff_file.diff.diff.bytesize * 10) + + stub_const("#{diff_file.diff.class}::DEFAULT_MAX_PATCH_BYTES", diff_file_size_kb - 1 ) + expect(diff_file.diff.class).to receive(:patch_safe_limit_bytes).and_return(diff_file_size_kb + 1) end it "manually writes highlighted lines to the cache" do -- GitLab From a47b4b0c0ac1d8092da6502a4cac2031f52c2326 Mon Sep 17 00:00:00 2001 From: Kerri Miller Date: Thu, 1 Oct 2020 14:51:01 -0700 Subject: [PATCH 06/10] Add #diff_bytesize for accessing bytesize of diff --- lib/gitlab/git/diff.rb | 8 ++++++-- spec/lib/gitlab/git/diff_spec.rb | 8 ++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index 09a49b6c1ca647..7f98dd1da02a1a 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -174,9 +174,13 @@ def line_count @line_count ||= Util.count_lines(@diff) end + def diff_bytesize + @diff_bytesize ||= @diff.bytesize + end + def too_large? if @too_large.nil? - @too_large = @diff.bytesize >= self.class.patch_hard_limit_bytes + @too_large = diff_bytesize >= self.class.patch_hard_limit_bytes else @too_large end @@ -194,7 +198,7 @@ def too_large! def collapsed? return @collapsed if defined?(@collapsed) - @collapsed = !expanded && @diff.bytesize >= self.class.patch_safe_limit_bytes + @collapsed = !expanded && diff_bytesize >= self.class.patch_safe_limit_bytes end def collapse! diff --git a/spec/lib/gitlab/git/diff_spec.rb b/spec/lib/gitlab/git/diff_spec.rb index 117c519e98de6d..2284e9149d8a1a 100644 --- a/spec/lib/gitlab/git/diff_spec.rb +++ b/spec/lib/gitlab/git/diff_spec.rb @@ -291,6 +291,14 @@ end end + describe "#diff_bytesize" do + let(:diff) { described_class.new(gitaly_diff) } + + it "returns the size of the diff in bytes" do + expect(diff.diff_bytesize).to eq(diff.diff.bytesize) + end + end + describe '#too_large?' do it 'returns true for a diff that is too large' do diff = described_class.new(diff: 'a' * 204800) -- GitLab From 7510b1aa18addc281008b3b3874f3ed5c273ff92 Mon Sep 17 00:00:00 2001 From: Kerri Miller Date: Thu, 1 Oct 2020 14:51:29 -0700 Subject: [PATCH 07/10] Use a let to define test value --- spec/lib/gitlab/git/diff_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/lib/gitlab/git/diff_spec.rb b/spec/lib/gitlab/git/diff_spec.rb index 2284e9149d8a1a..980a52bb61e867 100644 --- a/spec/lib/gitlab/git/diff_spec.rb +++ b/spec/lib/gitlab/git/diff_spec.rb @@ -284,9 +284,9 @@ end describe '#line_count' do - it 'returns the correct number of lines' do - diff = described_class.new(gitaly_diff) + let(:diff) { described_class.new(gitaly_diff) } + it 'returns the correct number of lines' do expect(diff.line_count).to eq(7) end end -- GitLab From 47099c8993f7931cbd29a99018cfd89fd8ffb8ba Mon Sep 17 00:00:00 2001 From: Kerri Miller Date: Thu, 1 Oct 2020 14:56:18 -0700 Subject: [PATCH 08/10] Take advantage of new #diff_bytesize method --- lib/gitlab/diff/highlight_cache.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/diff/highlight_cache.rb b/lib/gitlab/diff/highlight_cache.rb index e51999ea67da10..5074ae8d289e83 100644 --- a/lib/gitlab/diff/highlight_cache.rb +++ b/lib/gitlab/diff/highlight_cache.rb @@ -79,7 +79,6 @@ def set_highlighted_diff_lines(diff_file, content) def recache_due_to_size?(diff_file) diff_file_class = diff_file.diff.class - diff_bytesize = diff_file.diff.diff.bytesize current_patch_safe_limit_bytes = diff_file_class.patch_safe_limit_bytes default_patch_safe_limit_bytes = diff_file_class::DEFAULT_MAX_PATCH_BYTES / 10 @@ -88,7 +87,10 @@ def recache_due_to_size?(diff_file) # limit, it is likely uncached due to having hit the default limit, # making it eligible for recalculating. # - diff_bytesize.between?(default_patch_safe_limit_bytes, current_patch_safe_limit_bytes) + diff_file.diff.diff_bytesize.between?( + default_patch_safe_limit_bytes, + current_patch_safe_limit_bytes + ) end def cacheable_files -- GitLab From b75449641981b0f28cff62782bb06f43826b6a1c Mon Sep 17 00:00:00 2001 From: Kerri Miller Date: Thu, 1 Oct 2020 15:23:35 -0700 Subject: [PATCH 09/10] Allow #patch_safe_limit_bytes to accept limit param --- lib/gitlab/git/diff.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/gitlab/git/diff.rb b/lib/gitlab/git/diff.rb index 7f98dd1da02a1a..78c47023c081fa 100644 --- a/lib/gitlab/git/diff.rb +++ b/lib/gitlab/git/diff.rb @@ -120,8 +120,8 @@ def binary_message(old_path, new_path) # default. # # Patches surpassing this limit should still be persisted in the database. - def patch_safe_limit_bytes - patch_hard_limit_bytes / 10 + def patch_safe_limit_bytes(limit = patch_hard_limit_bytes) + limit / 10 end # Returns the limit for a single diff file (patch). -- GitLab From 5d0179582c0e978967ab1c58b1008608ff57408c Mon Sep 17 00:00:00 2001 From: Kerri Miller Date: Thu, 1 Oct 2020 15:24:20 -0700 Subject: [PATCH 10/10] Pull default limit value from DiffFile --- lib/gitlab/diff/highlight_cache.rb | 2 +- spec/lib/gitlab/diff/highlight_cache_spec.rb | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/gitlab/diff/highlight_cache.rb b/lib/gitlab/diff/highlight_cache.rb index 5074ae8d289e83..e873e9c17d57cb 100644 --- a/lib/gitlab/diff/highlight_cache.rb +++ b/lib/gitlab/diff/highlight_cache.rb @@ -81,7 +81,7 @@ def recache_due_to_size?(diff_file) diff_file_class = diff_file.diff.class current_patch_safe_limit_bytes = diff_file_class.patch_safe_limit_bytes - default_patch_safe_limit_bytes = diff_file_class::DEFAULT_MAX_PATCH_BYTES / 10 + default_patch_safe_limit_bytes = diff_file_class.patch_safe_limit_bytes(diff_file_class::DEFAULT_MAX_PATCH_BYTES) # If the diff is >= than the default limit, but less than the current # limit, it is likely uncached due to having hit the default limit, diff --git a/spec/lib/gitlab/diff/highlight_cache_spec.rb b/spec/lib/gitlab/diff/highlight_cache_spec.rb index 46e3fb91b88b0d..f6810d7a966704 100644 --- a/spec/lib/gitlab/diff/highlight_cache_spec.rb +++ b/spec/lib/gitlab/diff/highlight_cache_spec.rb @@ -87,6 +87,10 @@ stub_const("#{diff_file.diff.class}::DEFAULT_MAX_PATCH_BYTES", diff_file_size_kb - 1 ) expect(diff_file.diff.class).to receive(:patch_safe_limit_bytes).and_return(diff_file_size_kb + 1) + expect(diff_file.diff.class) + .to receive(:patch_safe_limit_bytes) + .with(diff_file.diff.class::DEFAULT_MAX_PATCH_BYTES) + .and_call_original end it "manually writes highlighted lines to the cache" do -- GitLab