From f4da54c3fa25c93cbf916a4dabe1e75956a025e6 Mon Sep 17 00:00:00 2001 From: Joe Woodward Date: Tue, 15 Oct 2024 23:57:04 +0100 Subject: [PATCH] View correct content when blob is a symlink When we create a symlink, git stores this as a blob with mode 120000 and the data will be the path to the linked file. Prior to this change we would render the path to the linked file in the UI when it would make more sense to render the linked file. Now when we visit a symlink we see the correct content e.g. blob_a is README.md blob_b is a symlink to ./README.md Viewing blob_b now displays the contents of README.md from blob_a. Changelog: added Co-authored-by: Joe Woodward --- app/controllers/projects/blob_controller.rb | 1 + app/graphql/resolvers/blobs_resolver.rb | 1 + app/graphql/types/repository/blob_type.rb | 5 +- app/models/blob.rb | 85 ++++-- spec/graphql/resolvers/blobs_resolver_spec.rb | 183 ++++++++++- spec/models/blob_spec.rb | 284 +++++++++++++++++- spec/support/helpers/fake_blob_helpers.rb | 20 +- 7 files changed, 542 insertions(+), 37 deletions(-) diff --git a/app/controllers/projects/blob_controller.rb b/app/controllers/projects/blob_controller.rb index 8e9625f25dd84c..092d24b7b4b807 100644 --- a/app/controllers/projects/blob_controller.rb +++ b/app/controllers/projects/blob_controller.rb @@ -143,6 +143,7 @@ def blob return unless commit @blob = @repository.blob_at(commit.id, @path) + @blob.tap(&:load_symlinked_data!) if @blob end strong_memoize_attr :blob diff --git a/app/graphql/resolvers/blobs_resolver.rb b/app/graphql/resolvers/blobs_resolver.rb index 0b159a69d1fe9b..38f8293b09859b 100644 --- a/app/graphql/resolvers/blobs_resolver.rb +++ b/app/graphql/resolvers/blobs_resolver.rb @@ -41,6 +41,7 @@ def resolve(paths:, ref:, ref_type:) repository.blobs_at(paths.map { |path| [ref, path] }).tap do |blobs| blobs.each do |blob| blob.ref_type = ref_type + blob.load_symlinked_data! end end end diff --git a/app/graphql/types/repository/blob_type.rb b/app/graphql/types/repository/blob_type.rb index ba3608cafa572c..b26b95281c3c37 100644 --- a/app/graphql/types/repository/blob_type.rb +++ b/app/graphql/types/repository/blob_type.rb @@ -56,8 +56,9 @@ class BlobType < BaseObject field :base64_encoded_blob, GraphQL::Types::String, null: true, experiment: { milestone: '17.1' }, description: 'Content of blob is encoded base64. Returns `null` if the `unicode_escaped_data` feature flag is disabled.' - field :raw_text_blob, GraphQL::Types::String, null: true, method: :text_only_data, - description: 'Raw content of the blob, if the blob is text data.' + field :raw_text_blob, GraphQL::Types::String, null: true, + description: 'Raw content of the blob, if the blob is text data.', + calls_gitaly: true field :stored_externally, GraphQL::Types::Boolean, null: true, method: :stored_externally?, description: "Whether the blob's content is stored externally (for instance, in LFS)." diff --git a/app/models/blob.rb b/app/models/blob.rb index 8909abe2ebb391..e242e4b6b2beec 100644 --- a/app/models/blob.rb +++ b/app/models/blob.rb @@ -8,6 +8,7 @@ class Blob < SimpleDelegator include BlobActiveModel MODE_SYMLINK = '120000' # The STRING 120000 is the git-reported octal filemode for a symlink + MODE_NORMAL = '100644' # The STRING 100644 is the git-reported octal filemode for a normal file MODE_EXECUTABLE = '100755' # The STRING 100755 is the git-reported octal filemode for an executable file CACHE_TIME = 60 # Cache raw blobs referred to by a (mutable) ref for 1 minute @@ -113,16 +114,16 @@ def inspect "#<#{self.class.name} oid:#{id[0..8]} commit:#{commit_id[0..8]} path:#{path}>" end - # Returns the data of the blob. - # - # If the blob is a text based blob the content is converted to UTF-8 and any - # invalid byte sequences are replaced. def data - if binary_in_repo? - super - else - @data ||= super.encode(Encoding::UTF_8, invalid: :replace, undef: :replace) - end + @data ||= process_data(super) + end + + def size + @size ||= super + end + + def binary + @binary ||= super end def load_all_data! @@ -132,6 +133,31 @@ def load_all_data! end end + def load_symlinked_data! + return unless symlink? + + linked_blob = load_symlinked_blob + return unless linked_blob + + @data = process_data(linked_blob.data, linked_blob.binary?) + @size = linked_blob.size + @binary = linked_blob.binary? + end + + def load_symlinked_blob(traversals = 2, loaded_paths = [path]) + return unless symlink? + + return if linked_path.nil? || loaded_paths.include?(linked_path) + + linked_blob = repository.blob_at(commit_id, linked_path) + + return linked_blob unless linked_blob&.symlink? + + return if traversals == 0 + + linked_blob.load_symlinked_blob(traversals - 1, loaded_paths << linked_path) + end + def empty? raw_size == 0 end @@ -166,25 +192,32 @@ def raw_size # text-based rich blob viewer matched on the file's extension. Otherwise, this # depends on the type of the blob itself. def binary? - if stored_externally? - if rich_viewer - rich_viewer.binary? - elsif known_extension? - false - elsif _mime_type - _mime_type.binary? - else - true - end - else - binary_in_repo? - end + return @binary unless @binary.nil? + + return binary_in_repo? unless stored_externally? + + return rich_viewer.binary? if rich_viewer + return false if known_extension? + return _mime_type.binary? if _mime_type + + true end def symlink? mode == MODE_SYMLINK end + def linked_path + return unless symlink? + + relative_target_path = Pathname.new(data.strip).cleanpath + source_dir_path = Pathname.new(path).parent + absolute_target_path = source_dir_path.join(relative_target_path).to_s + + # Files that link to paths outside of the git folder aren't reachable. + absolute_target_path unless absolute_target_path.start_with?('.') + end + def executable? mode == MODE_EXECUTABLE end @@ -270,4 +303,12 @@ def viewer_class_from(classes) classes.find { |viewer_class| viewer_class.can_render?(self, verify_binary: verify_binary) } end + + # If the blob is a text based blob the content is converted to UTF-8 and any + # invalid byte sequences are replaced. + def process_data(unprocessed_data, binary = binary_in_repo?) + return unprocessed_data if binary + + unprocessed_data.encode(Encoding::UTF_8, invalid: :replace, undef: :replace) + end end diff --git a/spec/graphql/resolvers/blobs_resolver_spec.rb b/spec/graphql/resolvers/blobs_resolver_spec.rb index 0d725f00d43193..d07e96ec67191a 100644 --- a/spec/graphql/resolvers/blobs_resolver_spec.rb +++ b/spec/graphql/resolvers/blobs_resolver_spec.rb @@ -5,6 +5,7 @@ RSpec.describe Resolvers::BlobsResolver, feature_category: :source_code_management do include GraphqlHelpers include RepoHelpers + include FakeBlobHelpers describe '.resolver_complexity' do it 'adds one per path being resolved' do @@ -22,7 +23,7 @@ let(:repository) { project.repository } let(:args) { { paths: paths, ref: ref } } let(:paths) { [] } - let(:ref) { nil } + let(:ref) { 'master' } subject(:resolve_blobs) { resolve(described_class, obj: repository, args: args, ctx: { current_user: user }) } @@ -52,6 +53,186 @@ is_expected.to contain_exactly(have_attributes(path: 'README.md')) end + context 'with a path to a symlinked file' do + let(:path) { 'text-to-symlink.txt' } + let(:paths) { [path] } + + context 'when the symlink has 2 hops' do + let(:path) { 'one' } + let(:intermediate_path) { 'two' } + let(:target_path) { 'three' } + let(:target_data) { 'target_data' } + let(:blob) do + fake_blob( + path: path, + data: "./#{intermediate_path}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + let(:intermediate_blob) do + fake_blob( + path: intermediate_path, + data: "./#{target_path}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + let(:target_blob) do + fake_blob( + path: target_path, + data: target_data, + binary: false, + size: 150000, + commit_id: ref + ) + end + + before do + allow(repository).to receive(:blobs_at).with([[ref, path]]).and_return([blob]) + allow(repository).to receive(:blob_at).with(ref, intermediate_path).and_return(intermediate_blob) + allow(repository).to receive(:blob_at).with(ref, target_path).and_return(target_blob) + end + + it 'proxies the size, data, and binary values from the target blob' do + is_expected.to contain_exactly( + have_attributes( + path: path, + data: target_blob.data, + size: target_blob.size, + binary: target_blob.binary? + ) + ) + end + end + + context 'when the symlink has 3 hops' do + let(:path) { 'one' } + let(:intermediate_path_1) { 'two' } + let(:intermediate_path_2) { 'three' } + let(:target_path) { 'four' } + let(:target_data) { 'target_data' } + let(:blob) do + fake_blob( + path: path, + data: "./#{intermediate_path_1}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + let(:intermediate_blob_1) do + fake_blob( + path: intermediate_path_1, + data: "./#{intermediate_path_2}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + let(:intermediate_blob_2) do + fake_blob( + path: intermediate_path_2, + data: "./#{target_path}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + let(:target_blob) do + fake_blob( + path: target_path, + data: target_data, + binary: false, + size: 150000, + commit_id: ref + ) + end + + before do + allow(repository).to receive(:blobs_at).with([[ref, path]]).and_return([blob]) + allow(repository).to receive(:blob_at).with(ref, intermediate_path_1).and_return(intermediate_blob_1) + allow(repository).to receive(:blob_at).with(ref, intermediate_path_2).and_return(intermediate_blob_2) + allow(repository).to receive(:blob_at).with(ref, target_path).and_return(target_blob) + end + + it "returns the original blob's data" do + is_expected.to contain_exactly( + have_attributes( + path: path, + data: blob.data, + size: blob.size, + binary: blob.binary? + ) + ) + end + end + + context 'when the symlink has a circular reference' do + let(:path) { 'README' } + let(:target_path) { 'text-to-symlink.txt' } + let(:blob) do + fake_blob( + path: path, + data: "./#{target_path}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + before do + allow(repository).to receive(:blobs_at).with([[ref, path]]).and_return([blob]) + end + + it 'returns the original blob' do + is_expected.to contain_exactly( + have_attributes(path: blob.path, data: blob.data, size: blob.size, binary: blob.binary?) + ) + end + end + + context 'when the symlink is self-referencing' do + let(:path) { 'README' } + let(:target_path) { 'README' } + let(:blob) do + fake_blob( + path: path, + data: "./#{target_path}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + before do + allow(repository).to receive(:blobs_at).with([[ref, path]]).and_return([blob]) + allow(repository).to receive(:blob_at) + end + + it 'returns the original blob' do + is_expected.to contain_exactly( + have_attributes(path: blob.path, data: blob.data, size: blob.size, binary: blob.binary?) + ) + expect(repository).not_to have_received(:blob_at) + end + end + end + context 'specifying a non-existent blob' do let(:paths) { ['non-existent'] } diff --git a/spec/models/blob_spec.rb b/spec/models/blob_spec.rb index 5173c5c5a5918f..6db92f8bd09c6c 100644 --- a/spec/models/blob_spec.rb +++ b/spec/models/blob_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Blob do +RSpec.describe Blob, feature_category: :shared do include FakeBlobHelpers using RSpec::Parameterized::TableSyntax @@ -137,6 +137,15 @@ expect(blob.data).to eq("\n���") end end + + context 'when @data has been set' do + it 'overrides the underlying blob data' do + blob = fake_blob(binary: false, data: 'fizz buzz', container: container) + data = 'lorem ipsum' + blob.instance_variable_set(:@data, data) + expect(blob.data).to eq(data) + end + end end context 'with project' do @@ -158,6 +167,222 @@ end end + describe '#size' do + let_it_be(:project) { create(:project, :repository) } + + context 'when @size is set' do + it 'overrides the underlying blob size' do + blob = fake_blob(size: 1, container: project) + size = 1_000_000 + blob.instance_variable_set(:@size, size) + expect(blob.size).to eq(size) + end + end + end + + describe '#binary' do + let_it_be(:project) { create(:project, :repository) } + let(:blob_binary) { false } + let(:blob) { fake_blob(binary: blob_binary, container: project) } + + context 'when @binary is set' do + before do + blob.instance_variable_set(:@binary, !blob_binary) + end + + it 'overrides blob.binary' do + expect(blob.binary).to eq(!blob_binary) + end + + it 'overrides blob.binary?' do + expect(blob.binary?).to eq(!blob_binary) + end + end + end + + describe '#load_symlinked_data!' do + let_it_be(:project) { create(:project, :repository) } + + subject(:load_symlinked_data) { current_blob.load_symlinked_data! } + + context 'when the blob is not a symlink' do + let(:current_blob) { fake_blob(mode: Blob::MODE_NORMAL, container: project) } + + before do + allow(current_blob).to receive(:load_symlinked_blob) + end + + it 'does nothing' do + load_symlinked_data + expect(current_blob).not_to have_received(:load_symlinked_blob) + end + end + + context 'when the blob is a symlink' do + shared_examples 'linked_blob attributes are copied to current_blob' do + it 'assigns linked_blob.size to current_blob' do + expect(current_blob.size).to eq(linked_blob_size) + end + + it 'assigns linked_blob.binary to current_blob' do + expect(current_blob.binary).to eq(linked_blob_binary) + end + + context 'when linked_blob.data is binary' do + let(:linked_blob_binary) { true } + + it 'assigns linked_blob.data to current_blob' do + expect(current_blob.data).to eq(linked_blob_data) + end + end + + context 'when linked_blob.data is not binary' do + let(:linked_blob_binary) { false } + + it 'attempts to encode the data in UTF8 format' do + expect(current_blob.data).to eq(encoded_blob_data) + end + end + end + + let(:current_blob_path) { 'current_blob.txt' } + let(:current_blob_data) { first_linked_blob.path } + + let(:linked_blob_data) { "foo\xC2bar" } + let(:encoded_blob_data) { "foo�bar" } + + let(:current_blob_binary) { false } + let(:linked_blob_binary) { true } + + let(:current_blob_size) { 1 } + let(:linked_blob_size) { 1_000_000 } + + let(:current_blob) do + fake_blob( + path: current_blob_path, + mode: Blob::MODE_SYMLINK, + data: current_blob_data, + binary: current_blob_binary, + size: current_blob_size, + container: project + ) + end + + let(:linked_blob) do + fake_blob( + path: 'linked_blob.txt', + binary: linked_blob_binary, + data: linked_blob_data, + size: linked_blob_size, + container: project + ) + end + + before do + allow(project.repository) + .to receive(:blob_at) + .and_return(*blob_at_chain) + + load_symlinked_data + end + + context 'when not linked_blob.symlink?' do + let(:blob_at_chain) { [first_linked_blob] } + let(:first_linked_blob) { linked_blob } + + it_behaves_like 'linked_blob attributes are copied to current_blob' + end + + context 'when linked_blob.symlink?' do + let(:first_linked_blob) do + fake_blob( + path: 'connect.txt', + binary: false, + mode: Blob::MODE_SYMLINK, + data: second_linked_blob.path, + container: project + ) + end + + context 'and the next linked blob is not a symlink' do + let(:blob_at_chain) { [first_linked_blob, second_linked_blob] } + let(:second_linked_blob) { linked_blob } + + it_behaves_like 'linked_blob attributes are copied to current_blob' + end + + context 'but the next linked blob loops back to current_blob' do + let(:blob_at_chain) { [first_linked_blob, second_linked_blob] } + let(:second_linked_blob) do + fake_blob( + path: 'loop.txt', + binary: !current_blob_binary, + mode: Blob::MODE_SYMLINK, + data: current_blob_path, + container: project + ) + end + + it 'does not update current_blob.size' do + expect(current_blob.size).to eq(current_blob_size) + end + + it 'does not update current_blob.binary' do + expect(current_blob.binary).to eq(current_blob_binary) + end + + it 'does not update current_blob.data' do + expect(current_blob.data).to eq(current_blob_data) + end + end + + context 'when the next linked blob is a symlink' do + let(:second_linked_blob) do + fake_blob( + path: 'connect2.txt', + binary: false, + mode: Blob::MODE_SYMLINK, + data: third_linked_blob.path, + container: project + ) + end + + context 'and the next linked blob is not a symlink' do + let(:blob_at_chain) { [first_linked_blob, second_linked_blob, third_linked_blob] } + let(:third_linked_blob) { linked_blob } + + it_behaves_like 'linked_blob attributes are copied to current_blob' + end + + context 'and the next linked blob is a symlink' do + let(:blob_at_chain) { [first_linked_blob, second_linked_blob, third_linked_blob, linked_blob] } + let(:third_linked_blob) do + fake_blob( + path: 'third.txt', + binary: !current_blob_binary, + mode: Blob::MODE_SYMLINK, + data: linked_blob.path, + container: project + ) + end + + it 'does not update current_blob.size' do + expect(current_blob.size).to eq(current_blob_size) + end + + it 'does not update current_blob.binary' do + expect(current_blob.binary).to eq(current_blob_binary) + end + + it 'does not update current_blob.data' do + expect(current_blob.data).to eq(current_blob_data) + end + end + end + end + end + end + describe '#external_storage_error?' do subject { blob.external_storage_error? } @@ -243,27 +468,76 @@ describe '#symlink?' do it 'is true for symlinks' do - symlink_blob = fake_blob(path: 'file', mode: '120000') + symlink_blob = fake_blob(path: 'file', mode: Blob::MODE_SYMLINK) expect(symlink_blob.symlink?).to eq true end it 'is false for non-symlinks' do - non_symlink_blob = fake_blob(path: 'file', mode: '100755') + non_symlink_blob = fake_blob(path: 'file', mode: Blob::MODE_EXECUTABLE) expect(non_symlink_blob.symlink?).to eq false end end + describe '#linked_path' do + let(:base_dir) { 'app/models/' } + let(:path) { 'LINKED_README.md' } + let(:blob) { fake_blob(data: data, path: File.join(base_dir, path).to_s, mode: mode) } + let(:data) { '' } + + subject(:linked_path) { blob.linked_path } + + context 'when symlink?' do + let(:mode) { Blob::MODE_SYMLINK } + + context 'when path does not start with . or /' do + let(:data) { 'README.md' } + + it { is_expected.to eq('app/models/README.md') } + end + + context 'when path includes current dir dot (./)' do + let(:data) { './README.md' } + + it { is_expected.to eq('app/models/README.md') } + end + + context 'when path includes parent dir dots (../)' do + let(:data) { '../README.md' } + + it { is_expected.to eq('app/README.md') } + end + + context 'when path includes multiple parent dir dots (../)' do + let(:data) { '../../README.md' } + + it { is_expected.to eq('README.md') } + end + + context 'when linked path links outside the git repo' do + let(:data) { '../../../README.md' } + + it { is_expected.to be_nil } + end + end + + context 'when not symlink?' do + let(:mode) { Blob::MODE_EXECUTABLE } + + it { is_expected.to be_nil } + end + end + describe '#executable?' do it 'is true for executables' do - executable_blob = fake_blob(path: 'file', mode: '100755') + executable_blob = fake_blob(path: 'file', mode: Blob::MODE_EXECUTABLE) expect(executable_blob.executable?).to eq true end it 'is false for non-executables' do - non_executable_blob = fake_blob(path: 'file', mode: '100655') + non_executable_blob = fake_blob(path: 'file', mode: Blob::MODE_SYMLINK) expect(non_executable_blob.executable?).to eq false end diff --git a/spec/support/helpers/fake_blob_helpers.rb b/spec/support/helpers/fake_blob_helpers.rb index 47fb9a345a39fc..280c22c72e6379 100644 --- a/spec/support/helpers/fake_blob_helpers.rb +++ b/spec/support/helpers/fake_blob_helpers.rb @@ -4,14 +4,24 @@ module FakeBlobHelpers class FakeBlob include BlobLike - attr_reader :path, :size, :data, :lfs_oid, :lfs_size, :mode - - def initialize(path: 'file.txt', size: 1.kilobyte, data: 'foo', binary: false, lfs: nil, mode: nil) + attr_reader :path, :lfs_oid, :lfs_size, :mode, :commit_id + attr_accessor :size, :data, :binary + + def initialize( + path: 'file.txt', + size: 1.kilobyte, + data: 'foo', + binary: false, + lfs: nil, + mode: nil, + commit_id: "11111111" + ) @path = path @size = size @data = data @binary = binary @mode = mode + @commit_id = commit_id @lfs_pointer = lfs.present? if @lfs_pointer @@ -26,10 +36,6 @@ def id "00000000" end - def commit_id - "11111111" - end - def binary_in_repo? @binary end -- GitLab