diff --git a/app/controllers/projects/blob_controller.rb b/app/controllers/projects/blob_controller.rb index 8e9625f25dd84c81a8c0d5bf3d53e20047abd9ea..092d24b7b4b807b3189e19bd292561897d8b0d2d 100644 --- a/app/controllers/projects/blob_controller.rb +++ b/app/controllers/projects/blob_controller.rb @@ -143,6 +143,7 @@ def blob return unless commit @blob = @repository.blob_at(commit.id, @path) + @blob.tap(&:load_symlinked_data!) if @blob end strong_memoize_attr :blob diff --git a/app/graphql/resolvers/blobs_resolver.rb b/app/graphql/resolvers/blobs_resolver.rb index 0b159a69d1fe9b8735634d0498e4a7acb3d6921c..38f8293b09859be6884da9b04c6de5d9cfde4827 100644 --- a/app/graphql/resolvers/blobs_resolver.rb +++ b/app/graphql/resolvers/blobs_resolver.rb @@ -41,6 +41,7 @@ def resolve(paths:, ref:, ref_type:) repository.blobs_at(paths.map { |path| [ref, path] }).tap do |blobs| blobs.each do |blob| blob.ref_type = ref_type + blob.load_symlinked_data! end end end diff --git a/app/graphql/types/repository/blob_type.rb b/app/graphql/types/repository/blob_type.rb index ba3608cafa572cda940311c4151b1dfb9c758fc3..b26b95281c3c37980b29d73c9a72715cddf0d667 100644 --- a/app/graphql/types/repository/blob_type.rb +++ b/app/graphql/types/repository/blob_type.rb @@ -56,8 +56,9 @@ class BlobType < BaseObject field :base64_encoded_blob, GraphQL::Types::String, null: true, experiment: { milestone: '17.1' }, description: 'Content of blob is encoded base64. Returns `null` if the `unicode_escaped_data` feature flag is disabled.' - field :raw_text_blob, GraphQL::Types::String, null: true, method: :text_only_data, - description: 'Raw content of the blob, if the blob is text data.' + field :raw_text_blob, GraphQL::Types::String, null: true, + description: 'Raw content of the blob, if the blob is text data.', + calls_gitaly: true field :stored_externally, GraphQL::Types::Boolean, null: true, method: :stored_externally?, description: "Whether the blob's content is stored externally (for instance, in LFS)." diff --git a/app/models/blob.rb b/app/models/blob.rb index 8909abe2ebb391a83dcafa6bdc2d3cb4e9c267bb..e242e4b6b2beec5e3819b1aad002f5f93af738ab 100644 --- a/app/models/blob.rb +++ b/app/models/blob.rb @@ -8,6 +8,7 @@ class Blob < SimpleDelegator include BlobActiveModel MODE_SYMLINK = '120000' # The STRING 120000 is the git-reported octal filemode for a symlink + MODE_NORMAL = '100644' # The STRING 100644 is the git-reported octal filemode for a normal file MODE_EXECUTABLE = '100755' # The STRING 100755 is the git-reported octal filemode for an executable file CACHE_TIME = 60 # Cache raw blobs referred to by a (mutable) ref for 1 minute @@ -113,16 +114,16 @@ def inspect "#<#{self.class.name} oid:#{id[0..8]} commit:#{commit_id[0..8]} path:#{path}>" end - # Returns the data of the blob. - # - # If the blob is a text based blob the content is converted to UTF-8 and any - # invalid byte sequences are replaced. def data - if binary_in_repo? - super - else - @data ||= super.encode(Encoding::UTF_8, invalid: :replace, undef: :replace) - end + @data ||= process_data(super) + end + + def size + @size ||= super + end + + def binary + @binary ||= super end def load_all_data! @@ -132,6 +133,31 @@ def load_all_data! end end + def load_symlinked_data! + return unless symlink? + + linked_blob = load_symlinked_blob + return unless linked_blob + + @data = process_data(linked_blob.data, linked_blob.binary?) + @size = linked_blob.size + @binary = linked_blob.binary? + end + + def load_symlinked_blob(traversals = 2, loaded_paths = [path]) + return unless symlink? + + return if linked_path.nil? || loaded_paths.include?(linked_path) + + linked_blob = repository.blob_at(commit_id, linked_path) + + return linked_blob unless linked_blob&.symlink? + + return if traversals == 0 + + linked_blob.load_symlinked_blob(traversals - 1, loaded_paths << linked_path) + end + def empty? raw_size == 0 end @@ -166,25 +192,32 @@ def raw_size # text-based rich blob viewer matched on the file's extension. Otherwise, this # depends on the type of the blob itself. def binary? - if stored_externally? - if rich_viewer - rich_viewer.binary? - elsif known_extension? - false - elsif _mime_type - _mime_type.binary? - else - true - end - else - binary_in_repo? - end + return @binary unless @binary.nil? + + return binary_in_repo? unless stored_externally? + + return rich_viewer.binary? if rich_viewer + return false if known_extension? + return _mime_type.binary? if _mime_type + + true end def symlink? mode == MODE_SYMLINK end + def linked_path + return unless symlink? + + relative_target_path = Pathname.new(data.strip).cleanpath + source_dir_path = Pathname.new(path).parent + absolute_target_path = source_dir_path.join(relative_target_path).to_s + + # Files that link to paths outside of the git folder aren't reachable. + absolute_target_path unless absolute_target_path.start_with?('.') + end + def executable? mode == MODE_EXECUTABLE end @@ -270,4 +303,12 @@ def viewer_class_from(classes) classes.find { |viewer_class| viewer_class.can_render?(self, verify_binary: verify_binary) } end + + # If the blob is a text based blob the content is converted to UTF-8 and any + # invalid byte sequences are replaced. + def process_data(unprocessed_data, binary = binary_in_repo?) + return unprocessed_data if binary + + unprocessed_data.encode(Encoding::UTF_8, invalid: :replace, undef: :replace) + end end diff --git a/spec/graphql/resolvers/blobs_resolver_spec.rb b/spec/graphql/resolvers/blobs_resolver_spec.rb index 0d725f00d43193fa37c4d7c50608a38249843d63..d07e96ec67191ab8f7de39659f9476476dea7d9e 100644 --- a/spec/graphql/resolvers/blobs_resolver_spec.rb +++ b/spec/graphql/resolvers/blobs_resolver_spec.rb @@ -5,6 +5,7 @@ RSpec.describe Resolvers::BlobsResolver, feature_category: :source_code_management do include GraphqlHelpers include RepoHelpers + include FakeBlobHelpers describe '.resolver_complexity' do it 'adds one per path being resolved' do @@ -22,7 +23,7 @@ let(:repository) { project.repository } let(:args) { { paths: paths, ref: ref } } let(:paths) { [] } - let(:ref) { nil } + let(:ref) { 'master' } subject(:resolve_blobs) { resolve(described_class, obj: repository, args: args, ctx: { current_user: user }) } @@ -52,6 +53,186 @@ is_expected.to contain_exactly(have_attributes(path: 'README.md')) end + context 'with a path to a symlinked file' do + let(:path) { 'text-to-symlink.txt' } + let(:paths) { [path] } + + context 'when the symlink has 2 hops' do + let(:path) { 'one' } + let(:intermediate_path) { 'two' } + let(:target_path) { 'three' } + let(:target_data) { 'target_data' } + let(:blob) do + fake_blob( + path: path, + data: "./#{intermediate_path}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + let(:intermediate_blob) do + fake_blob( + path: intermediate_path, + data: "./#{target_path}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + let(:target_blob) do + fake_blob( + path: target_path, + data: target_data, + binary: false, + size: 150000, + commit_id: ref + ) + end + + before do + allow(repository).to receive(:blobs_at).with([[ref, path]]).and_return([blob]) + allow(repository).to receive(:blob_at).with(ref, intermediate_path).and_return(intermediate_blob) + allow(repository).to receive(:blob_at).with(ref, target_path).and_return(target_blob) + end + + it 'proxies the size, data, and binary values from the target blob' do + is_expected.to contain_exactly( + have_attributes( + path: path, + data: target_blob.data, + size: target_blob.size, + binary: target_blob.binary? + ) + ) + end + end + + context 'when the symlink has 3 hops' do + let(:path) { 'one' } + let(:intermediate_path_1) { 'two' } + let(:intermediate_path_2) { 'three' } + let(:target_path) { 'four' } + let(:target_data) { 'target_data' } + let(:blob) do + fake_blob( + path: path, + data: "./#{intermediate_path_1}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + let(:intermediate_blob_1) do + fake_blob( + path: intermediate_path_1, + data: "./#{intermediate_path_2}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + let(:intermediate_blob_2) do + fake_blob( + path: intermediate_path_2, + data: "./#{target_path}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + let(:target_blob) do + fake_blob( + path: target_path, + data: target_data, + binary: false, + size: 150000, + commit_id: ref + ) + end + + before do + allow(repository).to receive(:blobs_at).with([[ref, path]]).and_return([blob]) + allow(repository).to receive(:blob_at).with(ref, intermediate_path_1).and_return(intermediate_blob_1) + allow(repository).to receive(:blob_at).with(ref, intermediate_path_2).and_return(intermediate_blob_2) + allow(repository).to receive(:blob_at).with(ref, target_path).and_return(target_blob) + end + + it "returns the original blob's data" do + is_expected.to contain_exactly( + have_attributes( + path: path, + data: blob.data, + size: blob.size, + binary: blob.binary? + ) + ) + end + end + + context 'when the symlink has a circular reference' do + let(:path) { 'README' } + let(:target_path) { 'text-to-symlink.txt' } + let(:blob) do + fake_blob( + path: path, + data: "./#{target_path}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + before do + allow(repository).to receive(:blobs_at).with([[ref, path]]).and_return([blob]) + end + + it 'returns the original blob' do + is_expected.to contain_exactly( + have_attributes(path: blob.path, data: blob.data, size: blob.size, binary: blob.binary?) + ) + end + end + + context 'when the symlink is self-referencing' do + let(:path) { 'README' } + let(:target_path) { 'README' } + let(:blob) do + fake_blob( + path: path, + data: "./#{target_path}\n", + binary: false, + size: 10, + mode: Blob::MODE_SYMLINK, + commit_id: ref + ) + end + + before do + allow(repository).to receive(:blobs_at).with([[ref, path]]).and_return([blob]) + allow(repository).to receive(:blob_at) + end + + it 'returns the original blob' do + is_expected.to contain_exactly( + have_attributes(path: blob.path, data: blob.data, size: blob.size, binary: blob.binary?) + ) + expect(repository).not_to have_received(:blob_at) + end + end + end + context 'specifying a non-existent blob' do let(:paths) { ['non-existent'] } diff --git a/spec/models/blob_spec.rb b/spec/models/blob_spec.rb index 5173c5c5a5918f6962bd9fa79bfa0639f5d257d1..6db92f8bd09c6c5dadcf575bde2aa1e96ed66fda 100644 --- a/spec/models/blob_spec.rb +++ b/spec/models/blob_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Blob do +RSpec.describe Blob, feature_category: :shared do include FakeBlobHelpers using RSpec::Parameterized::TableSyntax @@ -137,6 +137,15 @@ expect(blob.data).to eq("\n���") end end + + context 'when @data has been set' do + it 'overrides the underlying blob data' do + blob = fake_blob(binary: false, data: 'fizz buzz', container: container) + data = 'lorem ipsum' + blob.instance_variable_set(:@data, data) + expect(blob.data).to eq(data) + end + end end context 'with project' do @@ -158,6 +167,222 @@ end end + describe '#size' do + let_it_be(:project) { create(:project, :repository) } + + context 'when @size is set' do + it 'overrides the underlying blob size' do + blob = fake_blob(size: 1, container: project) + size = 1_000_000 + blob.instance_variable_set(:@size, size) + expect(blob.size).to eq(size) + end + end + end + + describe '#binary' do + let_it_be(:project) { create(:project, :repository) } + let(:blob_binary) { false } + let(:blob) { fake_blob(binary: blob_binary, container: project) } + + context 'when @binary is set' do + before do + blob.instance_variable_set(:@binary, !blob_binary) + end + + it 'overrides blob.binary' do + expect(blob.binary).to eq(!blob_binary) + end + + it 'overrides blob.binary?' do + expect(blob.binary?).to eq(!blob_binary) + end + end + end + + describe '#load_symlinked_data!' do + let_it_be(:project) { create(:project, :repository) } + + subject(:load_symlinked_data) { current_blob.load_symlinked_data! } + + context 'when the blob is not a symlink' do + let(:current_blob) { fake_blob(mode: Blob::MODE_NORMAL, container: project) } + + before do + allow(current_blob).to receive(:load_symlinked_blob) + end + + it 'does nothing' do + load_symlinked_data + expect(current_blob).not_to have_received(:load_symlinked_blob) + end + end + + context 'when the blob is a symlink' do + shared_examples 'linked_blob attributes are copied to current_blob' do + it 'assigns linked_blob.size to current_blob' do + expect(current_blob.size).to eq(linked_blob_size) + end + + it 'assigns linked_blob.binary to current_blob' do + expect(current_blob.binary).to eq(linked_blob_binary) + end + + context 'when linked_blob.data is binary' do + let(:linked_blob_binary) { true } + + it 'assigns linked_blob.data to current_blob' do + expect(current_blob.data).to eq(linked_blob_data) + end + end + + context 'when linked_blob.data is not binary' do + let(:linked_blob_binary) { false } + + it 'attempts to encode the data in UTF8 format' do + expect(current_blob.data).to eq(encoded_blob_data) + end + end + end + + let(:current_blob_path) { 'current_blob.txt' } + let(:current_blob_data) { first_linked_blob.path } + + let(:linked_blob_data) { "foo\xC2bar" } + let(:encoded_blob_data) { "foo�bar" } + + let(:current_blob_binary) { false } + let(:linked_blob_binary) { true } + + let(:current_blob_size) { 1 } + let(:linked_blob_size) { 1_000_000 } + + let(:current_blob) do + fake_blob( + path: current_blob_path, + mode: Blob::MODE_SYMLINK, + data: current_blob_data, + binary: current_blob_binary, + size: current_blob_size, + container: project + ) + end + + let(:linked_blob) do + fake_blob( + path: 'linked_blob.txt', + binary: linked_blob_binary, + data: linked_blob_data, + size: linked_blob_size, + container: project + ) + end + + before do + allow(project.repository) + .to receive(:blob_at) + .and_return(*blob_at_chain) + + load_symlinked_data + end + + context 'when not linked_blob.symlink?' do + let(:blob_at_chain) { [first_linked_blob] } + let(:first_linked_blob) { linked_blob } + + it_behaves_like 'linked_blob attributes are copied to current_blob' + end + + context 'when linked_blob.symlink?' do + let(:first_linked_blob) do + fake_blob( + path: 'connect.txt', + binary: false, + mode: Blob::MODE_SYMLINK, + data: second_linked_blob.path, + container: project + ) + end + + context 'and the next linked blob is not a symlink' do + let(:blob_at_chain) { [first_linked_blob, second_linked_blob] } + let(:second_linked_blob) { linked_blob } + + it_behaves_like 'linked_blob attributes are copied to current_blob' + end + + context 'but the next linked blob loops back to current_blob' do + let(:blob_at_chain) { [first_linked_blob, second_linked_blob] } + let(:second_linked_blob) do + fake_blob( + path: 'loop.txt', + binary: !current_blob_binary, + mode: Blob::MODE_SYMLINK, + data: current_blob_path, + container: project + ) + end + + it 'does not update current_blob.size' do + expect(current_blob.size).to eq(current_blob_size) + end + + it 'does not update current_blob.binary' do + expect(current_blob.binary).to eq(current_blob_binary) + end + + it 'does not update current_blob.data' do + expect(current_blob.data).to eq(current_blob_data) + end + end + + context 'when the next linked blob is a symlink' do + let(:second_linked_blob) do + fake_blob( + path: 'connect2.txt', + binary: false, + mode: Blob::MODE_SYMLINK, + data: third_linked_blob.path, + container: project + ) + end + + context 'and the next linked blob is not a symlink' do + let(:blob_at_chain) { [first_linked_blob, second_linked_blob, third_linked_blob] } + let(:third_linked_blob) { linked_blob } + + it_behaves_like 'linked_blob attributes are copied to current_blob' + end + + context 'and the next linked blob is a symlink' do + let(:blob_at_chain) { [first_linked_blob, second_linked_blob, third_linked_blob, linked_blob] } + let(:third_linked_blob) do + fake_blob( + path: 'third.txt', + binary: !current_blob_binary, + mode: Blob::MODE_SYMLINK, + data: linked_blob.path, + container: project + ) + end + + it 'does not update current_blob.size' do + expect(current_blob.size).to eq(current_blob_size) + end + + it 'does not update current_blob.binary' do + expect(current_blob.binary).to eq(current_blob_binary) + end + + it 'does not update current_blob.data' do + expect(current_blob.data).to eq(current_blob_data) + end + end + end + end + end + end + describe '#external_storage_error?' do subject { blob.external_storage_error? } @@ -243,27 +468,76 @@ describe '#symlink?' do it 'is true for symlinks' do - symlink_blob = fake_blob(path: 'file', mode: '120000') + symlink_blob = fake_blob(path: 'file', mode: Blob::MODE_SYMLINK) expect(symlink_blob.symlink?).to eq true end it 'is false for non-symlinks' do - non_symlink_blob = fake_blob(path: 'file', mode: '100755') + non_symlink_blob = fake_blob(path: 'file', mode: Blob::MODE_EXECUTABLE) expect(non_symlink_blob.symlink?).to eq false end end + describe '#linked_path' do + let(:base_dir) { 'app/models/' } + let(:path) { 'LINKED_README.md' } + let(:blob) { fake_blob(data: data, path: File.join(base_dir, path).to_s, mode: mode) } + let(:data) { '' } + + subject(:linked_path) { blob.linked_path } + + context 'when symlink?' do + let(:mode) { Blob::MODE_SYMLINK } + + context 'when path does not start with . or /' do + let(:data) { 'README.md' } + + it { is_expected.to eq('app/models/README.md') } + end + + context 'when path includes current dir dot (./)' do + let(:data) { './README.md' } + + it { is_expected.to eq('app/models/README.md') } + end + + context 'when path includes parent dir dots (../)' do + let(:data) { '../README.md' } + + it { is_expected.to eq('app/README.md') } + end + + context 'when path includes multiple parent dir dots (../)' do + let(:data) { '../../README.md' } + + it { is_expected.to eq('README.md') } + end + + context 'when linked path links outside the git repo' do + let(:data) { '../../../README.md' } + + it { is_expected.to be_nil } + end + end + + context 'when not symlink?' do + let(:mode) { Blob::MODE_EXECUTABLE } + + it { is_expected.to be_nil } + end + end + describe '#executable?' do it 'is true for executables' do - executable_blob = fake_blob(path: 'file', mode: '100755') + executable_blob = fake_blob(path: 'file', mode: Blob::MODE_EXECUTABLE) expect(executable_blob.executable?).to eq true end it 'is false for non-executables' do - non_executable_blob = fake_blob(path: 'file', mode: '100655') + non_executable_blob = fake_blob(path: 'file', mode: Blob::MODE_SYMLINK) expect(non_executable_blob.executable?).to eq false end diff --git a/spec/support/helpers/fake_blob_helpers.rb b/spec/support/helpers/fake_blob_helpers.rb index 47fb9a345a39fce2d90f1681c3e3b8276de145e2..280c22c72e63799d350e3ab3b1c99d82788fa9e3 100644 --- a/spec/support/helpers/fake_blob_helpers.rb +++ b/spec/support/helpers/fake_blob_helpers.rb @@ -4,14 +4,24 @@ module FakeBlobHelpers class FakeBlob include BlobLike - attr_reader :path, :size, :data, :lfs_oid, :lfs_size, :mode - - def initialize(path: 'file.txt', size: 1.kilobyte, data: 'foo', binary: false, lfs: nil, mode: nil) + attr_reader :path, :lfs_oid, :lfs_size, :mode, :commit_id + attr_accessor :size, :data, :binary + + def initialize( + path: 'file.txt', + size: 1.kilobyte, + data: 'foo', + binary: false, + lfs: nil, + mode: nil, + commit_id: "11111111" + ) @path = path @size = size @data = data @binary = binary @mode = mode + @commit_id = commit_id @lfs_pointer = lfs.present? if @lfs_pointer @@ -26,10 +36,6 @@ def id "00000000" end - def commit_id - "11111111" - end - def binary_in_repo? @binary end