From e56855dc301182a80037a46a8e0762dbc401ec41 Mon Sep 17 00:00:00 2001 From: vbot Date: Mon, 6 Nov 2023 15:40:30 +0100 Subject: [PATCH 1/5] Snapshots: fix invalid tar header generation --- src/lib_store/unix/snapshots.ml | 103 ++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 39 deletions(-) diff --git a/src/lib_store/unix/snapshots.ml b/src/lib_store/unix/snapshots.ml index 1f564852a816..33f2400e3472 100644 --- a/src/lib_store/unix/snapshots.ml +++ b/src/lib_store/unix/snapshots.ml @@ -1138,8 +1138,17 @@ end = struct let really_write fd = Lwt_cstruct.(complete (write fd)) end - module HR = Tar.HeaderReader (Lwt) (Reader) - module HW = Tar.HeaderWriter (Lwt) (Writer) + module HR = struct + include Tar.HeaderReader (Lwt) (Reader) + + let read ic = read ~level:Posix ic + end + + module HW = struct + include Tar.HeaderWriter (Lwt) (Writer) + + let write oc = write ~level:Posix oc + end type file = {header : Tar.Header.t; data_ofs : Int64.t} @@ -1154,7 +1163,7 @@ end = struct let* fd = Lwt_unix.openfile file Unix.[O_WRONLY; O_CREAT] snapshot_rw_file_perm in - let data_pos = Int64.of_int Header.length in + let data_pos = Int64.of_int (Header.length * 3) in let* _ = Lwt_unix.LargeFile.lseek fd data_pos SEEK_SET in Lwt.return {current_pos = 0L; data_pos; fd} @@ -1168,44 +1177,54 @@ end = struct Lwt_unix.close t.fd (* Builds a tar header for the given sequence of bytes *) - let header_of_bytes ?level ~filename ~data_size (file : Lwt_unix.file_descr) : + let header_of_bytes ~filename ~data_size (file : Lwt_unix.file_descr) : Header.t Lwt.t = let open Lwt_syntax in - let level = - match level with None -> Tar.Header.V7 | Some level -> level - in - (* Use Posix by default instead of V7? *) let* stat = Lwt_unix.LargeFile.fstat file in - let* pwent = Lwt_unix.getpwuid stat.Lwt_unix.LargeFile.st_uid in - let* grent = Lwt_unix.getgrgid stat.Lwt_unix.LargeFile.st_gid in let file_mode = stat.Lwt_unix.LargeFile.st_perm in let user_id = stat.Lwt_unix.LargeFile.st_uid in let group_id = stat.Lwt_unix.LargeFile.st_gid in let mod_time = Int64.of_float stat.Lwt_unix.LargeFile.st_mtime in let link_indicator = Tar.Header.Link.Normal in let link_name = "" in - let uname = if level = V7 then "" else pwent.Lwt_unix.pw_name in - let gname = if level = V7 then "" else grent.Lwt_unix.gr_name in - let devmajor = - if level = Ustar then stat.Lwt_unix.LargeFile.st_dev else 0 - in - let devminor = - if level = Ustar then stat.Lwt_unix.LargeFile.st_rdev else 0 + let devmajor = 0 in + let devminor = 0 in + (* Enforce the extended header version (Posix aka pax). All tar + headers are then expected to be of size [Tar.Header.length * 3 + = 512B x 3]. It is only necessary to set a single field to + trigger this behavior in the [Tar] library. *) + let extended = + Some + { + Tar.Header.Extended.access_time = None; + charset = None; + comment = None; + group_id = None; + gname = None; + header_charset = None; + link_path = None; + mod_time = None; + path = None; + file_size = Some data_size; + user_id = None; + uname = None; + } in - Lwt.return - (Tar.Header.make - ~file_mode - ~user_id - ~group_id - ~mod_time - ~link_indicator - ~link_name - ~uname - ~gname - ~devmajor - ~devminor - filename - data_size) + let header = + Tar.Header.make + ~file_mode + ~user_id + ~group_id + ~mod_time + ~link_indicator + ~link_name + ~devmajor + ~devminor + filename + data_size + in + let header = {header with extended} in + Lwt.return header (* [finalize tar ~bytes_written ~filename] writes the header corresponding to the quantity of data given through @@ -1216,7 +1235,8 @@ end = struct let open Lwt_syntax in (* Build the header based of the bytes_written *) let* header = header_of_bytes ~filename ~data_size:bytes_written t.fd in - let header_length = Int64.of_int Header.length in + (* We are building extended headers which are 512B x 3. *) + let header_length = Int64.of_int (Header.length * 3) in (* Compute and right the adequate padding for finalizing a block data *) let c = Tar.Header.zero_padding header in let zero_padding = Cstruct.to_bytes c in @@ -1357,7 +1377,12 @@ end = struct let open_in ~file = let open Lwt_syntax in let* fd = Lwt_unix.openfile file Unix.[O_RDONLY] snapshot_ro_file_perm in - let data_pos = Int64.of_int Header.length in + (* We need to retrieve the first header's length. [Tar] will shift + the offset to the data location in the file: we can infer the + length from it. *) + let* _header = HR.read fd in + let* data_pos = Lwt_unix.LargeFile.lseek fd 0L SEEK_CUR in + let* _ = Lwt_unix.LargeFile.lseek fd 0L SEEK_SET in let files = None in Lwt.return {current_pos = 0L; data_pos; fd; files} @@ -1453,22 +1478,22 @@ end = struct may_update_files t acc ; Lwt.return_none | Ok hdr -> - (* Header length can be 1024 if extended *) + (* Header length are 512B x 3 when extended (which is + now the default). *) let* data_pos = Lwt_unix.LargeFile.lseek t.fd 0L SEEK_CUR in if hdr.file_name = filename then Lwt.return_some {header = hdr; data_ofs = data_pos} else - let header_length = Int64.sub data_pos pos in + let header_length = Int64.(sub data_pos pos) in let file_size = hdr.Tar.Header.file_size in let padding = Int64.of_int (Tar.Header.compute_zero_padding_length hdr) in - let next_header = - Int64.(add (add file_size padding) header_length) + let next_header_pos = + Int64.(add pos (add (add file_size padding) header_length)) in - let* _ = Lwt_unix.LargeFile.lseek t.fd next_header SEEK_SET in let h = {header = hdr; data_ofs = data_pos} in - loop (Int64.add pos next_header) (h :: acc) + loop next_header_pos (h :: acc) in loop 0L [] -- GitLab From 57e52f6003030360556026ca4b4a3fad96c1c2b8 Mon Sep 17 00:00:00 2001 From: vbot Date: Wed, 8 Nov 2023 12:51:55 +0100 Subject: [PATCH 2/5] Snapshots: bump snapshot version to 7 --- src/lib_store/unix/snapshots.ml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/lib_store/unix/snapshots.ml b/src/lib_store/unix/snapshots.ml index 33f2400e3472..6688b974a129 100644 --- a/src/lib_store/unix/snapshots.ml +++ b/src/lib_store/unix/snapshots.ml @@ -596,17 +596,21 @@ module Version = struct * - 5: snapshot exported with new protocol tables and "à la GC" context (storage 2.0) * - 6: new context representation introduced by irmin 3.7.2 + * - 7: fix tar snapshots corrupted generation *) (* Used for old snapshot format versions *) let legacy_version = 4 - let current_version = 6 + let current_version = 7 (* List of versions that are supported *) let supported_versions = [ - (legacy_version, `Legacy_format); (5, `Legacy); (current_version, `Current); + (legacy_version, `Legacy_format); + (5, `Legacy); + (6, `Legacy); + (current_version, `Current); ] let is_supported version = -- GitLab From 19f84fa927b21ec62d963b711d49fb1cd916ec50 Mon Sep 17 00:00:00 2001 From: vbot Date: Wed, 8 Nov 2023 12:52:17 +0100 Subject: [PATCH 3/5] Snapshots: emit warning about potentially corrupted v6 tar snapshots --- src/lib_store/unix/snapshots.ml | 11 +++++++++++ src/lib_store/unix/snapshots_events.ml | 13 +++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/lib_store/unix/snapshots.ml b/src/lib_store/unix/snapshots.ml index 6688b974a129..3c5c31ad5279 100644 --- a/src/lib_store/unix/snapshots.ml +++ b/src/lib_store/unix/snapshots.ml @@ -3084,6 +3084,8 @@ end module type IMPORTER = sig type t + val format : snapshot_format + val init : snapshot_path:string -> dst_store_dir:[`Store_dir] Naming.directory -> @@ -3142,6 +3144,8 @@ module Raw_importer : IMPORTER = struct dst_chain_dir : [`Chain_dir] Naming.directory; } + let format = Raw + let load_snapshot_header ~snapshot_path = let (module Loader) = (module Make_snapshot_loader (Raw_loader) : Snapshot_loader) @@ -3486,6 +3490,8 @@ module Tar_importer : IMPORTER = struct files : Onthefly.file list; } + let format = Tar + let load_snapshot_header ~snapshot_path = let (module Loader) = (module Make_snapshot_loader (Tar_loader) : Snapshot_loader) @@ -4278,6 +4284,11 @@ module Make_snapshot_importer (Importer : IMPORTER) : Snapshot_importer = struct got = snapshot_version; }) in + let*! () = + if snapshot_version <= 6 && Importer.format = Tar then + Event.(emit warn_tar_corruption snapshot_version) + else Lwt.return_unit + in let* () = let metadata_chain_name = Snapshot_metadata.get_chain_name snapshot_metadata diff --git a/src/lib_store/unix/snapshots_events.ml b/src/lib_store/unix/snapshots_events.ml index 6bc26c56b0aa..a9ab3dfa4376 100644 --- a/src/lib_store/unix/snapshots_events.ml +++ b/src/lib_store/unix/snapshots_events.ml @@ -144,4 +144,17 @@ module Event = struct will not be fully checked. It is not recommended to use this option \ with a snapshot downloaded from an untrusted source" () + + let warn_tar_corruption = + declare_1 + ~section + ~level:Warning + ~name:"warn_tar_corruption" + ~msg: + "Warning: the snapshot being imported is in version {version} and \ + using the tar format: some internal files might be corrupted. If the \ + snapshot import fails, please import using a snapshot that is a least \ + version 7." + ~pp1:Format.pp_print_int + ("version", Data_encoding.int31) end -- GitLab From 519d341e0d9272cf245acdd46dc39fb9fd0e5dbe Mon Sep 17 00:00:00 2001 From: Victor Allombert Date: Thu, 9 Nov 2023 10:08:29 +0100 Subject: [PATCH 4/5] Test: bump snapshot version to 7 --- tezt/tests/storage_snapshots.ml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tezt/tests/storage_snapshots.ml b/tezt/tests/storage_snapshots.ml index cf4953f4d437..b15e30471834 100644 --- a/tezt/tests/storage_snapshots.ml +++ b/tezt/tests/storage_snapshots.ml @@ -451,7 +451,10 @@ let test_info_command = in let expected_hash = JSON.(head |-> "hash" |> as_string) in let expected_level = head_level in - let expected_version = 6 in + (* This is expected to be updated as soon as a new snapshot version + is released (referring to the Snapshot.Version.current_version + from `lib_store/unix/snapshots`)*) + let expected_version = 7 in Log.info "Checks the human formatted output" ; (* Get the info line, which is the second line. *) let* () = -- GitLab From 7334f0a87973da0ed46c3cfcff12d89e2d35312c Mon Sep 17 00:00:00 2001 From: vbot Date: Wed, 8 Nov 2023 13:26:21 +0100 Subject: [PATCH 5/5] Changelog: add entry --- CHANGES.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 2e7d61c639b5..9e5efc8a03d6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -239,6 +239,12 @@ Node nodes. Also, improved the consistency of ``snapshot`` import errors messages (MR :gl:`!10138`) +- **Breaking change** Bumped the Octez snapshot version from ``6`` to + ``7`` which fixes the corrupted generation of tar rolling and full + snapshots. It is still possible to import previous version snapshots + but snapshots in version 7 are not retro-compatible with previous + octez versions (MR :gl:`!10785`). + Client ------ - Adding client commands to generate, open and verify a time-lock. -- GitLab