diff --git a/CHANGES.rst b/CHANGES.rst index 0ba861131877ba8808d1e08a33985ea13db8dc5b..eb70e350b37819277b9f1784449f3a1a03508758 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -57,6 +57,12 @@ Node was emitted every time a block was applied using an old protocol where its plugin was removed. +- **Breaking change** Bumped the Octez snapshot version from ``6`` to + ``7`` which fixes the corrupted generation of tar rolling and full + snapshots. It is still possible to import previous version snapshots + but snapshots in version 7 are not retro-compatible with previous + Octez versions (MR :gl:`!10785`). + Client ------ diff --git a/src/lib_store/unix/snapshots.ml b/src/lib_store/unix/snapshots.ml index a1318711542e88405afe7215b7f26ee2577719f7..eecce3b04991673aa10b39e5912f5c3d5cc1bb9a 100644 --- a/src/lib_store/unix/snapshots.ml +++ b/src/lib_store/unix/snapshots.ml @@ -596,17 +596,21 @@ module Version = struct * - 5: snapshot exported with new protocol tables and "à la GC" context (storage 2.0) * - 6: new context representation introduced by irmin 3.7.2 + * - 7: fix tar snapshots corrupted generation *) (* Used for old snapshot format versions *) let legacy_version = 4 - let current_version = 6 + let current_version = 7 (* List of versions that are supported *) let supported_versions = [ - (legacy_version, `Legacy_format); (5, `Legacy); (current_version, `Current); + (legacy_version, `Legacy_format); + (5, `Legacy); + (6, `Legacy); + (current_version, `Current); ] let is_supported version = @@ -1138,8 +1142,17 @@ end = struct let really_write fd = Lwt_cstruct.(complete (write fd)) end - module HR = Tar.HeaderReader (Lwt) (Reader) - module HW = Tar.HeaderWriter (Lwt) (Writer) + module HR = struct + include Tar.HeaderReader (Lwt) (Reader) + + let read ic = read ~level:Posix ic + end + + module HW = struct + include Tar.HeaderWriter (Lwt) (Writer) + + let write oc = write ~level:Posix oc + end type file = {header : Tar.Header.t; data_ofs : Int64.t} @@ -1154,7 +1167,7 @@ end = struct let* fd = Lwt_unix.openfile file Unix.[O_WRONLY; O_CREAT] snapshot_rw_file_perm in - let data_pos = Int64.of_int Header.length in + let data_pos = Int64.of_int (Header.length * 3) in let* _ = Lwt_unix.LargeFile.lseek fd data_pos SEEK_SET in Lwt.return {current_pos = 0L; data_pos; fd} @@ -1168,44 +1181,54 @@ end = struct Lwt_unix.close t.fd (* Builds a tar header for the given sequence of bytes *) - let header_of_bytes ?level ~filename ~data_size (file : Lwt_unix.file_descr) : + let header_of_bytes ~filename ~data_size (file : Lwt_unix.file_descr) : Header.t Lwt.t = let open Lwt_syntax in - let level = - match level with None -> Tar.Header.V7 | Some level -> level - in - (* Use Posix by default instead of V7? *) let* stat = Lwt_unix.LargeFile.fstat file in - let* pwent = Lwt_unix.getpwuid stat.Lwt_unix.LargeFile.st_uid in - let* grent = Lwt_unix.getgrgid stat.Lwt_unix.LargeFile.st_gid in let file_mode = stat.Lwt_unix.LargeFile.st_perm in let user_id = stat.Lwt_unix.LargeFile.st_uid in let group_id = stat.Lwt_unix.LargeFile.st_gid in let mod_time = Int64.of_float stat.Lwt_unix.LargeFile.st_mtime in let link_indicator = Tar.Header.Link.Normal in let link_name = "" in - let uname = if level = V7 then "" else pwent.Lwt_unix.pw_name in - let gname = if level = V7 then "" else grent.Lwt_unix.gr_name in - let devmajor = - if level = Ustar then stat.Lwt_unix.LargeFile.st_dev else 0 - in - let devminor = - if level = Ustar then stat.Lwt_unix.LargeFile.st_rdev else 0 + let devmajor = 0 in + let devminor = 0 in + (* Enforce the extended header version (Posix aka pax). All tar + headers are then expected to be of size [Tar.Header.length * 3 + = 512B x 3]. It is only necessary to set a single field to + trigger this behavior in the [Tar] library. *) + let extended = + Some + { + Tar.Header.Extended.access_time = None; + charset = None; + comment = None; + group_id = None; + gname = None; + header_charset = None; + link_path = None; + mod_time = None; + path = None; + file_size = Some data_size; + user_id = None; + uname = None; + } in - Lwt.return - (Tar.Header.make - ~file_mode - ~user_id - ~group_id - ~mod_time - ~link_indicator - ~link_name - ~uname - ~gname - ~devmajor - ~devminor - filename - data_size) + let header = + Tar.Header.make + ~file_mode + ~user_id + ~group_id + ~mod_time + ~link_indicator + ~link_name + ~devmajor + ~devminor + filename + data_size + in + let header = {header with extended} in + Lwt.return header (* [finalize tar ~bytes_written ~filename] writes the header corresponding to the quantity of data given through @@ -1216,7 +1239,8 @@ end = struct let open Lwt_syntax in (* Build the header based of the bytes_written *) let* header = header_of_bytes ~filename ~data_size:bytes_written t.fd in - let header_length = Int64.of_int Header.length in + (* We are building extended headers which are 512B x 3. *) + let header_length = Int64.of_int (Header.length * 3) in (* Compute and right the adequate padding for finalizing a block data *) let c = Tar.Header.zero_padding header in let zero_padding = Cstruct.to_bytes c in @@ -1357,7 +1381,12 @@ end = struct let open_in ~file = let open Lwt_syntax in let* fd = Lwt_unix.openfile file Unix.[O_RDONLY] snapshot_ro_file_perm in - let data_pos = Int64.of_int Header.length in + (* We need to retrieve the first header's length. [Tar] will shift + the offset to the data location in the file: we can infer the + length from it. *) + let* _header = HR.read fd in + let* data_pos = Lwt_unix.LargeFile.lseek fd 0L SEEK_CUR in + let* _ = Lwt_unix.LargeFile.lseek fd 0L SEEK_SET in let files = None in Lwt.return {current_pos = 0L; data_pos; fd; files} @@ -1453,22 +1482,22 @@ end = struct may_update_files t acc ; Lwt.return_none | Ok hdr -> - (* Header length can be 1024 if extended *) + (* Header length are 512B x 3 when extended (which is + now the default). *) let* data_pos = Lwt_unix.LargeFile.lseek t.fd 0L SEEK_CUR in if hdr.file_name = filename then Lwt.return_some {header = hdr; data_ofs = data_pos} else - let header_length = Int64.sub data_pos pos in + let header_length = Int64.(sub data_pos pos) in let file_size = hdr.Tar.Header.file_size in let padding = Int64.of_int (Tar.Header.compute_zero_padding_length hdr) in - let next_header = - Int64.(add (add file_size padding) header_length) + let next_header_pos = + Int64.(add pos (add (add file_size padding) header_length)) in - let* _ = Lwt_unix.LargeFile.lseek t.fd next_header SEEK_SET in let h = {header = hdr; data_ofs = data_pos} in - loop (Int64.add pos next_header) (h :: acc) + loop next_header_pos (h :: acc) in loop 0L [] @@ -3055,6 +3084,8 @@ end module type IMPORTER = sig type t + val format : snapshot_format + val init : snapshot_path:string -> dst_store_dir:[`Store_dir] Naming.directory -> @@ -3113,6 +3144,8 @@ module Raw_importer : IMPORTER = struct dst_chain_dir : [`Chain_dir] Naming.directory; } + let format = Raw + let load_snapshot_header ~snapshot_path = let (module Loader) = (module Make_snapshot_loader (Raw_loader) : Snapshot_loader) @@ -3457,6 +3490,8 @@ module Tar_importer : IMPORTER = struct files : Onthefly.file list; } + let format = Tar + let load_snapshot_header ~snapshot_path = let (module Loader) = (module Make_snapshot_loader (Tar_loader) : Snapshot_loader) @@ -4249,6 +4284,11 @@ module Make_snapshot_importer (Importer : IMPORTER) : Snapshot_importer = struct got = snapshot_version; }) in + let*! () = + if snapshot_version <= 6 && Importer.format = Tar then + Event.(emit warn_tar_corruption snapshot_version) + else Lwt.return_unit + in let* () = let metadata_chain_name = Snapshot_metadata.get_chain_name snapshot_metadata diff --git a/src/lib_store/unix/snapshots_events.ml b/src/lib_store/unix/snapshots_events.ml index 6bc26c56b0aae3d5073e0a769cf15d15dbd3055a..a9ab3dfa43760132f7c85198e8f85de351fc44d3 100644 --- a/src/lib_store/unix/snapshots_events.ml +++ b/src/lib_store/unix/snapshots_events.ml @@ -144,4 +144,17 @@ module Event = struct will not be fully checked. It is not recommended to use this option \ with a snapshot downloaded from an untrusted source" () + + let warn_tar_corruption = + declare_1 + ~section + ~level:Warning + ~name:"warn_tar_corruption" + ~msg: + "Warning: the snapshot being imported is in version {version} and \ + using the tar format: some internal files might be corrupted. If the \ + snapshot import fails, please import using a snapshot that is a least \ + version 7." + ~pp1:Format.pp_print_int + ("version", Data_encoding.int31) end diff --git a/tezt/tests/storage_snapshots.ml b/tezt/tests/storage_snapshots.ml index 3345fe733217278c57b280808b1782b1535fb213..7b8a8863aec83d11bf2c2d33a4d716669135b581 100644 --- a/tezt/tests/storage_snapshots.ml +++ b/tezt/tests/storage_snapshots.ml @@ -455,7 +455,10 @@ let test_info_command = in let expected_hash = JSON.(head |-> "hash" |> as_string) in let expected_level = head_level in - let expected_version = 6 in + (* This is expected to be updated as soon as a new snapshot version + is released (referring to the Snapshot.Version.current_version + from `lib_store/unix/snapshots`)*) + let expected_version = 7 in Log.info "Checks the human formatted output" ; (* Get the info line, which is the second line. *) let* () =