From 3df634943c9d19f361e19393d13eff3b7b15789f Mon Sep 17 00:00:00 2001 From: Victor Allombert Date: Thu, 29 Feb 2024 18:47:56 +0100 Subject: [PATCH 1/4] Store: consistent read/write cemented offsets Co-authored-by: vbot --- src/lib_store/unix/cemented_block_store.ml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lib_store/unix/cemented_block_store.ml b/src/lib_store/unix/cemented_block_store.ml index 0f377c2fca54..609c32cde5c4 100644 --- a/src/lib_store/unix/cemented_block_store.ml +++ b/src/lib_store/unix/cemented_block_store.ml @@ -356,7 +356,7 @@ let close cemented_store = let offset_length = 4 (* file offset *) -let offset_encoding = Data_encoding.int31 +let offset_encoding = Data_encoding.int32 let find_block_file cemented_store block_level = try @@ -570,7 +570,10 @@ let read_block fd block_number = Lwt_utils_unix.read_bytes ~pos:0 ~len:offset_length fd offset_buffer in let offset = - Data_encoding.(Binary.of_bytes_exn offset_encoding offset_buffer) + let ofs = + Data_encoding.(Binary.of_bytes_exn offset_encoding offset_buffer) + in + Int32.to_int ofs in let* _ofs = Lwt_unix.lseek fd offset Unix.SEEK_SET in (* We move the cursor to the element's position *) -- GitLab From 1290143c8ff5b05d56fc2d268e24c4f9d412e3fb Mon Sep 17 00:00:00 2001 From: Victor Allombert Date: Thu, 29 Feb 2024 18:56:41 +0100 Subject: [PATCH 2/4] Store: cemented offset read as uint32 Co-authored-by: vbot --- src/lib_store/unix/cemented_block_store.ml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/lib_store/unix/cemented_block_store.ml b/src/lib_store/unix/cemented_block_store.ml index 609c32cde5c4..712dc466cf44 100644 --- a/src/lib_store/unix/cemented_block_store.ml +++ b/src/lib_store/unix/cemented_block_store.ml @@ -573,7 +573,11 @@ let read_block fd block_number = let ofs = Data_encoding.(Binary.of_bytes_exn offset_encoding offset_buffer) in - Int32.to_int ofs + (* We interpret the offset, written as an int32, as an unsigned + int32. This is allowed by the encoded scheme and allows one + additional bit to encode the offset. In enables dealing with + files up to 4Gib. *) + match Int32.unsigned_to_int ofs with Some v -> v | None -> assert false in let* _ofs = Lwt_unix.lseek fd offset Unix.SEEK_SET in (* We move the cursor to the element's position *) -- GitLab From 5d35979ce993e856c0891ebdde37d7fefbf85f61 Mon Sep 17 00:00:00 2001 From: Victor Allombert Date: Fri, 1 Mar 2024 09:20:19 +0100 Subject: [PATCH 3/4] Store: add FIXME for 4Gib cemented file limitation --- src/lib_store/unix/cemented_block_store.ml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib_store/unix/cemented_block_store.ml b/src/lib_store/unix/cemented_block_store.ml index 712dc466cf44..16ebc10a4c93 100644 --- a/src/lib_store/unix/cemented_block_store.ml +++ b/src/lib_store/unix/cemented_block_store.ml @@ -354,6 +354,8 @@ let close cemented_store = terminated which means potential new reads won't be scheduled. *) Metadata_fd_cache.clear cemented_store.metadata_fd_cache +(* FIXME: https://gitlab.com/tezos/tezos/-/issues/7034 Cemented file + cannot exceed 4Gib. *) let offset_length = 4 (* file offset *) let offset_encoding = Data_encoding.int32 -- GitLab From 8fd90341da4e28d1fb698dd9a642f49ad84e7479 Mon Sep 17 00:00:00 2001 From: vbot Date: Mon, 4 Mar 2024 11:44:11 +0100 Subject: [PATCH 4/4] Store: replace offset encoding by ad hoc conversion --- src/lib_store/unix/cemented_block_store.ml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/lib_store/unix/cemented_block_store.ml b/src/lib_store/unix/cemented_block_store.ml index 16ebc10a4c93..d4ff48ef8c5d 100644 --- a/src/lib_store/unix/cemented_block_store.ml +++ b/src/lib_store/unix/cemented_block_store.ml @@ -358,8 +358,6 @@ let close cemented_store = cannot exceed 4Gib. *) let offset_length = 4 (* file offset *) -let offset_encoding = Data_encoding.int32 - let find_block_file cemented_store block_level = try if Compare.Int32.(block_level < 0l) then None @@ -572,14 +570,18 @@ let read_block fd block_number = Lwt_utils_unix.read_bytes ~pos:0 ~len:offset_length fd offset_buffer in let offset = - let ofs = - Data_encoding.(Binary.of_bytes_exn offset_encoding offset_buffer) - in + let ofs = Bytes.get_int32_be offset_buffer 0 in (* We interpret the offset, written as an int32, as an unsigned int32. This is allowed by the encoded scheme and allows one additional bit to encode the offset. In enables dealing with files up to 4Gib. *) - match Int32.unsigned_to_int ofs with Some v -> v | None -> assert false + match Int32.unsigned_to_int ofs with + | Some v -> v + | None -> + (* It will be [None] on 32-bit machines which is not + supported. We default to [Int32.to_int] instead of [assert + false] *) + Int32.to_int ofs in let* _ofs = Lwt_unix.lseek fd offset Unix.SEEK_SET in (* We move the cursor to the element's position *) -- GitLab