diff --git a/CHANGES.rst b/CHANGES.rst index 05189c0eb39c389a55ade05115e783b82506779a..47880666a0327cb1019d08fd4e6c428525c41ce2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -138,6 +138,8 @@ Node - Fixed a bug while reconstructing the storage after a snapshot import that would result in wrong context hash mapping for some blocks. +- Fixed a bug that caused a context corruption when using an old context. + Client ------ diff --git a/src/lib_context/disk/context.ml b/src/lib_context/disk/context.ml index c3ae143352d97a8ccd3f640632b7501e9b003f85..8641189964035209317c8edbb2309b3ff286919c 100644 --- a/src/lib_context/disk/context.ml +++ b/src/lib_context/disk/context.ml @@ -183,6 +183,14 @@ module Events = struct ~pp2:Time.System.Span.pp_hum ("finalisation", Time.System.Span.encoding) + let split_context = + declare_0 + ~section + ~level:Debug + ~name:"split_context" + ~msg:"splitting context into a new chunk" + () + let gc_failure = declare_1 ~section @@ -394,7 +402,11 @@ module Make (Encoding : module type of Tezos_context_encoding.Context) = struct let is_gc_allowed index = Store.Gc.is_allowed index.repo - let split index = Store.split index.repo + let split index = + let open Lwt_syntax in + let* () = Events.(emit split_context ()) in + Store.split index.repo ; + Lwt.return_unit let export_snapshot index context_hash ~path = let open Lwt_syntax in diff --git a/src/lib_context/memory/context.ml b/src/lib_context/memory/context.ml index 703f7a1c85c9f75f338ea514f0dadc5ad369d7c1..078a17519333b5b598d732e8463b5c11cd79fd8c 100644 --- a/src/lib_context/memory/context.ml +++ b/src/lib_context/memory/context.ml @@ -197,7 +197,7 @@ module Make (Encoding : module type of Tezos_context_encoding.Context) = struct let is_gc_allowed _ = (* not implemented for in-memory context *) false - let split _ = (* not implemented for in-memory context *) () + let split _ = (* not implemented for in-memory context *) Lwt.return_unit let export_snapshot _ _ ~path:_ = (* not implemented for in-memory context *) Lwt.return_unit diff --git a/src/lib_context/sigs/context.ml b/src/lib_context/sigs/context.ml index d05cb41f2a756e51b1599f7c9e998b9319d42f69..0130f45656def59a556d26aab914550e06c5dbe0 100644 --- a/src/lib_context/sigs/context.ml +++ b/src/lib_context/sigs/context.ml @@ -780,7 +780,7 @@ module type TEZOS_CONTEXT = sig partially needed data. This is not an issue, but it should be avoided to prevent storing unnecessary data and thus, to minimize the disk footprint. *) - val split : index -> unit + val split : index -> unit Lwt.t (** [export_snapshot index context_hash ~path] exports the context corresponding to [context_hash], if found in [index], into the diff --git a/src/lib_protocol_environment/context_ops/context_ops.ml b/src/lib_protocol_environment/context_ops/context_ops.ml index 51d2218db0c41b1a56b8e5e37a7934cec8482f6d..3112bdaf1f012431b72a6d181b9f92f795c5c2f4 100644 --- a/src/lib_protocol_environment/context_ops/context_ops.ml +++ b/src/lib_protocol_environment/context_ops/context_ops.ml @@ -208,6 +208,11 @@ let wait_gc_completion context_index = | Disk_index index -> Context.wait_gc_completion index | Memory_index index -> Tezos_context_memory.Context.wait_gc_completion index +let is_gc_allowed context_index = + match context_index with + | Disk_index index -> Context.is_gc_allowed index + | Memory_index index -> Tezos_context_memory.Context.is_gc_allowed index + let split context_index = match context_index with | Disk_index index -> Context.split index diff --git a/src/lib_shell/block_validator_process.ml b/src/lib_shell/block_validator_process.ml index 2f08bd7c732ba30b8ed4a6a114f3cd0adf910eac..82aab9f4861bf40fe056d6de374f73c3d1192b60 100644 --- a/src/lib_shell/block_validator_process.ml +++ b/src/lib_shell/block_validator_process.ml @@ -385,7 +385,7 @@ module Internal_validator_process = struct let context_split _validator context_index = let open Lwt_result_syntax in - let () = Context_ops.split context_index in + let*! () = Context_ops.split context_index in return_unit let commit_genesis validator ~chain_id = diff --git a/src/lib_shell/chain_validator.ml b/src/lib_shell/chain_validator.ml index a19236da00a3f31f59d010ca66785b85112ff8d5..c5c6ebff68695b8129c3d518a776105e67713e2c 100644 --- a/src/lib_shell/chain_validator.ml +++ b/src/lib_shell/chain_validator.ml @@ -443,60 +443,71 @@ let register_garbage_collect_callback w = let gc_lockfile_path = Naming.(file_path (gc_lockfile (Store.Chain.chain_dir chain_store))) in - let gc_synchronous_call block_hash = - let* block = Store.Block.read_block chain_store block_hash in - let* resulting_context_hash = - Store.Block.resulting_context_hash chain_store block - in - let* () = - Block_validator.context_garbage_collection - nv.parameters.block_validator - index - resulting_context_hash - ~gc_lockfile_path - in - let kind = - Block_validator_process.kind nv.parameters.block_validator_process - in - (* The callback call will block until the GC is completed: either - using the Irmin function directly in single-process or using - the lockfile through the validator process used to notify the - GC completion. *) - match kind with - | Single_process -> - (* If the GC is running in the same instance, we can directly - wait for its completion. *) - let*! () = Context_ops.wait_gc_completion index in - return_unit - | External_process -> - (* Otherwise, we wait for the lockfile, which is locked by the - external process when the GC starts and released when the - GC ends. *) - let* gc_lockfile = - protect (fun () -> - let*! fd = - Lwt_unix.openfile - gc_lockfile_path - [Unix.O_CREAT; O_RDWR; O_CLOEXEC; O_SYNC] - 0o644 - in - return fd) + let is_gc_allowed = Context_ops.is_gc_allowed index in + let gc_callback = + if is_gc_allowed then + let gc_synchronous_call block_hash = + let* block = Store.Block.read_block chain_store block_hash in + let* resulting_context_hash = + Store.Block.resulting_context_hash chain_store block in - Lwt.finalize - (fun () -> - let*! () = Lwt_unix.lockf gc_lockfile Unix.F_RLOCK 0 in - let*! () = Lwt_unix.lockf gc_lockfile Unix.F_ULOCK 0 in - return_unit) - (fun () -> Lwt_unix.close gc_lockfile) + let* () = + Block_validator.context_garbage_collection + nv.parameters.block_validator + index + resulting_context_hash + ~gc_lockfile_path + in + let kind = + Block_validator_process.kind nv.parameters.block_validator_process + in + (* The callback call will block until the GC is completed: either + using the Irmin function directly in single-process or using + the lockfile through the validator process used to notify the + GC completion. *) + match kind with + | Single_process -> + (* If the GC is running in the same instance, we can directly + wait for its completion. *) + let*! () = Context_ops.wait_gc_completion index in + return_unit + | External_process -> + (* Otherwise, we wait for the lockfile, which is locked by the + external process when the GC starts and released when the + GC ends. *) + let* gc_lockfile = + protect (fun () -> + let*! fd = + Lwt_unix.openfile + gc_lockfile_path + [Unix.O_CREAT; O_RDWR; O_CLOEXEC; O_SYNC] + 0o644 + in + return fd) + in + Lwt.finalize + (fun () -> + let*! () = Lwt_unix.lockf gc_lockfile Unix.F_RLOCK 0 in + let*! () = Lwt_unix.lockf gc_lockfile Unix.F_ULOCK 0 in + return_unit) + (fun () -> Lwt_unix.close gc_lockfile) + in + Some gc_synchronous_call + else None in - Store.Chain.register_gc_callback nv.parameters.chain_store gc_synchronous_call + Store.Chain.register_gc_callback nv.parameters.chain_store gc_callback let register_split_callback w = let nv = Worker.state w in let chain_store = nv.parameters.chain_store in let index = Store.(context_index (Chain.global_store chain_store)) in - let split () = - Block_validator.context_split nv.parameters.block_validator index + let is_gc_allowed = Context_ops.is_gc_allowed index in + let split = + if is_gc_allowed then + Some + (fun () -> + Block_validator.context_split nv.parameters.block_validator index) + else None in Store.Chain.register_split_callback nv.parameters.chain_store split diff --git a/src/lib_store/shared/store_events.ml b/src/lib_store/shared/store_events.ml index f69dba37bfda06c3b4d5ef73b0a0483cbe62fb12..0aa9acc9af9746ff746b41c89274e1392013129b 100644 --- a/src/lib_store/shared/store_events.ml +++ b/src/lib_store/shared/store_events.ml @@ -220,6 +220,15 @@ let start_context_gc = ~pp1:pp_block_descriptor ("block", block_descriptor_encoding) +let start_context_split = + declare_1 + ~section + ~level:Info + ~name:"start_context_split" + ~msg:"splitting context into a new chunk at level {level}" + ~pp1:pp_int32 + ("level", Data_encoding.int32) + let context_gc_is_not_allowed = declare_0 ~section diff --git a/src/lib_store/store.mli b/src/lib_store/store.mli index 72166fc5ddb93a54cb89f1a47b3ff67a37040831..5dd0d26b469d8b7af89adf3c4b9ca0ea1788e2a0 100644 --- a/src/lib_store/store.mli +++ b/src/lib_store/store.mli @@ -937,13 +937,13 @@ module Chain : sig [callback] that may be triggered during a block store merge in order to garbage-collect old contexts. *) val register_gc_callback : - chain_store -> (Block_hash.t -> unit tzresult Lwt.t) -> unit + chain_store -> (Block_hash.t -> unit tzresult Lwt.t) option -> unit (** [register_split_callback chain_store callback] installs a [callback] that may be triggered during a [set_head] in order to split the context into a new chunk. *) val register_split_callback : - chain_store -> (unit -> unit tzresult Lwt.t) -> unit + chain_store -> (unit -> unit tzresult Lwt.t) option -> unit end (** [global_block_watcher global_store] instantiates a new block diff --git a/src/lib_store/unix/block_store.ml b/src/lib_store/unix/block_store.ml index ab366c48ebc3209ad9b837e4e7884e319f7b564b..bc93155a325d4dd0ed95f4522fe10c2c336897f6 100644 --- a/src/lib_store/unix/block_store.ml +++ b/src/lib_store/unix/block_store.ml @@ -1316,11 +1316,13 @@ let may_trigger_gc block_store history_mode ~previous_savepoint ~new_savepoint = let*! () = Store_events.(emit start_context_gc new_savepoint) in gc savepoint_hash -let split_context block_store = +let split_context block_store new_head_lafl = let open Lwt_result_syntax in match block_store.split_callback with | None -> return_unit - | Some split -> split () + | Some split -> + let*! () = Store_events.(emit start_context_split new_head_lafl) in + split () let merge_stores block_store ~(on_error : tztrace -> unit tzresult Lwt.t) ~finalizer ~history_mode ~new_head ~new_head_metadata @@ -1621,10 +1623,10 @@ let create ?block_cache_limit chain_dir ~genesis_block = return block_store let register_gc_callback block_store gc_callback = - block_store.gc_callback <- Some gc_callback + block_store.gc_callback <- gc_callback let register_split_callback block_store split_callback = - block_store.split_callback <- Some split_callback + block_store.split_callback <- split_callback let pp_merge_status fmt status = match status with diff --git a/src/lib_store/unix/block_store.mli b/src/lib_store/unix/block_store.mli index 9227a0c97845f096371720fb6adb90f6043c2e31..034fe00320e94cf1b7ccb2ff4a14bd20f5e7dcc6 100644 --- a/src/lib_store/unix/block_store.mli +++ b/src/lib_store/unix/block_store.mli @@ -350,17 +350,17 @@ val load : that may be triggered during a block store merge in order to garbage-collect old contexts. *) val register_gc_callback : - block_store -> (Block_hash.t -> unit tzresult Lwt.t) -> unit + block_store -> (Block_hash.t -> unit tzresult Lwt.t) option -> unit (** [register_split_callback chain_store callback] installs a [callback] that may be triggered during a [set_head] in order to split the context into a new chunk. *) val register_split_callback : - block_store -> (unit -> unit tzresult Lwt.t) -> unit + block_store -> (unit -> unit tzresult Lwt.t) option -> unit -(** [split_context block_store] calls the callback registered by - [register_split_callback] if any. *) -val split_context : t -> unit tzresult Lwt.t +(** [split_context block_store new_head_lafl] calls the callback + registered by [register_split_callback] if any. *) +val split_context : t -> Int32.t -> unit tzresult Lwt.t (** [close block_store] closes the [block_store] and every underlying opened stores. diff --git a/src/lib_store/unix/store.ml b/src/lib_store/unix/store.ml index 3c1e047315f8b55ec71a84ebf3b0cb81aa9ee74f..f4d79e2d76883493264747eff6f3b83b4ab6d868 100644 --- a/src/lib_store/unix/store.ml +++ b/src/lib_store/unix/store.ml @@ -1393,7 +1393,7 @@ module Chain = struct (Block.last_allowed_fork_level previous_head_metadata)) then let block_store = chain_store.block_store in - Block_store.split_context block_store + Block_store.split_context block_store new_head_lafl else return_unit let set_head chain_store new_head = diff --git a/src/lib_store/unix/store.mli b/src/lib_store/unix/store.mli index 2a06f4bcef87ce8e2f7055d2cf060714b6f137a8..9088771a10753fb7e60e4293260c9587b500e413 100644 --- a/src/lib_store/unix/store.mli +++ b/src/lib_store/unix/store.mli @@ -931,13 +931,13 @@ module Chain : sig [callback] that may be triggered during a block store merge in order to garbage-collect old contexts. *) val register_gc_callback : - chain_store -> (Block_hash.t -> unit tzresult Lwt.t) -> unit + chain_store -> (Block_hash.t -> unit tzresult Lwt.t) option -> unit (** [register_split_callback chain_store callback] installs a [callback] that may be triggered during a [set_head] in order to split the context into a new chunk. *) val register_split_callback : - chain_store -> (unit -> unit tzresult Lwt.t) -> unit + chain_store -> (unit -> unit tzresult Lwt.t) option -> unit end (** [global_block_watcher global_store] instantiates a new block diff --git a/src/lib_store/unix/test/test_utils.ml b/src/lib_store/unix/test/test_utils.ml index 3644d679ec1212ff093a7f63a33d8e45ac28e274..f1d877b1f3c628e54e64882a03e948d069d20206 100644 --- a/src/lib_store/unix/test/test_utils.ml +++ b/src/lib_store/unix/test/test_utils.ml @@ -195,7 +195,7 @@ let register_gc store = in return_unit in - Store.Chain.register_gc_callback chain_store gc + Store.Chain.register_gc_callback chain_store (Some gc) let wrap_store_init ?(patch_context = dummy_patch_context) ?(history_mode = History_mode.Archive) ?(allow_testchains = true) diff --git a/src/lib_validation/external_validator.ml b/src/lib_validation/external_validator.ml index 5be40598a57dd14ef8dd16ae4c9089bee8ab0f0f..317ff551a9c8b3cb0d2aea6f1d2f12187702fbcf 100644 --- a/src/lib_validation/external_validator.ml +++ b/src/lib_validation/external_validator.ml @@ -533,7 +533,7 @@ let run ~readonly input output = loop cache None | External_validation.Context_split -> let*! () = Events.(emit context_split_request) () in - let () = Context.split context_index in + let*! () = Context.split context_index in let*! () = External_validation.send output