From dca022b4ac7771c49ee6618f7b9fbeab718f1723 Mon Sep 17 00:00:00 2001 From: Victor Allombert Date: Wed, 31 Jul 2024 10:14:53 +0200 Subject: [PATCH 1/3] Node: improve Node.create interface --- docs/doc_gen/node_helpers.ml | 2 +- src/bin_node/node_run_command.ml | 2 +- src/lib_shell/node.ml | 6 +++--- src/lib_shell/node.mli | 2 +- src/lib_shell/test/test_node.ml | 6 +++--- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/doc_gen/node_helpers.ml b/docs/doc_gen/node_helpers.ml index 0ec233856f1f..9cec439e68dd 100644 --- a/docs/doc_gen/node_helpers.ml +++ b/docs/doc_gen/node_helpers.ml @@ -81,7 +81,7 @@ let with_node f = Tezos_shell_services.Shell_limits.default_block_validator_limits Tezos_shell_services.Shell_limits.default_prevalidator_limits Tezos_shell_services.Shell_limits.default_chain_validator_limits - None + ?history_mode:None in f node in diff --git a/src/bin_node/node_run_command.ml b/src/bin_node/node_run_command.ml index 01b72bd0f295..e1d056e24b92 100644 --- a/src/bin_node/node_run_command.ml +++ b/src/bin_node/node_run_command.ml @@ -358,7 +358,7 @@ let init_node ?sandbox ?target ~identity ~singleprocess ~internal_events config.shell.block_validator_limits config.shell.prevalidator_limits config.shell.chain_validator_limits - config.shell.history_mode + ?history_mode:config.shell.history_mode let rpc_metrics = Prometheus.Summary.v_labels diff --git a/src/lib_shell/node.ml b/src/lib_shell/node.ml index b44840e61c5e..d49c2f2882c6 100644 --- a/src/lib_shell/node.ml +++ b/src/lib_shell/node.ml @@ -217,8 +217,8 @@ let check_context_consistency store = tzfail Non_recoverable_context let create ?(sandboxed = false) ?sandbox_parameters - ?(context_pruning = Storage_maintenance.Enabled) ~singleprocess ~version - ~commit_info + ?(context_pruning = Storage_maintenance.Enabled) ?history_mode + ~singleprocess ~version ~commit_info { genesis; chain_name; @@ -238,7 +238,7 @@ let create ?(sandboxed = false) ?sandbox_parameters enable_testchain; dal_config; } peer_validator_limits block_validator_limits prevalidator_limits - chain_validator_limits history_mode = + chain_validator_limits = let open Lwt_result_syntax in let start_prevalidator, start_testchain = match p2p_params with diff --git a/src/lib_shell/node.mli b/src/lib_shell/node.mli index 07310be53d69..bf21e8a56eb5 100644 --- a/src/lib_shell/node.mli +++ b/src/lib_shell/node.mli @@ -57,6 +57,7 @@ val create : ?sandboxed:bool -> ?sandbox_parameters:Data_encoding.json -> ?context_pruning:Storage_maintenance.context_pruning -> + ?history_mode:History_mode.t -> singleprocess:bool -> version:string -> commit_info:Octez_node_version.commit_info -> @@ -65,7 +66,6 @@ val create : Shell_limits.block_validator_limits -> Shell_limits.prevalidator_limits -> Shell_limits.chain_validator_limits -> - History_mode.t option -> (t, tztrace) result Lwt.t val shutdown : t -> unit Lwt.t diff --git a/src/lib_shell/test/test_node.ml b/src/lib_shell/test/test_node.ml index 19f93f969f88..ebee6cf08ce0 100644 --- a/src/lib_shell/test/test_node.ml +++ b/src/lib_shell/test/test_node.ml @@ -140,7 +140,7 @@ let node_sandbox_initialization_events sandbox_parameters config _switch () = (* Tezos_shell.Node.chain_validator_limits *) Shell_limits.default_chain_validator_limits (* Tezos_shell_services.History_mode.t option *) - None + ?history_mode:None in (* Start tests *) let evs = Mock_sink.get_events ?filter () in @@ -187,7 +187,7 @@ let node_initialization_events _sandbox_parameters config _switch () = (* Tezos_shell.Node.chain_validator_limits *) Shell_limits.default_chain_validator_limits (* Tezos_shell_services.History_mode.t option *) - None + ?history_mode:None in (* Start tests *) let evs = Mock_sink.get_events ?filter () in @@ -240,7 +240,7 @@ let node_store_known_protocol_events _sandbox_parameters config _switch () = (* Tezos_shell.Node.chain_validator_limits *) Shell_limits.default_chain_validator_limits (* Tezos_shell_services.History_mode.t option *) - None + ?history_mode:None in (* Start tests *) Mock_sink.( -- GitLab From c6d074adc696bfbf985ff3463595623a38d7100b Mon Sep 17 00:00:00 2001 From: Victor Allombert Date: Wed, 17 Jul 2024 08:15:03 +0200 Subject: [PATCH 2/3] Store: allow custom maintenance delay --- docs/doc_gen/node_helpers.ml | 1 + src/bin_node/node_run_command.ml | 1 + src/lib_node_config/shared_arg.ml | 23 +- src/lib_node_config/shared_arg.mli | 2 +- src/lib_shell/node.ml | 4 +- src/lib_shell/node.mli | 1 + src/lib_shell/test/test_node.ml | 9 +- src/lib_shell_services/storage_maintenance.ml | 17 +- .../storage_maintenance.mli | 6 +- src/lib_store/mocked/store.ml | 4 +- src/lib_store/shared/naming.ml | 7 + src/lib_store/shared/naming.mli | 3 + src/lib_store/shared/store_events.ml | 16 ++ src/lib_store/store.mli | 4 + src/lib_store/unix/store.ml | 210 +++++++++++++----- src/lib_store/unix/store.mli | 9 +- src/lib_store/unix/test/test_testchain.ml | 5 +- 17 files changed, 249 insertions(+), 73 deletions(-) diff --git a/docs/doc_gen/node_helpers.ml b/docs/doc_gen/node_helpers.ml index 9cec439e68dd..36948d79884a 100644 --- a/docs/doc_gen/node_helpers.ml +++ b/docs/doc_gen/node_helpers.ml @@ -82,6 +82,7 @@ let with_node f = Tezos_shell_services.Shell_limits.default_prevalidator_limits Tezos_shell_services.Shell_limits.default_chain_validator_limits ?history_mode:None + ?maintenance_delay:None in f node in diff --git a/src/bin_node/node_run_command.ml b/src/bin_node/node_run_command.ml index e1d056e24b92..0bc02dfceff3 100644 --- a/src/bin_node/node_run_command.ml +++ b/src/bin_node/node_run_command.ml @@ -359,6 +359,7 @@ let init_node ?sandbox ?target ~identity ~singleprocess ~internal_events config.shell.prevalidator_limits config.shell.chain_validator_limits ?history_mode:config.shell.history_mode + ?maintenance_delay:config.shell.storage_maintenance_delay let rpc_metrics = Prometheus.Summary.v_labels diff --git a/src/lib_node_config/shared_arg.ml b/src/lib_node_config/shared_arg.ml index 052989eea602..b331308067ba 100644 --- a/src/lib_node_config/shared_arg.ml +++ b/src/lib_node_config/shared_arg.ml @@ -71,7 +71,7 @@ type t = { operation_metadata_size_limit : Shell_limits.operation_metadata_size_limit option; context_pruning : Storage_maintenance.context_pruning option; - storage_maintenance_delay : Storage_maintenance.delay; + storage_maintenance_delay : Storage_maintenance.delay option; } type error += @@ -774,14 +774,23 @@ module Term = struct let storage_maintenance_delay = let open Storage_maintenance in let doc = "Configures the storage maintenance delays" in - let get_storage_maintenance_delay_arg str = - match str with - | "disabled" -> `Ok Disabled - | _ -> `Error "storage-maintenance-delay only supports \"disabled\" mode" + let delay_converter = + let parse_storage_maintenance_delay_arg str = + match str with + | "disabled" -> `Ok Disabled + | _ -> ( + match Int32.of_string_opt str with + | Some delay -> `Ok (Custom delay) + | None -> + `Error + "delayed-storage-maintenance only supports \"disabled\" or \ + \"\" mode") + in + (parse_storage_maintenance_delay_arg, pp_delay) in Arg.( value - & opt (get_storage_maintenance_delay_arg, pp_delay) Disabled + & opt (some delay_converter) None & info ~docs ~doc ["storage-maintenance-delay"]) (* Args. *) @@ -1106,7 +1115,7 @@ let patch_config ?(may_override_network = false) ?(emit = Event.emit) ?history_mode ?latency ?context_pruning - ~storage_maintenance_delay + ?storage_maintenance_delay cfg let read_and_patch_config_file ?may_override_network ?emit diff --git a/src/lib_node_config/shared_arg.mli b/src/lib_node_config/shared_arg.mli index 06ad1ea1c643..00cb748ba56d 100644 --- a/src/lib_node_config/shared_arg.mli +++ b/src/lib_node_config/shared_arg.mli @@ -87,7 +87,7 @@ type t = { Shell_limits.operation_metadata_size_limit option; (** maximum operation metadata size allowed to be stored on disk *) context_pruning : Storage_maintenance.context_pruning option; - storage_maintenance_delay : Storage_maintenance.delay; + storage_maintenance_delay : Storage_maintenance.delay option; } val process_command : unit tzresult Lwt.t -> unit Cmdliner.Term.ret diff --git a/src/lib_shell/node.ml b/src/lib_shell/node.ml index d49c2f2882c6..0ab9410e170e 100644 --- a/src/lib_shell/node.ml +++ b/src/lib_shell/node.ml @@ -218,7 +218,7 @@ let check_context_consistency store = let create ?(sandboxed = false) ?sandbox_parameters ?(context_pruning = Storage_maintenance.Enabled) ?history_mode - ~singleprocess ~version ~commit_info + ?maintenance_delay ~singleprocess ~version ~commit_info { genesis; chain_name; @@ -271,6 +271,7 @@ let create ?(sandboxed = false) ?sandbox_parameters ~allow_testchains:start_testchain ~readonly:false ~context_pruning + ?maintenance_delay genesis in let main_chain_store = Store.main_chain_store store in @@ -313,6 +314,7 @@ let create ?(sandboxed = false) ?sandbox_parameters ~allow_testchains:start_testchain ~readonly:false ~context_pruning + ?maintenance_delay genesis in return (validator_process, store) diff --git a/src/lib_shell/node.mli b/src/lib_shell/node.mli index bf21e8a56eb5..121063baf990 100644 --- a/src/lib_shell/node.mli +++ b/src/lib_shell/node.mli @@ -58,6 +58,7 @@ val create : ?sandbox_parameters:Data_encoding.json -> ?context_pruning:Storage_maintenance.context_pruning -> ?history_mode:History_mode.t -> + ?maintenance_delay:Storage_maintenance.delay -> singleprocess:bool -> version:string -> commit_info:Octez_node_version.commit_info -> diff --git a/src/lib_shell/test/test_node.ml b/src/lib_shell/test/test_node.ml index ebee6cf08ce0..a65add2f7b59 100644 --- a/src/lib_shell/test/test_node.ml +++ b/src/lib_shell/test/test_node.ml @@ -140,7 +140,8 @@ let node_sandbox_initialization_events sandbox_parameters config _switch () = (* Tezos_shell.Node.chain_validator_limits *) Shell_limits.default_chain_validator_limits (* Tezos_shell_services.History_mode.t option *) - ?history_mode:None + ?history_mode:None (* Tezos_shell_services.Storage_maintenance option *) + ?maintenance_delay:None in (* Start tests *) let evs = Mock_sink.get_events ?filter () in @@ -187,7 +188,8 @@ let node_initialization_events _sandbox_parameters config _switch () = (* Tezos_shell.Node.chain_validator_limits *) Shell_limits.default_chain_validator_limits (* Tezos_shell_services.History_mode.t option *) - ?history_mode:None + ?history_mode:None (* Tezos_shell_services.Storage_maintenance option *) + ?maintenance_delay:None in (* Start tests *) let evs = Mock_sink.get_events ?filter () in @@ -240,7 +242,8 @@ let node_store_known_protocol_events _sandbox_parameters config _switch () = (* Tezos_shell.Node.chain_validator_limits *) Shell_limits.default_chain_validator_limits (* Tezos_shell_services.History_mode.t option *) - ?history_mode:None + ?history_mode:None (* Tezos_shell_services.Storage_maintenance option *) + ?maintenance_delay:None in (* Start tests *) Mock_sink.( diff --git a/src/lib_shell_services/storage_maintenance.ml b/src/lib_shell_services/storage_maintenance.ml index c7c0d1b683c3..009cf21d73e2 100644 --- a/src/lib_shell_services/storage_maintenance.ml +++ b/src/lib_shell_services/storage_maintenance.ml @@ -39,7 +39,7 @@ let pp_context_pruning fmt = function | Disabled -> Format.fprintf fmt "disabled" | Enabled -> Format.fprintf fmt "enabled" -type delay = Disabled +type delay = Disabled | Custom of Int32.t let delay_encoding = let open Data_encoding in @@ -57,8 +57,19 @@ let delay_encoding = delay, as soon as a new cycle starts." (Tag 0) (constant "disabled") - (function Disabled -> Some ()) + (function Disabled -> Some () | _ -> None) (fun () -> Disabled); + case + ~title:"custom" + ~description: + "When \"custom \" is set, storage maintenance is triggered \ + \"N\" blocks after the start of a new cycle." + (Tag 1) + (obj1 (req "custom" int32)) + (function Custom delay -> Some delay | _ -> None) + (fun delay -> Custom delay); ]) -let pp_delay fmt = function Disabled -> Format.fprintf fmt "disabled" +let pp_delay fmt = function + | Disabled -> Format.fprintf fmt "disabled" + | Custom delay -> Format.fprintf fmt "custom %ld" delay diff --git a/src/lib_shell_services/storage_maintenance.mli b/src/lib_shell_services/storage_maintenance.mli index 1a96fb55dfae..d9856201a051 100644 --- a/src/lib_shell_services/storage_maintenance.mli +++ b/src/lib_shell_services/storage_maintenance.mli @@ -24,8 +24,10 @@ val pp_context_pruning : Format.formatter -> context_pruning -> unit should be delayed or not. Setting it to [Disabled] will trigger the storage maintenance as soon as possible, that is, at the very beginning of a new cycle - dawn. *) -type delay = Disabled + dawn. + [Custom n] will trigger the storage maintenance n blocks + subsequently to a new cycle dawn. *) +type delay = Disabled | Custom of Int32.t val delay_encoding : delay Data_encoding.t diff --git a/src/lib_store/mocked/store.ml b/src/lib_store/mocked/store.ml index 46a5a040392a..25637ea0e24e 100644 --- a/src/lib_store/mocked/store.ml +++ b/src/lib_store/mocked/store.ml @@ -1824,8 +1824,8 @@ let store_dirs = ref [] let context_dirs = ref [] let init ?patch_context ?commit_genesis ?history_mode ?(readonly = false) - ?block_cache_limit ?context_pruning:_ ~store_dir ~context_dir - ~allow_testchains genesis = + ?block_cache_limit ?context_pruning:_ ?maintenance_delay:_ ~store_dir + ~context_dir ~allow_testchains genesis = let open Lwt_result_syntax in if List.mem ~equal:String.equal context_dir !context_dirs then Format.kasprintf diff --git a/src/lib_store/shared/naming.ml b/src/lib_store/shared/naming.ml index d94ce64af0aa..1619672296e6 100644 --- a/src/lib_store/shared/naming.ml +++ b/src/lib_store/shared/naming.ml @@ -165,6 +165,13 @@ let legacy_block_store_status_file dir = Block_store_status.Legacy.encoding Block_store_status.Legacy.equal +let scheduled_maintenance dir = + make_encoded_file + dir + ~filename:"scheduled_maintenance" + Data_encoding.(option int32) + (Option.equal Int32.equal) + let cemented_blocks_dir dir = mk_dir dir "cemented" let cemented_blocks_level_index_dir dir = mk_dir dir "level_index" diff --git a/src/lib_store/shared/naming.mli b/src/lib_store/shared/naming.mli index 4a7a2f64a560..b3af94856ead 100644 --- a/src/lib_store/shared/naming.mli +++ b/src/lib_store/shared/naming.mli @@ -140,6 +140,9 @@ val legacy_block_store_status_file : [`Chain_dir] directory -> ([`Status], Block_store_status.Legacy.t) encoded_file +val scheduled_maintenance : + [`Chain_dir] directory -> int32 option Stored_data.file + val cemented_blocks_dir : [< `Chain_dir | `Snapshot_dir | `Snapshot_tmp_dir | `Tar_archive] directory -> [`Cemented_blocks_dir] directory diff --git a/src/lib_store/shared/store_events.ml b/src/lib_store/shared/store_events.ml index d0dbd8343ec1..c84ff9926ca9 100644 --- a/src/lib_store/shared/store_events.ml +++ b/src/lib_store/shared/store_events.ml @@ -277,6 +277,22 @@ let end_merging_stores = ~pp1:Time.System.Span.pp_hum ("time", Time.System.Span.encoding) +let delay_store_merging = + declare_1 + ~section + ~level:Info + ~name:"start_delayed_maintenance" + ~msg:"delaying storage maintenance (target {level})" + ("level", Data_encoding.int32) + +let delayed_store_merging_countdown = + declare_1 + ~section + ~level:Debug + ~name:"delayed_store_merging_countdown" + ~msg:"merging storage after {count} blocks scheduled delay" + ("count", Data_encoding.int32) + let start_context_gc = declare_1 ~section diff --git a/src/lib_store/store.mli b/src/lib_store/store.mli index 04557fcedb8f..3450bb1bff04 100644 --- a/src/lib_store/store.mli +++ b/src/lib_store/store.mli @@ -210,6 +210,9 @@ type chain_store pruning is expected to be run (if set to Enabled) or not (if set to Disabled) during a storage maintenance. + @param maintenace_delay allows to introduce a delay prior to the + trigger of the storage maintenance + @param readonly a flag that, if set to true, prevent writing throughout the store {b and} context. Default: false @@ -223,6 +226,7 @@ val init : ?readonly:bool -> ?block_cache_limit:int -> ?context_pruning:Storage_maintenance.context_pruning -> + ?maintenance_delay:Storage_maintenance.delay -> store_dir:string -> context_dir:string -> allow_testchains:bool -> diff --git a/src/lib_store/unix/store.ml b/src/lib_store/unix/store.ml index 559c944e1dc4..f592e6ac98b7 100644 --- a/src/lib_store/unix/store.ml +++ b/src/lib_store/unix/store.ml @@ -76,6 +76,11 @@ type store = { global_block_watcher : (chain_store * block) Lwt_watcher.input; } +and storage_maintenance = { + maintenance_delay : Storage_maintenance.delay; + scheduled_maintenance : Int32.t option Stored_data.t; +} + and chain_store = { global_store : store; chain_id : Chain_id.t; @@ -92,11 +97,12 @@ and chain_store = { Protocol_hash.Table.t; lockfile : Lwt_unix.file_descr; context_pruning : Storage_maintenance.context_pruning; + storage_maintenance : storage_maintenance; } and chain_state = { (* Following fields are not safe to update concurrently and must be - manipulated carefuly: *) + manipulated carefully: *) current_head_data : block_descriptor Stored_data.t; mutable last_finalized_block_level : Int32.t option; cementing_highwatermark_data : int32 option Stored_data.t; @@ -1581,7 +1587,10 @@ module Chain = struct are often short, this will lead to the optimal behaviour. As the split is necessary in the scope of the context pruning - only, it may be discarded depending on [context_pruning]. *) + only, it may be discarded depending on + [context_pruning]. However, it is mandatory that the split is not + delayed by the [maintenance_delay] argument as the split must + occur at the cycle start. *) let may_split_context ~context_pruning chain_store new_head_lpbl previous_head = let open Lwt_result_syntax in @@ -1643,7 +1652,8 @@ module Chain = struct trace Bad_head_invariant (let* pred_block = Block.read_block chain_store predecessor in - (* check that prededecessor's block metadata are available *) + (* check that predecessor's block metadata is + available *) let* _pred_head_metadata = Block.get_block_metadata chain_store pred_block in @@ -1701,6 +1711,9 @@ module Chain = struct ~checkpoint ~target in + (* [should_merge] is a placeholder acknowledging that a + storage maintenance can be triggered, thanks to several + fulfilled parameters. *) let should_merge = (* Make sure that the previous merge is completed before starting a new merge. If the lock on the chain_state is @@ -1718,48 +1731,113 @@ module Chain = struct in let* new_cementing_highwatermark = if should_merge then - let*! b = try_lock_for_write chain_store.lockfile in - match b with - | false -> - (* Delay the merge until the lock is available *) - return cementing_highwatermark - | true -> - (* Lock on lockfile is now taken *) - let finalizer new_highest_cemented_level = + (* [trigger_merge] is a placeholder that depends on + [should_merge] and that controls the delayed + maintenance. Thus, even if we [should_merge], + [trigger_merge] may interfere with the actual merge to + delay it. *) + let* trigger_merge = + match chain_store.storage_maintenance.maintenance_delay with + | Disabled -> + (* The storage maintenance delay is off -- merging right now. *) let* () = - merge_finalizer chain_store new_highest_cemented_level + (* Reset scheduled maintenance flag. It could be + necessary if the node was stopped during a + delay and restarted with the delay as + disabled. *) + Stored_data.write + chain_store.storage_maintenance.scheduled_maintenance + None in - let*! () = may_unlock chain_store.lockfile in - return_unit - in - let on_error errs = - (* Release the lockfile *) - let*! () = may_unlock chain_store.lockfile in - Lwt.return (Error errs) - in - (* Notes: - - The lock will be released when the merge - terminates. i.e. in [finalizer] or in - [on_error]. - - The heavy-work of this function is asynchronously - done so this call is expected to return quickly. *) - let* () = - Block_store.merge_stores - chain_store.block_store - ~on_error - ~finalizer - ~history_mode:(history_mode chain_store) - ~new_head - ~new_head_metadata - ~cementing_highwatermark: - (WithExceptions.Option.get - ~loc:__LOC__ - cementing_highwatermark) - ~context_pruning:chain_store.context_pruning - in - (* The new memory highwatermark is new_head_lpbl, the disk - value will be updated after the merge completion. *) - return_some new_head_lpbl + return_true + | Custom delay -> ( + let*! scheduled_maintenance = + Stored_data.get + chain_store.storage_maintenance.scheduled_maintenance + in + match scheduled_maintenance with + | Some target -> + (* A delayed merge is pending. *) + let level_to_merge_reached = + target <= Block.level new_head + in + let* () = + if level_to_merge_reached then + Stored_data.write + chain_store.storage_maintenance + .scheduled_maintenance + None + else + let*! () = + Store_events.( + emit + delayed_store_merging_countdown + Int32.(sub target (Block.level new_head))) + in + return_unit + in + return level_to_merge_reached + | None -> + (* A merge is ready to be executed, setting the + target for the delayed execution. *) + let new_target = Int32.add (Block.level new_head) delay in + let* () = + Stored_data.write + chain_store.storage_maintenance.scheduled_maintenance + (Some new_target) + in + let*! () = + Store_events.(emit delay_store_merging new_target) + in + return_false) + in + (* We effectively trigger the merge only if the delayed + maintenance is disabled or if the targeted delay is + reached. *) + if trigger_merge then + let*! b = try_lock_for_write chain_store.lockfile in + match b with + | false -> + (* Delay the merge until the lock is available *) + return cementing_highwatermark + | true -> + (* Lock on lockfile is now taken *) + let finalizer new_highest_cemented_level = + let* () = + merge_finalizer chain_store new_highest_cemented_level + in + let*! () = may_unlock chain_store.lockfile in + return_unit + in + let on_error errs = + (* Release the lockfile *) + let*! () = may_unlock chain_store.lockfile in + Lwt.return (Error errs) + in + (* Notes: + - The lock will be released when the merge + terminates. i.e. in [finalizer] or in + [on_error]. + - The heavy-work of this function is asynchronously + done so this call is expected to return quickly. *) + let* () = + Block_store.merge_stores + chain_store.block_store + ~on_error + ~finalizer + ~history_mode:(history_mode chain_store) + ~new_head + ~new_head_metadata + ~cementing_highwatermark: + (WithExceptions.Option.get + ~loc:__LOC__ + cementing_highwatermark) + ~context_pruning:chain_store.context_pruning + in + (* The new memory highwatermark is new_head_lpbl, the disk + value will be updated after the merge completion. *) + return_some new_head_lpbl + else return cementing_highwatermark else return cementing_highwatermark in let*! new_checkpoint = @@ -2104,8 +2182,8 @@ module Chain = struct } let create_chain_store ?block_cache_limit global_store chain_dir ?target - ~chain_id ?(expiration = None) ~context_pruning ?genesis_block ~genesis - ~genesis_context history_mode = + ~chain_id ?(expiration = None) ~context_pruning ~maintenance_delay + ?genesis_block ~genesis ~genesis_context history_mode = let open Lwt_result_syntax in (* Chain directory *) let genesis_block = @@ -2138,6 +2216,11 @@ module Chain = struct let validated_block_watcher = Lwt_watcher.create_input () in let block_rpc_directories = Protocol_hash.Table.create 7 in let* lockfile = create_lockfile chain_dir in + let* scheduled_maintenance = + Stored_data.init + (Naming.scheduled_maintenance chain_dir) + ~initial_data:None + in let chain_store : chain_store = { global_store; @@ -2152,12 +2235,13 @@ module Chain = struct block_rpc_directories; lockfile; context_pruning; + storage_maintenance = {maintenance_delay; scheduled_maintenance}; } in return chain_store let load_chain_store ?block_cache_limit global_store chain_dir ~chain_id - ~readonly ~context_pruning = + ~readonly ~context_pruning ~maintenance_delay = let open Lwt_result_syntax in let* chain_config_data = Stored_data.load (Naming.chain_config_file chain_dir) @@ -2176,6 +2260,11 @@ module Chain = struct let validated_block_watcher = Lwt_watcher.create_input () in let block_rpc_directories = Protocol_hash.Table.create 7 in let* lockfile = create_lockfile chain_dir in + let* scheduled_maintenance = + Stored_data.init + (Naming.scheduled_maintenance chain_dir) + ~initial_data:None + in let chain_store = { global_store; @@ -2191,6 +2280,7 @@ module Chain = struct block_rpc_directories; lockfile; context_pruning; + storage_maintenance = {maintenance_delay; scheduled_maintenance}; } in (* Also initalize the live blocks *) @@ -2243,7 +2333,8 @@ module Chain = struct let testchain_store {testchain_store; _} = testchain_store - let locked_load_testchain chain_store chain_state ~chain_id = + let locked_load_testchain chain_store chain_state ~chain_id ~maintenance_delay + = let open Lwt_result_syntax in let {forked_chains_data; active_testchain; _} = chain_state in match active_testchain with @@ -2265,6 +2356,7 @@ module Chain = struct ~chain_id ~readonly:false ~context_pruning:Enabled + ~maintenance_delay in let testchain = {forked_block; testchain_store} in return_some testchain) @@ -2304,6 +2396,7 @@ module Chain = struct chain_store chain_state ~chain_id:testchain_id + ~maintenance_delay:Storage_maintenance.Disabled in match o with | None -> tzfail (Cannot_load_testchain testchain_dir_path) @@ -2332,6 +2425,7 @@ module Chain = struct ~chain_id:testchain_id ~expiration:(Some expiration) ~context_pruning:Enabled + ~maintenance_delay:Storage_maintenance.Disabled ~genesis_block ~genesis ~genesis_context @@ -2362,9 +2456,13 @@ module Chain = struct testchain )) (* Look for chain_store's testchains - does not look recursively *) - let load_testchain chain_store ~chain_id = + let load_testchain chain_store ~chain_id ~maintenance_delay = Shared.locked_use chain_store.chain_state (fun chain_state -> - locked_load_testchain chain_store chain_state ~chain_id) + locked_load_testchain + chain_store + chain_state + ~chain_id + ~maintenance_delay) (* TODO (later) Also garbage-collect testchains store/context. *) let shutdown_testchain chain_store = @@ -2616,7 +2714,7 @@ end let create_store ?block_cache_limit ~context_index ~chain_id ~genesis ~genesis_context ?(history_mode = History_mode.default) ~allow_testchains - ~context_pruning store_dir = + ~context_pruning ~maintenance_delay store_dir = let open Lwt_result_syntax in let store_dir_path = Naming.dir_path store_dir in let*! () = Lwt_utils_unix.create_dir store_dir_path in @@ -2643,6 +2741,7 @@ let create_store ?block_cache_limit ~context_index ~chain_id ~genesis ~chain_id ~expiration:None ~context_pruning + ~maintenance_delay ~genesis ~genesis_context history_mode @@ -2651,7 +2750,8 @@ let create_store ?block_cache_limit ~context_index ~chain_id ~genesis return global_store let load_store ?history_mode ?block_cache_limit store_dir ~context_index - ~genesis ~chain_id ~allow_testchains ~readonly ~context_pruning () = + ~genesis ~chain_id ~allow_testchains ~readonly ~context_pruning + ~maintenance_delay () = let open Lwt_result_syntax in let chain_dir = Naming.chain_dir store_dir chain_id in let* () = @@ -2705,6 +2805,7 @@ let load_store ?history_mode ?block_cache_limit store_dir ~context_index ~chain_id ~readonly ~context_pruning + ~maintenance_delay in let stored_genesis = Chain.genesis main_chain_store in let* () = @@ -2744,7 +2845,8 @@ let check_history_mode_consistency chain_dir history_mode = let init ?patch_context ?commit_genesis ?history_mode ?(readonly = false) ?block_cache_limit ?(context_pruning = Storage_maintenance.Enabled) - ~store_dir ~context_dir ~allow_testchains genesis = + ?(maintenance_delay = Storage_maintenance.Disabled) ~store_dir ~context_dir + ~allow_testchains genesis = let open Lwt_result_syntax in let*! () = Store_events.(emit init_store) (readonly, context_pruning) in let patch_context = @@ -2796,6 +2898,7 @@ let init ?patch_context ?commit_genesis ?history_mode ?(readonly = false) ~allow_testchains ~readonly ~context_pruning + ~maintenance_delay () else (* Fresh store *) @@ -2810,6 +2913,7 @@ let init ?patch_context ?commit_genesis ?history_mode ?(readonly = false) ~genesis_context ?history_mode ~allow_testchains + ~maintenance_delay in let main_chain_store = main_chain_store store in (* Emit a warning if context GC is not allowed. *) @@ -2862,6 +2966,7 @@ let may_switch_history_mode ~store_dir ~context_dir genesis ~new_history_mode = ~allow_testchains:true ~readonly:false ~context_pruning:Enabled + ~maintenance_delay:Storage_maintenance.Disabled () in let chain_store = main_chain_store store in @@ -3263,6 +3368,7 @@ module Unsafe = struct ~allow_testchains:false ~readonly:true ~context_pruning:Disabled + ~maintenance_delay:Storage_maintenance.Disabled () in let chain_store = main_chain_store store in diff --git a/src/lib_store/unix/store.mli b/src/lib_store/unix/store.mli index 10e9d04c7a04..3f8c89347064 100644 --- a/src/lib_store/unix/store.mli +++ b/src/lib_store/unix/store.mli @@ -209,6 +209,9 @@ type chain_store pruning is expected to be run (if set to Enabled) or not (if set to Disabled) during a storage maintenance. + @param maintenace_delay allows to introduce a delay prior to the + trigger of the storage maintenance + @param readonly a flag that, if set to true, prevent writing throughout the store {b and} context. Default: false @@ -222,6 +225,7 @@ val init : ?readonly:bool -> ?block_cache_limit:int -> ?context_pruning:Storage_maintenance.context_pruning -> + ?maintenance_delay:Storage_maintenance.delay -> store_dir:string -> context_dir:string -> allow_testchains:bool -> @@ -1041,7 +1045,10 @@ module Unsafe : sig val get_block_store : chain_store -> Block_store.block_store val load_testchain : - chain_store -> chain_id:Chain_id.t -> Chain.testchain option tzresult Lwt.t + chain_store -> + chain_id:Chain_id.t -> + maintenance_delay:Storage_maintenance.delay -> + Chain.testchain option tzresult Lwt.t (** [set_head chain_store block] sets the block as the current head of [chain_store] without checks. *) diff --git a/src/lib_store/unix/test/test_testchain.ml b/src/lib_store/unix/test/test_testchain.ml index aeaab34f3721..3bb58efb5d16 100644 --- a/src/lib_store/unix/test/test_testchain.ml +++ b/src/lib_store/unix/test/test_testchain.ml @@ -132,7 +132,10 @@ let test_shutdown store = | Some _ -> Assert.fail_msg "test chain still initialized" | None -> ( let* o = - Store.Unsafe.load_testchain chain_store ~chain_id:testchain_id + Store.Unsafe.load_testchain + chain_store + ~chain_id:testchain_id + ~maintenance_delay:Storage_maintenance.Disabled in match o with | None -> Assert.fail_msg "failed to load the existing test chain" -- GitLab From b86f95e97fd5cd4a37180d4b9046ddf94d96d42f Mon Sep 17 00:00:00 2001 From: Victor Allombert Date: Mon, 29 Jul 2024 17:18:47 +0200 Subject: [PATCH 3/3] Tezt: test custom storage maintenance delay --- .../storage_maintenance.mli | 4 +- tezt/tests/storage_maintenance.ml | 146 ++++++++++++++++-- 2 files changed, 134 insertions(+), 16 deletions(-) diff --git a/src/lib_shell_services/storage_maintenance.mli b/src/lib_shell_services/storage_maintenance.mli index d9856201a051..8700ad0c7113 100644 --- a/src/lib_shell_services/storage_maintenance.mli +++ b/src/lib_shell_services/storage_maintenance.mli @@ -25,8 +25,8 @@ val pp_context_pruning : Format.formatter -> context_pruning -> unit Setting it to [Disabled] will trigger the storage maintenance as soon as possible, that is, at the very beginning of a new cycle dawn. - [Custom n] will trigger the storage maintenance n blocks - subsequently to a new cycle dawn. *) + [Custom n] will trigger the storage maintenance n blocks after a + new cycle dawn. *) type delay = Disabled | Custom of Int32.t val delay_encoding : delay Data_encoding.t diff --git a/tezt/tests/storage_maintenance.ml b/tezt/tests/storage_maintenance.ml index baf3888a49ef..345a21b5e662 100644 --- a/tezt/tests/storage_maintenance.ml +++ b/tezt/tests/storage_maintenance.ml @@ -101,26 +101,144 @@ let test_disabled_maintenance_delay = let* () = wait_merge in unit -(* This is a temporary test to ensure that only the disabled mode is - implemented. *) -let test_only_disabled_is_implemented = +(* This test aims to check the behavior of the custom delayed storage + maintenance. To do so, it will start 2 nodes, 1 with the delay set + to 2 and 1 without any delay and check the following behavior: + + regular_node delayed_node + (A) LEVEL 1 : -------------------------------------- + /\ | | + blocks_per_cycle | | + \/ | | + (B) LEVEL 9 : (storage maint.) | + /\ | | + custom_delay | | + \/ | | + LEVEL 11 : | (delayed storage maint.) + /\ | | + (blocks_per_cycle | | + - | | + custom_delay) | | + \/ | | + (C) LEVEL 17 : (storage maint.) (restart) + /\ | | + custom_delay | | + \/ | | + LEVEL 19 : | (delayed storage maint.) + /\ | | + (blocks_per_cycle | | + - | | + custom_delay) | | + LEVEL 25 : (storage maint.) (restart + disable delay) + LEVEL 26 : | (storage maint.) +*) +let test_custom_maintenance_delay = Protocol.register_test ~__FILE__ - ~title:(Format.asprintf "storage maintenance allows disabled only") - ~tags:[team; "storage"; "maintenance"; "delay"; "enable"] - ~uses_client:false - ~uses_admin_client:false - @@ fun _protocol -> - let node = Node.create Node.[Synchronisation_threshold 0] in + ~title:(Format.asprintf "storage maintenance custom delay") + ~tags:[team; "storage"; "maintenance"; "delay"; "custom"] + @@ fun protocol -> + let custom_delay = 2 in + let* delayed_node = + Node.init + ~name:"delayed_node" + Node. + [ + Synchronisation_threshold 0; + Storage_maintenance_delay (string_of_int custom_delay); + ] + in + let* regular_node = + Node.init ~name:"regular_node" Node.[Synchronisation_threshold 0] + in + let* client = Client.init ~endpoint:(Node delayed_node) () in + let* () = Client.Admin.connect_address ~peer:regular_node client in + let* () = + Client.activate_protocol_and_wait ~protocol ~node:delayed_node client + in + let* (_ : int) = Node.wait_for_level regular_node 1 in + let blocks_per_cycle = 8 in + (* Step A: We bake enough blocks to trigger the first storage + maintenance. We expect the regular node to merge immediately and + the delayed node to merge after the given delay. *) + let merge_level_A = 1 in + let wait_delayed_storage_maintenance_A = + wait_for_complete_storage_maintenance delayed_node merge_level_A + in + let wait_regular_storage_maintenance_A = + wait_for_complete_storage_maintenance regular_node merge_level_A + in + let* () = + let nb = blocks_per_cycle in + Log.info "Baking %d blocks to trigger the regular storage maintenance" nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb + in + Log.info "Waiting for the regular storage maintenance" ; + let* () = wait_regular_storage_maintenance_A in + let* () = + let nb = custom_delay in + Log.info "Baking %d blocks to trigger the delayed storage maintenance" nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb + in + Log.info "Waiting for the delayed storage maintenance" ; + let* () = wait_delayed_storage_maintenance_A in + (* Step B: We bake enough blocks to trigger the delayed maintenance + but restart the node to check that the delay is maintained. *) + let merge_level_B = merge_level_A + blocks_per_cycle in + let wait_regular_storage_maintenance_B = + wait_for_complete_storage_maintenance regular_node merge_level_B + in + let* () = + let nb = blocks_per_cycle - custom_delay in + Log.info "Baking %d blocks to trigger the regular storage maintenance" nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb + in + let* () = wait_regular_storage_maintenance_B in + Log.info "Restarting delayed storage maintenance node" ; + let* () = Node.terminate delayed_node in + let* () = Node.run delayed_node [] in + let* () = Node.wait_for_ready delayed_node in + let* () = + let nb = 1 in + Log.info + "Baking %d blocks should not trigger delayed storage maintenance" + nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb + in + let wait_delayed_storage_maintenance_B = + wait_for_complete_storage_maintenance delayed_node merge_level_B + in + let* () = + let nb = 1 in + Log.info "Baking %d blocks should trigger delayed storage maintenance" nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb + in + let* () = wait_delayed_storage_maintenance_B in + let merge_level_C = merge_level_B + blocks_per_cycle in + let* () = + let nb = blocks_per_cycle - custom_delay in + Log.info "Baking %d blocks to trigger the delayed storage maintenance" nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb + in + Log.info "Restarting node and disable storage maintenance delay" ; + let* () = Node.terminate delayed_node in + let* () = Node.run delayed_node [Storage_maintenance_delay "disabled"] in + let* () = Node.wait_for_ready delayed_node in + let wait_delayed_storage_maintenance_C = + wait_for_complete_storage_maintenance delayed_node merge_level_C + in let* () = - Node.spawn_config_update node [Storage_maintenance_delay "enabled"] - |> Process.check_error - ~msg:(rex "only supports \"disabled\" mode") - ~exit_code:124 + let nb = 1 in + Log.info + "Baking %d blocks should reset delayed storage maintenance and trigger \ + the merge" + nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb in + let* () = wait_delayed_storage_maintenance_C in unit let register ~protocols = test_context_pruning_call protocols ; test_disabled_maintenance_delay protocols ; - test_only_disabled_is_implemented protocols + test_custom_maintenance_delay protocols -- GitLab