diff --git a/docs/doc_gen/node_helpers.ml b/docs/doc_gen/node_helpers.ml index 0ec233856f1f7c8ff7fcc8121b6cbb00c1ab769f..36948d79884aed12554eb218c1b37a27708cfaa4 100644 --- a/docs/doc_gen/node_helpers.ml +++ b/docs/doc_gen/node_helpers.ml @@ -81,7 +81,8 @@ let with_node f = Tezos_shell_services.Shell_limits.default_block_validator_limits Tezos_shell_services.Shell_limits.default_prevalidator_limits Tezos_shell_services.Shell_limits.default_chain_validator_limits - None + ?history_mode:None + ?maintenance_delay:None in f node in diff --git a/src/bin_node/node_run_command.ml b/src/bin_node/node_run_command.ml index e7c4ff8bbb4a7eef5ea309aed0ab4066a9d999cc..b990ca8cc7865b67c55d177357a2562ded69c2e4 100644 --- a/src/bin_node/node_run_command.ml +++ b/src/bin_node/node_run_command.ml @@ -378,7 +378,8 @@ let init_node ?sandbox ?target ~identity ~singleprocess ~internal_events config.shell.block_validator_limits config.shell.prevalidator_limits config.shell.chain_validator_limits - config.shell.history_mode + ?history_mode:config.shell.history_mode + ?maintenance_delay:config.shell.storage_maintenance_delay let rpc_metrics = Prometheus.Summary.v_labels diff --git a/src/lib_node_config/shared_arg.ml b/src/lib_node_config/shared_arg.ml index 4a6f5130e136344b7298d52be44fb2ba688cbf1a..8d081f83266eddb603bab1357d9d6f65f6b77df8 100644 --- a/src/lib_node_config/shared_arg.ml +++ b/src/lib_node_config/shared_arg.ml @@ -73,7 +73,7 @@ type t = { Shell_limits.operation_metadata_size_limit option; enable_http_cache_headers : bool option; context_pruning : Storage_maintenance.context_pruning option; - storage_maintenance_delay : Storage_maintenance.delay; + storage_maintenance_delay : Storage_maintenance.delay option; } type error += @@ -781,14 +781,23 @@ module Term = struct let storage_maintenance_delay = let open Storage_maintenance in let doc = "Configures the storage maintenance delays" in - let get_storage_maintenance_delay_arg str = - match str with - | "disabled" -> `Ok Disabled - | _ -> `Error "storage-maintenance-delay only supports \"disabled\" mode" + let delay_converter = + let parse_storage_maintenance_delay_arg str = + match str with + | "disabled" -> `Ok Disabled + | _ -> ( + match Int32.of_string_opt str with + | Some delay -> `Ok (Custom delay) + | None -> + `Error + "delayed-storage-maintenance only supports \"disabled\" or \ + \"\" mode") + in + (parse_storage_maintenance_delay_arg, pp_delay) in Arg.( value - & opt (get_storage_maintenance_delay_arg, pp_delay) Disabled + & opt (some delay_converter) None & info ~docs ~doc ["storage-maintenance-delay"]) (* Args. *) @@ -1115,7 +1124,7 @@ let patch_config ?(may_override_network = false) ?(emit = Event.emit) ?latency ?enable_http_cache_headers ?context_pruning - ~storage_maintenance_delay + ?storage_maintenance_delay cfg let read_and_patch_config_file ?may_override_network ?emit diff --git a/src/lib_node_config/shared_arg.mli b/src/lib_node_config/shared_arg.mli index d58c8a141fcb5c0855cbc26dd8d0d712eebb8987..bf933ce7060a7a0134f0e4a426f035c37458cd37 100644 --- a/src/lib_node_config/shared_arg.mli +++ b/src/lib_node_config/shared_arg.mli @@ -91,7 +91,7 @@ type t = { (** Adds Cache-control header directives to RPC responses for queries that are relative to the head block. *) context_pruning : Storage_maintenance.context_pruning option; - storage_maintenance_delay : Storage_maintenance.delay; + storage_maintenance_delay : Storage_maintenance.delay option; } val process_command : unit tzresult Lwt.t -> unit Cmdliner.Term.ret diff --git a/src/lib_shell/node.ml b/src/lib_shell/node.ml index a322e353a8da088f31c9e8a7be03358fda27917c..21ae4e0f558348da2f3fb1b5918c7adf3c0877d0 100644 --- a/src/lib_shell/node.ml +++ b/src/lib_shell/node.ml @@ -200,8 +200,8 @@ let check_context_consistency store = tzfail Non_recoverable_context let create ?(sandboxed = false) ?sandbox_parameters - ?(context_pruning = Storage_maintenance.Enabled) ~singleprocess ~version - ~commit_info + ?(context_pruning = Storage_maintenance.Enabled) ?history_mode + ?maintenance_delay ~singleprocess ~version ~commit_info { genesis; chain_name; @@ -221,7 +221,7 @@ let create ?(sandboxed = false) ?sandbox_parameters enable_testchain; dal_config; } peer_validator_limits block_validator_limits prevalidator_limits - chain_validator_limits history_mode = + chain_validator_limits = let open Lwt_result_syntax in let start_prevalidator, start_testchain = match p2p_params with @@ -254,6 +254,7 @@ let create ?(sandboxed = false) ?sandbox_parameters ~allow_testchains:start_testchain ~readonly:false ~context_pruning + ?maintenance_delay genesis in let main_chain_store = Store.main_chain_store store in @@ -295,6 +296,7 @@ let create ?(sandboxed = false) ?sandbox_parameters ~allow_testchains:start_testchain ~readonly:false ~context_pruning + ?maintenance_delay genesis in return (validator_process, store) diff --git a/src/lib_shell/node.mli b/src/lib_shell/node.mli index ead6c56481698fece98b13d85945dbf7bd4d922c..9ff6e8e77c8d264424a73d64099c6c85007175d8 100644 --- a/src/lib_shell/node.mli +++ b/src/lib_shell/node.mli @@ -40,6 +40,8 @@ val create : ?sandboxed:bool -> ?sandbox_parameters:Data_encoding.json -> ?context_pruning:Storage_maintenance.context_pruning -> + ?history_mode:History_mode.t -> + ?maintenance_delay:Storage_maintenance.delay -> singleprocess:bool -> version:string -> commit_info:Octez_node_version.commit_info -> @@ -48,7 +50,6 @@ val create : Shell_limits.block_validator_limits -> Shell_limits.prevalidator_limits -> Shell_limits.chain_validator_limits -> - History_mode.t option -> (t, tztrace) result Lwt.t val shutdown : t -> unit Lwt.t diff --git a/src/lib_shell/test/test_node.ml b/src/lib_shell/test/test_node.ml index 19f93f969f88c31f560dda6d3b212099aa0f22df..a65add2f7b5979e45c9406c66bb5fb877ec136c9 100644 --- a/src/lib_shell/test/test_node.ml +++ b/src/lib_shell/test/test_node.ml @@ -140,7 +140,8 @@ let node_sandbox_initialization_events sandbox_parameters config _switch () = (* Tezos_shell.Node.chain_validator_limits *) Shell_limits.default_chain_validator_limits (* Tezos_shell_services.History_mode.t option *) - None + ?history_mode:None (* Tezos_shell_services.Storage_maintenance option *) + ?maintenance_delay:None in (* Start tests *) let evs = Mock_sink.get_events ?filter () in @@ -187,7 +188,8 @@ let node_initialization_events _sandbox_parameters config _switch () = (* Tezos_shell.Node.chain_validator_limits *) Shell_limits.default_chain_validator_limits (* Tezos_shell_services.History_mode.t option *) - None + ?history_mode:None (* Tezos_shell_services.Storage_maintenance option *) + ?maintenance_delay:None in (* Start tests *) let evs = Mock_sink.get_events ?filter () in @@ -240,7 +242,8 @@ let node_store_known_protocol_events _sandbox_parameters config _switch () = (* Tezos_shell.Node.chain_validator_limits *) Shell_limits.default_chain_validator_limits (* Tezos_shell_services.History_mode.t option *) - None + ?history_mode:None (* Tezos_shell_services.Storage_maintenance option *) + ?maintenance_delay:None in (* Start tests *) Mock_sink.( diff --git a/src/lib_shell_services/storage_maintenance.ml b/src/lib_shell_services/storage_maintenance.ml index c7c0d1b683c34d79334f2f5367bd51f311795eb0..009cf21d73e2909e40d1d15281d21515fda9e4ac 100644 --- a/src/lib_shell_services/storage_maintenance.ml +++ b/src/lib_shell_services/storage_maintenance.ml @@ -39,7 +39,7 @@ let pp_context_pruning fmt = function | Disabled -> Format.fprintf fmt "disabled" | Enabled -> Format.fprintf fmt "enabled" -type delay = Disabled +type delay = Disabled | Custom of Int32.t let delay_encoding = let open Data_encoding in @@ -57,8 +57,19 @@ let delay_encoding = delay, as soon as a new cycle starts." (Tag 0) (constant "disabled") - (function Disabled -> Some ()) + (function Disabled -> Some () | _ -> None) (fun () -> Disabled); + case + ~title:"custom" + ~description: + "When \"custom \" is set, storage maintenance is triggered \ + \"N\" blocks after the start of a new cycle." + (Tag 1) + (obj1 (req "custom" int32)) + (function Custom delay -> Some delay | _ -> None) + (fun delay -> Custom delay); ]) -let pp_delay fmt = function Disabled -> Format.fprintf fmt "disabled" +let pp_delay fmt = function + | Disabled -> Format.fprintf fmt "disabled" + | Custom delay -> Format.fprintf fmt "custom %ld" delay diff --git a/src/lib_shell_services/storage_maintenance.mli b/src/lib_shell_services/storage_maintenance.mli index 1a96fb55dfaef888e7f4a47b045d4e6c1c1ae254..8700ad0c7113cce8214577d53803fbd7ee156425 100644 --- a/src/lib_shell_services/storage_maintenance.mli +++ b/src/lib_shell_services/storage_maintenance.mli @@ -24,8 +24,10 @@ val pp_context_pruning : Format.formatter -> context_pruning -> unit should be delayed or not. Setting it to [Disabled] will trigger the storage maintenance as soon as possible, that is, at the very beginning of a new cycle - dawn. *) -type delay = Disabled + dawn. + [Custom n] will trigger the storage maintenance n blocks after a + new cycle dawn. *) +type delay = Disabled | Custom of Int32.t val delay_encoding : delay Data_encoding.t diff --git a/src/lib_store/mocked/store.ml b/src/lib_store/mocked/store.ml index de8ed91096b6aab46500a26bfafd891479bcd26f..33fecb934c5632760c85ac6b262238954f0d5105 100644 --- a/src/lib_store/mocked/store.ml +++ b/src/lib_store/mocked/store.ml @@ -1818,8 +1818,8 @@ let store_dirs = ref [] let context_dirs = ref [] let init ?patch_context ?commit_genesis ?history_mode ?(readonly = false) - ?block_cache_limit ?context_pruning:_ ~store_dir ~context_dir - ~allow_testchains genesis = + ?block_cache_limit ?context_pruning:_ ?maintenance_delay:_ ~store_dir + ~context_dir ~allow_testchains genesis = let open Lwt_result_syntax in if List.mem ~equal:String.equal context_dir !context_dirs then Format.kasprintf @@ -1827,7 +1827,6 @@ let init ?patch_context ?commit_genesis ?history_mode ?(readonly = false) "init: already initialized context in %s" context_dir ; context_dirs := context_dir :: !context_dirs ; - let store_dir = Naming.store_dir ~dir_path:store_dir in let chain_id = Chain_id.of_block_hash genesis.Genesis.block in let*! context_index, commit_genesis = diff --git a/src/lib_store/shared/naming.ml b/src/lib_store/shared/naming.ml index ab5a34fb3fe50b9fc96b0d90b300fe8b5b9a3156..62283f399962fab8a7fc852f787c67288b452230 100644 --- a/src/lib_store/shared/naming.ml +++ b/src/lib_store/shared/naming.ml @@ -162,6 +162,13 @@ let legacy_block_store_status_file dir = Block_store_status.Legacy.encoding Block_store_status.Legacy.equal +let scheduled_maintenance dir = + make_encoded_file + dir + ~filename:"scheduled_maintenance" + Data_encoding.(option int32) + (Option.equal Int32.equal) + let cemented_blocks_dir dir = mk_dir dir "cemented" let cemented_blocks_level_index_dir dir = mk_dir dir "level_index" diff --git a/src/lib_store/shared/naming.mli b/src/lib_store/shared/naming.mli index 7d44b08ffceb05fe0d9122fcc71292ffb626b9f3..3350b2fd6c58c3c51923016e920282e3204f534f 100644 --- a/src/lib_store/shared/naming.mli +++ b/src/lib_store/shared/naming.mli @@ -139,6 +139,9 @@ val legacy_block_store_status_file : [`Chain_dir] directory -> ([`Status], Block_store_status.Legacy.t) encoded_file +val scheduled_maintenance : + [`Chain_dir] directory -> int32 option Stored_data.file + val cemented_blocks_dir : [< `Chain_dir | `Snapshot_dir | `Snapshot_tmp_dir | `Tar_archive] directory -> [`Cemented_blocks_dir] directory diff --git a/src/lib_store/shared/store_events.ml b/src/lib_store/shared/store_events.ml index b97d606628a5226369e473370dad7019ea3d405e..801d52006c8305241742fee39da446a93e125131 100644 --- a/src/lib_store/shared/store_events.ml +++ b/src/lib_store/shared/store_events.ml @@ -279,6 +279,22 @@ let end_merging_stores = ~pp1:Time.System.Span.pp_hum ("time", Time.System.Span.encoding) +let delay_store_merging = + declare_1 + ~section + ~level:Info + ~name:"start_delayed_maintenance" + ~msg:"delaying storage maintenance (target {level})" + ("level", Data_encoding.int32) + +let delayed_store_merging_countdown = + declare_1 + ~section + ~level:Debug + ~name:"delayed_store_merging_countdown" + ~msg:"merging storage after {count} blocks scheduled delay" + ("count", Data_encoding.int32) + let start_context_gc = declare_1 ~section @@ -481,6 +497,7 @@ let store_was_fixed = let recover_merge = declare_0 + ~alternative_color:Internal_event.Cyan ~section ~level:Notice ~name:"recovering_merge" diff --git a/src/lib_store/store.mli b/src/lib_store/store.mli index 2fa3eb5e6940f4a430be5efc203c83749c1d770e..5406aaa1e1577ab431788c0c030565122b1d1f74 100644 --- a/src/lib_store/store.mli +++ b/src/lib_store/store.mli @@ -210,6 +210,9 @@ type chain_store pruning is expected to be run (if set to Enabled) or not (if set to Disabled) during a storage maintenance. + @param maintenace_delay allows to introduce a delay prior to the + trigger of the storage maintenance + @param readonly a flag that, if set to true, prevent writing throughout the store {b and} context. Default: false @@ -224,6 +227,7 @@ val init : ?readonly:bool -> ?block_cache_limit:int -> ?context_pruning:Storage_maintenance.context_pruning -> + ?maintenance_delay:Storage_maintenance.delay -> store_dir:string -> context_dir:string -> allow_testchains:bool -> diff --git a/src/lib_store/unix/store.ml b/src/lib_store/unix/store.ml index 41c324241025dc5de75aeaa8083e978202d46fa0..75b4711fd61fa4b756f96946080e86707dc7af47 100644 --- a/src/lib_store/unix/store.ml +++ b/src/lib_store/unix/store.ml @@ -76,6 +76,11 @@ type store = { global_block_watcher : (chain_store * block) Lwt_watcher.input; } +and storage_maintenance = { + maintenance_delay : Storage_maintenance.delay; + scheduled_maintenance : Int32.t option Stored_data.t; +} + and chain_store = { global_store : store; chain_id : Chain_id.t; @@ -92,11 +97,12 @@ and chain_store = { lockfile : Lwt_unix.file_descr; stored_data_lockfile : Lwt_unix.file_descr; context_pruning : Storage_maintenance.context_pruning; + storage_maintenance : storage_maintenance; } and chain_state = { (* Following fields are not safe to update concurrently and must be - manipulated carefuly: *) + manipulated carefully: *) current_head_data : block_descriptor Stored_data.t; mutable last_finalized_block_level : Int32.t option; cementing_highwatermark_data : int32 option Stored_data.t; @@ -1651,7 +1657,10 @@ module Chain = struct are often short, this will lead to the optimal behaviour. As the split is necessary in the scope of the context pruning - only, it may be discarded depending on [context_pruning]. *) + only, it may be discarded depending on + [context_pruning]. However, it is mandatory that the split is not + delayed by the [maintenance_delay] argument as the split must + occur at the cycle start. *) let may_split_context ~context_pruning chain_store new_head_lpbl previous_head = let open Lwt_result_syntax in @@ -1713,7 +1722,8 @@ module Chain = struct trace Bad_head_invariant (let* pred_block = Block.read_block chain_store predecessor in - (* check that prededecessor's block metadata are available *) + (* check that predecessor's block metadata is + available *) let* _pred_head_metadata = Block.get_block_metadata chain_store pred_block in @@ -1771,6 +1781,9 @@ module Chain = struct ~checkpoint ~target in + (* [should_merge] is a placeholder acknowledging that a + storage maintenance can be triggered, thanks to several + fulfilled parameters. *) let should_merge = (* Make sure that the previous merge is completed before starting a new merge. If the lock on the chain_state is @@ -1788,48 +1801,113 @@ module Chain = struct in let* new_cementing_highwatermark = if should_merge then - let*! b = try_lock_for_write chain_store.lockfile in - match b with - | false -> - (* Delay the merge until the lock is available *) - return cementing_highwatermark - | true -> - (* Lock on lockfile is now taken *) - let finalizer new_highest_cemented_level = + (* [trigger_merge] is a placeholder that depends on + [should_merge] and that controls the delayed + maintenance. Thus, even if we [should_merge], + [trigger_merge] may interfere with the actual merge to + delay it. *) + let* trigger_merge = + match chain_store.storage_maintenance.maintenance_delay with + | Disabled -> + (* The storage maintenance delay is off -- merging right now. *) let* () = - merge_finalizer chain_store new_highest_cemented_level + (* Reset scheduled maintenance flag. It could be + necessary if the node was stopped during a + delay and restarted with the delay as + disabled. *) + Stored_data.write + chain_store.storage_maintenance.scheduled_maintenance + None in - let*! () = may_unlock chain_store.lockfile in - return_unit - in - let on_error errs = - (* Release the lockfile *) - let*! () = may_unlock chain_store.lockfile in - Lwt.return (Error errs) - in - (* Notes: - - The lock will be released when the merge - terminates. i.e. in [finalizer] or in - [on_error]. - - The heavy-work of this function is asynchronously - done so this call is expected to return quickly. *) - let* () = - Block_store.merge_stores - chain_store.block_store - ~on_error - ~finalizer - ~history_mode:(history_mode chain_store) - ~new_head - ~new_head_metadata - ~cementing_highwatermark: - (WithExceptions.Option.get - ~loc:__LOC__ - cementing_highwatermark) - ~context_pruning:chain_store.context_pruning - in - (* The new memory highwatermark is new_head_lpbl, the disk - value will be updated after the merge completion. *) - return_some new_head_lpbl + return_true + | Custom delay -> ( + let*! scheduled_maintenance = + Stored_data.get + chain_store.storage_maintenance.scheduled_maintenance + in + match scheduled_maintenance with + | Some target -> + (* A delayed merge is pending. *) + let level_to_merge_reached = + target <= Block.level new_head + in + let* () = + if level_to_merge_reached then + Stored_data.write + chain_store.storage_maintenance + .scheduled_maintenance + None + else + let*! () = + Store_events.( + emit + delayed_store_merging_countdown + Int32.(sub target (Block.level new_head))) + in + return_unit + in + return level_to_merge_reached + | None -> + (* A merge is ready to be executed, setting the + target for the delayed execution. *) + let new_target = Int32.add (Block.level new_head) delay in + let* () = + Stored_data.write + chain_store.storage_maintenance.scheduled_maintenance + (Some new_target) + in + let*! () = + Store_events.(emit delay_store_merging new_target) + in + return_false) + in + (* We effectively trigger the merge only if the delayed + maintenance is disabled or if the targeted delay is + reached. *) + if trigger_merge then + let*! b = try_lock_for_write chain_store.lockfile in + match b with + | false -> + (* Delay the merge until the lock is available *) + return cementing_highwatermark + | true -> + (* Lock on lockfile is now taken *) + let finalizer new_highest_cemented_level = + let* () = + merge_finalizer chain_store new_highest_cemented_level + in + let*! () = may_unlock chain_store.lockfile in + return_unit + in + let on_error errs = + (* Release the lockfile *) + let*! () = may_unlock chain_store.lockfile in + Lwt.return (Error errs) + in + (* Notes: + - The lock will be released when the merge + terminates. i.e. in [finalizer] or in + [on_error]. + - The heavy-work of this function is asynchronously + done so this call is expected to return quickly. *) + let* () = + Block_store.merge_stores + chain_store.block_store + ~on_error + ~finalizer + ~history_mode:(history_mode chain_store) + ~new_head + ~new_head_metadata + ~cementing_highwatermark: + (WithExceptions.Option.get + ~loc:__LOC__ + cementing_highwatermark) + ~context_pruning:chain_store.context_pruning + in + (* The new memory highwatermark is new_head_lpbl, the disk + value will be updated after the merge completion. *) + return_some new_head_lpbl + else return cementing_highwatermark else return cementing_highwatermark in let*! new_checkpoint = @@ -2180,8 +2258,8 @@ module Chain = struct } let create_chain_store ?block_cache_limit global_store chain_dir ?target - ~chain_id ?(expiration = None) ~context_pruning ?genesis_block ~genesis - ~genesis_context history_mode = + ~chain_id ?(expiration = None) ~context_pruning ~maintenance_delay + ?genesis_block ~genesis ~genesis_context history_mode = let open Lwt_result_syntax in (* Chain directory *) let genesis_block = @@ -2217,6 +2295,11 @@ module Chain = struct let* stored_data_lockfile = create_lockfile Naming.stored_data_lockfile chain_dir in + let* scheduled_maintenance = + Stored_data.init + (Naming.scheduled_maintenance chain_dir) + ~initial_data:None + in let chain_store : chain_store = { global_store; @@ -2231,12 +2314,13 @@ module Chain = struct lockfile; stored_data_lockfile; context_pruning; + storage_maintenance = {maintenance_delay; scheduled_maintenance}; } in return chain_store let load_chain_store ?block_cache_limit global_store chain_dir ~chain_id - ~readonly ~context_pruning = + ~readonly ~context_pruning ~maintenance_delay = let open Lwt_result_syntax in let* chain_config_data = Stored_data.load (Naming.chain_config_file chain_dir) @@ -2264,6 +2348,11 @@ module Chain = struct let validated_block_watcher = Lwt_watcher.create_input () in let block_rpc_directories = Protocol_hash.Table.create 7 in let* lockfile = create_lockfile Naming.lockfile chain_dir in + let* scheduled_maintenance = + Stored_data.init + (Naming.scheduled_maintenance chain_dir) + ~initial_data:None + in let chain_store = { global_store; @@ -2279,6 +2368,7 @@ module Chain = struct lockfile; stored_data_lockfile; context_pruning; + storage_maintenance = {maintenance_delay; scheduled_maintenance}; } in (* Also initalize the live blocks *) @@ -2335,7 +2425,8 @@ module Chain = struct let testchain_store {testchain_store; _} = testchain_store - let locked_load_testchain chain_store chain_state ~chain_id = + let locked_load_testchain chain_store chain_state ~chain_id ~maintenance_delay + = let open Lwt_result_syntax in let {forked_chains_data; active_testchain; _} = chain_state in match active_testchain with @@ -2357,6 +2448,7 @@ module Chain = struct ~chain_id ~readonly:false ~context_pruning:Enabled + ~maintenance_delay in let testchain = {forked_block; testchain_store} in return_some testchain) @@ -2399,6 +2491,7 @@ module Chain = struct chain_store chain_state ~chain_id:testchain_id + ~maintenance_delay:Storage_maintenance.Disabled in match o with | None -> tzfail (Cannot_load_testchain testchain_dir_path) @@ -2427,6 +2520,7 @@ module Chain = struct ~chain_id:testchain_id ~expiration:(Some expiration) ~context_pruning:Enabled + ~maintenance_delay:Storage_maintenance.Disabled ~genesis_block ~genesis ~genesis_context @@ -2463,9 +2557,13 @@ module Chain = struct testchain )) (* Look for chain_store's testchains - does not look recursively *) - let load_testchain chain_store ~chain_id = + let load_testchain chain_store ~chain_id ~maintenance_delay = Shared.locked_use chain_store.chain_state (fun chain_state -> - locked_load_testchain chain_store chain_state ~chain_id) + locked_load_testchain + chain_store + chain_state + ~chain_id + ~maintenance_delay) (* TODO (later) Also garbage-collect testchains store/context. *) let shutdown_testchain chain_store = @@ -2688,7 +2786,7 @@ end let create_store ?block_cache_limit ~context_index ~chain_id ~genesis ~genesis_context ?(history_mode = History_mode.default) ~allow_testchains - ~context_pruning store_dir = + ~context_pruning ~maintenance_delay store_dir = let open Lwt_result_syntax in let store_dir_path = Naming.dir_path store_dir in let*! () = Lwt_utils_unix.create_dir store_dir_path in @@ -2715,6 +2813,7 @@ let create_store ?block_cache_limit ~context_index ~chain_id ~genesis ~chain_id ~expiration:None ~context_pruning + ~maintenance_delay ~genesis ~genesis_context history_mode @@ -2723,7 +2822,8 @@ let create_store ?block_cache_limit ~context_index ~chain_id ~genesis return global_store let load_store ?history_mode ?block_cache_limit store_dir ~context_index - ~genesis ~chain_id ~allow_testchains ~readonly ~context_pruning () = + ~genesis ~chain_id ~allow_testchains ~readonly ~context_pruning + ~maintenance_delay () = let open Lwt_result_syntax in let chain_dir = Naming.chain_dir store_dir chain_id in let* () = @@ -2777,6 +2877,7 @@ let load_store ?history_mode ?block_cache_limit store_dir ~context_index ~chain_id ~readonly ~context_pruning + ~maintenance_delay in let stored_genesis = Chain.genesis main_chain_store in let* () = @@ -2816,7 +2917,8 @@ let check_history_mode_consistency chain_dir history_mode = let init ?patch_context ?commit_genesis ?history_mode ?(readonly = false) ?block_cache_limit ?(context_pruning = Storage_maintenance.Enabled) - ~store_dir ~context_dir ~allow_testchains genesis = + ?(maintenance_delay = Storage_maintenance.Disabled) ~store_dir ~context_dir + ~allow_testchains genesis = let open Lwt_result_syntax in let*! () = Store_events.(emit init_store) (readonly, context_pruning) in let store_dir = Naming.store_dir ~dir_path:store_dir in @@ -2859,6 +2961,7 @@ let init ?patch_context ?commit_genesis ?history_mode ?(readonly = false) ~allow_testchains ~readonly ~context_pruning + ~maintenance_delay () else (* Fresh store *) @@ -2873,6 +2976,7 @@ let init ?patch_context ?commit_genesis ?history_mode ?(readonly = false) ~genesis_context ?history_mode ~allow_testchains + ~maintenance_delay in let main_chain_store = main_chain_store store in (* Emit a warning if context GC is not allowed. *) @@ -3012,6 +3116,7 @@ let may_switch_history_mode ~store_dir ~context_dir genesis ~new_history_mode = ~allow_testchains:true ~readonly:false ~context_pruning:Enabled + ~maintenance_delay:Storage_maintenance.Disabled () in let chain_store = main_chain_store store in @@ -3394,6 +3499,7 @@ module Unsafe = struct ~allow_testchains:false ~readonly:true ~context_pruning:Disabled + ~maintenance_delay:Storage_maintenance.Disabled () in let chain_store = main_chain_store store in diff --git a/src/lib_store/unix/store.mli b/src/lib_store/unix/store.mli index 317a4fb8932d614dc30c7dd16192792fc5f862f8..b129d4748e8a999005502f00ebd4c83efff7671c 100644 --- a/src/lib_store/unix/store.mli +++ b/src/lib_store/unix/store.mli @@ -209,6 +209,9 @@ type chain_store pruning is expected to be run (if set to Enabled) or not (if set to Disabled) during a storage maintenance. + @param maintenace_delay allows to introduce a delay prior to the + trigger of the storage maintenance + @param readonly a flag that, if set to true, prevent writing throughout the store {b and} context. Default: false @@ -222,6 +225,7 @@ val init : ?readonly:bool -> ?block_cache_limit:int -> ?context_pruning:Storage_maintenance.context_pruning -> + ?maintenance_delay:Storage_maintenance.delay -> store_dir:string -> context_dir:string -> allow_testchains:bool -> @@ -1045,7 +1049,10 @@ module Unsafe : sig val get_block_store : chain_store -> Block_store.block_store val load_testchain : - chain_store -> chain_id:Chain_id.t -> Chain.testchain option tzresult Lwt.t + chain_store -> + chain_id:Chain_id.t -> + maintenance_delay:Storage_maintenance.delay -> + Chain.testchain option tzresult Lwt.t (** [set_head chain_store block] sets the block as the current head of [chain_store] without checks. *) diff --git a/src/lib_store/unix/test/test_testchain.ml b/src/lib_store/unix/test/test_testchain.ml index aeaab34f3721136646e960950222686efb514640..3bb58efb5d16b9eb146e22749cb88f3269a6006c 100644 --- a/src/lib_store/unix/test/test_testchain.ml +++ b/src/lib_store/unix/test/test_testchain.ml @@ -132,7 +132,10 @@ let test_shutdown store = | Some _ -> Assert.fail_msg "test chain still initialized" | None -> ( let* o = - Store.Unsafe.load_testchain chain_store ~chain_id:testchain_id + Store.Unsafe.load_testchain + chain_store + ~chain_id:testchain_id + ~maintenance_delay:Storage_maintenance.Disabled in match o with | None -> Assert.fail_msg "failed to load the existing test chain" diff --git a/tezt/tests/storage_maintenance.ml b/tezt/tests/storage_maintenance.ml index ae9fab0694d0f6810d934952158904d5dc4b105d..3a59c77a0fa37df99f3339b1bfa4658677c9b2db 100644 --- a/tezt/tests/storage_maintenance.ml +++ b/tezt/tests/storage_maintenance.ml @@ -102,26 +102,144 @@ let test_disabled_maintenance_delay = let* () = wait_merge in unit -(* This is a temporary test to ensure that only the disabled mode is - implemented. *) -let test_only_disabled_is_implemented = +(* This test aims to check the behavior of the custom delayed storage + maintenance. To do so, it will start 2 nodes, 1 with the delay set + to 2 and 1 without any delay and check the following behavior: + + regular_node delayed_node + (A) LEVEL 1 : -------------------------------------- + /\ | | + blocks_per_cycle | | + \/ | | + (B) LEVEL 9 : (storage maint.) | + /\ | | + custom_delay | | + \/ | | + LEVEL 11 : | (delayed storage maint.) + /\ | | + (blocks_per_cycle | | + - | | + custom_delay) | | + \/ | | + (C) LEVEL 17 : (storage maint.) (restart) + /\ | | + custom_delay | | + \/ | | + LEVEL 19 : | (delayed storage maint.) + /\ | | + (blocks_per_cycle | | + - | | + custom_delay) | | + LEVEL 25 : (storage maint.) (restart + disable delay) + LEVEL 26 : | (storage maint.) +*) +let test_custom_maintenance_delay = Protocol.register_test ~__FILE__ - ~title:(Format.asprintf "storage maintenance allows disabled only") - ~tags:[team; "storage"; "maintenance"; "delay"; "enable"] - ~uses_client:false - ~uses_admin_client:false - @@ fun _protocol -> - let node = Node.create Node.[Synchronisation_threshold 0] in + ~title:(Format.asprintf "storage maintenance custom delay") + ~tags:[team; "storage"; "maintenance"; "delay"; "custom"] + @@ fun protocol -> + let custom_delay = 2 in + let* delayed_node = + Node.init + ~name:"delayed_node" + Node. + [ + Synchronisation_threshold 0; + Storage_maintenance_delay (string_of_int custom_delay); + ] + in + let* regular_node = + Node.init ~name:"regular_node" Node.[Synchronisation_threshold 0] + in + let* client = Client.init ~endpoint:(Node delayed_node) () in + let* () = Client.Admin.connect_address ~peer:regular_node client in + let* () = + Client.activate_protocol_and_wait ~protocol ~node:delayed_node client + in + let* (_ : int) = Node.wait_for_level regular_node 1 in + let blocks_per_cycle = 8 in + (* Step A: We bake enough blocks to trigger the first storage + maintenance. We expect the regular node to merge immediately and + the delayed node to merge after the given delay. *) + let merge_level_A = 1 in + let wait_delayed_storage_maintenance_A = + wait_for_complete_storage_maintenance delayed_node merge_level_A + in + let wait_regular_storage_maintenance_A = + wait_for_complete_storage_maintenance regular_node merge_level_A + in + let* () = + let nb = blocks_per_cycle in + Log.info "Baking %d blocks to trigger the regular storage maintenance" nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb + in + Log.info "Waiting for the regular storage maintenance" ; + let* () = wait_regular_storage_maintenance_A in + let* () = + let nb = custom_delay in + Log.info "Baking %d blocks to trigger the delayed storage maintenance" nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb + in + Log.info "Waiting for the delayed storage maintenance" ; + let* () = wait_delayed_storage_maintenance_A in + (* Step B: We bake enough blocks to trigger the delayed maintenance + but restart the node to check that the delay is maintained. *) + let merge_level_B = merge_level_A + blocks_per_cycle in + let wait_regular_storage_maintenance_B = + wait_for_complete_storage_maintenance regular_node merge_level_B + in + let* () = + let nb = blocks_per_cycle - custom_delay in + Log.info "Baking %d blocks to trigger the regular storage maintenance" nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb + in + let* () = wait_regular_storage_maintenance_B in + Log.info "Restarting delayed storage maintenance node" ; + let* () = Node.terminate delayed_node in + let* () = Node.run delayed_node [] in + let* () = Node.wait_for_ready delayed_node in + let* () = + let nb = 1 in + Log.info + "Baking %d blocks should not trigger delayed storage maintenance" + nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb + in + let wait_delayed_storage_maintenance_B = + wait_for_complete_storage_maintenance delayed_node merge_level_B + in + let* () = + let nb = 1 in + Log.info "Baking %d blocks should trigger delayed storage maintenance" nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb + in + let* () = wait_delayed_storage_maintenance_B in + let merge_level_C = merge_level_B + blocks_per_cycle in + let* () = + let nb = blocks_per_cycle - custom_delay in + Log.info "Baking %d blocks to trigger the delayed storage maintenance" nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb + in + Log.info "Restarting node and disable storage maintenance delay" ; + let* () = Node.terminate delayed_node in + let* () = Node.run delayed_node [Storage_maintenance_delay "disabled"] in + let* () = Node.wait_for_ready delayed_node in + let wait_delayed_storage_maintenance_C = + wait_for_complete_storage_maintenance delayed_node merge_level_C + in let* () = - Node.spawn_config_update node [Storage_maintenance_delay "enabled"] - |> Process.check_error - ~msg:(rex "only supports \"disabled\" mode") - ~exit_code:124 + let nb = 1 in + Log.info + "Baking %d blocks should reset delayed storage maintenance and trigger \ + the merge" + nb ; + bake_blocks delayed_node client ~blocks_to_bake:nb in + let* () = wait_delayed_storage_maintenance_C in unit let register ~protocols = test_context_pruning_call protocols ; test_disabled_maintenance_delay protocols ; - test_only_disabled_is_implemented protocols + test_custom_maintenance_delay protocols