From fa9f353dd5303e8461400c258ca0e2882c75dfc5 Mon Sep 17 00:00:00 2001 From: Valentin Chaboche Date: Fri, 11 Apr 2025 14:33:41 +0200 Subject: [PATCH 1/5] Baker: move retry to lib_stdlib_unix --- src/lib_stdlib_unix/utils.ml | 40 +++++++++++++++++ src/lib_stdlib_unix/utils.mli | 19 ++++++++ .../lib_delegate/baking_scheduling.ml | 45 ++++++------------- .../lib_delegate/baking_scheduling.ml | 45 ++++++------------- .../lib_delegate/baking_scheduling.ml | 45 ++++++------------- 5 files changed, 101 insertions(+), 93 deletions(-) diff --git a/src/lib_stdlib_unix/utils.ml b/src/lib_stdlib_unix/utils.ml index 70a594eeb2c5..7bf05ec6b41c 100644 --- a/src/lib_stdlib_unix/utils.ml +++ b/src/lib_stdlib_unix/utils.ml @@ -114,3 +114,43 @@ let copy_dir ?(perm = 0o755) ?progress src dst = () let copy_file = copy_file ~count_progress:(fun _ -> ()) + +let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?(msg = "") f x = + let open Lwt.Syntax in + let* result = f x in + match result with + | Ok _ as r -> Lwt.return r + | Error (err :: _) as errs when tries > 0 && is_error err -> ( + let* () = + emit (Format.sprintf "%sRetrying in %.2f seconds..." msg delay) + in + let* result = + Lwt.pick + [ + (let* () = Lwt_unix.sleep delay in + Lwt.return `Continue); + (let* _ = Lwt_exit.clean_up_starts in + Lwt.return `Killed); + ] + in + match result with + | `Killed -> Lwt.return errs + | `Continue -> + let next_delay = delay *. factor in + let delay = + Option.fold + ~none:next_delay + ~some:(fun max_delay -> Float.min next_delay max_delay) + max_delay + in + retry + ?max_delay + ~delay + ~factor + ~msg + ~tries:(tries - 1) + ~is_error + ~emit + f + x) + | Error _ as err -> Lwt.return err diff --git a/src/lib_stdlib_unix/utils.mli b/src/lib_stdlib_unix/utils.mli index ebb14f741140..738c02bf50d3 100644 --- a/src/lib_stdlib_unix/utils.mli +++ b/src/lib_stdlib_unix/utils.mli @@ -58,3 +58,22 @@ val copy_file : src:string -> dst:string -> unit outputs with the given message and color. *) val copy_dir : ?perm:int -> ?progress:string * Terminal.Color.t -> string -> string -> unit + +(** [retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?msg f x] + retries applying [f x] [tries] until it succeeds or returns an error + when [is_error] is false, at most [tries] number of times. After + each try it waits for a number of seconds, but not more than [max_delay], if + given. The wait time between tries is given by the initial [delay], + multiplied by [factor] at each subsequent try. At each failure, [msg] + together with the current delay is printed using [emit]. *) +val retry : + ?max_delay:float -> + delay:float -> + factor:float -> + tries:int -> + is_error:('err -> bool) -> + emit:(string -> unit Lwt.t) -> + ?msg:string -> + ('a -> ('b, 'err list) result Lwt.t) -> + 'a -> + ('b, 'err list) result Lwt.t diff --git a/src/proto_021_PsQuebec/lib_delegate/baking_scheduling.ml b/src/proto_021_PsQuebec/lib_delegate/baking_scheduling.ml index dd8567612240..d9c1d2ba7fda 100644 --- a/src/proto_021_PsQuebec/lib_delegate/baking_scheduling.ml +++ b/src/proto_021_PsQuebec/lib_delegate/baking_scheduling.ml @@ -881,38 +881,21 @@ let perform_sanity_check cctxt ~chain_id = in return_unit -let rec retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor +let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor ~tries ?(msg = "Connection failed. ") f x = - let open Lwt_result_syntax in - let*! result = f x in - match result with - | Ok _ as r -> Lwt.return r - | Error - (RPC_client_errors.Request_failed {error = Connection_failed _; _} :: _) - as err - when tries > 0 -> ( - let*! () = cctxt#message "%sRetrying in %.2f seconds..." msg delay in - let*! result = - Lwt.pick - [ - (let*! () = Lwt_unix.sleep delay in - Lwt.return `Continue); - (let*! _ = Lwt_exit.clean_up_starts in - Lwt.return `Killed); - ] - in - match result with - | `Killed -> Lwt.return err - | `Continue -> - let next_delay = delay *. factor in - let delay = - Option.fold - ~none:next_delay - ~some:(fun max_delay -> Float.min next_delay max_delay) - max_delay - in - retry cctxt ?max_delay ~delay ~factor ~msg ~tries:(tries - 1) f x) - | Error _ as err -> Lwt.return err + Utils.retry + ~emit:(cctxt#message "%s") + ?max_delay + ~delay + ~factor + ~tries + ~msg + ~is_error:(function + | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> + true + | _ -> false) + f + x (* This function attempts to resolve the primary delegate associated with the given [key]. diff --git a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml index a0b6f8d38f13..fef652c47995 100644 --- a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml +++ b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml @@ -871,38 +871,21 @@ let perform_sanity_check cctxt ~chain_id = in return_unit -let rec retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor +let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor ~tries ?(msg = "Connection failed. ") f x = - let open Lwt_result_syntax in - let*! result = f x in - match result with - | Ok _ as r -> Lwt.return r - | Error - (RPC_client_errors.Request_failed {error = Connection_failed _; _} :: _) - as err - when tries > 0 -> ( - let*! () = cctxt#message "%sRetrying in %.2f seconds..." msg delay in - let*! result = - Lwt.pick - [ - (let*! () = Lwt_unix.sleep delay in - Lwt.return `Continue); - (let*! _ = Lwt_exit.clean_up_starts in - Lwt.return `Killed); - ] - in - match result with - | `Killed -> Lwt.return err - | `Continue -> - let next_delay = delay *. factor in - let delay = - Option.fold - ~none:next_delay - ~some:(fun max_delay -> Float.min next_delay max_delay) - max_delay - in - retry cctxt ?max_delay ~delay ~factor ~msg ~tries:(tries - 1) f x) - | Error _ as err -> Lwt.return err + Utils.retry + ~emit:(cctxt#message "%s") + ?max_delay + ~delay + ~factor + ~tries + ~msg + ~is_error:(function + | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> + true + | _ -> false) + f + x (* This function attempts to resolve the primary delegate associated with the given [key]. diff --git a/src/proto_alpha/lib_delegate/baking_scheduling.ml b/src/proto_alpha/lib_delegate/baking_scheduling.ml index 59f796283d09..6d9c017d8139 100644 --- a/src/proto_alpha/lib_delegate/baking_scheduling.ml +++ b/src/proto_alpha/lib_delegate/baking_scheduling.ml @@ -869,38 +869,21 @@ let perform_sanity_check cctxt ~chain_id = in return_unit -let rec retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor +let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor ~tries ?(msg = "Connection failed. ") f x = - let open Lwt_result_syntax in - let*! result = f x in - match result with - | Ok _ as r -> Lwt.return r - | Error - (RPC_client_errors.Request_failed {error = Connection_failed _; _} :: _) - as err - when tries > 0 -> ( - let*! () = cctxt#message "%sRetrying in %.2f seconds..." msg delay in - let*! result = - Lwt.pick - [ - (let*! () = Lwt_unix.sleep delay in - Lwt.return `Continue); - (let*! _ = Lwt_exit.clean_up_starts in - Lwt.return `Killed); - ] - in - match result with - | `Killed -> Lwt.return err - | `Continue -> - let next_delay = delay *. factor in - let delay = - Option.fold - ~none:next_delay - ~some:(fun max_delay -> Float.min next_delay max_delay) - max_delay - in - retry cctxt ?max_delay ~delay ~factor ~msg ~tries:(tries - 1) f x) - | Error _ as err -> Lwt.return err + Utils.retry + ~emit:(cctxt#message "%s") + ?max_delay + ~delay + ~factor + ~tries + ~msg + ~is_error:(function + | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> + true + | _ -> false) + f + x (* This function attempts to resolve the primary delegate associated with the given [key]. -- GitLab From 9a7ca715e1a0c502fce74dc048268f0522e79883 Mon Sep 17 00:00:00 2001 From: Valentin Chaboche Date: Fri, 11 Apr 2025 14:38:28 +0200 Subject: [PATCH 2/5] Agnostic-baker: support [--keep-alive] --- CHANGES.rst | 2 + manifest/product_octez.ml | 1 + src/bin_agnostic_baker/main_agnostic_baker.ml | 6 ++- .../agnostic_baker_events.ml | 11 ++++ src/lib_agnostic_baker/daemon.ml | 50 +++++++++++++++++-- src/lib_agnostic_baker/daemon.mli | 4 +- src/lib_agnostic_baker/dune | 6 ++- src/lib_agnostic_baker/run_args.ml | 11 ++++ src/lib_agnostic_baker/run_args.mli | 4 ++ 9 files changed, 85 insertions(+), 10 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 2523d32a16cf..06fbd0123848 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -87,6 +87,8 @@ Baker Agnostic Baker -------------- +- Fix the support of ``--keep-alive`` for the agnostic baker. (MR :gl:`!17685`) + - The agnostic baker binary becomes ``octez-agnostic-baker``. (MR :gl:`!17491`) - The agnostic baker now has the same CLI as the classical baker, getting rid of the diff --git a/manifest/product_octez.ml b/manifest/product_octez.ml index f64249a799bd..e3415979878e 100644 --- a/manifest/product_octez.ml +++ b/manifest/product_octez.ml @@ -5676,6 +5676,7 @@ let octez_agnostic_baker_lib = octez_node_config; octez_client_commands |> open_; octez_profiler |> open_; + octez_stdlib_unix |> open_; ] (* PROTOCOL PACKAGES *) diff --git a/src/bin_agnostic_baker/main_agnostic_baker.ml b/src/bin_agnostic_baker/main_agnostic_baker.ml index d3928cddb25b..b0ca6c967110 100644 --- a/src/bin_agnostic_baker/main_agnostic_baker.ml +++ b/src/bin_agnostic_baker/main_agnostic_baker.ml @@ -33,7 +33,11 @@ let lwt_run ~args () = Client_main_run.init_logging (module Agnostic_baker_config) ~base_dir () in () [@profiler.overwrite may_start_profiler base_dir] ; - let daemon = Daemon.create ~node_endpoint:(Run_args.get_endpoint args) in + let daemon = + Daemon.create + ~node_endpoint:(Run_args.get_endpoint args) + ~keep_alive:(Run_args.keep_alive args) + in let* (_ : unit) = Daemon.run daemon in let*! () = Lwt_utils.never_ending () in return_unit diff --git a/src/lib_agnostic_baker/agnostic_baker_events.ml b/src/lib_agnostic_baker/agnostic_baker_events.ml index 4b55a56912a8..a24c0f396b51 100644 --- a/src/lib_agnostic_baker/agnostic_baker_events.ml +++ b/src/lib_agnostic_baker/agnostic_baker_events.ml @@ -102,3 +102,14 @@ let period_status = ("block", Block_hash.encoding) ("period", string) ("remaining", int31) + +(* Error *) +let cannot_connect = + declare_1 + ~section + ~alternative_color + ~level:Error + ~name:"agnostic_baker_cannot_connect" + ~msg:"Cannot connect to node. {message}" + ~pp1:Format.pp_print_string + ("message", Data_encoding.string) diff --git a/src/lib_agnostic_baker/daemon.ml b/src/lib_agnostic_baker/daemon.ml index b1ce04fec974..1b92ef55fbf1 100644 --- a/src/lib_agnostic_baker/daemon.ml +++ b/src/lib_agnostic_baker/daemon.ml @@ -27,12 +27,33 @@ type state = { node_endpoint : string; mutable current_baker : baker option; mutable old_baker : baker_to_kill option; + keep_alive : bool; } type t = state (* ---- Baker Process Management ---- *) +let rec retry_on_disconnection ~emit node_addr f = + let open Lwt_result_syntax in + let*! result = f () in + match result with + | Ok () -> return_unit + | Error (Lost_node_connection :: _ | Cannot_connect_to_node _ :: _) -> + let* _level = + Utils.retry + ~emit + ~max_delay:10. + ~delay:1. + ~factor:1.5 + ~tries:max_int + ~is_error:(function Cannot_connect_to_node _ -> true | _ -> false) + (fun node_addr -> Rpc_services.get_level ~node_addr) + node_addr + in + retry_on_disconnection ~emit node_addr f + | Error trace -> fail trace + (** [run_thread ~protocol_hash ~baker_commands ~cancel_promise] returns the main running thread for the baker given its protocol [~protocol_hash], corresponding commands [~baker_commands] and cancellation [~cancel_promise]. *) @@ -277,8 +298,8 @@ let may_start_initial_baker state = in may_start ~head_stream:None () -let create ~node_endpoint = - {node_endpoint; current_baker = None; old_baker = None} +let create ~node_endpoint ~keep_alive = + {node_endpoint; current_baker = None; old_baker = None; keep_alive} let run state = let open Lwt_result_syntax in @@ -289,8 +310,27 @@ let run state = let*! () = Events.(emit stopping_daemon) () in Lwt.return_unit) in - let* () = may_start_initial_baker state in - let* head_stream = monitor_heads ~node_addr in + let* () = + if state.keep_alive then + retry_on_disconnection + ~emit:(Events.emit Agnostic_baker_events.cannot_connect) + node_addr + (fun () -> may_start_initial_baker state) + else may_start_initial_baker state + in + let monitor_voting_periods () = + let* head_stream = monitor_heads ~node_addr in + monitor_voting_periods ~state head_stream + in (* Monitoring voting periods through heads monitoring to avoid missing UAUs. *) - Lwt.pick [monitor_voting_periods ~state head_stream; baker_thread ~state] + Lwt.pick + [ + (* We do not care if --keep-alive is provided, if the baker thread doesn't + have the argument it'll abort the process anyway. *) + retry_on_disconnection + ~emit:(fun _ -> Lwt.return_unit) + node_addr + monitor_voting_periods; + baker_thread ~state; + ] diff --git a/src/lib_agnostic_baker/daemon.mli b/src/lib_agnostic_baker/daemon.mli index bbc1bfabd215..987847b692bd 100644 --- a/src/lib_agnostic_baker/daemon.mli +++ b/src/lib_agnostic_baker/daemon.mli @@ -23,8 +23,8 @@ type t -(** [create ~node_endpoint] returns a non initialized daemon. *) -val create : node_endpoint:string -> t +(** [create ~node_endpoint ~keep_alive] returns a non initialized daemon. *) +val create : node_endpoint:string -> keep_alive:bool -> t (** [run daemon] Runs the daemon responsible for the spawn/stop of the baker daemons. *) diff --git a/src/lib_agnostic_baker/dune b/src/lib_agnostic_baker/dune index 3ce85997056c..5047352ab2cc 100644 --- a/src/lib_agnostic_baker/dune +++ b/src/lib_agnostic_baker/dune @@ -14,7 +14,8 @@ octez-shell-libs.client-base-unix octez-node-config octez-shell-libs.client-commands - octez-libs.octez-profiler) + octez-libs.octez-profiler + octez-libs.stdlib-unix) (preprocess (pps octez-libs.ppx_profiler)) (preprocessor_deps (env_var TEZOS_PPX_PROFILER)) (flags @@ -25,4 +26,5 @@ -open Tezos_base_unix -open Tezos_client_base_unix -open Tezos_client_commands - -open Tezos_profiler)) + -open Tezos_profiler + -open Tezos_stdlib_unix)) diff --git a/src/lib_agnostic_baker/run_args.ml b/src/lib_agnostic_baker/run_args.ml index e00333e56a62..e5d3d969b669 100644 --- a/src/lib_agnostic_baker/run_args.ml +++ b/src/lib_agnostic_baker/run_args.ml @@ -46,3 +46,14 @@ let get_endpoint args = @@ get_arg_value ~arg:endpoint_arg ~short_arg:endpoint_short_arg args let get_base_dir = get_arg_value ~arg:base_dir_arg ~short_arg:base_dir_short_arg + +let keep_alive_arg = "--keep-alive" + +let keep_alive_short_arg = "-K" + +let keep_alive args = + List.find + (fun arg -> + String.equal keep_alive_short_arg arg || String.equal keep_alive_arg arg) + args + |> Option.is_some diff --git a/src/lib_agnostic_baker/run_args.mli b/src/lib_agnostic_baker/run_args.mli index 088f22b1320b..c3f59bee7b8c 100644 --- a/src/lib_agnostic_baker/run_args.mli +++ b/src/lib_agnostic_baker/run_args.mli @@ -25,3 +25,7 @@ val get_endpoint : string list -> string (** [get_base_dir args] returns the value associated to the [--base-dir] argument amongst [args]. *) val get_base_dir : string list -> string option + +(** [keep_alive args] returns [true] iff [--keep-alive] (or [-K]) is amongst + [args]. *) +val keep_alive : string list -> bool -- GitLab From 775116139056d4f26eb4b5d3c46e163ee782d816 Mon Sep 17 00:00:00 2001 From: Valentin Chaboche Date: Fri, 11 Apr 2025 14:39:03 +0200 Subject: [PATCH 3/5] Agnostic-baker: remove trailing whitespace --- src/lib_agnostic_baker/daemon.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib_agnostic_baker/daemon.ml b/src/lib_agnostic_baker/daemon.ml index 1b92ef55fbf1..fe004ef0c80f 100644 --- a/src/lib_agnostic_baker/daemon.ml +++ b/src/lib_agnostic_baker/daemon.ml @@ -99,7 +99,7 @@ let spawn_baker protocol_hash = let*! () = Events.(emit baker_running) protocol_hash in return {protocol_hash; process = {thread; canceller}} -(** [hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash +(** [hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash ~level_to_kill_old_baker] moves the current baker into the old baker slot (to be killed later) and spawns a new baker for [~next_protocol_hash] *) let hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash @@ -178,7 +178,7 @@ let monitor_heads ~node_addr = (** [monitor_voting_periods ~state head_stream] continuously monitors [heads_stream] to detect protocol changes. It will: - Shut down an old baker it its time has come; - - Spawn and "hot-swap" to a new baker if the next protocol hash is different. + - Spawn and "hot-swap" to a new baker if the next protocol hash is different. The voting period information is used for logging purposes. *) let monitor_voting_periods ~state head_stream = let open Lwt_result_syntax in -- GitLab From 2857c4895c2ac5a1284b2f0715f0802c9f811f8c Mon Sep 17 00:00:00 2001 From: Valentin Chaboche Date: Fri, 11 Apr 2025 16:28:11 +0200 Subject: [PATCH 4/5] Agnostic-baker/Tezt: support [--keep-alive] --- tezt/lib_tezos/agnostic_baker.ml | 18 ++++++++++++++---- tezt/lib_tezos/agnostic_baker.mli | 3 +++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/tezt/lib_tezos/agnostic_baker.ml b/tezt/lib_tezos/agnostic_baker.ml index 5beb98bc110f..99b4e8e64e47 100644 --- a/tezt/lib_tezos/agnostic_baker.ml +++ b/tezt/lib_tezos/agnostic_baker.ml @@ -44,6 +44,7 @@ module Parameters = struct state_recorder : bool; node_version_check_bypass : bool; node_version_allowed : string option; + keep_alive : bool; } type session_state = {mutable ready : bool} @@ -88,7 +89,7 @@ let create_from_uris ?runner ?(path = Uses.path Constant.octez_agnostic_baker) ?(remote_mode = false) ?operations_pool ?dal_node_rpc_endpoint ?dal_node_timeout_percentage ?(state_recorder = false) ?(node_version_check_bypass = false) ?node_version_allowed ~base_dir - ~node_data_dir ~node_rpc_endpoint () = + ~node_data_dir ~node_rpc_endpoint ?(keep_alive = false) () = let agnostic_baker = create ~path @@ -113,6 +114,7 @@ let create_from_uris ?runner ?(path = Uses.path Constant.octez_agnostic_baker) state_recorder; node_version_check_bypass; node_version_allowed; + keep_alive; } in agnostic_baker @@ -124,7 +126,8 @@ let create ?runner ?path ?name ?color ?event_pipe ?(delegates = []) ?votefile ?(liquidity_baking_toggle_vote = Some Pass) ?force_apply_from_round ?(remote_mode = false) ?operations_pool ?dal_node ?dal_node_timeout_percentage ?(state_recorder = false) - ?(node_version_check_bypass = false) ?node_version_allowed node client = + ?(node_version_check_bypass = false) ?node_version_allowed ?keep_alive node + client = let dal_node_rpc_endpoint = Option.map Dal_node.as_rpc_endpoint dal_node in let agnostic_baker = create_from_uris @@ -147,6 +150,7 @@ let create ?runner ?path ?name ?color ?event_pipe ?(delegates = []) ?votefile ~base_dir:(Client.base_dir client) ~node_data_dir:(Node.data_dir node) ~node_rpc_endpoint:(Node.as_rpc_endpoint node) + ?keep_alive () in on_event agnostic_baker (handle_event agnostic_baker) ; @@ -224,6 +228,11 @@ let run ?env ?event_level ?event_sections_levels (agnostic_baker : t) = Fun.id agnostic_baker.persistent_state.node_version_allowed in + let keep_alive = + Cli_arg.optional_switch + "keep-alive" + agnostic_baker.persistent_state.keep_alive + in let run_args = if agnostic_baker.persistent_state.remote_mode then ["remotely"] else ["with"; "local"; "node"; node_data_dir] @@ -233,7 +242,7 @@ let run ?env ?event_level ?event_sections_levels (agnostic_baker : t) = @ run_args @ delegates @ liquidity_baking_toggle_vote @ votefile @ force_apply_from_round @ operations_pool @ dal_node_endpoint @ without_dal @ dal_node_timeout_percentage @ state_recorder @ node_version_check_bypass - @ node_version_allowed + @ node_version_allowed @ keep_alive in let on_terminate _ = @@ -273,7 +282,7 @@ let init ?env ?runner ?(path = Uses.path Constant.octez_agnostic_baker) ?name ?color ?event_level ?event_pipe ?event_sections_levels ?(delegates = []) ?votefile ?liquidity_baking_toggle_vote ?force_apply_from_round ?remote_mode ?operations_pool ?dal_node ?dal_node_timeout_percentage ?state_recorder - ?node_version_check_bypass ?node_version_allowed node client = + ?node_version_check_bypass ?node_version_allowed ?keep_alive node client = let* () = Node.wait_for_ready node in let agnostic_baker = create @@ -293,6 +302,7 @@ let init ?env ?runner ?(path = Uses.path Constant.octez_agnostic_baker) ?name ?node_version_check_bypass ?node_version_allowed ~delegates + ?keep_alive node client in diff --git a/tezt/lib_tezos/agnostic_baker.mli b/tezt/lib_tezos/agnostic_baker.mli index 64d9ff474a55..1af2db52c131 100644 --- a/tezt/lib_tezos/agnostic_baker.mli +++ b/tezt/lib_tezos/agnostic_baker.mli @@ -144,6 +144,7 @@ val create : ?state_recorder:bool -> ?node_version_check_bypass:bool -> ?node_version_allowed:string -> + ?keep_alive:bool -> Node.t -> Client.t -> t @@ -186,6 +187,7 @@ val create_from_uris : base_dir:string -> node_data_dir:string -> node_rpc_endpoint:Endpoint.t -> + ?keep_alive:bool -> unit -> t @@ -245,6 +247,7 @@ val init : ?state_recorder:bool -> ?node_version_check_bypass:bool -> ?node_version_allowed:string -> + ?keep_alive:bool -> Node.t -> Client.t -> t Lwt.t -- GitLab From 9ed83b28bfbaa8d3098f9dd2b23f029daa38a03e Mon Sep 17 00:00:00 2001 From: Valentin Chaboche Date: Fri, 11 Apr 2025 17:20:32 +0200 Subject: [PATCH 5/5] Agnostic-baker/Tezt: test [--keep-alive] --- tezt/lib_tezos/agnostic_baker.ml | 40 +++++++++++++++--------- tezt/lib_tezos/agnostic_baker.mli | 4 +++ tezt/tests/agnostic_baker_test.ml | 51 ++++++++++++++++++++++++++++++- tezt/tests/main.ml | 5 +-- 4 files changed, 82 insertions(+), 18 deletions(-) diff --git a/tezt/lib_tezos/agnostic_baker.ml b/tezt/lib_tezos/agnostic_baker.ml index 99b4e8e64e47..57a2f58d0819 100644 --- a/tezt/lib_tezos/agnostic_baker.ml +++ b/tezt/lib_tezos/agnostic_baker.ml @@ -156,13 +156,8 @@ let create ?runner ?path ?name ?color ?event_pipe ?(delegates = []) ?votefile on_event agnostic_baker (handle_event agnostic_baker) ; agnostic_baker -let run ?env ?event_level ?event_sections_levels (agnostic_baker : t) = - (match agnostic_baker.status with - | Not_running -> () - | Running _ -> - Test.fail "agnostic_baker %s is already running" agnostic_baker.name) ; +let run_args agnostic_baker = let delegates = agnostic_baker.persistent_state.delegates in - let runner = agnostic_baker.persistent_state.runner in let node_data_dir = agnostic_baker.persistent_state.node_data_dir in let base_dir = agnostic_baker.persistent_state.base_dir in let node_addr = @@ -237,14 +232,18 @@ let run ?env ?event_level ?event_sections_levels (agnostic_baker : t) = if agnostic_baker.persistent_state.remote_mode then ["remotely"] else ["with"; "local"; "node"; node_data_dir] in - let arguments = - ["--endpoint"; node_addr; "--base-dir"; base_dir; "run"] - @ run_args @ delegates @ liquidity_baking_toggle_vote @ votefile - @ force_apply_from_round @ operations_pool @ dal_node_endpoint @ without_dal - @ dal_node_timeout_percentage @ state_recorder @ node_version_check_bypass - @ node_version_allowed @ keep_alive - in + ["--endpoint"; node_addr; "--base-dir"; base_dir; "run"] + @ run_args @ delegates @ liquidity_baking_toggle_vote @ votefile + @ force_apply_from_round @ operations_pool @ dal_node_endpoint @ without_dal + @ dal_node_timeout_percentage @ state_recorder @ node_version_check_bypass + @ node_version_allowed @ keep_alive +let run ?env ?event_level ?event_sections_levels ?(extra_arguments = []) + (agnostic_baker : t) = + (match agnostic_baker.status with + | Not_running -> () + | Running _ -> + Test.fail "agnostic_baker %s is already running" agnostic_baker.name) ; let on_terminate _ = (* Cancel all [Ready] event listeners. *) trigger_ready agnostic_baker None ; @@ -256,9 +255,20 @@ let run ?env ?event_level ?event_sections_levels (agnostic_baker : t) = ?event_sections_levels agnostic_baker {ready = false} - arguments + (run_args agnostic_baker @ extra_arguments) ~on_terminate - ?runner + ?runner:agnostic_baker.persistent_state.runner + +let spawn_run ?env (agnostic_baker : t) = + (match agnostic_baker.status with + | Not_running -> () + | Running _ -> + Test.fail "agnostic_baker %s is already running" agnostic_baker.name) ; + Process.spawn + ?env + ?runner:agnostic_baker.persistent_state.runner + agnostic_baker.path + (run_args agnostic_baker) let check_event ?where agnostic_baker name promise = let* result = promise in diff --git a/tezt/lib_tezos/agnostic_baker.mli b/tezt/lib_tezos/agnostic_baker.mli index 1af2db52c131..742fb5bbefad 100644 --- a/tezt/lib_tezos/agnostic_baker.mli +++ b/tezt/lib_tezos/agnostic_baker.mli @@ -52,9 +52,13 @@ val run : ?env:string String_map.t -> ?event_level:Daemon.Level.default_level -> ?event_sections_levels:(string * Daemon.Level.level) list -> + ?extra_arguments:string list -> t -> unit Lwt.t +(** Spawn [octez-agnostic-baker run] similarly to {!run} but returns the process. *) +val spawn_run : ?env:string String_map.t -> t -> Process.t + (** Liquidity baking vote values. *) type liquidity_baking_vote = Off | On | Pass diff --git a/tezt/tests/agnostic_baker_test.ml b/tezt/tests/agnostic_baker_test.ml index 42a8fe18ac62..8edfae35904e 100644 --- a/tezt/tests/agnostic_baker_test.ml +++ b/tezt/tests/agnostic_baker_test.ml @@ -204,7 +204,56 @@ let test_man () = in unit -let register ~migrate_from ~migrate_to = +let test_keep_alive = + Protocol.register_test + ~__FILE__ + ~title:"Agnostic baker --keep-alive" + ~tags:[team; "sandbox"; "agnostic"; "baker"; "keep_alive"] + ~uses:(fun _ -> [Constant.octez_agnostic_baker]) + @@ fun protocol -> + let* node, client = Client.init_with_protocol ~protocol `Client () in + let baker = + Agnostic_baker.create ~node_version_check_bypass:true node client + in + let* () = Node.terminate node in + (* Start the baker with no node running and no [--keep-alive], it crashes. *) + let process = Agnostic_baker.spawn_run baker in + let* () = Process.check_error ~msg:(rex "Cannot connect to node") process in + (* Start the baker with no node running and [--keep-alive], it'll wait. *) + let wait_for_cannot_connect = + Agnostic_baker.wait_for + baker + "agnostic_baker_cannot_connect.v0" + (fun _json -> Some ()) + in + let* () = Agnostic_baker.run ~extra_arguments:["--keep-alive"] baker + and* () = wait_for_cannot_connect in + (* Start the node. *) + let* () = Node.run node [] and* () = Node.wait_for_ready node in + (* Bake a block, the baker is connected so it'll see it. *) + let wait_baker_proposal = + (* This is an event emitted by the baker lib. *) + Agnostic_baker.wait_for baker "new_valid_proposal.v0" (fun _json -> Some ()) + in + let wait_period_status = + (* This is an event emitted by the agnostic baker. *) + Agnostic_baker.wait_for baker "period_status.v0" (fun _json -> Some ()) + in + let* () = Client.bake_for_and_wait client + and* () = wait_baker_proposal + and* () = wait_period_status in + (* Kill the node now that they are connected, the baker will stay alive. *) + let* () = Node.terminate node and* () = wait_for_cannot_connect in + (* Redo the procedure, restart the node and wait for the block events. *) + let* () = Node.run node [] and* () = Node.wait_for_ready node in + let* () = Client.bake_for_and_wait client + and* () = wait_baker_proposal + and* () = wait_period_status in + unit + +let register ~protocols = test_keep_alive protocols + +let register_migration ~migrate_from ~migrate_to = (* We want to migrate only from Active protocols *) if Agnostic_baker.protocol_status migrate_from = Active then ( migrate ~migrate_from ~migrate_to ~use_remote_signer:false ; diff --git a/tezt/tests/main.ml b/tezt/tests/main.ml index 043bd51028b2..27524ed4fdb3 100644 --- a/tezt/tests/main.ml +++ b/tezt/tests/main.ml @@ -68,7 +68,7 @@ let register_protocol_independent_tests () = (* Tests related to protocol migration. *) let register_protocol_migration_tests () = let migrate_from = Option.get @@ Protocol.previous_protocol migrate_to in - Agnostic_baker_test.register ~migrate_from ~migrate_to ; + Agnostic_baker_test.register_migration ~migrate_from ~migrate_to ; Mockup.register_constant_migration ~migrate_from ~migrate_to ; Protocol_migration.register ~migrate_from ~migrate_to ; Weeklynet.register () ; @@ -100,7 +100,7 @@ let register_old_protocol_migration_tests () = | _, Alpha -> () (* Already in register_protocol_migration_tests *) | None, _ -> () | Some migrate_from, migrate_to -> - Agnostic_baker_test.register ~migrate_from ~migrate_to ; + Agnostic_baker_test.register_migration ~migrate_from ~migrate_to ; Sc_rollup_migration.register ~migrate_from ~migrate_to ; Dal.register_migration ~migrate_from ~migrate_to) Protocol.all @@ -116,6 +116,7 @@ let register_old_protocol_migration_tests () = let register_protocol_tests_that_use_supports_correctly () = let protocols = Protocol.all in Adaptive_issuance.register ~protocols ; + Agnostic_baker_test.register ~protocols ; Bad_annot.register ~protocols ; Bad_indentation.register ~protocols ; Baker_test.register ~protocols ; -- GitLab