diff --git a/CHANGES.rst b/CHANGES.rst index 2523d32a16cfe109ec226ee1c5a43abe9001c1c4..06fbd0123848efb6ef7770a153a7a76bb52d612f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -87,6 +87,8 @@ Baker Agnostic Baker -------------- +- Fix the support of ``--keep-alive`` for the agnostic baker. (MR :gl:`!17685`) + - The agnostic baker binary becomes ``octez-agnostic-baker``. (MR :gl:`!17491`) - The agnostic baker now has the same CLI as the classical baker, getting rid of the diff --git a/manifest/product_octez.ml b/manifest/product_octez.ml index f64249a799bd1aa926d9a1b09d8f2475c9171226..e3415979878e062c2cd03f5dd4e2dcff91f99832 100644 --- a/manifest/product_octez.ml +++ b/manifest/product_octez.ml @@ -5676,6 +5676,7 @@ let octez_agnostic_baker_lib = octez_node_config; octez_client_commands |> open_; octez_profiler |> open_; + octez_stdlib_unix |> open_; ] (* PROTOCOL PACKAGES *) diff --git a/src/bin_agnostic_baker/main_agnostic_baker.ml b/src/bin_agnostic_baker/main_agnostic_baker.ml index d3928cddb25b70466baf35cd549948b0abfc208e..b0ca6c967110a399f6c15812292e9989100bb44f 100644 --- a/src/bin_agnostic_baker/main_agnostic_baker.ml +++ b/src/bin_agnostic_baker/main_agnostic_baker.ml @@ -33,7 +33,11 @@ let lwt_run ~args () = Client_main_run.init_logging (module Agnostic_baker_config) ~base_dir () in () [@profiler.overwrite may_start_profiler base_dir] ; - let daemon = Daemon.create ~node_endpoint:(Run_args.get_endpoint args) in + let daemon = + Daemon.create + ~node_endpoint:(Run_args.get_endpoint args) + ~keep_alive:(Run_args.keep_alive args) + in let* (_ : unit) = Daemon.run daemon in let*! () = Lwt_utils.never_ending () in return_unit diff --git a/src/lib_agnostic_baker/agnostic_baker_events.ml b/src/lib_agnostic_baker/agnostic_baker_events.ml index 4b55a56912a89e403b2348dc0606a096a92bd630..a24c0f396b5109efc138a13a513def716903639e 100644 --- a/src/lib_agnostic_baker/agnostic_baker_events.ml +++ b/src/lib_agnostic_baker/agnostic_baker_events.ml @@ -102,3 +102,14 @@ let period_status = ("block", Block_hash.encoding) ("period", string) ("remaining", int31) + +(* Error *) +let cannot_connect = + declare_1 + ~section + ~alternative_color + ~level:Error + ~name:"agnostic_baker_cannot_connect" + ~msg:"Cannot connect to node. {message}" + ~pp1:Format.pp_print_string + ("message", Data_encoding.string) diff --git a/src/lib_agnostic_baker/daemon.ml b/src/lib_agnostic_baker/daemon.ml index b1ce04fec97483cf1a43e4f4258a341769c40948..fe004ef0c80f6158ada29726cc7b125a33b14453 100644 --- a/src/lib_agnostic_baker/daemon.ml +++ b/src/lib_agnostic_baker/daemon.ml @@ -27,12 +27,33 @@ type state = { node_endpoint : string; mutable current_baker : baker option; mutable old_baker : baker_to_kill option; + keep_alive : bool; } type t = state (* ---- Baker Process Management ---- *) +let rec retry_on_disconnection ~emit node_addr f = + let open Lwt_result_syntax in + let*! result = f () in + match result with + | Ok () -> return_unit + | Error (Lost_node_connection :: _ | Cannot_connect_to_node _ :: _) -> + let* _level = + Utils.retry + ~emit + ~max_delay:10. + ~delay:1. + ~factor:1.5 + ~tries:max_int + ~is_error:(function Cannot_connect_to_node _ -> true | _ -> false) + (fun node_addr -> Rpc_services.get_level ~node_addr) + node_addr + in + retry_on_disconnection ~emit node_addr f + | Error trace -> fail trace + (** [run_thread ~protocol_hash ~baker_commands ~cancel_promise] returns the main running thread for the baker given its protocol [~protocol_hash], corresponding commands [~baker_commands] and cancellation [~cancel_promise]. *) @@ -78,7 +99,7 @@ let spawn_baker protocol_hash = let*! () = Events.(emit baker_running) protocol_hash in return {protocol_hash; process = {thread; canceller}} -(** [hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash +(** [hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash ~level_to_kill_old_baker] moves the current baker into the old baker slot (to be killed later) and spawns a new baker for [~next_protocol_hash] *) let hot_swap_baker ~state ~current_protocol_hash ~next_protocol_hash @@ -157,7 +178,7 @@ let monitor_heads ~node_addr = (** [monitor_voting_periods ~state head_stream] continuously monitors [heads_stream] to detect protocol changes. It will: - Shut down an old baker it its time has come; - - Spawn and "hot-swap" to a new baker if the next protocol hash is different. + - Spawn and "hot-swap" to a new baker if the next protocol hash is different. The voting period information is used for logging purposes. *) let monitor_voting_periods ~state head_stream = let open Lwt_result_syntax in @@ -277,8 +298,8 @@ let may_start_initial_baker state = in may_start ~head_stream:None () -let create ~node_endpoint = - {node_endpoint; current_baker = None; old_baker = None} +let create ~node_endpoint ~keep_alive = + {node_endpoint; current_baker = None; old_baker = None; keep_alive} let run state = let open Lwt_result_syntax in @@ -289,8 +310,27 @@ let run state = let*! () = Events.(emit stopping_daemon) () in Lwt.return_unit) in - let* () = may_start_initial_baker state in - let* head_stream = monitor_heads ~node_addr in + let* () = + if state.keep_alive then + retry_on_disconnection + ~emit:(Events.emit Agnostic_baker_events.cannot_connect) + node_addr + (fun () -> may_start_initial_baker state) + else may_start_initial_baker state + in + let monitor_voting_periods () = + let* head_stream = monitor_heads ~node_addr in + monitor_voting_periods ~state head_stream + in (* Monitoring voting periods through heads monitoring to avoid missing UAUs. *) - Lwt.pick [monitor_voting_periods ~state head_stream; baker_thread ~state] + Lwt.pick + [ + (* We do not care if --keep-alive is provided, if the baker thread doesn't + have the argument it'll abort the process anyway. *) + retry_on_disconnection + ~emit:(fun _ -> Lwt.return_unit) + node_addr + monitor_voting_periods; + baker_thread ~state; + ] diff --git a/src/lib_agnostic_baker/daemon.mli b/src/lib_agnostic_baker/daemon.mli index bbc1bfabd2156b082de52ab1e4a410c9e2727419..987847b692bdbe897c1b78500ed32602d2484de5 100644 --- a/src/lib_agnostic_baker/daemon.mli +++ b/src/lib_agnostic_baker/daemon.mli @@ -23,8 +23,8 @@ type t -(** [create ~node_endpoint] returns a non initialized daemon. *) -val create : node_endpoint:string -> t +(** [create ~node_endpoint ~keep_alive] returns a non initialized daemon. *) +val create : node_endpoint:string -> keep_alive:bool -> t (** [run daemon] Runs the daemon responsible for the spawn/stop of the baker daemons. *) diff --git a/src/lib_agnostic_baker/dune b/src/lib_agnostic_baker/dune index 3ce85997056c4d52878bd1e3a8ca2d4b14ed7263..5047352ab2cc66952c07cff9f2def4a431b58cb3 100644 --- a/src/lib_agnostic_baker/dune +++ b/src/lib_agnostic_baker/dune @@ -14,7 +14,8 @@ octez-shell-libs.client-base-unix octez-node-config octez-shell-libs.client-commands - octez-libs.octez-profiler) + octez-libs.octez-profiler + octez-libs.stdlib-unix) (preprocess (pps octez-libs.ppx_profiler)) (preprocessor_deps (env_var TEZOS_PPX_PROFILER)) (flags @@ -25,4 +26,5 @@ -open Tezos_base_unix -open Tezos_client_base_unix -open Tezos_client_commands - -open Tezos_profiler)) + -open Tezos_profiler + -open Tezos_stdlib_unix)) diff --git a/src/lib_agnostic_baker/run_args.ml b/src/lib_agnostic_baker/run_args.ml index e00333e56a6298fbfc847a1185d327119abf2d80..e5d3d969b669a17c2ec055c9adf11943272550e4 100644 --- a/src/lib_agnostic_baker/run_args.ml +++ b/src/lib_agnostic_baker/run_args.ml @@ -46,3 +46,14 @@ let get_endpoint args = @@ get_arg_value ~arg:endpoint_arg ~short_arg:endpoint_short_arg args let get_base_dir = get_arg_value ~arg:base_dir_arg ~short_arg:base_dir_short_arg + +let keep_alive_arg = "--keep-alive" + +let keep_alive_short_arg = "-K" + +let keep_alive args = + List.find + (fun arg -> + String.equal keep_alive_short_arg arg || String.equal keep_alive_arg arg) + args + |> Option.is_some diff --git a/src/lib_agnostic_baker/run_args.mli b/src/lib_agnostic_baker/run_args.mli index 088f22b1320be4f058834228b396359aa142af23..c3f59bee7b8c8d022753af84d9c900287789fee9 100644 --- a/src/lib_agnostic_baker/run_args.mli +++ b/src/lib_agnostic_baker/run_args.mli @@ -25,3 +25,7 @@ val get_endpoint : string list -> string (** [get_base_dir args] returns the value associated to the [--base-dir] argument amongst [args]. *) val get_base_dir : string list -> string option + +(** [keep_alive args] returns [true] iff [--keep-alive] (or [-K]) is amongst + [args]. *) +val keep_alive : string list -> bool diff --git a/src/lib_stdlib_unix/utils.ml b/src/lib_stdlib_unix/utils.ml index 70a594eeb2c50e721249b6f7aa4bb71f7295b808..7bf05ec6b41c98d5356bf30f1754b4b0bdb06312 100644 --- a/src/lib_stdlib_unix/utils.ml +++ b/src/lib_stdlib_unix/utils.ml @@ -114,3 +114,43 @@ let copy_dir ?(perm = 0o755) ?progress src dst = () let copy_file = copy_file ~count_progress:(fun _ -> ()) + +let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?(msg = "") f x = + let open Lwt.Syntax in + let* result = f x in + match result with + | Ok _ as r -> Lwt.return r + | Error (err :: _) as errs when tries > 0 && is_error err -> ( + let* () = + emit (Format.sprintf "%sRetrying in %.2f seconds..." msg delay) + in + let* result = + Lwt.pick + [ + (let* () = Lwt_unix.sleep delay in + Lwt.return `Continue); + (let* _ = Lwt_exit.clean_up_starts in + Lwt.return `Killed); + ] + in + match result with + | `Killed -> Lwt.return errs + | `Continue -> + let next_delay = delay *. factor in + let delay = + Option.fold + ~none:next_delay + ~some:(fun max_delay -> Float.min next_delay max_delay) + max_delay + in + retry + ?max_delay + ~delay + ~factor + ~msg + ~tries:(tries - 1) + ~is_error + ~emit + f + x) + | Error _ as err -> Lwt.return err diff --git a/src/lib_stdlib_unix/utils.mli b/src/lib_stdlib_unix/utils.mli index ebb14f7411400431b21e3a8cdfbde7ef8a915bd2..738c02bf50d3f614947d6bcabfb8450dc473771e 100644 --- a/src/lib_stdlib_unix/utils.mli +++ b/src/lib_stdlib_unix/utils.mli @@ -58,3 +58,22 @@ val copy_file : src:string -> dst:string -> unit outputs with the given message and color. *) val copy_dir : ?perm:int -> ?progress:string * Terminal.Color.t -> string -> string -> unit + +(** [retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?msg f x] + retries applying [f x] [tries] until it succeeds or returns an error + when [is_error] is false, at most [tries] number of times. After + each try it waits for a number of seconds, but not more than [max_delay], if + given. The wait time between tries is given by the initial [delay], + multiplied by [factor] at each subsequent try. At each failure, [msg] + together with the current delay is printed using [emit]. *) +val retry : + ?max_delay:float -> + delay:float -> + factor:float -> + tries:int -> + is_error:('err -> bool) -> + emit:(string -> unit Lwt.t) -> + ?msg:string -> + ('a -> ('b, 'err list) result Lwt.t) -> + 'a -> + ('b, 'err list) result Lwt.t diff --git a/src/proto_021_PsQuebec/lib_delegate/baking_scheduling.ml b/src/proto_021_PsQuebec/lib_delegate/baking_scheduling.ml index dd8567612240a2125f486e643ea5419aece4b3c2..d9c1d2ba7fda80c23766988ecec44ec0ceb95c6d 100644 --- a/src/proto_021_PsQuebec/lib_delegate/baking_scheduling.ml +++ b/src/proto_021_PsQuebec/lib_delegate/baking_scheduling.ml @@ -881,38 +881,21 @@ let perform_sanity_check cctxt ~chain_id = in return_unit -let rec retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor +let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor ~tries ?(msg = "Connection failed. ") f x = - let open Lwt_result_syntax in - let*! result = f x in - match result with - | Ok _ as r -> Lwt.return r - | Error - (RPC_client_errors.Request_failed {error = Connection_failed _; _} :: _) - as err - when tries > 0 -> ( - let*! () = cctxt#message "%sRetrying in %.2f seconds..." msg delay in - let*! result = - Lwt.pick - [ - (let*! () = Lwt_unix.sleep delay in - Lwt.return `Continue); - (let*! _ = Lwt_exit.clean_up_starts in - Lwt.return `Killed); - ] - in - match result with - | `Killed -> Lwt.return err - | `Continue -> - let next_delay = delay *. factor in - let delay = - Option.fold - ~none:next_delay - ~some:(fun max_delay -> Float.min next_delay max_delay) - max_delay - in - retry cctxt ?max_delay ~delay ~factor ~msg ~tries:(tries - 1) f x) - | Error _ as err -> Lwt.return err + Utils.retry + ~emit:(cctxt#message "%s") + ?max_delay + ~delay + ~factor + ~tries + ~msg + ~is_error:(function + | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> + true + | _ -> false) + f + x (* This function attempts to resolve the primary delegate associated with the given [key]. diff --git a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml index a0b6f8d38f1383ac15d33d9096034474048c5091..fef652c479955c096690eacb6662a4614ccc0352 100644 --- a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml +++ b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml @@ -871,38 +871,21 @@ let perform_sanity_check cctxt ~chain_id = in return_unit -let rec retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor +let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor ~tries ?(msg = "Connection failed. ") f x = - let open Lwt_result_syntax in - let*! result = f x in - match result with - | Ok _ as r -> Lwt.return r - | Error - (RPC_client_errors.Request_failed {error = Connection_failed _; _} :: _) - as err - when tries > 0 -> ( - let*! () = cctxt#message "%sRetrying in %.2f seconds..." msg delay in - let*! result = - Lwt.pick - [ - (let*! () = Lwt_unix.sleep delay in - Lwt.return `Continue); - (let*! _ = Lwt_exit.clean_up_starts in - Lwt.return `Killed); - ] - in - match result with - | `Killed -> Lwt.return err - | `Continue -> - let next_delay = delay *. factor in - let delay = - Option.fold - ~none:next_delay - ~some:(fun max_delay -> Float.min next_delay max_delay) - max_delay - in - retry cctxt ?max_delay ~delay ~factor ~msg ~tries:(tries - 1) f x) - | Error _ as err -> Lwt.return err + Utils.retry + ~emit:(cctxt#message "%s") + ?max_delay + ~delay + ~factor + ~tries + ~msg + ~is_error:(function + | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> + true + | _ -> false) + f + x (* This function attempts to resolve the primary delegate associated with the given [key]. diff --git a/src/proto_alpha/lib_delegate/baking_scheduling.ml b/src/proto_alpha/lib_delegate/baking_scheduling.ml index 59f796283d091a29f51e53a5cb7d00dd24937049..6d9c017d813936b4f5cf106396203efc7a48c9fa 100644 --- a/src/proto_alpha/lib_delegate/baking_scheduling.ml +++ b/src/proto_alpha/lib_delegate/baking_scheduling.ml @@ -869,38 +869,21 @@ let perform_sanity_check cctxt ~chain_id = in return_unit -let rec retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor +let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor ~tries ?(msg = "Connection failed. ") f x = - let open Lwt_result_syntax in - let*! result = f x in - match result with - | Ok _ as r -> Lwt.return r - | Error - (RPC_client_errors.Request_failed {error = Connection_failed _; _} :: _) - as err - when tries > 0 -> ( - let*! () = cctxt#message "%sRetrying in %.2f seconds..." msg delay in - let*! result = - Lwt.pick - [ - (let*! () = Lwt_unix.sleep delay in - Lwt.return `Continue); - (let*! _ = Lwt_exit.clean_up_starts in - Lwt.return `Killed); - ] - in - match result with - | `Killed -> Lwt.return err - | `Continue -> - let next_delay = delay *. factor in - let delay = - Option.fold - ~none:next_delay - ~some:(fun max_delay -> Float.min next_delay max_delay) - max_delay - in - retry cctxt ?max_delay ~delay ~factor ~msg ~tries:(tries - 1) f x) - | Error _ as err -> Lwt.return err + Utils.retry + ~emit:(cctxt#message "%s") + ?max_delay + ~delay + ~factor + ~tries + ~msg + ~is_error:(function + | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> + true + | _ -> false) + f + x (* This function attempts to resolve the primary delegate associated with the given [key]. diff --git a/tezt/lib_tezos/agnostic_baker.ml b/tezt/lib_tezos/agnostic_baker.ml index 5beb98bc110f46d3f954c9ec42fc322e1ea32299..57a2f58d0819630dbd70174c97b9e9ed9c92a0e1 100644 --- a/tezt/lib_tezos/agnostic_baker.ml +++ b/tezt/lib_tezos/agnostic_baker.ml @@ -44,6 +44,7 @@ module Parameters = struct state_recorder : bool; node_version_check_bypass : bool; node_version_allowed : string option; + keep_alive : bool; } type session_state = {mutable ready : bool} @@ -88,7 +89,7 @@ let create_from_uris ?runner ?(path = Uses.path Constant.octez_agnostic_baker) ?(remote_mode = false) ?operations_pool ?dal_node_rpc_endpoint ?dal_node_timeout_percentage ?(state_recorder = false) ?(node_version_check_bypass = false) ?node_version_allowed ~base_dir - ~node_data_dir ~node_rpc_endpoint () = + ~node_data_dir ~node_rpc_endpoint ?(keep_alive = false) () = let agnostic_baker = create ~path @@ -113,6 +114,7 @@ let create_from_uris ?runner ?(path = Uses.path Constant.octez_agnostic_baker) state_recorder; node_version_check_bypass; node_version_allowed; + keep_alive; } in agnostic_baker @@ -124,7 +126,8 @@ let create ?runner ?path ?name ?color ?event_pipe ?(delegates = []) ?votefile ?(liquidity_baking_toggle_vote = Some Pass) ?force_apply_from_round ?(remote_mode = false) ?operations_pool ?dal_node ?dal_node_timeout_percentage ?(state_recorder = false) - ?(node_version_check_bypass = false) ?node_version_allowed node client = + ?(node_version_check_bypass = false) ?node_version_allowed ?keep_alive node + client = let dal_node_rpc_endpoint = Option.map Dal_node.as_rpc_endpoint dal_node in let agnostic_baker = create_from_uris @@ -147,18 +150,14 @@ let create ?runner ?path ?name ?color ?event_pipe ?(delegates = []) ?votefile ~base_dir:(Client.base_dir client) ~node_data_dir:(Node.data_dir node) ~node_rpc_endpoint:(Node.as_rpc_endpoint node) + ?keep_alive () in on_event agnostic_baker (handle_event agnostic_baker) ; agnostic_baker -let run ?env ?event_level ?event_sections_levels (agnostic_baker : t) = - (match agnostic_baker.status with - | Not_running -> () - | Running _ -> - Test.fail "agnostic_baker %s is already running" agnostic_baker.name) ; +let run_args agnostic_baker = let delegates = agnostic_baker.persistent_state.delegates in - let runner = agnostic_baker.persistent_state.runner in let node_data_dir = agnostic_baker.persistent_state.node_data_dir in let base_dir = agnostic_baker.persistent_state.base_dir in let node_addr = @@ -224,18 +223,27 @@ let run ?env ?event_level ?event_sections_levels (agnostic_baker : t) = Fun.id agnostic_baker.persistent_state.node_version_allowed in + let keep_alive = + Cli_arg.optional_switch + "keep-alive" + agnostic_baker.persistent_state.keep_alive + in let run_args = if agnostic_baker.persistent_state.remote_mode then ["remotely"] else ["with"; "local"; "node"; node_data_dir] in - let arguments = - ["--endpoint"; node_addr; "--base-dir"; base_dir; "run"] - @ run_args @ delegates @ liquidity_baking_toggle_vote @ votefile - @ force_apply_from_round @ operations_pool @ dal_node_endpoint @ without_dal - @ dal_node_timeout_percentage @ state_recorder @ node_version_check_bypass - @ node_version_allowed - in + ["--endpoint"; node_addr; "--base-dir"; base_dir; "run"] + @ run_args @ delegates @ liquidity_baking_toggle_vote @ votefile + @ force_apply_from_round @ operations_pool @ dal_node_endpoint @ without_dal + @ dal_node_timeout_percentage @ state_recorder @ node_version_check_bypass + @ node_version_allowed @ keep_alive +let run ?env ?event_level ?event_sections_levels ?(extra_arguments = []) + (agnostic_baker : t) = + (match agnostic_baker.status with + | Not_running -> () + | Running _ -> + Test.fail "agnostic_baker %s is already running" agnostic_baker.name) ; let on_terminate _ = (* Cancel all [Ready] event listeners. *) trigger_ready agnostic_baker None ; @@ -247,9 +255,20 @@ let run ?env ?event_level ?event_sections_levels (agnostic_baker : t) = ?event_sections_levels agnostic_baker {ready = false} - arguments + (run_args agnostic_baker @ extra_arguments) ~on_terminate - ?runner + ?runner:agnostic_baker.persistent_state.runner + +let spawn_run ?env (agnostic_baker : t) = + (match agnostic_baker.status with + | Not_running -> () + | Running _ -> + Test.fail "agnostic_baker %s is already running" agnostic_baker.name) ; + Process.spawn + ?env + ?runner:agnostic_baker.persistent_state.runner + agnostic_baker.path + (run_args agnostic_baker) let check_event ?where agnostic_baker name promise = let* result = promise in @@ -273,7 +292,7 @@ let init ?env ?runner ?(path = Uses.path Constant.octez_agnostic_baker) ?name ?color ?event_level ?event_pipe ?event_sections_levels ?(delegates = []) ?votefile ?liquidity_baking_toggle_vote ?force_apply_from_round ?remote_mode ?operations_pool ?dal_node ?dal_node_timeout_percentage ?state_recorder - ?node_version_check_bypass ?node_version_allowed node client = + ?node_version_check_bypass ?node_version_allowed ?keep_alive node client = let* () = Node.wait_for_ready node in let agnostic_baker = create @@ -293,6 +312,7 @@ let init ?env ?runner ?(path = Uses.path Constant.octez_agnostic_baker) ?name ?node_version_check_bypass ?node_version_allowed ~delegates + ?keep_alive node client in diff --git a/tezt/lib_tezos/agnostic_baker.mli b/tezt/lib_tezos/agnostic_baker.mli index 64d9ff474a5579db2c5ddf223a370479df376f9c..742fb5bbefad4d99c4c7e6e19d0dc553ddb870ec 100644 --- a/tezt/lib_tezos/agnostic_baker.mli +++ b/tezt/lib_tezos/agnostic_baker.mli @@ -52,9 +52,13 @@ val run : ?env:string String_map.t -> ?event_level:Daemon.Level.default_level -> ?event_sections_levels:(string * Daemon.Level.level) list -> + ?extra_arguments:string list -> t -> unit Lwt.t +(** Spawn [octez-agnostic-baker run] similarly to {!run} but returns the process. *) +val spawn_run : ?env:string String_map.t -> t -> Process.t + (** Liquidity baking vote values. *) type liquidity_baking_vote = Off | On | Pass @@ -144,6 +148,7 @@ val create : ?state_recorder:bool -> ?node_version_check_bypass:bool -> ?node_version_allowed:string -> + ?keep_alive:bool -> Node.t -> Client.t -> t @@ -186,6 +191,7 @@ val create_from_uris : base_dir:string -> node_data_dir:string -> node_rpc_endpoint:Endpoint.t -> + ?keep_alive:bool -> unit -> t @@ -245,6 +251,7 @@ val init : ?state_recorder:bool -> ?node_version_check_bypass:bool -> ?node_version_allowed:string -> + ?keep_alive:bool -> Node.t -> Client.t -> t Lwt.t diff --git a/tezt/tests/agnostic_baker_test.ml b/tezt/tests/agnostic_baker_test.ml index 42a8fe18ac621aa6104d096c1825e7c6dab104f7..8edfae35904ea578464dba6ba441306cea297522 100644 --- a/tezt/tests/agnostic_baker_test.ml +++ b/tezt/tests/agnostic_baker_test.ml @@ -204,7 +204,56 @@ let test_man () = in unit -let register ~migrate_from ~migrate_to = +let test_keep_alive = + Protocol.register_test + ~__FILE__ + ~title:"Agnostic baker --keep-alive" + ~tags:[team; "sandbox"; "agnostic"; "baker"; "keep_alive"] + ~uses:(fun _ -> [Constant.octez_agnostic_baker]) + @@ fun protocol -> + let* node, client = Client.init_with_protocol ~protocol `Client () in + let baker = + Agnostic_baker.create ~node_version_check_bypass:true node client + in + let* () = Node.terminate node in + (* Start the baker with no node running and no [--keep-alive], it crashes. *) + let process = Agnostic_baker.spawn_run baker in + let* () = Process.check_error ~msg:(rex "Cannot connect to node") process in + (* Start the baker with no node running and [--keep-alive], it'll wait. *) + let wait_for_cannot_connect = + Agnostic_baker.wait_for + baker + "agnostic_baker_cannot_connect.v0" + (fun _json -> Some ()) + in + let* () = Agnostic_baker.run ~extra_arguments:["--keep-alive"] baker + and* () = wait_for_cannot_connect in + (* Start the node. *) + let* () = Node.run node [] and* () = Node.wait_for_ready node in + (* Bake a block, the baker is connected so it'll see it. *) + let wait_baker_proposal = + (* This is an event emitted by the baker lib. *) + Agnostic_baker.wait_for baker "new_valid_proposal.v0" (fun _json -> Some ()) + in + let wait_period_status = + (* This is an event emitted by the agnostic baker. *) + Agnostic_baker.wait_for baker "period_status.v0" (fun _json -> Some ()) + in + let* () = Client.bake_for_and_wait client + and* () = wait_baker_proposal + and* () = wait_period_status in + (* Kill the node now that they are connected, the baker will stay alive. *) + let* () = Node.terminate node and* () = wait_for_cannot_connect in + (* Redo the procedure, restart the node and wait for the block events. *) + let* () = Node.run node [] and* () = Node.wait_for_ready node in + let* () = Client.bake_for_and_wait client + and* () = wait_baker_proposal + and* () = wait_period_status in + unit + +let register ~protocols = test_keep_alive protocols + +let register_migration ~migrate_from ~migrate_to = (* We want to migrate only from Active protocols *) if Agnostic_baker.protocol_status migrate_from = Active then ( migrate ~migrate_from ~migrate_to ~use_remote_signer:false ; diff --git a/tezt/tests/main.ml b/tezt/tests/main.ml index 043bd51028b23a99eed2a0fb74824ce93ac0fbd6..27524ed4fdb3241f015844a45f63fafa4e6cdfd7 100644 --- a/tezt/tests/main.ml +++ b/tezt/tests/main.ml @@ -68,7 +68,7 @@ let register_protocol_independent_tests () = (* Tests related to protocol migration. *) let register_protocol_migration_tests () = let migrate_from = Option.get @@ Protocol.previous_protocol migrate_to in - Agnostic_baker_test.register ~migrate_from ~migrate_to ; + Agnostic_baker_test.register_migration ~migrate_from ~migrate_to ; Mockup.register_constant_migration ~migrate_from ~migrate_to ; Protocol_migration.register ~migrate_from ~migrate_to ; Weeklynet.register () ; @@ -100,7 +100,7 @@ let register_old_protocol_migration_tests () = | _, Alpha -> () (* Already in register_protocol_migration_tests *) | None, _ -> () | Some migrate_from, migrate_to -> - Agnostic_baker_test.register ~migrate_from ~migrate_to ; + Agnostic_baker_test.register_migration ~migrate_from ~migrate_to ; Sc_rollup_migration.register ~migrate_from ~migrate_to ; Dal.register_migration ~migrate_from ~migrate_to) Protocol.all @@ -116,6 +116,7 @@ let register_old_protocol_migration_tests () = let register_protocol_tests_that_use_supports_correctly () = let protocols = Protocol.all in Adaptive_issuance.register ~protocols ; + Agnostic_baker_test.register ~protocols ; Bad_annot.register ~protocols ; Bad_indentation.register ~protocols ; Baker_test.register ~protocols ;