diff --git a/devtools/testnet_experiment_tools/tool_023_PtSeouLo.ml b/devtools/testnet_experiment_tools/tool_023_PtSeouLo.ml index 6249f3c50bb7d278d88348ebca20c121bcb48c12..f6a61d08a58ed680a24576554642bce6db55c2f8 100644 --- a/devtools/testnet_experiment_tools/tool_023_PtSeouLo.ml +++ b/devtools/testnet_experiment_tools/tool_023_PtSeouLo.ml @@ -255,8 +255,18 @@ let create_state cctxt ?synchronize ?monitor_node_mempool ~config let* constants = Alpha_services.Constants.all cctxt (`Hash chain_id, `Head 0) in + let*? round_durations = + Round.Durations.create + ~first_round_duration:constants.parametric.minimal_block_delay + ~delay_increment_per_round:constants.parametric.delay_increment_per_round + |> Environment.wrap_tzresult + in let*! operation_worker = - Operation_worker.run ?monitor_node_operations ~constants cctxt + Operation_worker.run + ?monitor_node_operations + ~round_durations + ~constants + cctxt in Baking_scheduling.create_initial_state cctxt @@ -264,7 +274,9 @@ let create_state cctxt ?synchronize ?monitor_node_mempool ~config ~chain config operation_worker + ~constants ~current_proposal + round_durations delegates let compute_current_round_duration round_durations diff --git a/src/lib_agnostic_baker/daemon.ml b/src/lib_agnostic_baker/daemon.ml index 66587361037343d72ada49dd7350ab9dc23d1673..118bacb3ca5fd6f765c9f94a20cfda230db0102f 100644 --- a/src/lib_agnostic_baker/daemon.ml +++ b/src/lib_agnostic_baker/daemon.ml @@ -136,7 +136,6 @@ module Make_daemon (Agent : AGENT) : ~max_delay:10. ~delay:1. ~factor:1.5 - ~tries:max_int ~is_error:(function Cannot_connect_to_node _ -> true | _ -> false) (fun node_addr -> Rpc_services.get_level ~node_addr) node_addr diff --git a/src/lib_stdlib_unix/utils.ml b/src/lib_stdlib_unix/utils.ml index 7bf05ec6b41c98d5356bf30f1754b4b0bdb06312..ff00029801fda9ade66f415ff4d05f2fb0b05c52 100644 --- a/src/lib_stdlib_unix/utils.ml +++ b/src/lib_stdlib_unix/utils.ml @@ -115,14 +115,23 @@ let copy_dir ?(perm = 0o755) ?progress src dst = let copy_file = copy_file ~count_progress:(fun _ -> ()) -let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?(msg = "") f x = +let rec retry ?max_delay ~delay ~factor ?tries ~is_error ~emit + ?(msg = fun _ -> "") f x = let open Lwt.Syntax in let* result = f x in + let should_retry = match tries with None -> true | Some i -> i > 0 in match result with | Ok _ as r -> Lwt.return r - | Error (err :: _) as errs when tries > 0 && is_error err -> ( + | Error (err :: _ as errs) when should_retry && is_error err -> ( let* () = - emit (Format.sprintf "%sRetrying in %.2f seconds..." msg delay) + emit + (Format.sprintf + "%sRetrying in %.2f seconds%s..." + (msg errs) + delay + (match tries with + | None -> "" + | Some i -> Format.sprintf ", %d attempts left" i)) in let* result = Lwt.pick @@ -134,7 +143,7 @@ let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?(msg = "") f x = ] in match result with - | `Killed -> Lwt.return errs + | `Killed -> Lwt.return_error errs | `Continue -> let next_delay = delay *. factor in let delay = @@ -148,9 +157,11 @@ let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?(msg = "") f x = ~delay ~factor ~msg - ~tries:(tries - 1) + ?tries:(Option.map pred tries) ~is_error ~emit f x) - | Error _ as err -> Lwt.return err + | Error errs as err -> + let* () = emit (Format.sprintf "%sNo attempts left." (msg errs)) in + Lwt.return err diff --git a/src/lib_stdlib_unix/utils.mli b/src/lib_stdlib_unix/utils.mli index 738c02bf50d3f614947d6bcabfb8450dc473771e..89e42133edca2e84d8f05557f8b4d2c3c9009d0f 100644 --- a/src/lib_stdlib_unix/utils.mli +++ b/src/lib_stdlib_unix/utils.mli @@ -59,21 +59,21 @@ val copy_file : src:string -> dst:string -> unit val copy_dir : ?perm:int -> ?progress:string * Terminal.Color.t -> string -> string -> unit -(** [retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?msg f x] - retries applying [f x] [tries] until it succeeds or returns an error - when [is_error] is false, at most [tries] number of times. After - each try it waits for a number of seconds, but not more than [max_delay], if - given. The wait time between tries is given by the initial [delay], - multiplied by [factor] at each subsequent try. At each failure, [msg] - together with the current delay is printed using [emit]. *) +(** [retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?msg f x] retries + applying [f x] [tries] until it succeeds or returns an error when [is_error] + is false, at most [tries] number of times. After each try it waits for a + number of seconds, but not more than [max_delay], if given. The wait time + between tries is given by the initial [delay], multiplied by [factor] at + each subsequent try. At each failure, [msg] can print the error together + with the current delay using [emit]. *) val retry : ?max_delay:float -> delay:float -> factor:float -> - tries:int -> + ?tries:int -> is_error:('err -> bool) -> emit:(string -> unit Lwt.t) -> - ?msg:string -> + ?msg:('err list -> string) -> ('a -> ('b, 'err list) result Lwt.t) -> 'a -> ('b, 'err list) result Lwt.t diff --git a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml index 503a14f3555ef7c5698ea2e487b1a0c9d2b1fbfb..332ff6af5b5de3b0f14786cbe99a99c3c8181dfa 100644 --- a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml +++ b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml @@ -872,13 +872,13 @@ let perform_sanity_check cctxt ~chain_id = return_unit let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor - ~tries ?(msg = "Connection failed. ") f x = + ?tries ?(msg = fun _errs -> "Connection failed. ") f x = Utils.retry ~emit:(cctxt#message "%s") ?max_delay ~delay ~factor - ~tries + ?tries ~msg ~is_error:(function | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> @@ -961,8 +961,8 @@ let register_dal_profiles cctxt dal_node_rpc_ctxt delegates = ~max_delay:2. ~delay:1. ~factor:2. - ~tries:max_int - ~msg:"Failed to register profiles, DAL node is not reachable. " + ~msg:(fun _errs -> + "Failed to register profiles, DAL node is not reachable. ") (fun () -> register dal_ctxt) ()) dal_node_rpc_ctxt diff --git a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli index b6f52af181ca3bdf1f0652d6420743cc283c2da9..cab28a15d13e1c5caacf6703e960cc94fedcca28 100644 --- a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli +++ b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli @@ -45,8 +45,8 @@ val retry : ?max_delay:float -> delay:float -> factor:float -> - tries:int -> - ?msg:string -> + ?tries:int -> + ?msg:(tztrace -> string) -> ('a -> 'b tzresult Lwt.t) -> 'a -> 'b tzresult Lwt.t diff --git a/src/proto_022_PsRiotum/lib_delegate/client_daemon.ml b/src/proto_022_PsRiotum/lib_delegate/client_daemon.ml index 944a5862626fcaeda3122395edf50871696c0e7d..d589e9dc4d2bc6745040474b9344255c3bf82098 100644 --- a/src/proto_022_PsRiotum/lib_delegate/client_daemon.ml +++ b/src/proto_022_PsRiotum/lib_delegate/client_daemon.ml @@ -37,12 +37,7 @@ let rec retry_on_disconnection (cctxt : #Protocol_client_context.full) f = let* () = Client_confirmations.wait_for_bootstrapped ~retry: - (Baking_scheduling.retry - cctxt - ~max_delay:10. - ~delay:1. - ~factor:1.5 - ~tries:max_int) + (Baking_scheduling.retry cctxt ~max_delay:10. ~delay:1. ~factor:1.5) cctxt in retry_on_disconnection cctxt f diff --git a/src/proto_022_PsRiotum/lib_delegate/operation_worker.ml b/src/proto_022_PsRiotum/lib_delegate/operation_worker.ml index 872233eb9aad780675593d1bde82307070bb996e..3e26796b5d95f0dc4527b439486fb073bfac4a64 100644 --- a/src/proto_022_PsRiotum/lib_delegate/operation_worker.ml +++ b/src/proto_022_PsRiotum/lib_delegate/operation_worker.ml @@ -715,7 +715,7 @@ let run ?(monitor_node_operations = true) ~constants ~factor:2. ~tries:5 ~is_error:(function _ -> true) - ~msg:"unable to call monitor operations RPC." + ~msg:(fun _ -> "unable to call monitor operations RPC.") (fun () -> (monitor_operations cctxt diff --git a/src/proto_023_PtSeouLo/lib_delegate/baking_lib.ml b/src/proto_023_PtSeouLo/lib_delegate/baking_lib.ml index b5e992bfa3a4485c869e46838de5f47cad1a6f5d..9793e00e21adcd635284e1bd182fb9a44feb06e9 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/baking_lib.ml +++ b/src/proto_023_PtSeouLo/lib_delegate/baking_lib.ml @@ -57,8 +57,18 @@ let create_state cctxt ?dal_node_rpc_ctxt ?synchronize ?monitor_node_mempool let* constants = Alpha_services.Constants.all cctxt (`Hash chain_id, `Head 0) in + let*? round_durations = + Round.Durations.create + ~first_round_duration:constants.parametric.minimal_block_delay + ~delay_increment_per_round:constants.parametric.delay_increment_per_round + |> Environment.wrap_tzresult + in let*! operation_worker = - Operation_worker.run ?monitor_node_operations ~constants cctxt + Operation_worker.run + ?monitor_node_operations + ~constants + ~round_durations + cctxt in Baking_scheduling.create_initial_state cctxt @@ -67,6 +77,7 @@ let create_state cctxt ?dal_node_rpc_ctxt ?synchronize ?monitor_node_mempool ~chain config operation_worker + round_durations ~current_proposal ~constants delegates diff --git a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml index aae0b43742ecbf5bc2ffcb1ac5b538d923443b71..6a66761d424daca4f5f1fa7b036c466cdb254677 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml +++ b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml @@ -608,8 +608,8 @@ let create_round_durations constants = (Round.Durations.create ~first_round_duration ~delay_increment_per_round) let create_initial_state cctxt ?dal_node_rpc_ctxt ?(synchronize = true) ~chain - config operation_worker ~(current_proposal : Baking_state.proposal) - ?constants delegates = + config operation_worker round_durations + ~(current_proposal : Baking_state.proposal) ?constants delegates = let open Lwt_result_syntax in (* FIXME: https://gitlab.com/tezos/tezos/-/issues/7391 consider saved attestable value *) @@ -620,7 +620,6 @@ let create_initial_state cctxt ?dal_node_rpc_ctxt ?(synchronize = true) ~chain | Some c -> return c | None -> Alpha_services.Constants.all cctxt (`Hash chain_id, `Head 0) in - let*? round_durations = create_round_durations constants in let* validation_mode = Baking_state.( match config.Baking_configuration.validation with @@ -724,7 +723,6 @@ let create_initial_state cctxt ?dal_node_rpc_ctxt ?(synchronize = true) ~chain in let* round_state = if synchronize then - let*? round_durations = create_round_durations constants in let*? current_round = Baking_actions.compute_round current_proposal round_durations in @@ -873,13 +871,13 @@ let perform_sanity_check cctxt ~chain_id = return_unit let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor - ~tries ?(msg = "Connection failed. ") f x = + ?tries ?(msg = fun _errs -> "Connection failed. ") f x = Utils.retry ~emit:(cctxt#message "%s") ?max_delay ~delay ~factor - ~tries + ?tries ~msg ~is_error:(function | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> @@ -968,8 +966,8 @@ let register_dal_profiles cctxt dal_node_rpc_ctxt delegates = ~max_delay:2. ~delay:1. ~factor:2. - ~tries:max_int - ~msg:"Failed to register profiles, DAL node is not reachable. " + ~msg:(fun _errs -> + "Failed to register profiles, DAL node is not reachable. ") (fun () -> register dal_ctxt) ()) dal_node_rpc_ctxt @@ -997,7 +995,10 @@ let run cctxt ?dal_node_rpc_ctxt ?canceler ?(stop_on_event = fun _ -> false) | Some current_head -> return current_head | None -> failwith "head stream unexpectedly ended" in - let*! operation_worker = Operation_worker.run ~constants cctxt in + let*? round_durations = create_round_durations constants in + let*! operation_worker = + Operation_worker.run ~constants ~round_durations cctxt + in Option.iter (fun canceler -> Lwt_canceler.on_cancel canceler (fun () -> @@ -1011,6 +1012,7 @@ let run cctxt ?dal_node_rpc_ctxt ?canceler ?(stop_on_event = fun _ -> false) ~chain config operation_worker + round_durations ~current_proposal ~constants delegates diff --git a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli index 924476911a884ebf9b12aa32b27d08772f015840..d468d0c36d544fb8eba6a3884261ca862ce7ff72 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli +++ b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli @@ -45,8 +45,8 @@ val retry : ?max_delay:float -> delay:float -> factor:float -> - tries:int -> - ?msg:string -> + ?tries:int -> + ?msg:(tztrace -> string) -> ('a -> 'b tzresult Lwt.t) -> 'a -> 'b tzresult Lwt.t @@ -145,6 +145,7 @@ val create_initial_state : chain:Chain_services.chain -> Baking_configuration.t -> Operation_worker.t -> + Round.round_durations -> current_proposal:proposal -> ?constants:Constants.t -> Baking_state_types.Key.t list -> diff --git a/src/proto_023_PtSeouLo/lib_delegate/client_daemon.ml b/src/proto_023_PtSeouLo/lib_delegate/client_daemon.ml index 1e30f06735e2db5a70593d07c8e3449a54a1c053..714e0c6b3ce5406d27bf4a5d989a79b7a8095d5f 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/client_daemon.ml +++ b/src/proto_023_PtSeouLo/lib_delegate/client_daemon.ml @@ -37,12 +37,7 @@ let rec retry_on_disconnection (cctxt : #Protocol_client_context.full) f = let* () = Client_confirmations.wait_for_bootstrapped ~retry: - (Baking_scheduling.retry - cctxt - ~max_delay:10. - ~delay:1. - ~factor:1.5 - ~tries:max_int) + (Baking_scheduling.retry cctxt ~max_delay:10. ~delay:1. ~factor:1.5) cctxt in retry_on_disconnection cctxt f diff --git a/src/proto_023_PtSeouLo/lib_delegate/operation_worker.ml b/src/proto_023_PtSeouLo/lib_delegate/operation_worker.ml index 139c96af3c3386784943a41394c5352bad58f397..5d5079aae645d3d7e173e57c435e54171151d382 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/operation_worker.ml +++ b/src/proto_023_PtSeouLo/lib_delegate/operation_worker.ml @@ -36,14 +36,35 @@ module Events = struct let pp_int = Format.pp_print_int - let loop_failed = - declare_1 + let node_unreachable_crash = + declare_0 ~section - ~name:"loop_failed" + ~name:"node_unreachable_crash" ~level:Error - ~msg:"loop failed with {trace}" - ~pp1:Error_monad.pp_print_trace - ("trace", Error_monad.trace_encoding) + ~msg: + "Node unreachable via the monitor_operations RPC. Unable to monitor \ + quorum. Shutting down baker..." + () + + let monitor_operations_retry = + declare_1 + ~section + ~name:"monitor_operations_retry" + ~level:Warning + ~msg:"{msg}" + ~pp1:Format.pp_print_string + ("msg", Data_encoding.string) + + let monitor_operations_stream_timeout = + declare_1 + ~section + ~name:"monitor_operations_stream_timeout" + ~level:Warning + ~msg: + "No data received from monitor_operations RPC for {timeout} seconds. \ + Assuming the stream is stalled and refreshing it." + ~pp1:Format.pp_print_float + ("timeout", Data_encoding.float) let ended = declare_1 @@ -717,7 +738,7 @@ let flush_operation_pool state (head_level, head_round) = let operation_pool = {Operation_pool.empty with consensus = attestations} in state.operation_pool <- operation_pool -let run ?(monitor_node_operations = true) ~constants +let run ?(monitor_node_operations = true) ~constants ~round_durations (cctxt : #Protocol_client_context.full) = let open Lwt_syntax in let state = @@ -731,13 +752,17 @@ let run ?(monitor_node_operations = true) ~constants (* If the call to [monitor_operations] RPC fails, retry 5 times during 25 seconds before crashing the worker . *) Utils.retry - ~emit:(cctxt#message "%s") + ~emit:Events.(emit monitor_operations_retry) ~max_delay:10. ~delay:1. ~factor:2. - ~tries:5 + ~tries:10 ~is_error:(function _ -> true) - ~msg:"unable to call monitor operations RPC." + ~msg:(fun errs -> + Format.asprintf + "Failed to reach the node via the monitor_operations RPC@,%a" + pp_print_trace + errs) (fun () -> (monitor_operations cctxt @@ -745,8 +770,14 @@ let run ?(monitor_node_operations = true) ~constants () in match result with - | Error err -> Events.(emit loop_failed err) - | Ok (head, operation_stream, op_stream_stopper) -> + | Error _ -> + (* The baker failed to reach the node via the monitor_operations + RPC after multiple retries. Because it can no longer monitor the + consensus, it is unable to attest or bake. Rather than remain in this + degraded state or retry indefinitely, we shut it down explicitly. *) + let* () = Events.(emit node_unreachable_crash ()) in + Lwt_exit.exit_and_raise (*ECONNREFUSED*) 111 + | Ok (((_, round) as head), operation_stream, op_stream_stopper) -> () [@profiler.stop] ; () [@profiler.record @@ -769,10 +800,34 @@ let run ?(monitor_node_operations = true) ~constants state head [@profiler.record_f {verbosity = Notice} "update operations pool"] ; + let stream_timeout = + Round.round_duration round_durations (Round.succ round) + |> Period.to_seconds |> Int64.to_float + in let rec loop () = - let* ops = Lwt_stream.get operation_stream in - match ops with - | None -> + let* result = + Lwt.pick + [ + (let* ops = Lwt_stream.get operation_stream in + return (`Stream ops)); + (let* () = Lwt_unix.sleep stream_timeout in + return `Timeout); + ] + in + match result with + | `Timeout -> + (* The monitor_operations RPC has neither produced new data nor + closed the stream for some time. This can occur naturally, but + may also indicate a stalled stream. Restarting it is + inexpensive and can prevent the baker from hanging + indefinitely. *) + let* () = + Events.(emit monitor_operations_stream_timeout stream_timeout) + in + op_stream_stopper () ; + let* () = reset_monitoring state in + worker_loop () + | `Stream None -> (* When the stream closes, it means a new head has been set, we reset the monitoring and flush current operations *) let* () = Events.(emit end_of_stream ()) in @@ -786,7 +841,7 @@ let run ?(monitor_node_operations = true) ~constants in () [@profiler.stop] ; worker_loop () - | Some ops -> + | `Stream (Some ops) -> (state.operation_pool <- Operation_pool.add_operations state.operation_pool ops) [@profiler.aggregate_f {verbosity = Info} "add operations"] ; @@ -802,16 +857,11 @@ let run ?(monitor_node_operations = true) ~constants (loop () [@profiler.record_s {verbosity = Notice} "operations processing"]) in - Lwt.dont_wait - (fun () -> - Lwt.finalize - (fun () -> - if state.monitor_node_operations then worker_loop () else return_unit) - (fun () -> - let* _ = shutdown_worker state in - return_unit)) - (fun exn -> - Events.(emit__dont_wait__use_with_care ended (Printexc.to_string exn))) ; + if state.monitor_node_operations then + Lwt.dont_wait + (fun () -> worker_loop ()) + (fun exn -> + Events.(emit__dont_wait__use_with_care ended (Printexc.to_string exn))) ; return state let retrieve_pending_operations cctxt state = diff --git a/src/proto_023_PtSeouLo/lib_delegate/operation_worker.mli b/src/proto_023_PtSeouLo/lib_delegate/operation_worker.mli index 944bfbd54fb98cba72f247e0cc8f089603c6fb23..aa29853afa64fdd307c3b1b9db0e7340a1bcdda8 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/operation_worker.mli +++ b/src/proto_023_PtSeouLo/lib_delegate/operation_worker.mli @@ -55,16 +55,18 @@ type event = (** {1 Constructors}*) -(** [run ?monitor_node_operations cctxt] spawns an operation worker. +(** [run ?monitor_node_operations ~constants ~round_durations cctxt] spawns an + operation worker. @param monitor_node_operations monitor operations on the node (defaults: [true]). Set [monitor_node_operations] to [false] to only consider externally provided (non-node) operations. *) val run : ?monitor_node_operations:bool -> - constants:Constants.t -> + constants:Protocol.Alpha_context.Constants.t -> + round_durations:Protocol.Alpha_context.Round.round_durations -> #Protocol_client_context.full -> - t Lwt.t + t Environment.Lwt.t (** {1 Utilities} *) diff --git a/src/proto_alpha/lib_delegate/baking_scheduling.ml b/src/proto_alpha/lib_delegate/baking_scheduling.ml index b007199c193018ea0589f1a4c6cde047c1c9acfd..4a32d353647c3d96d5bdac2bb913dddd16f8f10b 100644 --- a/src/proto_alpha/lib_delegate/baking_scheduling.ml +++ b/src/proto_alpha/lib_delegate/baking_scheduling.ml @@ -883,13 +883,13 @@ let perform_sanity_check cctxt ~chain_id = return_unit let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor - ~tries ?(msg = "Connection failed. ") f x = + ?tries ?(msg = fun _errs -> "Connection failed. ") f x = Utils.retry ~emit:(cctxt#message "%s") ?max_delay ~delay ~factor - ~tries + ?tries ~msg ~is_error:(function | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> @@ -978,8 +978,8 @@ let register_dal_profiles cctxt dal_node_rpc_ctxt delegates = ~max_delay:2. ~delay:1. ~factor:2. - ~tries:max_int - ~msg:"Failed to register profiles, DAL node is not reachable. " + ~msg:(fun _errs -> + "Failed to register profiles, DAL node is not reachable. ") (fun () -> register dal_ctxt) ()) dal_node_rpc_ctxt diff --git a/src/proto_alpha/lib_delegate/baking_scheduling.mli b/src/proto_alpha/lib_delegate/baking_scheduling.mli index 151f47d87ee95f7319d2b3b7aa64a97d911e4805..5707a7fea4a0c92cec61ea7a57e89b58a6d82fe2 100644 --- a/src/proto_alpha/lib_delegate/baking_scheduling.mli +++ b/src/proto_alpha/lib_delegate/baking_scheduling.mli @@ -45,8 +45,8 @@ val retry : ?max_delay:float -> delay:float -> factor:float -> - tries:int -> - ?msg:string -> + ?tries:int -> + ?msg:(tztrace -> string) -> ('a -> 'b tzresult Lwt.t) -> 'a -> 'b tzresult Lwt.t diff --git a/src/proto_alpha/lib_delegate/client_daemon.ml b/src/proto_alpha/lib_delegate/client_daemon.ml index 1e30f06735e2db5a70593d07c8e3449a54a1c053..714e0c6b3ce5406d27bf4a5d989a79b7a8095d5f 100644 --- a/src/proto_alpha/lib_delegate/client_daemon.ml +++ b/src/proto_alpha/lib_delegate/client_daemon.ml @@ -37,12 +37,7 @@ let rec retry_on_disconnection (cctxt : #Protocol_client_context.full) f = let* () = Client_confirmations.wait_for_bootstrapped ~retry: - (Baking_scheduling.retry - cctxt - ~max_delay:10. - ~delay:1. - ~factor:1.5 - ~tries:max_int) + (Baking_scheduling.retry cctxt ~max_delay:10. ~delay:1. ~factor:1.5) cctxt in retry_on_disconnection cctxt f diff --git a/src/proto_alpha/lib_delegate/operation_worker.ml b/src/proto_alpha/lib_delegate/operation_worker.ml index 139c96af3c3386784943a41394c5352bad58f397..ed02e0c4fe27831c4af576b9581241761ba0e9d7 100644 --- a/src/proto_alpha/lib_delegate/operation_worker.ml +++ b/src/proto_alpha/lib_delegate/operation_worker.ml @@ -737,7 +737,7 @@ let run ?(monitor_node_operations = true) ~constants ~factor:2. ~tries:5 ~is_error:(function _ -> true) - ~msg:"unable to call monitor operations RPC." + ~msg:(fun _ -> "unable to call monitor operations RPC.") (fun () -> (monitor_operations cctxt