diff --git a/src/lib_agnostic_baker/daemon.ml b/src/lib_agnostic_baker/daemon.ml index 790699f9dae866f6751b078a0692c8bd066ba316..649e35c9808f07e8f73a4779f3604c34b3aeb811 100644 --- a/src/lib_agnostic_baker/daemon.ml +++ b/src/lib_agnostic_baker/daemon.ml @@ -136,7 +136,6 @@ module Make_daemon (Agent : AGENT) : ~max_delay:10. ~delay:1. ~factor:1.5 - ~tries:max_int ~is_error:(function Cannot_connect_to_node _ -> true | _ -> false) (fun node_addr -> Rpc_services.get_level ~node_addr) node_addr diff --git a/src/lib_stdlib_unix/utils.ml b/src/lib_stdlib_unix/utils.ml index 7bf05ec6b41c98d5356bf30f1754b4b0bdb06312..ff00029801fda9ade66f415ff4d05f2fb0b05c52 100644 --- a/src/lib_stdlib_unix/utils.ml +++ b/src/lib_stdlib_unix/utils.ml @@ -115,14 +115,23 @@ let copy_dir ?(perm = 0o755) ?progress src dst = let copy_file = copy_file ~count_progress:(fun _ -> ()) -let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?(msg = "") f x = +let rec retry ?max_delay ~delay ~factor ?tries ~is_error ~emit + ?(msg = fun _ -> "") f x = let open Lwt.Syntax in let* result = f x in + let should_retry = match tries with None -> true | Some i -> i > 0 in match result with | Ok _ as r -> Lwt.return r - | Error (err :: _) as errs when tries > 0 && is_error err -> ( + | Error (err :: _ as errs) when should_retry && is_error err -> ( let* () = - emit (Format.sprintf "%sRetrying in %.2f seconds..." msg delay) + emit + (Format.sprintf + "%sRetrying in %.2f seconds%s..." + (msg errs) + delay + (match tries with + | None -> "" + | Some i -> Format.sprintf ", %d attempts left" i)) in let* result = Lwt.pick @@ -134,7 +143,7 @@ let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?(msg = "") f x = ] in match result with - | `Killed -> Lwt.return errs + | `Killed -> Lwt.return_error errs | `Continue -> let next_delay = delay *. factor in let delay = @@ -148,9 +157,11 @@ let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?(msg = "") f x = ~delay ~factor ~msg - ~tries:(tries - 1) + ?tries:(Option.map pred tries) ~is_error ~emit f x) - | Error _ as err -> Lwt.return err + | Error errs as err -> + let* () = emit (Format.sprintf "%sNo attempts left." (msg errs)) in + Lwt.return err diff --git a/src/lib_stdlib_unix/utils.mli b/src/lib_stdlib_unix/utils.mli index 738c02bf50d3f614947d6bcabfb8450dc473771e..89e42133edca2e84d8f05557f8b4d2c3c9009d0f 100644 --- a/src/lib_stdlib_unix/utils.mli +++ b/src/lib_stdlib_unix/utils.mli @@ -59,21 +59,21 @@ val copy_file : src:string -> dst:string -> unit val copy_dir : ?perm:int -> ?progress:string * Terminal.Color.t -> string -> string -> unit -(** [retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?msg f x] - retries applying [f x] [tries] until it succeeds or returns an error - when [is_error] is false, at most [tries] number of times. After - each try it waits for a number of seconds, but not more than [max_delay], if - given. The wait time between tries is given by the initial [delay], - multiplied by [factor] at each subsequent try. At each failure, [msg] - together with the current delay is printed using [emit]. *) +(** [retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?msg f x] retries + applying [f x] [tries] until it succeeds or returns an error when [is_error] + is false, at most [tries] number of times. After each try it waits for a + number of seconds, but not more than [max_delay], if given. The wait time + between tries is given by the initial [delay], multiplied by [factor] at + each subsequent try. At each failure, [msg] can print the error together + with the current delay using [emit]. *) val retry : ?max_delay:float -> delay:float -> factor:float -> - tries:int -> + ?tries:int -> is_error:('err -> bool) -> emit:(string -> unit Lwt.t) -> - ?msg:string -> + ?msg:('err list -> string) -> ('a -> ('b, 'err list) result Lwt.t) -> 'a -> ('b, 'err list) result Lwt.t diff --git a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml index 503a14f3555ef7c5698ea2e487b1a0c9d2b1fbfb..332ff6af5b5de3b0f14786cbe99a99c3c8181dfa 100644 --- a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml +++ b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml @@ -872,13 +872,13 @@ let perform_sanity_check cctxt ~chain_id = return_unit let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor - ~tries ?(msg = "Connection failed. ") f x = + ?tries ?(msg = fun _errs -> "Connection failed. ") f x = Utils.retry ~emit:(cctxt#message "%s") ?max_delay ~delay ~factor - ~tries + ?tries ~msg ~is_error:(function | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> @@ -961,8 +961,8 @@ let register_dal_profiles cctxt dal_node_rpc_ctxt delegates = ~max_delay:2. ~delay:1. ~factor:2. - ~tries:max_int - ~msg:"Failed to register profiles, DAL node is not reachable. " + ~msg:(fun _errs -> + "Failed to register profiles, DAL node is not reachable. ") (fun () -> register dal_ctxt) ()) dal_node_rpc_ctxt diff --git a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli index b6f52af181ca3bdf1f0652d6420743cc283c2da9..cab28a15d13e1c5caacf6703e960cc94fedcca28 100644 --- a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli +++ b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli @@ -45,8 +45,8 @@ val retry : ?max_delay:float -> delay:float -> factor:float -> - tries:int -> - ?msg:string -> + ?tries:int -> + ?msg:(tztrace -> string) -> ('a -> 'b tzresult Lwt.t) -> 'a -> 'b tzresult Lwt.t diff --git a/src/proto_022_PsRiotum/lib_delegate/client_daemon.ml b/src/proto_022_PsRiotum/lib_delegate/client_daemon.ml index 944a5862626fcaeda3122395edf50871696c0e7d..d589e9dc4d2bc6745040474b9344255c3bf82098 100644 --- a/src/proto_022_PsRiotum/lib_delegate/client_daemon.ml +++ b/src/proto_022_PsRiotum/lib_delegate/client_daemon.ml @@ -37,12 +37,7 @@ let rec retry_on_disconnection (cctxt : #Protocol_client_context.full) f = let* () = Client_confirmations.wait_for_bootstrapped ~retry: - (Baking_scheduling.retry - cctxt - ~max_delay:10. - ~delay:1. - ~factor:1.5 - ~tries:max_int) + (Baking_scheduling.retry cctxt ~max_delay:10. ~delay:1. ~factor:1.5) cctxt in retry_on_disconnection cctxt f diff --git a/src/proto_022_PsRiotum/lib_delegate/operation_worker.ml b/src/proto_022_PsRiotum/lib_delegate/operation_worker.ml index 872233eb9aad780675593d1bde82307070bb996e..3e26796b5d95f0dc4527b439486fb073bfac4a64 100644 --- a/src/proto_022_PsRiotum/lib_delegate/operation_worker.ml +++ b/src/proto_022_PsRiotum/lib_delegate/operation_worker.ml @@ -715,7 +715,7 @@ let run ?(monitor_node_operations = true) ~constants ~factor:2. ~tries:5 ~is_error:(function _ -> true) - ~msg:"unable to call monitor operations RPC." + ~msg:(fun _ -> "unable to call monitor operations RPC.") (fun () -> (monitor_operations cctxt diff --git a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml index 3a65c8616a61e362a2e1aff2b5f594608003736c..10fd988b5893b09e28ac0c3e4c66b67079c942a9 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml +++ b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml @@ -883,13 +883,13 @@ let perform_sanity_check cctxt ~chain_id = return_unit let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor - ~tries ?(msg = "Connection failed. ") f x = + ?tries ?(msg = fun _errs -> "Connection failed. ") f x = Utils.retry ~emit:(cctxt#message "%s") ?max_delay ~delay ~factor - ~tries + ?tries ~msg ~is_error:(function | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> @@ -978,8 +978,8 @@ let register_dal_profiles cctxt dal_node_rpc_ctxt delegates = ~max_delay:2. ~delay:1. ~factor:2. - ~tries:max_int - ~msg:"Failed to register profiles, DAL node is not reachable. " + ~msg:(fun _errs -> + "Failed to register profiles, DAL node is not reachable. ") (fun () -> register dal_ctxt) ()) dal_node_rpc_ctxt diff --git a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli index 151f47d87ee95f7319d2b3b7aa64a97d911e4805..5707a7fea4a0c92cec61ea7a57e89b58a6d82fe2 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli +++ b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli @@ -45,8 +45,8 @@ val retry : ?max_delay:float -> delay:float -> factor:float -> - tries:int -> - ?msg:string -> + ?tries:int -> + ?msg:(tztrace -> string) -> ('a -> 'b tzresult Lwt.t) -> 'a -> 'b tzresult Lwt.t diff --git a/src/proto_023_PtSeouLo/lib_delegate/client_daemon.ml b/src/proto_023_PtSeouLo/lib_delegate/client_daemon.ml index 1e30f06735e2db5a70593d07c8e3449a54a1c053..714e0c6b3ce5406d27bf4a5d989a79b7a8095d5f 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/client_daemon.ml +++ b/src/proto_023_PtSeouLo/lib_delegate/client_daemon.ml @@ -37,12 +37,7 @@ let rec retry_on_disconnection (cctxt : #Protocol_client_context.full) f = let* () = Client_confirmations.wait_for_bootstrapped ~retry: - (Baking_scheduling.retry - cctxt - ~max_delay:10. - ~delay:1. - ~factor:1.5 - ~tries:max_int) + (Baking_scheduling.retry cctxt ~max_delay:10. ~delay:1. ~factor:1.5) cctxt in retry_on_disconnection cctxt f diff --git a/src/proto_023_PtSeouLo/lib_delegate/operation_worker.ml b/src/proto_023_PtSeouLo/lib_delegate/operation_worker.ml index c690a455693135ccdf8d7208320ba6e48a7dc76d..9d31ac0a019a88878b8019fcabf6cc7fa4d19a81 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/operation_worker.ml +++ b/src/proto_023_PtSeouLo/lib_delegate/operation_worker.ml @@ -739,7 +739,7 @@ let run ?(monitor_node_operations = true) ~constants ~factor:2. ~tries:5 ~is_error:(function _ -> true) - ~msg:"unable to call monitor operations RPC." + ~msg:(fun _ -> "unable to call monitor operations RPC.") (fun () -> (monitor_operations cctxt diff --git a/src/proto_alpha/lib_delegate/baking_scheduling.ml b/src/proto_alpha/lib_delegate/baking_scheduling.ml index 6e194048aded9c2296f688844914ded27bb6bdf4..97b9dc591e4323af1a4d594b8504448e2f662b61 100644 --- a/src/proto_alpha/lib_delegate/baking_scheduling.ml +++ b/src/proto_alpha/lib_delegate/baking_scheduling.ml @@ -881,13 +881,13 @@ let perform_sanity_check cctxt ~chain_id = return_unit let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor - ~tries ?(msg = "Connection failed. ") f x = + ?tries ?(msg = fun _errs -> "Connection failed. ") f x = Utils.retry ~emit:(cctxt#message "%s") ?max_delay ~delay ~factor - ~tries + ?tries ~msg ~is_error:(function | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> @@ -986,8 +986,8 @@ let register_dal_profiles cctxt dal_node_rpc_ctxt delegates = ~max_delay:2. ~delay:1. ~factor:2. - ~tries:max_int - ~msg:"Failed to register profiles, DAL node is not reachable. " + ~msg:(fun _errs -> + "Failed to register profiles, DAL node is not reachable. ") (fun () -> register dal_ctxt) ()) dal_node_rpc_ctxt diff --git a/src/proto_alpha/lib_delegate/baking_scheduling.mli b/src/proto_alpha/lib_delegate/baking_scheduling.mli index 151f47d87ee95f7319d2b3b7aa64a97d911e4805..5707a7fea4a0c92cec61ea7a57e89b58a6d82fe2 100644 --- a/src/proto_alpha/lib_delegate/baking_scheduling.mli +++ b/src/proto_alpha/lib_delegate/baking_scheduling.mli @@ -45,8 +45,8 @@ val retry : ?max_delay:float -> delay:float -> factor:float -> - tries:int -> - ?msg:string -> + ?tries:int -> + ?msg:(tztrace -> string) -> ('a -> 'b tzresult Lwt.t) -> 'a -> 'b tzresult Lwt.t diff --git a/src/proto_alpha/lib_delegate/client_daemon.ml b/src/proto_alpha/lib_delegate/client_daemon.ml index 1e30f06735e2db5a70593d07c8e3449a54a1c053..714e0c6b3ce5406d27bf4a5d989a79b7a8095d5f 100644 --- a/src/proto_alpha/lib_delegate/client_daemon.ml +++ b/src/proto_alpha/lib_delegate/client_daemon.ml @@ -37,12 +37,7 @@ let rec retry_on_disconnection (cctxt : #Protocol_client_context.full) f = let* () = Client_confirmations.wait_for_bootstrapped ~retry: - (Baking_scheduling.retry - cctxt - ~max_delay:10. - ~delay:1. - ~factor:1.5 - ~tries:max_int) + (Baking_scheduling.retry cctxt ~max_delay:10. ~delay:1. ~factor:1.5) cctxt in retry_on_disconnection cctxt f diff --git a/src/proto_alpha/lib_delegate/operation_worker.ml b/src/proto_alpha/lib_delegate/operation_worker.ml index 99502e11f847687ba4860bbdb8ddc33e94210918..1df6ccc4365fe9e9810c994daa3c41e70c886f6d 100644 --- a/src/proto_alpha/lib_delegate/operation_worker.ml +++ b/src/proto_alpha/lib_delegate/operation_worker.ml @@ -747,7 +747,7 @@ let run ?(monitor_node_operations = true) ~factor:2. ~tries:5 ~is_error:(function _ -> true) - ~msg:"unable to call monitor operations RPC." + ~msg:(fun _ -> "unable to call monitor operations RPC.") (fun () -> (monitor_operations cctxt