From 40b3b6c2f9d63546cf90d97045822b50b6f39095 Mon Sep 17 00:00:00 2001 From: Albin Coquereau Date: Thu, 9 Oct 2025 14:48:28 +0200 Subject: [PATCH 1/2] stdlib/utils: add errors in retry print message --- src/lib_stdlib_unix/utils.ml | 18 +++++++++++++----- src/lib_stdlib_unix/utils.mli | 16 ++++++++-------- .../lib_delegate/baking_scheduling.ml | 5 +++-- .../lib_delegate/baking_scheduling.mli | 2 +- .../lib_delegate/operation_worker.ml | 2 +- .../lib_delegate/baking_scheduling.ml | 5 +++-- .../lib_delegate/baking_scheduling.mli | 2 +- .../lib_delegate/operation_worker.ml | 2 +- .../lib_delegate/baking_scheduling.ml | 5 +++-- .../lib_delegate/baking_scheduling.mli | 2 +- .../lib_delegate/operation_worker.ml | 2 +- 11 files changed, 36 insertions(+), 25 deletions(-) diff --git a/src/lib_stdlib_unix/utils.ml b/src/lib_stdlib_unix/utils.ml index 7bf05ec6b41c..92b7b452e4c2 100644 --- a/src/lib_stdlib_unix/utils.ml +++ b/src/lib_stdlib_unix/utils.ml @@ -115,14 +115,20 @@ let copy_dir ?(perm = 0o755) ?progress src dst = let copy_file = copy_file ~count_progress:(fun _ -> ()) -let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?(msg = "") f x = +let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit + ?(msg = fun _ -> "") f x = let open Lwt.Syntax in let* result = f x in match result with | Ok _ as r -> Lwt.return r - | Error (err :: _) as errs when tries > 0 && is_error err -> ( + | Error (err :: _ as errs) when tries > 0 && is_error err -> ( let* () = - emit (Format.sprintf "%sRetrying in %.2f seconds..." msg delay) + emit + (Format.sprintf + "%sRetrying in %.2f seconds, %d attempts left..." + (msg errs) + delay + tries) in let* result = Lwt.pick @@ -134,7 +140,7 @@ let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?(msg = "") f x = ] in match result with - | `Killed -> Lwt.return errs + | `Killed -> Lwt.return_error errs | `Continue -> let next_delay = delay *. factor in let delay = @@ -153,4 +159,6 @@ let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?(msg = "") f x = ~emit f x) - | Error _ as err -> Lwt.return err + | Error errs as err -> + let* () = emit (Format.sprintf "%sNo attempts left." (msg errs)) in + Lwt.return err diff --git a/src/lib_stdlib_unix/utils.mli b/src/lib_stdlib_unix/utils.mli index 738c02bf50d3..94d6e2fbb308 100644 --- a/src/lib_stdlib_unix/utils.mli +++ b/src/lib_stdlib_unix/utils.mli @@ -59,13 +59,13 @@ val copy_file : src:string -> dst:string -> unit val copy_dir : ?perm:int -> ?progress:string * Terminal.Color.t -> string -> string -> unit -(** [retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?msg f x] - retries applying [f x] [tries] until it succeeds or returns an error - when [is_error] is false, at most [tries] number of times. After - each try it waits for a number of seconds, but not more than [max_delay], if - given. The wait time between tries is given by the initial [delay], - multiplied by [factor] at each subsequent try. At each failure, [msg] - together with the current delay is printed using [emit]. *) +(** [retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ?msg f x] retries + applying [f x] [tries] until it succeeds or returns an error when [is_error] + is false, at most [tries] number of times. After each try it waits for a + number of seconds, but not more than [max_delay], if given. The wait time + between tries is given by the initial [delay], multiplied by [factor] at + each subsequent try. At each failure, [msg] can print the error together + with the current delay using [emit]. *) val retry : ?max_delay:float -> delay:float -> @@ -73,7 +73,7 @@ val retry : tries:int -> is_error:('err -> bool) -> emit:(string -> unit Lwt.t) -> - ?msg:string -> + ?msg:('err list -> string) -> ('a -> ('b, 'err list) result Lwt.t) -> 'a -> ('b, 'err list) result Lwt.t diff --git a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml index 503a14f3555e..c4748657f384 100644 --- a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml +++ b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml @@ -872,7 +872,7 @@ let perform_sanity_check cctxt ~chain_id = return_unit let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor - ~tries ?(msg = "Connection failed. ") f x = + ~tries ?(msg = fun _errs -> "Connection failed. ") f x = Utils.retry ~emit:(cctxt#message "%s") ?max_delay @@ -962,7 +962,8 @@ let register_dal_profiles cctxt dal_node_rpc_ctxt delegates = ~delay:1. ~factor:2. ~tries:max_int - ~msg:"Failed to register profiles, DAL node is not reachable. " + ~msg:(fun _errs -> + "Failed to register profiles, DAL node is not reachable. ") (fun () -> register dal_ctxt) ()) dal_node_rpc_ctxt diff --git a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli index b6f52af181ca..498031186a8d 100644 --- a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli +++ b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli @@ -46,7 +46,7 @@ val retry : delay:float -> factor:float -> tries:int -> - ?msg:string -> + ?msg:(tztrace -> string) -> ('a -> 'b tzresult Lwt.t) -> 'a -> 'b tzresult Lwt.t diff --git a/src/proto_022_PsRiotum/lib_delegate/operation_worker.ml b/src/proto_022_PsRiotum/lib_delegate/operation_worker.ml index 872233eb9aad..3e26796b5d95 100644 --- a/src/proto_022_PsRiotum/lib_delegate/operation_worker.ml +++ b/src/proto_022_PsRiotum/lib_delegate/operation_worker.ml @@ -715,7 +715,7 @@ let run ?(monitor_node_operations = true) ~constants ~factor:2. ~tries:5 ~is_error:(function _ -> true) - ~msg:"unable to call monitor operations RPC." + ~msg:(fun _ -> "unable to call monitor operations RPC.") (fun () -> (monitor_operations cctxt diff --git a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml index 3a65c8616a61..a91f40546fbd 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml +++ b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml @@ -883,7 +883,7 @@ let perform_sanity_check cctxt ~chain_id = return_unit let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor - ~tries ?(msg = "Connection failed. ") f x = + ~tries ?(msg = fun _errs -> "Connection failed. ") f x = Utils.retry ~emit:(cctxt#message "%s") ?max_delay @@ -979,7 +979,8 @@ let register_dal_profiles cctxt dal_node_rpc_ctxt delegates = ~delay:1. ~factor:2. ~tries:max_int - ~msg:"Failed to register profiles, DAL node is not reachable. " + ~msg:(fun _errs -> + "Failed to register profiles, DAL node is not reachable. ") (fun () -> register dal_ctxt) ()) dal_node_rpc_ctxt diff --git a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli index 151f47d87ee9..47427edc3af9 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli +++ b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli @@ -46,7 +46,7 @@ val retry : delay:float -> factor:float -> tries:int -> - ?msg:string -> + ?msg:(tztrace -> string) -> ('a -> 'b tzresult Lwt.t) -> 'a -> 'b tzresult Lwt.t diff --git a/src/proto_023_PtSeouLo/lib_delegate/operation_worker.ml b/src/proto_023_PtSeouLo/lib_delegate/operation_worker.ml index c690a4556931..9d31ac0a019a 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/operation_worker.ml +++ b/src/proto_023_PtSeouLo/lib_delegate/operation_worker.ml @@ -739,7 +739,7 @@ let run ?(monitor_node_operations = true) ~constants ~factor:2. ~tries:5 ~is_error:(function _ -> true) - ~msg:"unable to call monitor operations RPC." + ~msg:(fun _ -> "unable to call monitor operations RPC.") (fun () -> (monitor_operations cctxt diff --git a/src/proto_alpha/lib_delegate/baking_scheduling.ml b/src/proto_alpha/lib_delegate/baking_scheduling.ml index 6e194048aded..cfe19edef1e6 100644 --- a/src/proto_alpha/lib_delegate/baking_scheduling.ml +++ b/src/proto_alpha/lib_delegate/baking_scheduling.ml @@ -881,7 +881,7 @@ let perform_sanity_check cctxt ~chain_id = return_unit let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor - ~tries ?(msg = "Connection failed. ") f x = + ~tries ?(msg = fun _errs -> "Connection failed. ") f x = Utils.retry ~emit:(cctxt#message "%s") ?max_delay @@ -987,7 +987,8 @@ let register_dal_profiles cctxt dal_node_rpc_ctxt delegates = ~delay:1. ~factor:2. ~tries:max_int - ~msg:"Failed to register profiles, DAL node is not reachable. " + ~msg:(fun _errs -> + "Failed to register profiles, DAL node is not reachable. ") (fun () -> register dal_ctxt) ()) dal_node_rpc_ctxt diff --git a/src/proto_alpha/lib_delegate/baking_scheduling.mli b/src/proto_alpha/lib_delegate/baking_scheduling.mli index 151f47d87ee9..47427edc3af9 100644 --- a/src/proto_alpha/lib_delegate/baking_scheduling.mli +++ b/src/proto_alpha/lib_delegate/baking_scheduling.mli @@ -46,7 +46,7 @@ val retry : delay:float -> factor:float -> tries:int -> - ?msg:string -> + ?msg:(tztrace -> string) -> ('a -> 'b tzresult Lwt.t) -> 'a -> 'b tzresult Lwt.t diff --git a/src/proto_alpha/lib_delegate/operation_worker.ml b/src/proto_alpha/lib_delegate/operation_worker.ml index 99502e11f847..1df6ccc4365f 100644 --- a/src/proto_alpha/lib_delegate/operation_worker.ml +++ b/src/proto_alpha/lib_delegate/operation_worker.ml @@ -747,7 +747,7 @@ let run ?(monitor_node_operations = true) ~factor:2. ~tries:5 ~is_error:(function _ -> true) - ~msg:"unable to call monitor operations RPC." + ~msg:(fun _ -> "unable to call monitor operations RPC.") (fun () -> (monitor_operations cctxt -- GitLab From 3bba802b21fdca9f5e9ce877ecbce8c243eea569 Mon Sep 17 00:00:00 2001 From: Albin Coquereau Date: Thu, 9 Oct 2025 14:52:37 +0200 Subject: [PATCH 2/2] stdlib/utils: make tries count optional --- src/lib_agnostic_baker/daemon.ml | 1 - src/lib_stdlib_unix/utils.ml | 13 ++++++++----- src/lib_stdlib_unix/utils.mli | 2 +- .../lib_delegate/baking_scheduling.ml | 5 ++--- .../lib_delegate/baking_scheduling.mli | 2 +- .../lib_delegate/client_daemon.ml | 7 +------ .../lib_delegate/baking_scheduling.ml | 5 ++--- .../lib_delegate/baking_scheduling.mli | 2 +- .../lib_delegate/client_daemon.ml | 7 +------ src/proto_alpha/lib_delegate/baking_scheduling.ml | 5 ++--- src/proto_alpha/lib_delegate/baking_scheduling.mli | 2 +- src/proto_alpha/lib_delegate/client_daemon.ml | 7 +------ 12 files changed, 21 insertions(+), 37 deletions(-) diff --git a/src/lib_agnostic_baker/daemon.ml b/src/lib_agnostic_baker/daemon.ml index 790699f9dae8..649e35c9808f 100644 --- a/src/lib_agnostic_baker/daemon.ml +++ b/src/lib_agnostic_baker/daemon.ml @@ -136,7 +136,6 @@ module Make_daemon (Agent : AGENT) : ~max_delay:10. ~delay:1. ~factor:1.5 - ~tries:max_int ~is_error:(function Cannot_connect_to_node _ -> true | _ -> false) (fun node_addr -> Rpc_services.get_level ~node_addr) node_addr diff --git a/src/lib_stdlib_unix/utils.ml b/src/lib_stdlib_unix/utils.ml index 92b7b452e4c2..ff00029801fd 100644 --- a/src/lib_stdlib_unix/utils.ml +++ b/src/lib_stdlib_unix/utils.ml @@ -115,20 +115,23 @@ let copy_dir ?(perm = 0o755) ?progress src dst = let copy_file = copy_file ~count_progress:(fun _ -> ()) -let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit +let rec retry ?max_delay ~delay ~factor ?tries ~is_error ~emit ?(msg = fun _ -> "") f x = let open Lwt.Syntax in let* result = f x in + let should_retry = match tries with None -> true | Some i -> i > 0 in match result with | Ok _ as r -> Lwt.return r - | Error (err :: _ as errs) when tries > 0 && is_error err -> ( + | Error (err :: _ as errs) when should_retry && is_error err -> ( let* () = emit (Format.sprintf - "%sRetrying in %.2f seconds, %d attempts left..." + "%sRetrying in %.2f seconds%s..." (msg errs) delay - tries) + (match tries with + | None -> "" + | Some i -> Format.sprintf ", %d attempts left" i)) in let* result = Lwt.pick @@ -154,7 +157,7 @@ let rec retry ?max_delay ~delay ~factor ~tries ~is_error ~emit ~delay ~factor ~msg - ~tries:(tries - 1) + ?tries:(Option.map pred tries) ~is_error ~emit f diff --git a/src/lib_stdlib_unix/utils.mli b/src/lib_stdlib_unix/utils.mli index 94d6e2fbb308..89e42133edca 100644 --- a/src/lib_stdlib_unix/utils.mli +++ b/src/lib_stdlib_unix/utils.mli @@ -70,7 +70,7 @@ val retry : ?max_delay:float -> delay:float -> factor:float -> - tries:int -> + ?tries:int -> is_error:('err -> bool) -> emit:(string -> unit Lwt.t) -> ?msg:('err list -> string) -> diff --git a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml index c4748657f384..332ff6af5b5d 100644 --- a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml +++ b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.ml @@ -872,13 +872,13 @@ let perform_sanity_check cctxt ~chain_id = return_unit let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor - ~tries ?(msg = fun _errs -> "Connection failed. ") f x = + ?tries ?(msg = fun _errs -> "Connection failed. ") f x = Utils.retry ~emit:(cctxt#message "%s") ?max_delay ~delay ~factor - ~tries + ?tries ~msg ~is_error:(function | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> @@ -961,7 +961,6 @@ let register_dal_profiles cctxt dal_node_rpc_ctxt delegates = ~max_delay:2. ~delay:1. ~factor:2. - ~tries:max_int ~msg:(fun _errs -> "Failed to register profiles, DAL node is not reachable. ") (fun () -> register dal_ctxt) diff --git a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli index 498031186a8d..cab28a15d13e 100644 --- a/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli +++ b/src/proto_022_PsRiotum/lib_delegate/baking_scheduling.mli @@ -45,7 +45,7 @@ val retry : ?max_delay:float -> delay:float -> factor:float -> - tries:int -> + ?tries:int -> ?msg:(tztrace -> string) -> ('a -> 'b tzresult Lwt.t) -> 'a -> diff --git a/src/proto_022_PsRiotum/lib_delegate/client_daemon.ml b/src/proto_022_PsRiotum/lib_delegate/client_daemon.ml index 944a5862626f..d589e9dc4d2b 100644 --- a/src/proto_022_PsRiotum/lib_delegate/client_daemon.ml +++ b/src/proto_022_PsRiotum/lib_delegate/client_daemon.ml @@ -37,12 +37,7 @@ let rec retry_on_disconnection (cctxt : #Protocol_client_context.full) f = let* () = Client_confirmations.wait_for_bootstrapped ~retry: - (Baking_scheduling.retry - cctxt - ~max_delay:10. - ~delay:1. - ~factor:1.5 - ~tries:max_int) + (Baking_scheduling.retry cctxt ~max_delay:10. ~delay:1. ~factor:1.5) cctxt in retry_on_disconnection cctxt f diff --git a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml index a91f40546fbd..10fd988b5893 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml +++ b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.ml @@ -883,13 +883,13 @@ let perform_sanity_check cctxt ~chain_id = return_unit let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor - ~tries ?(msg = fun _errs -> "Connection failed. ") f x = + ?tries ?(msg = fun _errs -> "Connection failed. ") f x = Utils.retry ~emit:(cctxt#message "%s") ?max_delay ~delay ~factor - ~tries + ?tries ~msg ~is_error:(function | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> @@ -978,7 +978,6 @@ let register_dal_profiles cctxt dal_node_rpc_ctxt delegates = ~max_delay:2. ~delay:1. ~factor:2. - ~tries:max_int ~msg:(fun _errs -> "Failed to register profiles, DAL node is not reachable. ") (fun () -> register dal_ctxt) diff --git a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli index 47427edc3af9..5707a7fea4a0 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli +++ b/src/proto_023_PtSeouLo/lib_delegate/baking_scheduling.mli @@ -45,7 +45,7 @@ val retry : ?max_delay:float -> delay:float -> factor:float -> - tries:int -> + ?tries:int -> ?msg:(tztrace -> string) -> ('a -> 'b tzresult Lwt.t) -> 'a -> diff --git a/src/proto_023_PtSeouLo/lib_delegate/client_daemon.ml b/src/proto_023_PtSeouLo/lib_delegate/client_daemon.ml index 1e30f06735e2..714e0c6b3ce5 100644 --- a/src/proto_023_PtSeouLo/lib_delegate/client_daemon.ml +++ b/src/proto_023_PtSeouLo/lib_delegate/client_daemon.ml @@ -37,12 +37,7 @@ let rec retry_on_disconnection (cctxt : #Protocol_client_context.full) f = let* () = Client_confirmations.wait_for_bootstrapped ~retry: - (Baking_scheduling.retry - cctxt - ~max_delay:10. - ~delay:1. - ~factor:1.5 - ~tries:max_int) + (Baking_scheduling.retry cctxt ~max_delay:10. ~delay:1. ~factor:1.5) cctxt in retry_on_disconnection cctxt f diff --git a/src/proto_alpha/lib_delegate/baking_scheduling.ml b/src/proto_alpha/lib_delegate/baking_scheduling.ml index cfe19edef1e6..97b9dc591e43 100644 --- a/src/proto_alpha/lib_delegate/baking_scheduling.ml +++ b/src/proto_alpha/lib_delegate/baking_scheduling.ml @@ -881,13 +881,13 @@ let perform_sanity_check cctxt ~chain_id = return_unit let retry (cctxt : #Protocol_client_context.full) ?max_delay ~delay ~factor - ~tries ?(msg = fun _errs -> "Connection failed. ") f x = + ?tries ?(msg = fun _errs -> "Connection failed. ") f x = Utils.retry ~emit:(cctxt#message "%s") ?max_delay ~delay ~factor - ~tries + ?tries ~msg ~is_error:(function | RPC_client_errors.Request_failed {error = Connection_failed _; _} -> @@ -986,7 +986,6 @@ let register_dal_profiles cctxt dal_node_rpc_ctxt delegates = ~max_delay:2. ~delay:1. ~factor:2. - ~tries:max_int ~msg:(fun _errs -> "Failed to register profiles, DAL node is not reachable. ") (fun () -> register dal_ctxt) diff --git a/src/proto_alpha/lib_delegate/baking_scheduling.mli b/src/proto_alpha/lib_delegate/baking_scheduling.mli index 47427edc3af9..5707a7fea4a0 100644 --- a/src/proto_alpha/lib_delegate/baking_scheduling.mli +++ b/src/proto_alpha/lib_delegate/baking_scheduling.mli @@ -45,7 +45,7 @@ val retry : ?max_delay:float -> delay:float -> factor:float -> - tries:int -> + ?tries:int -> ?msg:(tztrace -> string) -> ('a -> 'b tzresult Lwt.t) -> 'a -> diff --git a/src/proto_alpha/lib_delegate/client_daemon.ml b/src/proto_alpha/lib_delegate/client_daemon.ml index 1e30f06735e2..714e0c6b3ce5 100644 --- a/src/proto_alpha/lib_delegate/client_daemon.ml +++ b/src/proto_alpha/lib_delegate/client_daemon.ml @@ -37,12 +37,7 @@ let rec retry_on_disconnection (cctxt : #Protocol_client_context.full) f = let* () = Client_confirmations.wait_for_bootstrapped ~retry: - (Baking_scheduling.retry - cctxt - ~max_delay:10. - ~delay:1. - ~factor:1.5 - ~tries:max_int) + (Baking_scheduling.retry cctxt ~max_delay:10. ~delay:1. ~factor:1.5) cctxt in retry_on_disconnection cctxt f -- GitLab