From 5a675403c50875a9480c702c3dc52629a572c9be Mon Sep 17 00:00:00 2001 From: Guillaume Genestier Date: Thu, 9 Jan 2025 15:40:39 +0100 Subject: [PATCH 1/2] Baker: Check Dal health every 10 levels --- .../lib_delegate/baking_actions.ml | 43 ++++++++++++------- src/proto_alpha/lib_delegate/baking_events.ml | 4 ++ 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/src/proto_alpha/lib_delegate/baking_actions.ml b/src/proto_alpha/lib_delegate/baking_actions.ml index 71db50334edd..4e360661ea58 100644 --- a/src/proto_alpha/lib_delegate/baking_actions.ml +++ b/src/proto_alpha/lib_delegate/baking_actions.ml @@ -448,26 +448,37 @@ let prepare_block (global_state : global_state) (block_to_bake : block_to_bake) baking_votes; } -let only_if_dal_feature_enabled = - let no_dal_node_warning_counter = ref 0 in - fun state ~default_value f -> - let open Lwt_syntax in - let open Constants in - let Parametric.{dal = {feature_enable; _}; _} = - state.global_state.constants.parametric - in - if feature_enable then +let only_if_dal_feature_enabled state ~default_value f = + let open Lwt_syntax in + let open Constants in + let Parametric.{dal = {feature_enable; _}; _} = + state.global_state.constants.parametric + in + (* We print warning about DAL state only every 10 levels. *) + let current_level = state.level_state.current_level in + let level_with_warning = Int32.rem current_level 10l = 1l in + if feature_enable then + if level_with_warning then match state.global_state.dal_node_rpc_ctxt with | None -> - incr no_dal_node_warning_counter ; + let* () = Events.(emit no_dal_node_running ()) in + return default_value + | Some ctxt -> + let* health = Node_rpc.get_dal_health ctxt in let* () = - if !no_dal_node_warning_counter mod 10 = 1 then - Events.(emit no_dal_node_running ()) - else return_unit + match health with + | Ok {Tezos_dal_node_services.Types.Health.status = Up; _} -> + return_unit + | Ok health -> Events.(emit unhealthy_dal_node (ctxt#base, health)) + | Error _ -> Events.(emit unreachable_dal_node ctxt#base) in - return default_value - | Some ctxt -> f ctxt - else return default_value + f ctxt + else + Option.fold + ~none:(return default_value) + ~some:f + state.global_state.dal_node_rpc_ctxt + else return default_value let process_dal_rpc_result state delegate level round = let open Lwt_result_syntax in diff --git a/src/proto_alpha/lib_delegate/baking_events.ml b/src/proto_alpha/lib_delegate/baking_events.ml index 3987a4e0c53a..b87562cfaef8 100644 --- a/src/proto_alpha/lib_delegate/baking_events.ml +++ b/src/proto_alpha/lib_delegate/baking_events.ml @@ -1133,6 +1133,10 @@ module Actions = struct ("level", Data_encoding.int32) let no_dal_node_running = Commands.no_dal_node_running + + let unhealthy_dal_node = Commands.unhealthy_dal_node + + let unreachable_dal_node = Commands.unreachable_dal_node end module VDF = struct -- GitLab From df8f6ca0f440beb3a9a043ad8832bb7de1d992aa Mon Sep 17 00:00:00 2001 From: Guillaume Genestier Date: Thu, 9 Jan 2025 16:01:49 +0100 Subject: [PATCH 2/2] Baker: Put DAL warnings in a separate function --- .../lib_delegate/baking_actions.ml | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/proto_alpha/lib_delegate/baking_actions.ml b/src/proto_alpha/lib_delegate/baking_actions.ml index 4e360661ea58..2d2b9be42351 100644 --- a/src/proto_alpha/lib_delegate/baking_actions.ml +++ b/src/proto_alpha/lib_delegate/baking_actions.ml @@ -448,36 +448,36 @@ let prepare_block (global_state : global_state) (block_to_bake : block_to_bake) baking_votes; } +let dal_checks_and_warnings state = + let open Lwt_syntax in + (* We print warning about DAL state only every 10 levels. *) + let current_level = state.level_state.current_level in + let level_with_warning = Int32.rem current_level 10l = 1l in + if level_with_warning then + match state.global_state.dal_node_rpc_ctxt with + | None -> Events.(emit no_dal_node_running ()) + | Some ctxt -> ( + let* health = Node_rpc.get_dal_health ctxt in + match health with + | Ok health -> ( + match health.status with + | Tezos_dal_node_services.Types.Health.Up -> return_unit + | _ -> Events.(emit unhealthy_dal_node) (ctxt#base, health)) + | Error _ -> Events.(emit unreachable_dal_node) ctxt#base) + else return_unit + let only_if_dal_feature_enabled state ~default_value f = let open Lwt_syntax in let open Constants in let Parametric.{dal = {feature_enable; _}; _} = state.global_state.constants.parametric in - (* We print warning about DAL state only every 10 levels. *) - let current_level = state.level_state.current_level in - let level_with_warning = Int32.rem current_level 10l = 1l in if feature_enable then - if level_with_warning then - match state.global_state.dal_node_rpc_ctxt with - | None -> - let* () = Events.(emit no_dal_node_running ()) in - return default_value - | Some ctxt -> - let* health = Node_rpc.get_dal_health ctxt in - let* () = - match health with - | Ok {Tezos_dal_node_services.Types.Health.status = Up; _} -> - return_unit - | Ok health -> Events.(emit unhealthy_dal_node (ctxt#base, health)) - | Error _ -> Events.(emit unreachable_dal_node ctxt#base) - in - f ctxt - else - Option.fold - ~none:(return default_value) - ~some:f - state.global_state.dal_node_rpc_ctxt + let* () = dal_checks_and_warnings state in + Option.fold + ~none:(return default_value) + ~some:f + state.global_state.dal_node_rpc_ctxt else return default_value let process_dal_rpc_result state delegate level round = -- GitLab