diff --git a/CHANGES.rst b/CHANGES.rst index ee93685813e90bd6ef39c43dac92a77d03d8a576..327769737938f19d495e09ba3bdb04aacfb3efb0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -197,6 +197,8 @@ Protocol :gl:`!15677`) - Set the message validation function at node startup, fixing https://gitlab.com/tezos/tezos/-/issues/7629. (MR :gl:`!15830`) +- A warning has been introduced in case it is observed that the DAL node lags + behind the L1 node. (MR :gl:`!15756`) Miscellaneous ------------- diff --git a/src/bin_dal_node/RPC_server.ml b/src/bin_dal_node/RPC_server.ml index 4647bcb216812ec6395a719b601b1b64d00fc492..75bd288a52bfd3d9c20795332ebd499686d87575 100644 --- a/src/bin_dal_node/RPC_server.ml +++ b/src/bin_dal_node/RPC_server.ml @@ -376,6 +376,35 @@ module Profile_handlers = struct in Lwt.return_unit + let warn_if_lagging store ~attestation_level = + let open Lwt_result_syntax in + let*! last_processed_level = + let last_processed_level_store = Store.last_processed_level store in + Store.Last_processed_level.load last_processed_level_store + in + match last_processed_level with + | Ok (Some lpl) -> + (* The L1 node's level is at least [current_level = lpl + 2], because the + DAL node processes blocks with a delay of two levels, to be sure that + processed blocks are final. *) + let current_level = Int32.add lpl 2l in + (* The baker's current level is the same as its L1 node and is the one + of the latest seen proposal (ie block). The baker asks for slots' + status when it has seen a proposal at [attestation_level - 1]. *) + let current_baker_level = Int32.sub attestation_level 1l in + (* We check that the baker is not in advance wrt the DAL node, which would + mean that the DAL node is lagging. We allow a slack of 1 level. *) + if Int32.succ current_level < current_baker_level then + Event.( + emit + get_attestable_slots_future_level_warning + (current_level, current_baker_level)) + else Lwt.return_unit + | _ -> + (* We simply don't do anything if we couldn't obtain the + [last_processed_level]. This should not happen though. *) + Lwt.return_unit + let get_attestable_slots ctxt pkh attested_level () () = let get_attestable_slots ~shard_indices store proto_parameters ~attested_level = @@ -429,12 +458,13 @@ module Profile_handlers = struct return (Types.Attestable_slots {slots = flags; published_level}) in call_handler1 (fun () -> + let open Lwt_result_syntax in let store = Node_context.get_store ctxt in + let attestation_level = Int32.pred attested_level in + let*! () = warn_if_lagging store ~attestation_level in (* For retrieving the assigned shard indexes, we consider the committee - at [attested_level - 1], because the (DAL) attestations in the blocks + at [attestation_level], because the (DAL) attestations in the blocks at level [attested_level] refer to the predecessor level. *) - let attestation_level = Int32.pred attested_level in - let open Lwt_result_syntax in let* shard_indices = Node_context.fetch_assigned_shard_indices ctxt diff --git a/src/bin_dal_node/event.ml b/src/bin_dal_node/event.ml index 74ec732b7fa091993dda0d10f1a0cffa468b3e43..4f9e0375b988ab1229ccf4439c5d9f4de72d2622 100644 --- a/src/bin_dal_node/event.ml +++ b/src/bin_dal_node/event.ml @@ -678,7 +678,7 @@ let get_attestable_slots_ok_notice = let get_attestable_slots_not_ok_warning = declare_4 ~section - ~name:"get_attestable_slots_warning" + ~name:"get_attestable_slots_missing_shards_warning" ~msg: "For slots {slots_indices} published at level {published_level}, \ {attester} missed shards:\n\ @@ -701,6 +701,18 @@ let get_attestable_slots_not_ok_warning = stored_shards expected_shards)) +let get_attestable_slots_future_level_warning = + declare_2 + ~section + ~name:"get_attestable_slots_future_level_warning" + ~msg: + "It looks like the DAL node is lagging (its current level is \ + {current_level}, while the Layer1 node's level is \ + {current_baker_level})." + ~level:Warning + ("current_level", Data_encoding.int32) + ("current_baker_level", Data_encoding.int32) + let warn_attester_not_dal_attesting = declare_2 ~section