From e20cb8e2b829dcf5f27df2325e616a731d78ae79 Mon Sep 17 00:00:00 2001 From: Guillaume Genestier Date: Wed, 22 Oct 2025 18:35:34 +0200 Subject: [PATCH 1/2] DAL: Clean the "orphan" levels in the store when attestation lag reduces --- src/lib_dal_node/block_handler.ml | 55 ++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/src/lib_dal_node/block_handler.ml b/src/lib_dal_node/block_handler.ml index 0135c4ef65bb..f4ac9aa64e7f 100644 --- a/src/lib_dal_node/block_handler.ml +++ b/src/lib_dal_node/block_handler.ml @@ -103,24 +103,47 @@ let remove_old_level_stored_data proto_parameters ctxt current_level = (* [attestation_lag] levels after the publication of a commitment, if it has not been attested it will never be so we can safely - remove it from the store. This function removes from the store - all the slots (and their shards) published at the given level and - which are not listed in the [attested] list. *) -let remove_unattested_slots_and_shards proto_parameters ctxt ~published_level - attested = + remove it from the store. *) +let remove_unattested_slots_and_shards ~prev_proto_parameters proto_parameters + ctxt ~attested_level attested = let open Lwt_syntax in let number_of_slots = proto_parameters.Types.number_of_slots in let slot_size = proto_parameters.cryptobox_parameters.slot_size in let store = Node_context.get_store ctxt in - List.iter_s - (fun slot_index -> - if attested slot_index then return_unit - else - let slot_id : Types.slot_id = - {slot_level = published_level; slot_index} - in - remove_slots_and_shards ~slot_size store slot_id) - (0 -- (number_of_slots - 1)) + let previous_lag = prev_proto_parameters.Types.attestation_lag in + let current_lag = proto_parameters.attestation_lag in + (* This function removes from the store all the slots (and their shards) + published [lag] levels before the [attested_level] and which are not + listed in the [attested]. *) + let remove_slots_and_shards lag attested = + let published_level = Int32.(sub attested_level (of_int lag)) in + List.iter_s + (fun slot_index -> + if attested slot_index then return_unit + else + let slot_id : Types.slot_id = + {slot_level = published_level; slot_index} + in + remove_slots_and_shards ~slot_size store slot_id) + (0 -- (number_of_slots - 1)) + in + let* () = + (* TODO: https://gitlab.com/tezos/tezos/-/issues/8065 + Remove after dynamic lag is active. + This code removes all slots and shards associated to the "orphan" levels + which are guaranteed to never be attested when a protocol migration + reduces the attestation lag. *) + if previous_lag > current_lag then + let rec loop lag = + if lag = current_lag then return_unit + else + let* () = remove_slots_and_shards lag (fun _ -> false) in + loop (lag - 1) + in + loop previous_lag + else return_unit + in + remove_slots_and_shards current_lag attested (* Here [block_level] is the same as in [new_finalized_payload_level]. When the DAL node is up-to-date and the current L1 head is at level L, we call this @@ -444,10 +467,10 @@ let process_finalized_block_data ctxt cctxt store ~prev_proto_parameters in let*! () = (remove_unattested_slots_and_shards + ~prev_proto_parameters proto_parameters ctxt - ~published_level: - Int32.(sub block_level (of_int proto_parameters.attestation_lag)) + ~attested_level:block_level (Plugin.is_attested dal_attestation) [@profiler.record_s {verbosity = Notice} "remove_unattested_slots_and_shards"]) -- GitLab From 0fc06a2ccfb2dc7309d307c0c1590640025c6035 Mon Sep 17 00:00:00 2001 From: Guillaume Genestier Date: Thu, 23 Oct 2025 14:22:29 +0200 Subject: [PATCH 2/2] DAL: Clean the orphan levels in the DAL SQL skip-list when attestation lag reduces --- src/lib_dal_node/block_handler.ml | 87 +++++++++++++++++++++--------- src/lib_dal_node/block_handler.mli | 2 +- src/lib_dal_node/store_cleanup.ml | 4 +- 3 files changed, 64 insertions(+), 29 deletions(-) diff --git a/src/lib_dal_node/block_handler.ml b/src/lib_dal_node/block_handler.ml index f4ac9aa64e7f..9a2f4e599376 100644 --- a/src/lib_dal_node/block_handler.ml +++ b/src/lib_dal_node/block_handler.ml @@ -66,31 +66,66 @@ let remove_slots_and_shards ~slot_size (store : Store.t) {!Node_context.level_to_gc ~current_level}. It also removes skip list cells attested at that level. *) let remove_old_level_stored_data proto_parameters ctxt current_level = - let open Lwt_syntax in + let open Lwt_result_syntax in let store = Node_context.get_store ctxt in - Node_context.level_to_gc ctxt proto_parameters ~current_level - |> Option.iter_s (fun oldest_level -> - let* () = - (* TODO: https://gitlab.com/tezos/tezos/-/issues/7258 - We may want to remove this check. *) - if Node_context.supports_refutations ctxt then - let published_level = - Int32.( - sub - oldest_level - (of_int proto_parameters.Types.attestation_lag)) - in - let* res = Store.Skip_list_cells.remove store ~published_level in - match res with - | Ok () -> Event.emit_removed_skip_list_cells ~level:oldest_level - | Error error -> - Event.emit_removing_skip_list_cells_failed - ~level:oldest_level - ~error - else return_unit - in - let number_of_slots = proto_parameters.Types.number_of_slots in - List.iter_s + match Node_context.level_to_gc ctxt proto_parameters ~current_level with + | None -> return_unit + | Some oldest_level -> + (* The protocol parameters to consider when cleaning are the ones at the + time of the level we are cleaning. *) + let*? proto_parameters = + Node_context.get_proto_parameters ctxt ~level:(`Level oldest_level) + in + let current_lag = proto_parameters.attestation_lag in + (* This function removes from the skip-list all the cells for slots + published [lag] levels before the [oldest_level]. *) + let clean_skip_list_cells lag = + let published_level = Int32.(sub oldest_level (of_int lag)) in + let*! res = Store.Skip_list_cells.remove store ~published_level in + let*! () = + match res with + | Ok () -> Event.emit_removed_skip_list_cells ~level:oldest_level + | Error error -> + Event.emit_removing_skip_list_cells_failed + ~level:oldest_level + ~error + in + return_unit + in + let* () = + (* TODO: https://gitlab.com/tezos/tezos/-/issues/7258 + We may want to remove this check. *) + if Node_context.supports_refutations ctxt then + (* TODO: https://gitlab.com/tezos/tezos/-/issues/8065 + Remove after dynamic lag is active. + This code cleans the skip-list for the "orphan" levels which are + guaranteed to never be attested when a protocol migration reduces + the attestation lag. *) + let* () = + if oldest_level > 1l then + let*? prev_proto_parameters = + Node_context.get_proto_parameters + ctxt + ~level:(`Level (Int32.pred oldest_level)) + in + let previous_lag = prev_proto_parameters.Types.attestation_lag in + if previous_lag > current_lag then + let rec loop lag = + if lag = current_lag then return_unit + else + let* () = clean_skip_list_cells lag in + loop (lag - 1) + in + loop previous_lag + else return_unit + else return_unit + in + clean_skip_list_cells current_lag + else return_unit + in + let number_of_slots = proto_parameters.Types.number_of_slots in + Lwt_result.ok + @@ List.iter_s (fun slot_index -> let slot_id : Types.slot_id = {slot_level = oldest_level; slot_index} @@ -99,7 +134,7 @@ let remove_old_level_stored_data proto_parameters ctxt current_level = ~slot_size:proto_parameters.cryptobox_parameters.slot_size store slot_id) - (WithExceptions.List.init ~loc:__LOC__ number_of_slots Fun.id)) + (WithExceptions.List.init ~loc:__LOC__ number_of_slots Fun.id) (* [attestation_lag] levels after the publication of a commitment, if it has not been attested it will never be so we can safely @@ -676,7 +711,7 @@ let new_finalized_head ctxt cctxt l1_crawler cryptobox finalized_block_hash cryptobox ~head_level:level proto_parameters) ; - let*! () = remove_old_level_stored_data proto_parameters ctxt level in + let* () = remove_old_level_stored_data proto_parameters ctxt level in let* () = if level = 1l then (* We do not process the block at level 1, as it will not diff --git a/src/lib_dal_node/block_handler.mli b/src/lib_dal_node/block_handler.mli index 701cfb46eea6..b3140cb598be 100644 --- a/src/lib_dal_node/block_handler.mli +++ b/src/lib_dal_node/block_handler.mli @@ -67,4 +67,4 @@ val remove_old_level_stored_data : Tezos_dal_node_services.Types.proto_parameters -> Node_context.t -> int32 -> - unit Lwt.t + (unit, tztrace) result Lwt.t diff --git a/src/lib_dal_node/store_cleanup.ml b/src/lib_dal_node/store_cleanup.ml index 59189b73a1ac..dccdd99d2f0b 100644 --- a/src/lib_dal_node/store_cleanup.ml +++ b/src/lib_dal_node/store_cleanup.ml @@ -88,7 +88,7 @@ let clean_up_store_and_catch_up_for_refutation_support ctxt cctxt let rec clean_up_at_level level = if level > last_level then return_unit else - let*! () = + let* () = Block_handler.remove_old_level_stored_data proto_parameters ctxt level in let* () = @@ -184,7 +184,7 @@ let clean_up_store_and_catch_up_for_no_refutation_support ctxt let*! () = Event.emit_end_catchup () in return_unit else - let*! () = + let* () = Block_handler.remove_old_level_stored_data proto_parameters ctxt level in L1_crawler_status.catching_up_or_synced_status -- GitLab