From d17cfd987f8276cb28a5a2218b4e7a424be39216 Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Tue, 3 Jun 2025 12:00:59 +0100 Subject: [PATCH 1/5] DAL: Improve documentation format --- src/lib_dal_node/message_validation.ml | 28 +++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/lib_dal_node/message_validation.ml b/src/lib_dal_node/message_validation.ml index 7dc0cd663303..08d6f6ce755a 100644 --- a/src/lib_dal_node/message_validation.ml +++ b/src/lib_dal_node/message_validation.ml @@ -23,11 +23,11 @@ (* *) (*****************************************************************************) -(* [gossipsub_app_message_payload_validation cryptobox message message_id] - allows checking whether the given [message] identified by [message_id] is - valid with the current [cryptobox] parameters. The validity check is done - by verifying that the shard in the message effectively belongs to the - commitment given by [message_id]. *) +(** [gossipsub_app_message_payload_validation cryptobox message message_id] + allows checking whether the given [message] identified by [message_id] is + valid with the current [cryptobox] parameters. The validity check is done + by verifying that the shard in the message effectively belongs to the + commitment given by [message_id]. *) let gossipsub_app_message_payload_validation cryptobox message_id message = let Types.Message.{share; shard_proof} = message in let Types.Message_id.{commitment; shard_index; _} = message_id in @@ -129,16 +129,16 @@ let gossipsub_message_id_validation ctxt proto_parameters message_id = gossipsub_message_id_topic_validation ctxt proto_parameters message_id | other -> other -(* [gossipsub_app_messages_validation ctxt cryptobox head_level - attestation_lag ?message ~message_id ()] checks for the validity of the - given message (if any) and message id. +(** [gossipsub_app_messages_validation ctxt cryptobox head_level + attestation_lag ?message ~message_id ()] checks for the validity of the + given message (if any) and message id. - First, the message id's validity is checked if the application cares about - it and is not outdated (Otherwise `Unknown or `Outdated is returned, - respectively). This is done thanks to - {!gossipsub_message_id_validation}. Then, if a message is given, - {!gossipsub_app_message_payload_validation} is used to check its - validity. *) + First, the message id's validity is checked if the application cares about + it and is not outdated (Otherwise `Unknown or `Outdated is returned, + respectively). This is done thanks to + {!gossipsub_message_id_validation}. Then, if a message is given, + {!gossipsub_app_message_payload_validation} is used to check its + validity. *) let gossipsub_app_messages_validation ctxt cryptobox head_level proto_parameters ?message ~message_id () = if Node_context.is_bootstrap_node ctxt then -- GitLab From e31f19dc7b7a3deda0bd915bdf3a72cf670bdc85 Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Wed, 4 Jun 2025 10:13:39 +0100 Subject: [PATCH 2/5] DAL: Add option to disable shard validation --- src/lib_agnostic_baker/commands.ml | 7 +- src/lib_dal_node/cli.ml | 56 +++++++++++++-- src/lib_dal_node/cli.mli | 9 ++- src/lib_dal_node/daemon.ml | 9 ++- src/lib_dal_node/daemon.mli | 10 ++- src/lib_dal_node/event.ml | 11 +++ src/lib_dal_node/message_validation.ml | 98 ++++++++++++++------------ src/lib_dal_node/node_context.ml | 6 +- src/lib_dal_node/node_context.mli | 6 ++ 9 files changed, 155 insertions(+), 57 deletions(-) diff --git a/src/lib_agnostic_baker/commands.ml b/src/lib_agnostic_baker/commands.ml index 4f481c618db6..b2ee8d056fa7 100644 --- a/src/lib_agnostic_baker/commands.ml +++ b/src/lib_agnostic_baker/commands.ml @@ -114,10 +114,12 @@ module Dal = struct let fetch_trusted_setup = arg_to_clic fetch_trusted_setup_arg + let disable_shard_validation = switch_to_clic disable_shard_validation_switch + let verbose = switch_to_clic verbose_switch let args = - Tezos_clic.args20 + Tezos_clic.args21 data_dir rpc_addr expected_pow @@ -136,6 +138,7 @@ module Dal = struct service_name service_namespace fetch_trusted_setup + disable_shard_validation verbose ignore_l1_config_peers @@ -167,6 +170,7 @@ module Dal = struct service_name, service_namespace, fetch_trusted_setup, + disable_shard_validation, verbose, ignore_l1_config_peers ) _cctxt @@ -195,6 +199,7 @@ module Dal = struct service_name service_namespace fetch_trusted_setup + disable_shard_validation verbose ignore_l1_config_peers in diff --git a/src/lib_dal_node/cli.ml b/src/lib_dal_node/cli.ml index af6db07fefdf..f23fc275c02c 100644 --- a/src/lib_dal_node/cli.ml +++ b/src/lib_dal_node/cli.ml @@ -25,6 +25,19 @@ module Types = Tezos_dal_node_services.Types +(** This variable is used to disable DAL shard validation at runtime. When activated, + Gossipsub messages (i.e. shards) are always considered valid. This can be risky + as the DAL node would no longer validate the shards and therefore should be used + only for testing purposes and/or with extreme care. *) +let disable_shard_validation_environment_variable = + "TEZOS_DISABLE_SHARD_VALIDATION_I_KNOW_WHAT_I_AM_DOING" + +let disable_shard_validation = + match Sys.getenv_opt disable_shard_validation_environment_variable with + | None -> false + | Some x -> ( + match String.lowercase_ascii x with "yes" | "y" -> true | _ -> false) + module Term = struct type env = {docs : string; doc : string; name : string} @@ -498,6 +511,19 @@ module Term = struct let fetch_trusted_setup = arg_to_cmdliner fetch_trusted_setup_arg + let disable_shard_validation_switch = + make_switch + ~doc: + "Disable the shard verification. This is used conjoyintly with the \ + `TEZOS_DISABLE_SHARD_VERIFICATION_I_KNOW_WHAT_I_AM_DOING` environment \ + variable. To actually disable the shard verification this option must \ + be used and the environment variable must be set. If only the \ + environment variable is set, the DAL node will refuse to start. " + "disable-shard-validation" + + let disable_shard_validation = + switch_to_cmdliner disable_shard_validation_switch + let verbose_switch = make_switch ~doc: @@ -515,8 +541,8 @@ module Term = struct $ public_addr $ endpoint $ slots_backup_uris $ trust_slots_backup_uris $ metrics_addr $ attester_profile $ operator_profile $ observer_profile $ bootstrap_profile $ peers $ history_mode $ service_name - $ service_namespace $ fetch_trusted_setup $ verbose - $ ignore_l1_config_peers)) + $ service_namespace $ fetch_trusted_setup $ disable_shard_validation + $ verbose $ ignore_l1_config_peers)) end type t = Run | Config_init | Config_update | Debug_print_store_schemas @@ -676,6 +702,7 @@ type options = { service_namespace : string option; experimental_features : experimental_features; fetch_trusted_setup : bool option; + disable_shard_validation : bool; verbose : bool; ignore_l1_config_peers : bool; } @@ -683,7 +710,8 @@ type options = { let cli_options_to_options data_dir rpc_addr expected_pow listen_addr public_addr endpoint slots_backup_uris trust_slots_backup_uris metrics_addr attesters operators observers bootstrap_flag peers history_mode service_name - service_namespace fetch_trusted_setup verbose ignore_l1_config_peers = + service_namespace fetch_trusted_setup disable_shard_validation verbose + ignore_l1_config_peers = let open Result_syntax in let profile = Controller_profiles.make ~attesters ~operators ?observers () in let* profile = @@ -723,6 +751,7 @@ let cli_options_to_options data_dir rpc_addr expected_pow listen_addr service_namespace; experimental_features = (); fetch_trusted_setup; + disable_shard_validation; verbose; ignore_l1_config_peers; } @@ -747,6 +776,7 @@ let merge service_namespace; experimental_features; fetch_trusted_setup; + disable_shard_validation = _; verbose; ignore_l1_config_peers; } configuration = @@ -817,8 +847,24 @@ let run ?disable_logging subcommand cli_options = ~default:Configuration_file.default.data_dir cli_options.data_dir in + let* () = + if disable_shard_validation && not cli_options.disable_shard_validation + then + failwith + "DAL shard validation is disabled but the option \ + '--disable-shard-validation' was not provided." + else if + (not disable_shard_validation) && cli_options.disable_shard_validation + then + failwith + "DAL shard validation is enabled but the environment variable %s \ + was not set." + disable_shard_validation_environment_variable + else return_unit + in Daemon.run ?disable_logging + ~disable_shard_validation ~data_dir ~configuration_override:(merge cli_options) () @@ -848,7 +894,8 @@ let commands = let run subcommand data_dir rpc_addr expected_pow listen_addr public_addr endpoint slots_backup_uris trust_slots_backup_uris metrics_addr attesters operators observers bootstrap_flag peers history_mode service_name - service_namespace fetch_trusted_setup verbose ignore_l1_config_peers = + service_namespace fetch_trusted_setup disable_shard_validation verbose + ignore_l1_config_peers = match cli_options_to_options data_dir @@ -869,6 +916,7 @@ let commands = service_name service_namespace fetch_trusted_setup + disable_shard_validation verbose ignore_l1_config_peers with diff --git a/src/lib_dal_node/cli.mli b/src/lib_dal_node/cli.mli index 1bcea868503a..c85d1d8fa9f2 100644 --- a/src/lib_dal_node/cli.mli +++ b/src/lib_dal_node/cli.mli @@ -92,6 +92,8 @@ module Term : sig val fetch_trusted_setup_arg : bool arg + val disable_shard_validation_switch : switch + val verbose_switch : switch end @@ -131,10 +133,12 @@ type options = { service_name : string option; (** Name of the service provided by this node. *) service_namespace : string option; (** Namespace for the service. *) - experimental_features : experimental_features; (** Experimental features. *) + experimental_features : experimental_features; (** Experimental features. *) fetch_trusted_setup : bool option; (** Should the trusted setup be installed if required and invalid? - In case of [None] at init it is considered as yes.*) + In case of [None] at init it is considered as yes. *) + disable_shard_validation : bool; + (** Should the crypto shard verification against commitment hashes be bypassed. *) verbose : bool; (** Emit events related to connections. Default value is false. *) ignore_l1_config_peers : bool; @@ -167,6 +171,7 @@ val cli_options_to_options : bool option -> bool -> bool -> + bool -> (options, bool * string) result val run : ?disable_logging:bool -> t -> options -> unit tzresult Lwt.t diff --git a/src/lib_dal_node/daemon.ml b/src/lib_dal_node/daemon.ml index 14ae44205895..d3544f0ffe37 100644 --- a/src/lib_dal_node/daemon.ml +++ b/src/lib_dal_node/daemon.ml @@ -222,7 +222,8 @@ let update_and_register_profiles ctxt = let*! () = Node_context.set_profile_ctxt ctxt profile_ctxt in return_unit -let run ?(disable_logging = false) ~data_dir ~configuration_override () = +let run ?(disable_logging = false) ?(disable_shard_validation = false) ~data_dir + ~configuration_override () = let open Lwt_result_syntax in let*! () = if disable_logging then Lwt.return_unit @@ -429,6 +430,10 @@ let run ?(disable_logging = false) ~data_dir ~configuration_override () = (* Set crypto box share size hook. *) Value_size_hooks.set_share_size (Cryptobox.Internal_for_tests.encoded_share_size cryptobox) ; + let*! () = + if disable_shard_validation then Event.emit_shard_validation_is_disabled () + else Lwt.return_unit + in let ctxt = Node_context.init config @@ -442,6 +447,8 @@ let run ?(disable_logging = false) ~data_dir ~configuration_override () = cctxt ~last_finalized_level:head_level ~network_name + ~disable_shard_validation + () in let* () = match Profile_manager.get_profiles profile_ctxt with diff --git a/src/lib_dal_node/daemon.mli b/src/lib_dal_node/daemon.mli index d1c22287ca69..532fa769d941 100644 --- a/src/lib_dal_node/daemon.mli +++ b/src/lib_dal_node/daemon.mli @@ -6,8 +6,9 @@ (* *) (*****************************************************************************) -(** [run ?disable_logging ~data_dir ~configuration_override] starts a DAL node - with the given data directory and function to generate an initial configuration. +(** [run ?disable_logging ?disable_shard_validation ~data_dir ~configuration_override ()] + starts a DAL node with the given data directory and function to generate an initial + configuration. This function performs the following steps: @@ -28,10 +29,13 @@ - Starts the RPC server to handle incoming RPC requests; - - Connects the Gossipsub worker with the P2P layer and to the crawler. + - Connects the Gossipsub worker with the P2P layer and to the crawler; + + - Can disable the shard validation using [?disable_shard_validation]. *) val run : ?disable_logging:bool -> + ?disable_shard_validation:bool -> data_dir:string -> configuration_override:(Configuration_file.t -> Configuration_file.t) -> unit -> diff --git a/src/lib_dal_node/event.ml b/src/lib_dal_node/event.ml index c1e9a1f17b41..bc7e32e24856 100644 --- a/src/lib_dal_node/event.ml +++ b/src/lib_dal_node/event.ml @@ -1082,6 +1082,15 @@ open struct ("slot_index", Data_encoding.int31) ("backup_uri", Data_encoding.string) ("status", Data_encoding.string) + + let shard_validation_is_disabled = + declare_0 + ~section + ~prefix_name_with_section:true + ~name:"shard_validation_is_disabled" + ~msg:"shard validation is disabled" + ~level:Warning + () end (* DAL node event emission functions *) @@ -1380,3 +1389,5 @@ let emit_start_catchup ~start_level ~end_level ~levels_to_clean_up = let emit_catching_up ~current_level = emit catching_up current_level let emit_end_catchup () = emit end_catchup () + +let emit_shard_validation_is_disabled () = emit shard_validation_is_disabled () diff --git a/src/lib_dal_node/message_validation.ml b/src/lib_dal_node/message_validation.ml index 08d6f6ce755a..77de2a8fd556 100644 --- a/src/lib_dal_node/message_validation.ml +++ b/src/lib_dal_node/message_validation.ml @@ -23,50 +23,54 @@ (* *) (*****************************************************************************) -(** [gossipsub_app_message_payload_validation cryptobox message message_id] - allows checking whether the given [message] identified by [message_id] is - valid with the current [cryptobox] parameters. The validity check is done - by verifying that the shard in the message effectively belongs to the - commitment given by [message_id]. *) -let gossipsub_app_message_payload_validation cryptobox message_id message = - let Types.Message.{share; shard_proof} = message in - let Types.Message_id.{commitment; shard_index; _} = message_id in - let shard = Cryptobox.{share; index = shard_index} in - let res = - Dal_metrics.sample_time - ~sampling_frequency:Constants.shards_verification_sampling_frequency - ~metric_updater:Dal_metrics.update_shards_verification_time - ~to_sample:(fun () -> - Cryptobox.verify_shard cryptobox commitment shard shard_proof) - in - match res with - | Ok () -> `Valid - | Error err -> - let validation_error = - match err with - | `Invalid_degree_strictly_less_than_expected {given; expected} -> - Format.sprintf - "Invalid_degree_strictly_less_than_expected. Given: %d, \ - expected: %d" - given - expected - | `Invalid_shard -> "Invalid_shard" - | `Shard_index_out_of_range s -> - Format.sprintf "Shard_index_out_of_range(%s)" s - | `Shard_length_mismatch -> "Shard_length_mismatch" - | `Prover_SRS_not_loaded -> "Prover_SRS_not_loaded" - in - Event.emit_dont_wait__message_validation_error - ~message_id - ~validation_error ; - `Invalid - | exception exn -> - (* Don't crash if crypto raised an exception. *) - let validation_error = Printexc.to_string exn in - Event.emit_dont_wait__message_validation_error - ~message_id - ~validation_error ; - `Invalid +(** [gossipsub_app_message_payload_validation ~disable_shard_validation cryptobox message + message_id] allows checking whether the given [message] identified by + [message_id] is valid with the current [cryptobox] parameters. The validity check is + done by verifying that the shard in the message effectively belongs to the + commitment given by [message_id]. The whole validation can be bypassed if + [~disable_shard_validation] is set to [true]. *) +let gossipsub_app_message_payload_validation ~disable_shard_validation cryptobox + message_id message = + if disable_shard_validation then `Valid + else + let Types.Message.{share; shard_proof} = message in + let Types.Message_id.{commitment; shard_index; _} = message_id in + let shard = Cryptobox.{share; index = shard_index} in + let res = + Dal_metrics.sample_time + ~sampling_frequency:Constants.shards_verification_sampling_frequency + ~metric_updater:Dal_metrics.update_shards_verification_time + ~to_sample:(fun () -> + Cryptobox.verify_shard cryptobox commitment shard shard_proof) + in + match res with + | Ok () -> `Valid + | Error err -> + let validation_error = + match err with + | `Invalid_degree_strictly_less_than_expected {given; expected} -> + Format.sprintf + "Invalid_degree_strictly_less_than_expected. Given: %d, \ + expected: %d" + given + expected + | `Invalid_shard -> "Invalid_shard" + | `Shard_index_out_of_range s -> + Format.sprintf "Shard_index_out_of_range(%s)" s + | `Shard_length_mismatch -> "Shard_length_mismatch" + | `Prover_SRS_not_loaded -> "Prover_SRS_not_loaded" + in + Event.emit_dont_wait__message_validation_error + ~message_id + ~validation_error ; + `Invalid + | exception exn -> + (* Don't crash if crypto raised an exception. *) + let validation_error = Printexc.to_string exn in + Event.emit_dont_wait__message_validation_error + ~message_id + ~validation_error ; + `Invalid let gossipsub_message_id_commitment_validation ctxt proto_parameters message_id = @@ -170,7 +174,11 @@ let gossipsub_app_messages_validation ctxt cryptobox head_level proto_parameters message ~none:`Valid ~some: - (gossipsub_app_message_payload_validation cryptobox message_id) + (gossipsub_app_message_payload_validation + ~disable_shard_validation: + (Node_context.get_disable_shard_validation ctxt) + cryptobox + message_id) in (if res = `Valid then let store = Node_context.get_store ctxt in diff --git a/src/lib_dal_node/node_context.ml b/src/lib_dal_node/node_context.ml index fa41308ce7de..50e899e8b614 100644 --- a/src/lib_dal_node/node_context.ml +++ b/src/lib_dal_node/node_context.ml @@ -43,11 +43,12 @@ type t = { (* the highest finalized level the DAL node is aware of (except at start-up, where it is the highest level the node is aware of) *) mutable l1_crawler_status : L1_crawler_status.t; + disable_shard_validation : bool; } let init config ~network_name profile_ctxt cryptobox shards_proofs_precomputation proto_plugins store gs_worker transport_layer - cctxt ~last_finalized_level = + cctxt ~last_finalized_level ?(disable_shard_validation = false) () = { config; network_name; @@ -65,6 +66,7 @@ let init config ~network_name profile_ctxt cryptobox profile_ctxt; last_finalized_level; l1_crawler_status = Unknown; + disable_shard_validation; } let get_tezos_node_cctxt ctxt = ctxt.tezos_node_cctxt @@ -237,6 +239,8 @@ let warn_if_attesters_not_delegates ctxt controller_profiles = else return_unit) pkh_set +let get_disable_shard_validation ctxt = ctxt.disable_shard_validation + module P2P = struct let connect {transport_layer; _} ?timeout point = Gossipsub.Transport_layer.connect transport_layer ?timeout point diff --git a/src/lib_dal_node/node_context.mli b/src/lib_dal_node/node_context.mli index 76027ef6e3fe..7d3898978874 100644 --- a/src/lib_dal_node/node_context.mli +++ b/src/lib_dal_node/node_context.mli @@ -41,6 +41,8 @@ val init : Gossipsub.Transport_layer.t -> Tezos_rpc.Context.generic -> last_finalized_level:int32 -> + ?disable_shard_validation:bool -> + unit -> t (** Returns all the registered plugins *) @@ -202,6 +204,10 @@ val version : t -> Types.Version.t val warn_if_attesters_not_delegates : t -> Controller_profiles.t -> unit tzresult Lwt.t +(** [get_disable_shard_validation ctxt] returns whether we should disable shard + validation in the DAL node. *) +val get_disable_shard_validation : t -> bool + (** Module for P2P-related accessors. *) module P2P : sig (** [connect t ?timeout point] initiates a connection to the point -- GitLab From 710cae59b5177a39d1c39b7aa4a0902479448b36 Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Tue, 3 Jun 2025 15:39:35 +0100 Subject: [PATCH 3/5] Tezt: Add --disable-shard-validation to DAL node primitive --- tezt/lib_tezos/dal_node.ml | 23 ++++++++++++++++++----- tezt/lib_tezos/dal_node.mli | 8 ++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/tezt/lib_tezos/dal_node.ml b/tezt/lib_tezos/dal_node.ml index ef2cfe7b998b..0725e1de910c 100644 --- a/tezt/lib_tezos/dal_node.ml +++ b/tezt/lib_tezos/dal_node.ml @@ -33,6 +33,7 @@ module Parameters = struct public_addr : string option; metrics_addr : string; l1_node_endpoint : Endpoint.t; + disable_shard_validation : bool; mutable pending_ready : unit option Lwt.u list; runner : Runner.t option; } @@ -49,6 +50,12 @@ type history_mode = Full | Auto | Custom of int open Parameters include Daemon.Make (Parameters) +let check_error ?exit_code ?msg dal_node = + match dal_node.status with + | Not_running -> + Test.fail "DAL node %s is not running, it has no stderr" (name dal_node) + | Running {process; _} -> Process.check_error ?exit_code ?msg process + let wait dal_node = match dal_node.status with | Not_running -> @@ -281,7 +288,8 @@ let handle_event dal_node {name; value = _; timestamp = _} = let create_from_endpoint ?runner ?(path = Uses.path Constant.octez_dal_node) ?name ?color ?data_dir ?event_pipe ?(rpc_host = Constant.default_host) - ?rpc_port ?listen_addr ?public_addr ?metrics_addr ~l1_node_endpoint () = + ?rpc_port ?listen_addr ?public_addr ?metrics_addr + ?(disable_shard_validation = false) ~l1_node_endpoint () = let name = match name with None -> fresh_name () | Some name -> name in let data_dir = match data_dir with None -> Temp.dir name | Some dir -> dir @@ -314,6 +322,7 @@ let create_from_endpoint ?runner ?(path = Uses.path Constant.octez_dal_node) public_addr; metrics_addr; pending_ready = []; + disable_shard_validation; l1_node_endpoint; runner; } @@ -324,7 +333,7 @@ let create_from_endpoint ?runner ?(path = Uses.path Constant.octez_dal_node) (* TODO: have rpc_addr here, like for others. *) let create ?runner ?(path = Uses.path Constant.octez_dal_node) ?name ?color ?data_dir ?event_pipe ?(rpc_host = Constant.default_host) ?rpc_port - ?listen_addr ?public_addr ?metrics_addr ~node () = + ?listen_addr ?public_addr ?metrics_addr ?disable_shard_validation ~node () = create_from_endpoint ?runner ~path @@ -337,6 +346,7 @@ let create ?runner ?(path = Uses.path Constant.octez_dal_node) ?name ?color ?listen_addr ?public_addr ?metrics_addr + ?disable_shard_validation ~l1_node_endpoint:(Node.as_rpc_endpoint node) () @@ -356,10 +366,13 @@ let make_arguments node = "--metrics-addr"; metrics_addr node; ] + @ (match public_addr node with + | None -> [] + | Some addr -> ["--public-addr"; addr]) @ - match public_addr node with - | None -> [] - | Some addr -> ["--public-addr"; addr] + if node.persistent_state.disable_shard_validation then + ["--disable-shard-validation"] + else [] let do_runlike_command ?env ?(event_level = `Debug) node arguments = if node.status <> Not_running then diff --git a/tezt/lib_tezos/dal_node.mli b/tezt/lib_tezos/dal_node.mli index a2ef4516d31d..eb2bb378901e 100644 --- a/tezt/lib_tezos/dal_node.mli +++ b/tezt/lib_tezos/dal_node.mli @@ -48,6 +48,7 @@ val create : ?listen_addr:string -> ?public_addr:string -> ?metrics_addr:string -> + ?disable_shard_validation:bool -> node:Node.t -> unit -> t @@ -64,6 +65,7 @@ val create_from_endpoint : ?listen_addr:string -> ?public_addr:string -> ?metrics_addr:string -> + ?disable_shard_validation:bool -> l1_node_endpoint:Endpoint.t -> unit -> t @@ -141,6 +143,12 @@ val wait_for : ?where:string -> t -> string -> (JSON.t -> 'a option) -> 'a Lwt.t running but its status is not ready *) val is_running_not_ready : t -> bool +(** Wait until a DAL node terminates and check its status. + + If the DAL node is not running, + or if the [Process.check_error] function fails, fail the test. *) +val check_error : ?exit_code:int -> ?msg:Base.rex -> t -> unit Lwt.t + (** Wait until a node terminates and return its status. If the node is not running, make the test fail. *) val wait : t -> Unix.process_status Lwt.t -- GitLab From 2ea2e4e8d9a14d8aee696e040ee1162c32591200 Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Tue, 3 Jun 2025 15:40:07 +0100 Subject: [PATCH 4/5] Tezt: DAL: Test shard validation disabling mechanism is safe to use --- tezt/lib_tezos/dal_node.ml | 3 ++ tezt/lib_tezos/dal_node.mli | 2 + tezt/tests/dal.ml | 82 ++++++++++++++++++++++++++++++++++--- 3 files changed, 81 insertions(+), 6 deletions(-) diff --git a/tezt/lib_tezos/dal_node.ml b/tezt/lib_tezos/dal_node.ml index 0725e1de910c..0ebe5c2dced9 100644 --- a/tezt/lib_tezos/dal_node.ml +++ b/tezt/lib_tezos/dal_node.ml @@ -50,6 +50,9 @@ type history_mode = Full | Auto | Custom of int open Parameters include Daemon.Make (Parameters) +let disable_shard_validation_environment_variable = + "TEZOS_DISABLE_SHARD_VALIDATION_I_KNOW_WHAT_I_AM_DOING" + let check_error ?exit_code ?msg dal_node = match dal_node.status with | Not_running -> diff --git a/tezt/lib_tezos/dal_node.mli b/tezt/lib_tezos/dal_node.mli index eb2bb378901e..c566b02638da 100644 --- a/tezt/lib_tezos/dal_node.mli +++ b/tezt/lib_tezos/dal_node.mli @@ -34,6 +34,8 @@ type t [Custom (i)] : keeps the shards during [i] blocks *) type history_mode = Full | Auto | Custom of int +val disable_shard_validation_environment_variable : string + (** Creates a DAL node *) val create : diff --git a/tezt/tests/dal.ml b/tezt/tests/dal.ml index f4b98e9895b0..8ed35889937d 100644 --- a/tezt/tests/dal.ml +++ b/tezt/tests/dal.ml @@ -260,6 +260,10 @@ let wait_for_shards_promises ~dal_node ~shards ~published_level ~slot_index = in Lwt.join promises +let wait_for_shard_validation_is_disabled dal_node = + Dal_node.wait_for dal_node "shard_validation_is_disabled.v0" (fun _json -> + Some ()) + (* DAL/FIXME: https://gitlab.com/tezos/tezos/-/issues/3173 The functions below are duplicated from sc_rollup.ml. They should be moved to a common submodule. *) @@ -590,8 +594,11 @@ let with_fresh_rollup ?(pvm_name = "arith") ?dal_node f tezos_node tezos_client f rollup_address sc_rollup_node let make_dal_node ?name ?peers ?attester_profiles ?operator_profiles - ?bootstrap_profile ?history_mode tezos_node = - let dal_node = Dal_node.create ?name ~node:tezos_node () in + ?bootstrap_profile ?history_mode ?(wait_ready = true) ?env + ?disable_shard_validation tezos_node = + let dal_node = + Dal_node.create ?name ?disable_shard_validation ~node:tezos_node () + in let* () = Dal_node.init_config ?peers @@ -601,11 +608,12 @@ let make_dal_node ?name ?peers ?attester_profiles ?operator_profiles ?history_mode dal_node in - let* () = Dal_node.run ~event_level:`Debug dal_node ~wait_ready:true in + let* () = Dal_node.run ?env ~event_level:`Debug dal_node ~wait_ready in return dal_node let with_dal_node ?peers ?attester_profiles ?operator_profiles - ?bootstrap_profile ?history_mode tezos_node f key = + ?bootstrap_profile ?history_mode ?wait_ready ?env ?disable_shard_validation + tezos_node f key = let* dal_node = make_dal_node ?peers @@ -613,6 +621,9 @@ let with_dal_node ?peers ?attester_profiles ?operator_profiles ?operator_profiles ?bootstrap_profile ?history_mode + ?wait_ready + ?env + ?disable_shard_validation tezos_node in f key dal_node @@ -668,7 +679,8 @@ let scenario_with_layer1_and_dal_nodes ?regression ?(tags = []) ?commitment_period ?challenge_window ?(dal_enable = true) ?incentives_enable ?dal_rewards_weight ?activation_timestamp ?bootstrap_profile ?event_sections_levels ?operator_profiles ?history_mode ?prover - ?l1_history_mode variant scenario = + ?l1_history_mode ?wait_ready ?env ?disable_shard_validation variant scenario + = let description = "Testing DAL node" in let tags = if List.mem team tags then tags else team :: tags in test @@ -706,7 +718,14 @@ let scenario_with_layer1_and_dal_nodes ?regression ?(tags = []) ~protocol ~dal_enable @@ fun parameters cryptobox node client -> - with_dal_node ?bootstrap_profile ?operator_profiles ?history_mode node + with_dal_node + ?bootstrap_profile + ?operator_profiles + ?history_mode + ?wait_ready + ?env + ?disable_shard_validation + node @@ fun _key dal_node -> scenario protocol parameters cryptobox node client dal_node) @@ -10117,6 +10136,26 @@ let use_mockup_node_for_getting_attestable_slots _protocol dal_parameters ~error_msg:"Unexpected DAL attestation: expected %L, got %R" ; unit +let test_disable_shard_validation_wrong_cli _protocol _parameters _cryptobox + _node _client dal_node = + Dal_node.check_error + dal_node + ~msg: + (rex + ".* DAL shard validation is disabled but the option \ + '--disable-shard-validation' was not provided.*") + +let test_disable_shard_validation_wrong_env _protocol _parameters _cryptobox + _node _client dal_node = + Dal_node.check_error + dal_node + ~msg: + (rex + @@ Format.sprintf + ".* DAL shard validation is enabled but the environment variable %s \ + was not set.*" + Dal_node.disable_shard_validation_environment_variable) + let register ~protocols = (* Tests with Layer1 node only *) scenario_with_layer1_node @@ -10528,6 +10567,37 @@ let register ~protocols = ~activation_timestamp:Now "mockup get_attestable_slots" use_mockup_node_for_getting_attestable_slots + protocols ; + + (* Scenarios for disabling shard validation *) + scenario_with_layer1_and_dal_nodes + ~operator_profiles:[0] + ~wait_ready:false + ~env: + (String_map.singleton + Dal_node.disable_shard_validation_environment_variable + "yes") + "DAL node disable shard validation wrong CLI" + test_disable_shard_validation_wrong_cli + protocols ; + scenario_with_layer1_and_dal_nodes + ~operator_profiles:[0] + ~wait_ready:false + ~disable_shard_validation:true + "DAL node disable shard validation wrong env" + test_disable_shard_validation_wrong_env + protocols ; + scenario_with_layer1_and_dal_nodes + ~operator_profiles:[0] + ~wait_ready:true + ~env: + (String_map.singleton + Dal_node.disable_shard_validation_environment_variable + "yes") + ~disable_shard_validation:true + "DAL node disable shard validation correct CLI" + (fun _protocol _parameters _cryptobox _node _client dal_node -> + Dal_node.terminate dal_node) protocols let tests_start_dal_node_around_migration ~migrate_from ~migrate_to = -- GitLab From 4faf0c6e53d41b6456c3c0bfeb5c8ae09c74478d Mon Sep 17 00:00:00 2001 From: Gabriel Moise Date: Wed, 4 Jun 2025 11:52:51 +0100 Subject: [PATCH 5/5] Tezt_cloud: Add --disable-shard-validation option to DAL scenario --- tezt/tests/cloud/dal.ml | 22 +++++++++++++++++++++- tezt/tests/cloud/scenarios_cli.ml | 9 +++++++++ tezt/tests/cloud/tezos.ml | 21 +++++++++++++++++---- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/tezt/tests/cloud/dal.ml b/tezt/tests/cloud/dal.ml index 748024aeea26..14292493257e 100644 --- a/tezt/tests/cloud/dal.ml +++ b/tezt/tests/cloud/dal.ml @@ -491,6 +491,7 @@ type configuration = { bootstrap_dal_node_identity_file : string option; external_rpc : bool; dal_incentives : bool; + disable_shard_validation : bool; } type bootstrap = { @@ -2143,8 +2144,16 @@ let init_public_network cloud (configuration : configuration) let* dal_node = if not configuration.with_dal then Lwt.return_none else + let disable_shard_validation = + configuration.disable_shard_validation + in let* dal_node = - Dal_node.Agent.create ~name:"bootstrap-dal-node" cloud agent ~node + Dal_node.Agent.create + ~name:"bootstrap-dal-node" + cloud + agent + ~node + ~disable_shard_validation in let* () = @@ -2166,6 +2175,7 @@ let init_public_network cloud (configuration : configuration) ?otel ~memtrace:configuration.memtrace ~event_level:`Notice + ~disable_shard_validation dal_node in Lwt.return_some dal_node @@ -2371,6 +2381,7 @@ let init_sandbox_and_activate_protocol cloud (configuration : configuration) cloud agent ~node:bootstrap_node + ~disable_shard_validation:configuration.disable_shard_validation in Lwt.return_some dal_node else Lwt.return_none @@ -2471,6 +2482,7 @@ let init_sandbox_and_activate_protocol cloud (configuration : configuration) ?otel ~memtrace:configuration.memtrace ~event_level:`Notice + ~disable_shard_validation:configuration.disable_shard_validation dal_bootstrap_node in let* () = @@ -2537,6 +2549,7 @@ let init_baker ?stake cloud (configuration : configuration) ~bootstrap teztale Dal_node.Agent.create ~name:(Format.asprintf "baker-dal-node-%d" i) ~node + ~disable_shard_validation:configuration.disable_shard_validation cloud agent in @@ -2554,6 +2567,7 @@ let init_baker ?stake cloud (configuration : configuration) ~bootstrap teztale ?otel ~memtrace:configuration.memtrace ~event_level:`Notice + ~disable_shard_validation:configuration.disable_shard_validation dal_node in Lwt.return_some dal_node @@ -2640,6 +2654,7 @@ let init_producer cloud configuration ~bootstrap teztale account i slot_index Dal_node.Agent.create ~name:(Format.asprintf "producer-dal-node-%i" i) ~node + ~disable_shard_validation:configuration.disable_shard_validation cloud agent in @@ -2681,6 +2696,7 @@ let init_producer cloud configuration ~bootstrap teztale account i slot_index ?otel ~memtrace:configuration.memtrace ~event_level:`Notice + ~disable_shard_validation:configuration.disable_shard_validation dal_node in let () = toplog "Init producer %s: DAL node is ready" name in @@ -2716,6 +2732,7 @@ let init_observer cloud configuration ~bootstrap teztale ~topic i agent = Dal_node.Agent.create ~name:(Format.asprintf "observer-dal-node-%i" i) ~node + ~disable_shard_validation:configuration.disable_shard_validation cloud agent in @@ -2748,6 +2765,7 @@ let init_observer cloud configuration ~bootstrap teztale ~topic i agent = ?otel ~memtrace:configuration.memtrace ~event_level:`Notice + ~disable_shard_validation:configuration.disable_shard_validation dal_node in let* () = @@ -3691,6 +3709,7 @@ let register (module Cli : Scenarios_cli.Dal) = let bakers = Cli.bakers in let external_rpc = Cli.node_external_rpc_server in let dal_incentives = Cli.dal_incentives in + let disable_shard_validation = Cli.disable_shard_validation in let t = { with_dal; @@ -3716,6 +3735,7 @@ let register (module Cli : Scenarios_cli.Dal) = bootstrap_dal_node_identity_file; external_rpc; dal_incentives; + disable_shard_validation; } in (t, etherlink) diff --git a/tezt/tests/cloud/scenarios_cli.ml b/tezt/tests/cloud/scenarios_cli.ml index 44bf4bce2b42..a345ddfa8f7e 100644 --- a/tezt/tests/cloud/scenarios_cli.ml +++ b/tezt/tests/cloud/scenarios_cli.ml @@ -85,6 +85,8 @@ module type Dal = sig val dal_incentives : bool val proxy_localhost : bool + + val disable_shard_validation : bool end module Dal () : Dal = struct @@ -408,6 +410,13 @@ module Dal () : Dal = struct can be used to solve a bug with the Tezt Cloud library. This option \ will be removed once the bug is fixed" false + + let disable_shard_validation = + Clap.flag + ~section + ~set_long:"disable-shard-validation" + ~description:"All DAL nodes will bypass the shard validation stage." + false end module type Layer1 = sig diff --git a/tezt/tests/cloud/tezos.ml b/tezt/tests/cloud/tezos.ml index 1c29ac284a78..d361604fd9ef 100644 --- a/tezt/tests/cloud/tezos.ml +++ b/tezt/tests/cloud/tezos.ml @@ -177,7 +177,7 @@ module Dal_node = struct module Agent = struct let create_from_endpoint ?(group = "DAL") ?net_port ?(path = Uses.path Constant.octez_dal_node) ?name ?rpc_port - ~l1_node_endpoint cloud agent = + ?disable_shard_validation ~l1_node_endpoint cloud agent = let* path = Agent.copy agent ~source:path in let* () = Cloud.register_binary @@ -209,6 +209,7 @@ module Dal_node = struct ~rpc_port ~metrics_addr ~listen_addr + ?disable_shard_validation ~l1_node_endpoint () in @@ -217,15 +218,17 @@ module Dal_node = struct Cloud.service_register ~name ~executable agent ; Lwt.return node - let create ?net_port ?path ?name ~node agent = + let create ?net_port ?path ?name ?disable_shard_validation ~node agent = create_from_endpoint ?net_port ?path ?name + ?disable_shard_validation ~l1_node_endpoint:(Node.as_rpc_endpoint node) agent - let run ?otel ?(memtrace = false) ?event_level dal_node = + let run ?otel ?(memtrace = false) ?event_level + ?(disable_shard_validation = false) dal_node = let name = name dal_node in let filename = Format.asprintf "%s/%s-trace.ctf" (Filename.get_temp_dir_name ()) name @@ -246,7 +249,17 @@ module Dal_node = struct ] |> List.to_seq |> String_map.of_seq in - String_map.union (fun _ _ _ -> None) otel_env memtrace_env + let disable_shard_validation_env = + if disable_shard_validation then + String_map.singleton + "TEZOS_DISABLE_SHARD_VALIDATION_I_KNOW_WHAT_I_AM_DOING" + "yes" + else String_map.empty + in + String_map.union + (fun _ _ _ -> None) + (String_map.union (fun _ _ _ -> None) otel_env memtrace_env) + disable_shard_validation_env in let* () = run ~env ?event_level dal_node in (* Update the state in the service manager *) -- GitLab