From 743bb524e97a1498258cc8e1a1bfac05fa8d194e Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Mon, 16 Jun 2025 11:23:32 +0200 Subject: [PATCH 1/5] Manifest/DAL: add dependency to Opentelemetry profiler --- manifest/product_octez.ml | 2 ++ src/bin_dal_node/dune | 4 +++- src/lib_dal_node/dune | 6 ++++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/manifest/product_octez.ml b/manifest/product_octez.ml index 529337f7c11e..66658b7055bc 100644 --- a/manifest/product_octez.ml +++ b/manifest/product_octez.ml @@ -5342,6 +5342,7 @@ let octez_dal_node_lib = prometheus; octez_crawler |> open_; octez_profiler |> open_; + octez_profiler_complex_backends |> open_; ] ~preprocess ~preprocessor_deps @@ -9090,6 +9091,7 @@ let _octez_dal_node = octez_dal_node_lib |> open_; memtrace; octez_profiler_backends |> open_; + octez_profiler_complex_backends |> open_; ] @ protocol_deps) ~conflicts:[Conflicts.checkseum] diff --git a/src/bin_dal_node/dune b/src/bin_dal_node/dune index c66019c9c75b..7d8de2c9c4d1 100644 --- a/src/bin_dal_node/dune +++ b/src/bin_dal_node/dune @@ -14,6 +14,7 @@ tezos-dal-node-lib memtrace octez-libs.octez-profiler.backends + octez-libs.octez-profiler.complex_backends (select void_for_linking-octez-protocol-021-PsQuebec-libs-dal from (octez-protocol-021-PsQuebec-libs.dal -> void_for_linking-octez-protocol-021-PsQuebec-libs-dal.empty) (-> void_for_linking-octez-protocol-021-PsQuebec-libs-dal.empty)) @@ -31,7 +32,8 @@ -open Tezos_base.TzPervasives -open Tezos_stdlib_unix -open Tezos_dal_node_lib - -open Tezos_profiler_backends)) + -open Tezos_profiler_backends + -open Tezos_profiler_complex_backends)) (rule (action diff --git a/src/lib_dal_node/dune b/src/lib_dal_node/dune index db258314af75..6c38cf61eddf 100644 --- a/src/lib_dal_node/dune +++ b/src/lib_dal_node/dune @@ -37,7 +37,8 @@ octez-libs.prometheus-app octez-libs.prometheus octez-crawler - octez-libs.octez-profiler) + octez-libs.octez-profiler + octez-libs.octez-profiler.complex_backends) (preprocess (pps octez-libs.ppx_profiler)) (preprocessor_deps (env_var TEZOS_PPX_PROFILER)) (flags @@ -61,4 +62,5 @@ -open Tezos_base_p2p_identity_file -open Tezos_shell_services -open Octez_crawler - -open Tezos_profiler)) + -open Tezos_profiler + -open Tezos_profiler_complex_backends)) -- GitLab From f575c42a9d05bed7a10a71213e1331d5487e1e71 Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Mon, 16 Jun 2025 11:36:55 +0200 Subject: [PATCH 2/5] DAL/Node: use a default Opentelemetry service name and namespace --- src/lib_dal_node/cli.ml | 4 ++-- src/lib_dal_node/configuration_file.ml | 20 ++++++++++++-------- src/lib_dal_node/configuration_file.mli | 5 ++--- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/lib_dal_node/cli.ml b/src/lib_dal_node/cli.ml index dc8e8a979edb..dadc7ba6678f 100644 --- a/src/lib_dal_node/cli.ml +++ b/src/lib_dal_node/cli.ml @@ -847,9 +847,9 @@ let merge metrics_addr; peers = peers @ configuration.peers; history_mode = Option.value ~default:configuration.history_mode history_mode; - service_name = Option.either service_name configuration.service_name; + service_name = Option.value ~default:configuration.service_name service_name; service_namespace = - Option.either service_namespace configuration.service_namespace; + Option.value ~default:configuration.service_namespace service_namespace; fetch_trusted_setup = Option.value ~default:configuration.fetch_trusted_setup diff --git a/src/lib_dal_node/configuration_file.ml b/src/lib_dal_node/configuration_file.ml index e9d765eb129e..ec7befc1afda 100644 --- a/src/lib_dal_node/configuration_file.ml +++ b/src/lib_dal_node/configuration_file.ml @@ -78,8 +78,8 @@ type t = { profile : Profile_manager.unresolved_profile; history_mode : history_mode; version : int; - service_name : string option; - service_namespace : string option; + service_name : string; + service_namespace : string; experimental_features : experimental_features; fetch_trusted_setup : bool; verbose : bool; @@ -117,6 +117,10 @@ let default_metrics_port = let default_history_mode = Rolling {blocks = `Auto} +let default_service_name = "octez-dal-node" + +let default_service_namespace = "octez-dal-node" + let default_experimental_features = () let default_fetch_trusted_setup = true @@ -136,8 +140,8 @@ let default = history_mode = default_history_mode; profile = Profile_manager.Empty; version = current_version; - service_name = None; - service_namespace = None; + service_name = default_service_name; + service_namespace = default_service_namespace; experimental_features = default_experimental_features; fetch_trusted_setup = default_fetch_trusted_setup; verbose = false; @@ -318,13 +322,13 @@ let encoding : t Data_encoding.t = (dft "service_name" ~description:"Name of the service" - (Data_encoding.option Data_encoding.string) - None) + Data_encoding.string + default.service_name) (dft "service_namespace" ~description:"Namespace for the service" - (Data_encoding.option Data_encoding.string) - None) + Data_encoding.string + default.service_namespace) (dft "experimental_features" ~description:"Experimental features" diff --git a/src/lib_dal_node/configuration_file.mli b/src/lib_dal_node/configuration_file.mli index eb760489cef0..3c0afbedf71b 100644 --- a/src/lib_dal_node/configuration_file.mli +++ b/src/lib_dal_node/configuration_file.mli @@ -62,9 +62,8 @@ type t = { (** The profiles determining the topics of interest. *) history_mode : history_mode; version : int; (** The version of the configuration. *) - service_name : string option; - (** Name of the service provided by this node. *) - service_namespace : string option; (** Namespace for the service. *) + service_name : string; (** Name of the service provided by this node. *) + service_namespace : string; (** Namespace for the service. *) experimental_features : experimental_features; (** Experimental features. *) fetch_trusted_setup : bool; (** Should the trusted setup be downloaded if not found or has invalid hash. *) -- GitLab From 9cfb0ed2b600ca299255c77f201a1e50253396ba Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Mon, 16 Jun 2025 11:19:58 +0200 Subject: [PATCH 3/5] DAL/Node: initialize the Opentelemetry backend of the profiler --- src/lib_dal_node/daemon.ml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/lib_dal_node/daemon.ml b/src/lib_dal_node/daemon.ml index f4b9d7f74f00..86b13a4dde7c 100644 --- a/src/lib_dal_node/daemon.ml +++ b/src/lib_dal_node/daemon.ml @@ -393,6 +393,14 @@ let run ?(disable_logging = false) ?(disable_shard_validation = false) else limits in let identity = p2p_config.P2p.identity in + (* Initialize the OpenTelemetry profiler only when identity is available, to + allow discriminating the different services. *) + () + [@profiler.overwrite + {driver_ids = [Opentelemetry]} + (Opentelemetry_profiler.initialize + ~unique_identifier:(P2p_peer.Id.to_b58check identity.peer_id) + config.service_name)] ; let self = (* What matters is the identity, the reachable point is more like a placeholder here. *) Types.Peer. -- GitLab From 0f3ba3a90fe2938dc0a95436840476065f8ca2b9 Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Mon, 16 Jun 2025 13:42:12 +0200 Subject: [PATCH 4/5] DAL/Node: add an Opentelemetry helper to trace slot_ids --- src/lib_dal_node/opentelemetry_helpers.ml | 42 +++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 src/lib_dal_node/opentelemetry_helpers.ml diff --git a/src/lib_dal_node/opentelemetry_helpers.ml b/src/lib_dal_node/opentelemetry_helpers.ml new file mode 100644 index 000000000000..c6a6ec25a2d1 --- /dev/null +++ b/src/lib_dal_node/opentelemetry_helpers.ml @@ -0,0 +1,42 @@ +(*****************************************************************************) +(* *) +(* SPDX-License-Identifier: MIT *) +(* SPDX-FileCopyrightText: 2025 Nomadic Labs *) +(* *) +(*****************************************************************************) + +module Slot_id_hash = Blake2B.Make_minimal_with_data (struct + let name = "dal_slot_id" + + let title = "dal slot_id hash, used for Opentelemetry traces" + + (* Enforces the hash to have 16 bytes, to be compatible with Opentelemetry + trace ids. *) + let size = Some 16 +end) + +(* The first string value serves as salt for the hash, it avoids any value + that has the same encoding to have the same trace_id as the one produced + for slot_id (and produce inconsistent traces). *) +let slot_encoding = + Data_encoding.( + conv + (fun Types.Slot_id.{slot_level; slot_index} -> + (Slot_id_hash.name, slot_level, slot_index)) + (fun (_, slot_level, slot_index) -> {slot_level; slot_index}) + (tup3 string int32 int31)) + +let _trace_slot ?(attrs = []) ~name slot_id f = + let trace_id = + [Data_encoding.Binary.to_bytes_exn slot_encoding slot_id] + |> Slot_id_hash.hash_bytes |> Slot_id_hash.to_bytes + |> Opentelemetry.Trace_id.of_bytes + in + let attrs = + [ + ("level", `Int (Int32.to_int slot_id.slot_level)); + ("slot_index", `Int slot_id.slot_index); + ] + @ attrs + in + Opentelemetry_profiler.trace ~attrs ~trace_id name f -- GitLab From a8785270dc7d9823d46ebf12e3bbeaaa81da7898 Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Mon, 16 Jun 2025 15:42:52 +0200 Subject: [PATCH 5/5] DAL/Node: trace verifying and publishing shards --- src/lib_dal_node/message_validation.ml | 20 +++++++++++++++----- src/lib_dal_node/opentelemetry_helpers.ml | 2 +- src/lib_dal_node/slot_manager.ml | 15 +++++++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/lib_dal_node/message_validation.ml b/src/lib_dal_node/message_validation.ml index 77de2a8fd556..5854cabbdc43 100644 --- a/src/lib_dal_node/message_validation.ml +++ b/src/lib_dal_node/message_validation.ml @@ -37,11 +37,21 @@ let gossipsub_app_message_payload_validation ~disable_shard_validation cryptobox let Types.Message_id.{commitment; shard_index; _} = message_id in let shard = Cryptobox.{share; index = shard_index} in let res = - Dal_metrics.sample_time - ~sampling_frequency:Constants.shards_verification_sampling_frequency - ~metric_updater:Dal_metrics.update_shards_verification_time - ~to_sample:(fun () -> - Cryptobox.verify_shard cryptobox commitment shard shard_proof) + (Dal_metrics.sample_time + ~sampling_frequency:Constants.shards_verification_sampling_frequency + ~metric_updater:Dal_metrics.update_shards_verification_time + ~to_sample:(fun () -> + Cryptobox.verify_shard cryptobox commitment shard shard_proof) + [@profiler.wrap_f + {driver_ids = [Opentelemetry]} + (Opentelemetry_helpers.trace_slot + ~attrs:[("shard_index", `Int shard_index)] + ~name:"verify_shard" + Types.Slot_id. + { + slot_level = message_id.level; + slot_index = message_id.slot_index; + })]) in match res with | Ok () -> `Valid diff --git a/src/lib_dal_node/opentelemetry_helpers.ml b/src/lib_dal_node/opentelemetry_helpers.ml index c6a6ec25a2d1..f017442c53ea 100644 --- a/src/lib_dal_node/opentelemetry_helpers.ml +++ b/src/lib_dal_node/opentelemetry_helpers.ml @@ -26,7 +26,7 @@ let slot_encoding = (fun (_, slot_level, slot_index) -> {slot_level; slot_index}) (tup3 string int32 int31)) -let _trace_slot ?(attrs = []) ~name slot_id f = +let trace_slot ?(attrs = []) ~name slot_id f = let trace_id = [Data_encoding.Binary.to_bytes_exn slot_encoding slot_id] |> Slot_id_hash.hash_bytes |> Slot_id_hash.to_bytes diff --git a/src/lib_dal_node/slot_manager.ml b/src/lib_dal_node/slot_manager.ml index 524177586b38..ed7776dd82f8 100644 --- a/src/lib_dal_node/slot_manager.ml +++ b/src/lib_dal_node/slot_manager.ml @@ -713,6 +713,21 @@ let publish_proved_shards ctxt (slot_id : Types.slot_id) ~level_committee |> app_input gs_worker)) ; return_unit) +let publish_proved_shards ctxt (slot_id : Types.slot_id) ~level_committee + proto_parameters commitment shards shard_proofs gs_worker = + (publish_proved_shards + ctxt + slot_id + ~level_committee + proto_parameters + commitment + shards + shard_proofs + gs_worker + [@profiler.wrap_f + {driver_ids = [Opentelemetry]} + (Opentelemetry_helpers.trace_slot ~name:"publish_shards" slot_id)]) + (** This function publishes the shards of a commitment that is waiting for attestation on L1 if this node has those shards and their proofs in memory. *) -- GitLab