From ad6f7ea3a2bc2ddbb2663c8be0a2b78657b61cc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Thir=C3=A9?= Date: Tue, 3 Dec 2024 00:02:15 +0100 Subject: [PATCH] Bin/Node: Add an OpenTelemetry trace --- src/bin_dal_node/daemon.ml | 11 +++++-- src/bin_dal_node/otel.ml | 52 ++++++++++++++++++++++++++++++++ src/bin_dal_node/slot_manager.ml | 5 +++ 3 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 src/bin_dal_node/otel.ml diff --git a/src/bin_dal_node/daemon.ml b/src/bin_dal_node/daemon.ml index 3fcf4f5b7b4b..439cbed7fcf3 100644 --- a/src/bin_dal_node/daemon.ml +++ b/src/bin_dal_node/daemon.ml @@ -94,9 +94,14 @@ module Handler = struct commitment given by [message_id]. *) let gossipsub_app_message_payload_validation cryptobox message_id message = let Types.Message.{share; shard_proof} = message in - let Types.Message_id.{commitment; shard_index; _} = message_id in + let Types.Message_id.{commitment; shard_index; level; slot_index; _} = + message_id + in let shard = Cryptobox.{share; index = shard_index} in let res = + let attrs = [("shard_index", `Int shard_index)] in + Otel.trace_slot_event ~attrs ~event:"verify_shard" level slot_index + @@ fun () -> Dal_metrics.sample_time ~sampling_frequency:Constants.shards_verification_sampling_frequency ~metric_updater:Dal_metrics.update_shards_verification_time @@ -1156,6 +1161,8 @@ let run ~data_dir ~configuration_override = profile; listen_addr; public_addr; + service_name; + service_namespace; _; } as config) = let*! result = Configuration_file.load ~data_dir in @@ -1169,7 +1176,7 @@ let run ~data_dir ~configuration_override = return configuration in let*! () = Event.(emit configuration_loaded) () in - + Otel.start_if_requested ?service_name ?service_namespace () ; let cctxt = Rpc_context.make endpoint in let* dal_config = fetch_dal_config cctxt in let points = points @ dal_config.bootstrap_peers in diff --git a/src/bin_dal_node/otel.ml b/src/bin_dal_node/otel.ml new file mode 100644 index 000000000000..d997110500ee --- /dev/null +++ b/src/bin_dal_node/otel.ml @@ -0,0 +1,52 @@ +(*****************************************************************************) +(* *) +(* SPDX-License-Identifier: MIT *) +(* SPDX-FileCopyrightText: 2024 Nomadic Labs *) +(* *) +(*****************************************************************************) + +module Client = Opentelemetry_client_cohttp_lwt + +let requested = Sys.getenv_opt "OTEL" |> Option.is_some + +let start_if_requested ?service_name ?service_namespace () = + if requested then ( + service_name + |> Option.iter (fun service_name -> + Opentelemetry.Globals.service_name := service_name) ; + service_namespace + |> Option.iter (fun service_namespace -> + Opentelemetry.Globals.service_namespace := Some service_namespace) ; + Opentelemetry.GC_metrics.basic_setup () ; + Client.setup ()) + else () + +let trace_slot_event ?(attrs = []) ~event level slot_index f = + if not requested then f () + else + let slot_id_encoding = + let open Data_encoding in + conv + (fun (level, slot_index) -> (level, slot_index)) + (fun (level, slot_index) -> (level, slot_index)) + (tup2 int32 int31) + in + let slot_id_bytes = + Data_encoding.Binary.to_bytes_exn slot_id_encoding (level, slot_index) + in + let slot_id_16_bytes = + let bytes = Bytes.make 16 '\x00' in + for i = 0 to Bytes.length slot_id_bytes - 1 do + Bytes.set bytes i (Bytes.get slot_id_bytes i) + done ; + let x = level |> Int32.to_int |> fun x -> x mod 256 in + Bytes.set bytes 0 (Char.chr x) ; + Bytes.set bytes 15 '\x0f' ; + bytes + in + let trace_id = Opentelemetry.Trace_id.of_bytes slot_id_16_bytes in + let attrs = + [("level", `Int (Int32.to_int level)); ("slot_index", `Int slot_index)] + @ attrs + in + Opentelemetry.Trace.with_ event ~trace_id ~attrs @@ fun _scope -> f () diff --git a/src/bin_dal_node/slot_manager.ml b/src/bin_dal_node/slot_manager.ml index 375804771bc7..036047e4df73 100644 --- a/src/bin_dal_node/slot_manager.ml +++ b/src/bin_dal_node/slot_manager.ml @@ -246,6 +246,11 @@ let shards_to_attesters committee = let publish_proved_shards (slot_id : Types.slot_id) ~level_committee proto_parameters commitment shards shard_proofs gs_worker = let open Lwt_result_syntax in + Otel.trace_slot_event + ~event:"publish shards" + slot_id.slot_level + slot_id.slot_index + @@ fun _scope -> let attestation_level = Int32.( pred -- GitLab