From adb4250726fcdbd1364ef07250e2252f0c64bec6 Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Mon, 23 Jun 2025 10:03:57 +0200 Subject: [PATCH 1/9] Tezt/Cloud: etherlink DAL node uses configuration and profiler --- tezt/tests/cloud/dal.ml | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tezt/tests/cloud/dal.ml b/tezt/tests/cloud/dal.ml index b9cade79a029..89231916a531 100644 --- a/tezt/tests/cloud/dal.ml +++ b/tezt/tests/cloud/dal.ml @@ -2873,8 +2873,16 @@ let init_observer cloud configuration ~bootstrap teztale ~topic i agent = in Lwt.return {node; dal_node; topic} -let init_etherlink_dal_node ~bootstrap ~next_agent ~dal_slots ~network ~snapshot - ~otel ~memtrace ~rpc_external ~cloud = +let init_etherlink_dal_node + { + external_rpc; + network; + snapshot; + ppx_profiling; + ppx_profiler_backends; + memtrace; + _; + } ~bootstrap ~dal_slots ~next_agent ~otel ~cloud = match dal_slots with | [] -> toplog "Etherlink will run without DAL support" ; @@ -2891,7 +2899,7 @@ let init_etherlink_dal_node ~bootstrap ~next_agent ~dal_slots ~network ~snapshot ~name ~arguments: [Peer bootstrap.node_p2p_endpoint; History_mode (Rolling (Some 79))] - ~rpc_external + ~rpc_external:external_rpc network ~snapshot cloud @@ -2905,7 +2913,7 @@ let init_etherlink_dal_node ~bootstrap ~next_agent ~dal_slots ~network ~snapshot ~peers:(Option.to_list bootstrap.dal_node_p2p_endpoint) dal_node in - let* () = Dal_node.Agent.run ?otel dal_node in + let* () = Dal_node.Agent.run ?otel ~ppx_profiling dal_node in some dal_node | _ :: _ :: _ -> (* On several slot indices, we launch one observer DAL node per @@ -2927,7 +2935,7 @@ let init_etherlink_dal_node ~bootstrap ~next_agent ~dal_slots ~network ~snapshot ~name ~arguments: [Peer bootstrap.node_p2p_endpoint; History_mode (Rolling (Some 79))] - ~rpc_external + ~rpc_external:external_rpc network ~snapshot cloud @@ -2956,7 +2964,7 @@ let init_etherlink_dal_node ~bootstrap ~next_agent ~dal_slots ~network ~snapshot Peer bootstrap.node_p2p_endpoint; History_mode (Rolling (Some 79)); ] - ~rpc_external + ~rpc_external:external_rpc network ~snapshot cloud @@ -3047,14 +3055,11 @@ let init_etherlink_operator_setup cloud configuration etherlink_configuration let otel = Cloud.open_telemetry_endpoint cloud in let* dal_node = init_etherlink_dal_node + configuration ~bootstrap ~next_agent ~dal_slots:etherlink_configuration.etherlink_dal_slots - ~network:configuration.network - ~snapshot:configuration.snapshot - ~rpc_external:configuration.external_rpc ~otel - ~memtrace:configuration.memtrace ~cloud in let operators = -- GitLab From dd2911db1c11c989c933b9c9239787117e27f3fd Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Thu, 19 Jun 2025 15:25:37 +0200 Subject: [PATCH 2/9] Tezt/Cloud: add Cli parameter to select profiler backends to use --- tezt/tests/cloud/dal.ml | 3 +++ tezt/tests/cloud/scenarios_cli.ml | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/tezt/tests/cloud/dal.ml b/tezt/tests/cloud/dal.ml index 89231916a531..11a5a0dd2a8c 100644 --- a/tezt/tests/cloud/dal.ml +++ b/tezt/tests/cloud/dal.ml @@ -520,6 +520,7 @@ type configuration = { disable_shard_validation : bool; ignore_pkhs : string list; ppx_profiling : bool; + ppx_profiler_backends : string list; } type bootstrap = { @@ -3912,6 +3913,7 @@ let register (module Cli : Scenarios_cli.Dal) = let external_rpc = Cli.node_external_rpc_server in let disable_shard_validation = Cli.disable_shard_validation in let ppx_profiling = Cli.ppx_profiling in + let ppx_profiler_backends = Cli.ppx_profiler_backends in let t = { with_dal; @@ -3940,6 +3942,7 @@ let register (module Cli : Scenarios_cli.Dal) = disable_shard_validation; ignore_pkhs; ppx_profiling; + ppx_profiler_backends; } in (t, etherlink) diff --git a/tezt/tests/cloud/scenarios_cli.ml b/tezt/tests/cloud/scenarios_cli.ml index b6cfae72d501..be2c0f9b0081 100644 --- a/tezt/tests/cloud/scenarios_cli.ml +++ b/tezt/tests/cloud/scenarios_cli.ml @@ -91,6 +91,8 @@ module type Dal = sig val ignore_pkhs : string list val ppx_profiling : bool + + val ppx_profiler_backends : string list end module Dal () : Dal = struct @@ -480,6 +482,16 @@ module Dal () : Dal = struct "Enable PPX profiling on all components. The level of verbosity is by \ default `Debug` and the format of the output is `txt`. " false + + let ppx_profiler_backends = + Clap.list_string + ~section + ~long:"ppx-profiler-backends" + ~description: + "Select the backends used by the profiler, bypassing the defaults \ + selection: always `txt` and `json`, and also `prometheus` if \ + `--prometheus` and `opentelemetry` if `--opentelemetry`." + () end module type Layer1 = sig -- GitLab From 4764a9f8d713997531e3277d693ac69be8234d6c Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Thu, 19 Jun 2025 15:42:23 +0200 Subject: [PATCH 3/9] Tezt/Cloud: allow selection of profiler backends --- tezt/tests/cloud/dal.ml | 26 ++++++++++++++++-- tezt/tests/cloud/tezos.ml | 58 +++++++++++++++++++++++++++++---------- 2 files changed, 67 insertions(+), 17 deletions(-) diff --git a/tezt/tests/cloud/dal.ml b/tezt/tests/cloud/dal.ml index 11a5a0dd2a8c..b14e91bcf39a 100644 --- a/tezt/tests/cloud/dal.ml +++ b/tezt/tests/cloud/dal.ml @@ -2234,6 +2234,7 @@ let init_public_network cloud (configuration : configuration) ~event_level:`Notice ~disable_shard_validation ~ppx_profiling:configuration.ppx_profiling + ~ppx_profiler_backends:configuration.ppx_profiler_backends dal_node in Lwt.return_some dal_node @@ -2542,6 +2543,7 @@ let init_sandbox_and_activate_protocol cloud (configuration : configuration) ~event_level:`Notice ~disable_shard_validation:configuration.disable_shard_validation ~ppx_profiling:configuration.ppx_profiling + ~ppx_profiler_backends:configuration.ppx_profiler_backends dal_bootstrap_node in let* () = @@ -2635,6 +2637,7 @@ let init_baker ?stake cloud (configuration : configuration) ~bootstrap teztale ~event_level:`Notice ~disable_shard_validation:configuration.disable_shard_validation ~ppx_profiling:configuration.ppx_profiling + ~ppx_profiler_backends:configuration.ppx_profiler_backends dal_node in Lwt.return_some dal_node @@ -2786,6 +2789,7 @@ let init_producer cloud configuration ~bootstrap teztale account i slot_index ~disable_shard_validation:configuration.disable_shard_validation ?ignore_pkhs ~ppx_profiling:configuration.ppx_profiling + ~ppx_profiler_backends:configuration.ppx_profiler_backends dal_node in let () = toplog "Init producer %s: DAL node is ready" name in @@ -2859,6 +2863,7 @@ let init_observer cloud configuration ~bootstrap teztale ~topic i agent = ~event_level:`Notice ~disable_shard_validation:configuration.disable_shard_validation ~ppx_profiling:configuration.ppx_profiling + ~ppx_profiler_backends:configuration.ppx_profiler_backends dal_node in let* () = @@ -2914,7 +2919,9 @@ let init_etherlink_dal_node ~peers:(Option.to_list bootstrap.dal_node_p2p_endpoint) dal_node in - let* () = Dal_node.Agent.run ?otel ~ppx_profiling dal_node in + let* () = + Dal_node.Agent.run ?otel ~ppx_profiling ~ppx_profiler_backends dal_node + in some dal_node | _ :: _ :: _ -> (* On several slot indices, we launch one observer DAL node per @@ -2949,7 +2956,13 @@ let init_etherlink_dal_node ~peers:(Option.to_list bootstrap.dal_node_p2p_endpoint) default_dal_node in - let* () = Dal_node.Agent.run ?otel ~memtrace default_dal_node in + let* () = + Dal_node.Agent.run + ?otel + ~memtrace + ~ppx_profiler_backends:[] + default_dal_node + in let default_endpoint = Dal_node.rpc_endpoint default_dal_node in let* dal_slots_and_nodes = @@ -2979,7 +2992,13 @@ let init_etherlink_dal_node ~peers:(Option.to_list bootstrap.dal_node_p2p_endpoint) dal_node in - let* () = Dal_node.Agent.run ?otel ~memtrace dal_node in + let* () = + Dal_node.Agent.run + ?otel + ~memtrace + ~ppx_profiler_backends:[] + dal_node + in return (slot_index, Dal_node.rpc_endpoint dal_node)) in let* reverse_proxy_dal_node = @@ -3641,6 +3660,7 @@ let on_new_level t level ~metadata = ?otel:t.otel ~memtrace:t.configuration.memtrace ~ppx_profiling:t.configuration.ppx_profiling + ~ppx_profiler_backends:t.configuration.ppx_profiler_backends dal_node) in Lwt.return {t with disconnection_state = Some disconnection_state} diff --git a/tezt/tests/cloud/tezos.ml b/tezt/tests/cloud/tezos.ml index 4302a7d01bb9..3b9a852aa1b2 100644 --- a/tezt/tests/cloud/tezos.ml +++ b/tezt/tests/cloud/tezos.ml @@ -38,38 +38,67 @@ module Env = struct let default_profiling_verbosity = "Debug" - let default_profiling_backends = ["txt"; "json"] + let txt_profiler_backend = "txt" - let ppx_profiler_env ?prometheus ?otel enable env = + let json_profiler_backed = "json" + + let prometheus_profiler_backend = "prometheus" + + let opentelemetry_profiler_backend = "opentelemetry" + + let default_profiling_backends = [txt_profiler_backend; json_profiler_backed] + + let ppx_profiler_backends ?prometheus ?otel backends = let cons_if b v l = if b then v :: l else l in + let is_selected backend = backends = [] || List.mem backend backends in + let profiling_backends = + [] + |> cons_if + (Option.is_some otel && is_selected opentelemetry_profiler_backend) + opentelemetry_profiler_backend + |> cons_if + (Option.is_some prometheus && is_selected prometheus_profiler_backend) + prometheus_profiler_backend + |> cons_if (is_selected txt_profiler_backend) txt_profiler_backend + |> cons_if (is_selected json_profiler_backed) json_profiler_backed + in + "\"" ^ String.concat ";" profiling_backends ^ "\"" + + let ppx_profiler_env ?prometheus ?otel enable selected_backends env = let profiling_backends = - default_profiling_backends - |> cons_if (Option.is_some otel) "opentelemetry" - |> cons_if (Option.is_some prometheus) "prometheus" + ppx_profiler_backends ?prometheus ?otel selected_backends in env |> may_add enable "PROFILING" default_profiling_verbosity - |> may_add - enable - "PROFILING_BACKENDS" - (String.concat ";" profiling_backends) + |> may_add enable "PROFILING_BACKENDS" profiling_backends let initialize_env ~memtrace ~memtrace_output_filename ~disable_shard_validation ~prometheus ~otel_endpoint ~service_name - ~ignore_pkhs ~ppx_profiling = + ~ignore_pkhs ~ppx_profiling ~ppx_profiler_backends = empty |> memtrace_env memtrace memtrace_output_filename |> otel_env otel_endpoint service_name |> disable_shard_validation_env disable_shard_validation |> ignore_topics_env ignore_pkhs - |> ppx_profiler_env ?prometheus ?otel:otel_endpoint ppx_profiling + |> ppx_profiler_env + ?prometheus + ?otel:otel_endpoint + ppx_profiling + ppx_profiler_backends end let may_add_profiling_to_env ~ppx_profiling = function | None -> - if ppx_profiling then Some (Env.ppx_profiler_env ppx_profiling Env.empty) + if ppx_profiling then + Some + (Env.ppx_profiler_env + ppx_profiling + Env.default_profiling_backends + Env.empty) else None - | Some env -> Some (Env.ppx_profiler_env ppx_profiling env) + | Some env -> + Some + (Env.ppx_profiler_env ppx_profiling Env.default_profiling_backends env) let create_dir ?runner dir = let* () = Process.spawn ?runner "rm" ["-rf"; dir] |> Process.check in @@ -298,7 +327,7 @@ module Dal_node = struct let run ?prometheus ?otel ?(memtrace = false) ?event_level ?(disable_shard_validation = false) ?ignore_pkhs - ?(ppx_profiling = false) dal_node = + ?(ppx_profiling = false) ~ppx_profiler_backends dal_node = let service_name = name dal_node in let memtrace_output_filename = Format.asprintf "%s/%s-trace.ctf" Path.tmp_dir service_name @@ -313,6 +342,7 @@ module Dal_node = struct ~service_name ~ignore_pkhs ~ppx_profiling + ~ppx_profiler_backends in let* () = run ~env ?event_level dal_node in (* Update the state in the service manager *) -- GitLab From 96e5dc509112a7c1eaa0341eb8016c762d1355bc Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Thu, 19 Jun 2025 15:05:59 +0200 Subject: [PATCH 4/9] Tezt/Cloud: fix opentelemetry agent - the health service is actually at the root URI - the prometheus metrics are enabled by default, and this syntax has been deprecated. --- tezt/lib_cloud/otel.ml | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/tezt/lib_cloud/otel.ml b/tezt/lib_cloud/otel.ml index e714e4fedf0f..0417fb20a0aa 100644 --- a/tezt/lib_cloud/otel.ml +++ b/tezt/lib_cloud/otel.ml @@ -26,9 +26,7 @@ receivers: http: endpoint: "0.0.0.0:55681" -exporters: - -%s +exporters:%s processors: batch: # Batch processor to optimize telemetry processing @@ -41,16 +39,12 @@ service: processors: [batch] exporters: [otlp/jaeger] - telemetry: - metrics: - address: "0.0.0.0:8888" # Optional: Expose metrics for the collector itself (Prometheus scrapeable) - extensions: - health_check extensions: health_check: - endpoint: "localhost:13133" + endpoint: "0.0.0.0:13133" |} jaeger @@ -87,14 +81,7 @@ let run ~jaeger = let run () = Process.spawn "curl" - [ - "-s"; - "-o"; - "/dev/null"; - "-w"; - "%{http_code}"; - "http://localhost:13133/healthz"; - ] + ["-s"; "-o"; "/dev/null"; "-w"; "%{http_code}"; "http://localhost:13133"] in let* _ = Env.wait_process ~is_ready ~run () in Lwt.return () -- GitLab From 71ce79f0be4218a93b7fb6495e99843af51f4244 Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Mon, 23 Jun 2025 10:37:45 +0200 Subject: [PATCH 5/9] Tezt/Cloud: add Jaeger to the dashboard in opentelemetry is enabled --- tezt/lib_cloud/web.ml | 19 ++++++++++++++++++- tezt/lib_cloud/website/index.html.jingoo | 6 ++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tezt/lib_cloud/web.ml b/tezt/lib_cloud/web.ml index 7698e173b1ff..12fbaea06dbb 100644 --- a/tezt/lib_cloud/web.ml +++ b/tezt/lib_cloud/web.ml @@ -15,6 +15,7 @@ type t = { dir : string; monitoring : bool; prometheus : bool; + opentelemetry : bool; mutable services : service list; } @@ -146,6 +147,12 @@ let jingoo_template t agents = (if Env.monitoring then List.map (monitoring_jingo_template agents) agents else []) ); + ( "opentelemetry", + Tobj + [ + ("activated", Tbool Env.open_telemetry); + ("uri", Tstr (Format.asprintf "http://%s:16686" (domain agents))); + ] ); ("agents", Tlist (List.map agent_jingo_template agents)); ("services", Tlist (List.map service_jingo_template t.services)); ] @@ -180,6 +187,7 @@ let run () = let port = Env.website_port in let prometheus = Env.prometheus in let monitoring = Env.monitoring in + let opentelemetry = Env.open_telemetry in let stop, to_stop = Lwt.task () in let logger next_handler request = let meth = Dream.method_to_string (Dream.method_ request) in @@ -230,7 +238,16 @@ let run () = Dream.html content); ] in - Lwt.return {process; to_stop; dir; monitoring; prometheus; services = []} + Lwt.return + { + process; + to_stop; + dir; + monitoring; + prometheus; + opentelemetry; + services = []; + } let start ~agents = let* t = run () in diff --git a/tezt/lib_cloud/website/index.html.jingoo b/tezt/lib_cloud/website/index.html.jingoo index f8e781a459d3..458a6be5c8a4 100644 --- a/tezt/lib_cloud/website/index.html.jingoo +++ b/tezt/lib_cloud/website/index.html.jingoo @@ -83,6 +83,12 @@
  • NetData monitoring disabled. Use --monitoring to activate it.
  • {%- endif %} + {%- if opentelemetry.activated %} +
  • Jaeger (Opentelemetry trace observer)
  • + {%- else %} +
  • Opentelemetry disabled. Use --open-telemetry to activate it.
  • + {%- endif %} + {%- for service in services %}
  • {{ service.title }}
  • {%- endfor %} -- GitLab From 282484f6de0327983c6ced4742075e4e3ef04c86 Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Mon, 23 Jun 2025 11:15:46 +0200 Subject: [PATCH 6/9] Manifest: add profiler to lib_gossipsub --- manifest/product_octez.ml | 1 + src/lib_gossipsub/dune | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/manifest/product_octez.ml b/manifest/product_octez.ml index 4874bda8f941..ae6366788c05 100644 --- a/manifest/product_octez.ml +++ b/manifest/product_octez.ml @@ -2893,6 +2893,7 @@ let octez_gossipsub = octez_stdlib |> open_; octez_version; octez_profiler |> open_; + octez_profiler_complex_backends |> open_; ] ~preprocess ~preprocessor_deps diff --git a/src/lib_gossipsub/dune b/src/lib_gossipsub/dune index 11fea8053909..357d2198cb08 100644 --- a/src/lib_gossipsub/dune +++ b/src/lib_gossipsub/dune @@ -15,7 +15,8 @@ octez-libs.stdlib-unix octez-libs.stdlib octez-libs.version - octez-libs.octez-profiler) + octez-libs.octez-profiler + octez-libs.octez-profiler.complex_backends) (preprocess (pps octez-libs.ppx_profiler)) (preprocessor_deps (env_var TEZOS_PPX_PROFILER)) (flags @@ -26,4 +27,5 @@ -open Tezos_base_unix -open Tezos_stdlib_unix -open Tezos_stdlib - -open Tezos_profiler)) + -open Tezos_profiler + -open Tezos_profiler_complex_backends)) -- GitLab From e062a39ddcd48dd7d6f312e0b3a2952b88739cba Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Mon, 23 Jun 2025 12:00:15 +0200 Subject: [PATCH 7/9] Gossipsub: add metadata to the message stream Such metadata will be used to propagate the Opentelemetry scope, as done in lib_workers --- src/lib_dal_node/gossipsub/gs_interface.ml | 10 ++++++---- src/lib_dal_node/gossipsub/gs_transport_connection.ml | 4 ++-- src/lib_gossipsub/gossipsub_intf.ml | 8 +++++--- src/lib_gossipsub/gossipsub_worker.ml | 2 +- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/lib_dal_node/gossipsub/gs_interface.ml b/src/lib_dal_node/gossipsub/gs_interface.ml index c6ce6d3a18ee..484a0cca64d8 100644 --- a/src/lib_dal_node/gossipsub/gs_interface.ml +++ b/src/lib_dal_node/gossipsub/gs_interface.ml @@ -109,10 +109,12 @@ module Worker_config : maybe_reachable_point module Stream = struct + type metadata = {scope : Opentelemetry.Scope.t option} + type 'a t = { (* In principle [Seq_s] could also be used instead. *) - stream : 'a Lwt_stream.t; - pusher : 'a option -> unit; + stream : ('a * metadata) Lwt_stream.t; + pusher : ('a * metadata) option -> unit; mutable length : int; (* The [length] field counts the number of elements in the stream. It is incremented on calls to {!push}, decremented on succesful calls to @@ -123,8 +125,8 @@ module Worker_config : let stream, pusher = Lwt_stream.create () in {stream; pusher; length = 0} - let push e t = - t.pusher (Some e) ; + let push ?(metadata = {scope = None}) e t = + t.pusher (Some (e, metadata)) ; t.length <- t.length + 1 let pop t = diff --git a/src/lib_dal_node/gossipsub/gs_transport_connection.ml b/src/lib_dal_node/gossipsub/gs_transport_connection.ml index feac0889b9f3..b27f30918525 100644 --- a/src/lib_dal_node/gossipsub/gs_transport_connection.ml +++ b/src/lib_dal_node/gossipsub/gs_transport_connection.ml @@ -216,7 +216,7 @@ let gs_worker_p2p_output_handler gs_worker p2p_layer = | _ -> true in let rec loop output_stream = - let* p2p_output = Worker.Stream.pop output_stream in + let* p2p_output, _metadata = Worker.Stream.pop output_stream in let* () = match p2p_output with | Worker.Out_message {to_peer; p2p_message} -> ( @@ -281,7 +281,7 @@ let transport_layer_inputs_handler gs_worker p2p_layer = let app_messages_handler gs_worker ~app_messages_callback ~verbose = let open Lwt_syntax in let rec loop app_output_stream = - let* Worker.{message; message_id; topic = _} = + let* Worker.{message; message_id; topic = _}, _metadata = Worker.Stream.pop app_output_stream in let* res = app_messages_callback message message_id in diff --git a/src/lib_gossipsub/gossipsub_intf.ml b/src/lib_gossipsub/gossipsub_intf.ml index 1c010c0ff7a2..6b71f3595cc9 100644 --- a/src/lib_gossipsub/gossipsub_intf.ml +++ b/src/lib_gossipsub/gossipsub_intf.ml @@ -1081,16 +1081,18 @@ module type WORKER_CONFIGURATION = sig (** Create a new empty stream. *) val empty : unit -> 'a t + type metadata = {scope : Opentelemetry.Scope.t option} + (** Push the given value into the stream. *) - val push : 'a -> 'a t -> unit + val push : ?metadata:metadata -> 'a -> 'a t -> unit (** Pops the oldest value that has been pushed to the stream. In case the stream is empty, the function will wait until some value is pushed. *) - val pop : 'a t -> 'a Monad.t + val pop : 'a t -> ('a * metadata) Monad.t (** Returns and removes all available elements of the stream l without blocking. *) - val get_available : 'a t -> 'a list + val get_available : 'a t -> ('a * metadata) list (** Returns the number of elements in the stream. *) val length : 'a t -> int diff --git a/src/lib_gossipsub/gossipsub_worker.ml b/src/lib_gossipsub/gossipsub_worker.ml index 30e181cf5d03..e5e69bff50a7 100644 --- a/src/lib_gossipsub/gossipsub_worker.ml +++ b/src/lib_gossipsub/gossipsub_worker.ml @@ -941,7 +941,7 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : let events_stream = t.state.events_stream in let events_logging = t.state.events_logging in let rec loop t = - let* event = Stream.pop events_stream in + let* event, _metadata = Stream.pop events_stream in if !shutdown then return () else let* () = events_logging event in -- GitLab From 75daf52ce37ba56c86c80aa0c4e68c811d6a32b2 Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Mon, 23 Jun 2025 13:38:31 +0200 Subject: [PATCH 8/9] Gossipsub_worker: push metadata from ambient scope alongside messages --- src/lib_gossipsub/gossipsub_worker.ml | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/lib_gossipsub/gossipsub_worker.ml b/src/lib_gossipsub/gossipsub_worker.ml index e5e69bff50a7..2147698f2a63 100644 --- a/src/lib_gossipsub/gossipsub_worker.ml +++ b/src/lib_gossipsub/gossipsub_worker.ml @@ -900,8 +900,28 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : [@profiler.span_f {verbosity = Notice} ["apply_event"; "Check_unknown_messages"]] + (* See lib_workers/worker.ml for the rationale. *) + let make_metadata () = + (* This pattern is equivalent to: + ``` + let scope = + if "ppx is enabled" then + Opentelemetry.Scope.get_ambient_scope () + else + None + ``` + *) + let scope = + (None + [@profiler.overwrite + {driver_ids = [Opentelemetry]} + (Opentelemetry.Scope.get_ambient_scope ())]) + in + Stream.{scope} + (** A helper function that pushes events in the state *) - let push e {status = _; state; self = _} = Stream.push e state.events_stream + let push e {status = _; state; self = _} = + Stream.push ~metadata:(make_metadata ()) e state.events_stream let app_input t input = push (App_input input) t -- GitLab From 2d7ca03d425b0655120c0689819d6676afd013b1 Mon Sep 17 00:00:00 2001 From: Pierrick Couderc Date: Mon, 23 Jun 2025 14:47:29 +0200 Subject: [PATCH 9/9] Gossipsub/Opentelemetry: trace app message publishment --- src/lib_gossipsub/gossipsub_worker.ml | 31 ++++++++++++++++++--------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/lib_gossipsub/gossipsub_worker.ml b/src/lib_gossipsub/gossipsub_worker.ml index 2147698f2a63..03f794f9f66a 100644 --- a/src/lib_gossipsub/gossipsub_worker.ml +++ b/src/lib_gossipsub/gossipsub_worker.ml @@ -871,7 +871,7 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : (** This is the main function of the worker. It interacts with the Gossipsub automaton given an event. The function possibly sends messages to the P2P and application layers and returns the new worker's state. *) - let apply_event ~self ({gossip_state; _} as state) = function + let apply_event ~self ({gossip_state; _} as state) _metadata = function (* FIXME: https://gitlab.com/tezos/tezos/-/issues/5326 Notify the GS worker about the status of messages sent to peers. *) @@ -894,6 +894,11 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : state event [@profiler.span_f {verbosity = Notice} ["apply_event"; "App_input"]] + [@profiler.wrap_f + {driver_ids = [Opentelemetry]} + (Opentelemetry_profiler.trace + ?scope:_metadata.Stream.scope + "apply_event.app_input")] | Check_unknown_messages -> check_unknown_messages_id state @@ -961,16 +966,22 @@ module Make (C : Gossipsub_intf.WORKER_CONFIGURATION) : let events_stream = t.state.events_stream in let events_logging = t.state.events_logging in let rec loop t = + (* metadata needs to be prefixed by `_`, to avoid triggering the unused + variable warning when compiling without the ppx. *) let* event, _metadata = Stream.pop events_stream in - if !shutdown then return () - else - let* () = events_logging event in - t.state <- - (apply_event - ~self:t.self - t.state - event [@profiler.span_f {verbosity = Notice} ["apply_event"]]) ; - loop t + (if !shutdown then return () + else + let* () = events_logging event in + t.state <- + (apply_event + ~self:t.self + t.state + _metadata + event [@profiler.span_f {verbosity = Notice} ["apply_event"]]) ; + loop t) + [@profiler.wrap_f + {driver_ids = [Opentelemetry]} + (Opentelemetry_profiler.update_scope _metadata.scope)] in let promise = loop t in let schedule_cancellation () = -- GitLab