diff --git a/manifest/externals.ml b/manifest/externals.ml index 823596e743ea669c5ac0d729bd702fa1480f8ae4..3177e34c71a0fd155e03754407b36a33ad25b1a5 100644 --- a/manifest/externals.ml +++ b/manifest/externals.ml @@ -36,6 +36,8 @@ let alcotest = external_lib "alcotest" V.(at_least "1.5.0") let alcotest_lwt = external_lib "alcotest-lwt" V.(at_least "1.5.0") +let ambient_context_lwt = external_lib "ambient-context-lwt" V.(exactly "0.1.0") + let asetmap = external_lib "asetmap" V.(at_least "0.8.1") let astring = external_lib "astring" V.True diff --git a/manifest/product_octez.ml b/manifest/product_octez.ml index 4c32597c54614e2758ee0df79fd16e3f8cad427d..fc32fd9b2b48783e7000115066cac71007882b5c 100644 --- a/manifest/product_octez.ml +++ b/manifest/product_octez.ml @@ -3789,6 +3789,23 @@ let octez_requester_tests = qcheck_tezt; ] +let octez_profiler_backend = + octez_lib + "octez-profiler-backend" + ~internal_name:"tezos_profiler_backend" + ~path:"src/lib_profiler_backend" + ~synopsis:"Backends for the Octez Profiler" + ~deps: + [ + octez_base |> open_ ~m:"TzPervasives" |> open_; + octez_base_unix |> open_; + octez_stdlib |> open_; + octez_stdlib_unix; + opentelemetry; + ambient_context_lwt; + opentelemetry_client_cohttp_lwt; + ] + let octez_shell = let (PPX {preprocess; preprocessor_deps}) = ppx_profiler in octez_shell_lib @@ -3811,6 +3828,7 @@ let octez_shell = lwt_watcher; lwt_canceler; prometheus; + octez_profiler_backend |> open_; octez_base |> open_ ~m:"TzPervasives" |> open_; octez_base_unix |> open_; octez_rpc; @@ -7907,6 +7925,7 @@ let _octez_node = octez_rpc_http_server |> open_; octez_rpc_process |> open_; octez_p2p |> open_; + octez_profiler_backend |> open_; octez_shell |> open_; octez_store |> open_; octez_store_unix_reconstruction |> open_; diff --git a/opam/octez-libs.opam b/opam/octez-libs.opam index 2da03cfecd2e22c95b6c8eb66301db3f3d823709..bd277c709bcdb47c04218f158cdf352edda19eca 100644 --- a/opam/octez-libs.opam +++ b/opam/octez-libs.opam @@ -82,6 +82,7 @@ depends: [ "integers" "ctypes" { >= "0.18.0" } "tezos-sapling-parameters" { >= "1.1.0" } + "ambient-context-lwt" { = "0.1.0" } "octez-rustzcash-deps" { with-test & = version } "bigstring" {with-test} "bam-ppx" { with-test & >= "0.3" } diff --git a/opam/virtual/octez-deps.opam b/opam/virtual/octez-deps.opam index 1af898d3ef20080b2358f792d0b2731c15bd41e3..be741d8b3f79d88bb9fdb26ebafe0c67c66df055 100644 --- a/opam/virtual/octez-deps.opam +++ b/opam/virtual/octez-deps.opam @@ -13,6 +13,7 @@ depends: [ "aches-lwt" { >= "1.1.0" } "alcotest" { >= "1.5.0" } "alcotest-lwt" { >= "1.5.0" } + "ambient-context-lwt" { = "0.1.0" } "asetmap" { >= "0.8.1" } "astring" "bam-ppx" { >= "0.3" } diff --git a/opam/virtual/octez-deps.opam.locked b/opam/virtual/octez-deps.opam.locked index 4d14fbd195937fa52a2edf3e1ce8ee8a1b36f536..5ecbcb879bf3e8cf93b61d2b9cb0cf7cf74ba777 100644 --- a/opam/virtual/octez-deps.opam.locked +++ b/opam/virtual/octez-deps.opam.locked @@ -17,6 +17,7 @@ depends: [ "alcotest" {= "1.8.0"} "alcotest-lwt" {= "1.8.0"} "ambient-context" {= "0.1.0"} + "ambient-context-lwt" {= "0.1.0"} "angstrom" {= "0.16.1"} "asetmap" {= "0.8.1"} "asn1-combinators" {= "0.3.2"} diff --git a/src/bin_node/dune b/src/bin_node/dune index f805af767977b47b450bcf4a4aa10dc0e81f9ee3..8b3501c1cdf950523b08da0d1489436c17344009 100644 --- a/src/bin_node/dune +++ b/src/bin_node/dune @@ -20,6 +20,7 @@ octez-libs.rpc-http-server octez-rpc-process octez-shell-libs.p2p + octez-libs.octez-profiler-backend octez-shell-libs.shell octez-shell-libs.store octez-shell-libs.store.unix-reconstruction @@ -176,6 +177,7 @@ -open Tezos_rpc_http_server -open Octez_rpc_process -open Tezos_p2p + -open Tezos_profiler_backend -open Tezos_shell -open Tezos_store -open Tezos_store_unix_reconstruction diff --git a/src/bin_node/node_run_command.ml b/src/bin_node/node_run_command.ml index 111fb9cda8ba6a75b76de4a2bfe437dffa39a2f3..fc478691af843e8904ddcb7207b79f9296f479bc 100644 --- a/src/bin_node/node_run_command.ml +++ b/src/bin_node/node_run_command.ml @@ -892,6 +892,10 @@ let process sandbox verbosity target singleprocess force_history_mode_switch "Failed to parse the provided target. A ',' \ value was expected.") in + () + [@profiler.custom + {driver_ids = [Opentelemetry]} + (Opentelemetry_profiler.initialize "octez-node")] ; Lwt_lock_file.with_lock ~when_locked: (`Fail (Exn (Failure "Data directory is locked by another process"))) diff --git a/src/lib_base/index.mld b/src/lib_base/index.mld index a13a065443ef3ec6a2d935c5987b80f15ebed1be..eb70251bcaf0820f003042ebd1af107a9511363b 100644 --- a/src/lib_base/index.mld +++ b/src/lib_base/index.mld @@ -41,6 +41,7 @@ It contains the following libraries: - {{!page-tezos_lwt_result_stdlib}Tezos_lwt_result_stdlib}: error-aware stdlib replacement - {{!module-Tezos_micheline}Tezos_micheline}: Internal AST and parser for the Michelson language - {{!module-Tezos_p2p_services}Tezos_p2p_services}: Descriptions of RPCs exported by [tezos-p2p] +- {{!module-Tezos_profiler_backend}Tezos_profiler_backend}: Backends for the Octez Profiler - {{!module-Tezos_rpc}Tezos_rpc}: Library of auto-documented RPCs (service and hierarchy descriptions) - {{!module-Tezos_rpc_http}Tezos_rpc_http}: Library of auto-documented RPCs (http server and client) - {{!module-Tezos_rpc_http_client}Tezos_rpc_http_client}: Library of auto-documented RPCs (http client) diff --git a/src/lib_profiler_backend/dune b/src/lib_profiler_backend/dune new file mode 100644 index 0000000000000000000000000000000000000000..df433a48d250b2e970ee443ac6b2e894e07e2c8a --- /dev/null +++ b/src/lib_profiler_backend/dune @@ -0,0 +1,21 @@ +; This file was automatically generated, do not edit. +; Edit file manifest/main.ml instead. + +(library + (name tezos_profiler_backend) + (public_name octez-libs.octez-profiler-backend) + (instrumentation (backend bisect_ppx)) + (libraries + octez-libs.base + octez-libs.base.unix + octez-libs.stdlib + octez-libs.stdlib-unix + opentelemetry + ambient-context-lwt + octez-libs.opentelemetry-client-cohttp-lwt) + (flags + (:standard) + -open Tezos_base.TzPervasives + -open Tezos_base + -open Tezos_base_unix + -open Tezos_stdlib)) diff --git a/src/lib_profiler_backend/opentelemetry_profiler.ml b/src/lib_profiler_backend/opentelemetry_profiler.ml new file mode 100644 index 0000000000000000000000000000000000000000..186cb27a60d7d6574dc7102fa350f38063fd4c61 --- /dev/null +++ b/src/lib_profiler_backend/opentelemetry_profiler.ml @@ -0,0 +1,95 @@ +(*****************************************************************************) +(* *) +(* SPDX-License-Identifier: MIT *) +(* SPDX-FileCopyrightText: 2024 Nomadic Labs *) +(* *) +(*****************************************************************************) + +let initialize service_name = + Opentelemetry.Globals.service_name := service_name ; + Opentelemetry.GC_metrics.basic_setup () ; + Ambient_context.set_storage_provider (Ambient_context_lwt.storage ()) ; + Opentelemetry_client_cohttp_lwt.setup () + +let op_hash_to_trace_id op_hash = + (* A trace id must be of 16 bytes. We use the first 16 bytes of an operation + hash to make it replicable and unique enough. *) + Bytes.sub (Tezos_crypto.Hashed.Operation_hash.to_bytes op_hash) 0 16 + |> Opentelemetry.Trace_id.of_bytes + +let add_event ?attrs name = + Opentelemetry.( + Scope.get_ambient_scope () + |> Option.iter @@ fun scope -> + Trace.add_event scope @@ fun () -> Event.make ?attrs name) + +let trace ?force_new_trace_id ?trace_state ?service_name ?attrs ?kind ?trace_id + ?parent ?scope ?links name k = + Opentelemetry.Trace.with_ + ?force_new_trace_id + ?trace_state + ?service_name + ?attrs + ?kind + ?trace_id + ?parent + ?scope + ?links + name + @@ fun _scope -> k () + +let trace_operation op ?attrs = + let op_hash = + match op with `Operation op -> Operation.hash op | `Hash oph -> oph + in + let metadata = + ( "operation_hash", + `String (Tezos_crypto.Hashed.Operation_hash.to_b58check op_hash) ) + in + let attrs = + match attrs with None -> [metadata] | Some attrs -> metadata :: attrs + in + trace ~trace_id:(op_hash_to_trace_id op_hash) ~attrs + +type (_, _) Profiler.kind += Opentelemetry_profiler : ('a, 'b) Profiler.kind + +type config = {service_name : string; verbosity : Profiler.verbosity} + +module Driver = struct + type nonrec config = config + + type state = State + + let kind = Opentelemetry_profiler + + let create _ = State + + let time _ = Simple_profiler.time () + + let record _ _ _ = () + + let aggregate _ _ _ = () + + let stop _ = () + + let stamp _ _ _ = () + + let mark _ _ _ = () + + let span _ _ _ _ = () + + let inc _ _ = () + + let report _ = None + + let close _ = () +end + +let opentelemetry : config Profiler.driver = + (module Driver : Profiler.DRIVER with type config = config) + +let () = + Profiler_instance.register_backend + ["opentelemetry"] + (fun ~verbosity ~directory:_ ~name -> + Profiler.instance opentelemetry {verbosity; service_name = name}) diff --git a/src/lib_profiler_backend/opentelemetry_profiler.mli b/src/lib_profiler_backend/opentelemetry_profiler.mli new file mode 100644 index 0000000000000000000000000000000000000000..413326418392cc87625e707e70760ac150516b73 --- /dev/null +++ b/src/lib_profiler_backend/opentelemetry_profiler.mli @@ -0,0 +1,63 @@ +(*****************************************************************************) +(* *) +(* SPDX-License-Identifier: MIT *) +(* SPDX-FileCopyrightText: 2024 Nomadic Labs *) +(* *) +(*****************************************************************************) + +(** [initialize service_name] initializes the Opentelemetry client to output the + traces. *) +val initialize : string -> unit + +(** [op_hash_to_trace_id op_hash] returns a valid [Opentelemetry.Trace_id.t] that can be + used when tracing. When tracing an operation application, using the + operation hash as trace id can serve to produce a consistent trace amongst + multiple nodes to view the operation propagation. *) +val op_hash_to_trace_id : + Tezos_crypto.Hashed.Operation_hash.t -> Opentelemetry.Trace_id.t + +(** [add_event ?attrs name] adds an event to the currently opened scope. If no + scope has been opened yet (i.e. not called in the continuation of + [Opentelemetry.Trace.with_]), the function does nothing. *) +val add_event : ?attrs:Opentelemetry.key_value list -> string -> unit + +(** [trace id k] is [Opentelemetry.Trace.with_ id (fun _ -> k ())]. It doesn't + give the scope to the callback, as it is meant to be called by custom + functions. *) +val trace : + ?force_new_trace_id:bool -> + ?trace_state:string -> + ?service_name:string -> + ?attrs:(string * Opentelemetry.value) trace -> + ?kind:Opentelemetry_proto.Trace.span_span_kind -> + ?trace_id:Opentelemetry.Trace_id.t -> + ?parent:Opentelemetry.Span_id.t -> + ?scope:Opentelemetry.Scope.t -> + ?links:Opentelemetry_proto.Trace.span_link list -> + string -> + (unit -> 'a) -> + 'a + +(** [trace_operation op id k] uses the operation hash as a [trace_id] and calls + [trace ~trace_id id k]. *) +val trace_operation : + [< `Hash of Operation_hash.t | `Operation of Operation.t] -> + ?attrs:(string * Opentelemetry.value) trace -> + ?force_new_trace_id:bool -> + ?trace_state:string -> + ?service_name:string -> + ?kind:Opentelemetry_proto.Trace.span_span_kind -> + ?parent:Opentelemetry.Span_id.t -> + ?scope:Opentelemetry.Scope.t -> + ?links:Opentelemetry_proto.Trace.span_link list -> + string -> + (unit -> 'a) -> + 'a + +type config = {service_name : string; verbosity : Profiler.verbosity} + +(** Mocked driver, that serves only to register the opentelemetry as a valid + profiler and enable relevant functions with the ppx. *) +module Driver : Profiler.DRIVER with type config = config + +val opentelemetry : config Profiler.driver diff --git a/src/lib_shell/dune b/src/lib_shell/dune index 649976b8743a5f690e4366127155f68c4b9ac9ce..1bdf98c124a5b262d97a61cc51ef0a416c02613b 100644 --- a/src/lib_shell/dune +++ b/src/lib_shell/dune @@ -9,6 +9,7 @@ lwt-watcher lwt-canceler octez-libs.prometheus + octez-libs.octez-profiler-backend octez-libs.base octez-libs.base.unix octez-libs.rpc @@ -38,6 +39,7 @@ (preprocessor_deps (env_var TEZOS_PPX_PROFILER)) (flags (:standard) + -open Tezos_profiler_backend -open Tezos_base.TzPervasives -open Tezos_base -open Tezos_base_unix diff --git a/src/lib_shell/injection_directory.ml b/src/lib_shell/injection_directory.ml index eae255426ae5569bb6d7eaa89639740505580f26..79ad6e3f4618e8fe13d9650649e74be954d4bdc6 100644 --- a/src/lib_shell/injection_directory.ml +++ b/src/lib_shell/injection_directory.ml @@ -51,7 +51,19 @@ let inject_operation validator ~force ?chain bytes = match Data_encoding.Binary.of_bytes_opt Operation.encoding bytes with | None -> failwith "Can't parse the operation" | Some op -> - let t = Validator.inject_operation validator ~force ?chain_id op in + let t = + (Validator.inject_operation + validator + ~force + ?chain_id + op + [@profiler.custom_f + {driver_ids = [Opentelemetry]} + (Opentelemetry_profiler.trace_operation + (`Operation op) + "inject_operation")]) + in + let hash = Operation.hash op in return (hash, t) diff --git a/src/lib_shell/prevalidator.ml b/src/lib_shell/prevalidator.ml index 4435c871f052faff62eb4f9e209614b9a0d24087..0a11c6ece8fbc059538a937ba4cfc4dd810c1532 100644 --- a/src/lib_shell/prevalidator.ml +++ b/src/lib_shell/prevalidator.ml @@ -589,7 +589,10 @@ module Make_s advertise pv_shell advertisable_mempool - [@profiler.aggregate_f {verbosity = Debug} "advertise mempool"] ; + [@profiler.aggregate_f {verbosity = Debug} "advertise mempool"] + [@profiler.custom_f + {driver_ids = [Opentelemetry]} + (Opentelemetry_profiler.trace "advertise mempool")] ; if Mempool.is_empty validated_mempool then Lwt.return_unit else let our_mempool = @@ -765,6 +768,10 @@ module Make_s else match Parser.parse oph op with | Error err -> + () + [@profiler.custom + {driver_ids = [Opentelemetry]} + (Opentelemetry_profiler.add_event "parse_operation failed")] ; failwith "Invalid operation %a: %a." Operation_hash.pp @@ -772,6 +779,10 @@ module Make_s Error_monad.pp_print_trace err | Ok parsed_op -> ( + () + [@profiler.custom + {driver_ids = [Opentelemetry]} + (Opentelemetry_profiler.add_event "parse_operation succeeded")] ; if force then ( let*! () = pv.shell.parameters.tools.chain_tools.inject_operation oph op @@ -819,6 +830,10 @@ module Make_s in match op_status with | Some (_h, `Validated) -> + () + [@profiler.custom + {driver_ids = [Opentelemetry]} + (Opentelemetry_profiler.add_event "operation validated")] ; (* TODO: https://gitlab.com/tezos/tezos/-/issues/2294 We may want to only do the injection/replacement if a flag `replace` is set to true in the injection query. *) @@ -852,6 +867,10 @@ module Make_s | `Branch_refused e | `Refused e | `Outdated e ) ) -> + () + [@profiler.custom + {driver_ids = [Opentelemetry]} + (Opentelemetry_profiler.add_event "operation rejected")] ; Lwt.return @@ error_with "Error while validating injected operation %a:@ %a" @@ -1501,6 +1520,11 @@ module Make op [@profiler.aggregate_s {verbosity = Notice; metadata = [("prometheus", "")]} "on_inject"] + [@profiler.custom_f + {driver_ids = [Opentelemetry]} + (Opentelemetry_profiler.trace_operation + (`Operation op) + "on_inject")] | Request.Arrived (oph, op) -> Requests.on_arrived pv diff --git a/src/lib_shell/validator.ml b/src/lib_shell/validator.ml index 753558b4333ceeb9f79f40aa7ec957dfbca3f59e..7cff95e4ced2fb01dd0a2611189c959ea4ab5524 100644 --- a/src/lib_shell/validator.ml +++ b/src/lib_shell/validator.ml @@ -178,7 +178,11 @@ let chains_watcher {chains_input; _} = Lwt_watcher.create_stream chains_input let inject_operation v ?chain_id ~force op = let open Lwt_result_syntax in let inject_operation_on nv ~handle_missing_prevalidator = - match Chain_validator.prevalidator nv with + match[@profiler.custom_s + {driver_ids = [OpenTelemetry]} + (Opentelemetry_profiler.trace "Validator.inject_operation")] + Chain_validator.prevalidator nv + with | Some pv -> Prevalidator.inject_operation pv ~force op | None -> handle_missing_prevalidator in