From 900a7ad2fc72412c18605089101563e0b513e536 Mon Sep 17 00:00:00 2001 From: Julien Sagot Date: Thu, 3 Oct 2024 11:13:26 +0200 Subject: [PATCH 1/5] prometheus: pass increment to Summary.observe (opt param) --- prometheus/src/prometheus.ml | 4 ++-- prometheus/src/prometheus.mli | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/prometheus/src/prometheus.ml b/prometheus/src/prometheus.ml index f0db19380d95..3ad76b78244c 100644 --- a/prometheus/src/prometheus.ml +++ b/prometheus/src/prometheus.ml @@ -350,9 +350,9 @@ module Summary = struct include Metric (Child) - let observe t v = + let observe ?(n = 1.0) t v = let open Child in - t.count <- t.count +. 1.0 ; + t.count <- t.count +. n ; t.sum <- t.sum +. v let time t gettimeofday fn = diff --git a/prometheus/src/prometheus.mli b/prometheus/src/prometheus.mli index ce3af64eb6a6..f8bae5a1c20c 100644 --- a/prometheus/src/prometheus.mli +++ b/prometheus/src/prometheus.mli @@ -201,8 +201,8 @@ end module Summary : sig include METRIC - (** [observe t v] increases the total by [v] and the count by one. *) - val observe : t -> float -> unit + (** [observe ?n t v] increases the total by [v] and the count by [n] (default: by one). *) + val observe : ?n:float -> t -> float -> unit (** [time t gettime f] calls [gettime ()] before and after executing [f ()] and observes the difference. *) -- GitLab From 10e21bfb02512a0cd53efd804243de523f9ebce9 Mon Sep 17 00:00:00 2001 From: Julien Sagot Date: Thu, 3 Oct 2024 11:14:16 +0200 Subject: [PATCH 2/5] profiler: add a prometheus backend --- src/bin_node/main.ml | 3 + src/bin_node/prometheus_profiler.ml | 127 ++++++++++++++++++++++++++ src/bin_node/prometheus_profiler.mli | 21 +++++ src/lib_base/unix/simple_profiler.ml | 10 -- src/lib_base/unix/simple_profiler.mli | 44 +++++++++ 5 files changed, 195 insertions(+), 10 deletions(-) create mode 100644 src/bin_node/prometheus_profiler.ml create mode 100644 src/bin_node/prometheus_profiler.mli diff --git a/src/bin_node/main.ml b/src/bin_node/main.ml index 4e97b095a52d..21acf7673825 100644 --- a/src/bin_node/main.ml +++ b/src/bin_node/main.ml @@ -24,6 +24,9 @@ (* *) (*****************************************************************************) +(** Ensure that the prometheus profiler is linked. *) +let () = (() [@profiler.custom ignore Prometheus_profiler.prometheus]) + (* FIXME: https://gitlab.com/tezos/tezos/-/issues/4025 Remove backwards compatible Tezos symlinks. *) let warn_if_argv0_name_not_octez () = diff --git a/src/bin_node/prometheus_profiler.ml b/src/bin_node/prometheus_profiler.ml new file mode 100644 index 000000000000..08dd49f197d6 --- /dev/null +++ b/src/bin_node/prometheus_profiler.ml @@ -0,0 +1,127 @@ +(*****************************************************************************) +(* *) +(* SPDX-License-Identifier: MIT *) +(* SPDX-FileCopyrightText: 2024 Nomadic Labs *) +(* *) +(*****************************************************************************) + +open Profiler + +type prometheus_state = {name : string; mutable state : Simple_profiler.state} + +type prometheus_config = string * verbosity + +type (_, _) kind += Prometheus : (prometheus_config, prometheus_state) kind + +module Prometheus = struct + type nonrec state = prometheus_state + + type config = prometheus_config + + let kind = Prometheus + + let create (name, verbosity) : state = + {state = Simple_profiler.empty verbosity; name} + + include Simple_profiler.Base (struct + type t = prometheus_state + + let get_state t = t.state + + let set_state t s = t.state <- s + + (* prometheus backend will throw an error if you register the same name twice, + hence the need for reusing metrics already defined *) + let metric + (maker : + namespace:string -> + subsystem:string -> + label_name:string -> + help:string -> + string -> + 'a) help suffix = + let ht = Stdlib.Hashtbl.create 0 in + fun ~namespace ~subsystem -> + let k = (namespace, subsystem) in + match Stdlib.Hashtbl.find_opt ht k with + | Some metric -> metric + | None -> + let metric = + maker ~namespace ~subsystem ~label_name:"id" ~help suffix + in + Stdlib.Hashtbl.add ht k metric ; + metric + + let counter = + let maker ~namespace ~subsystem ~label_name ~help name = + Prometheus.Counter.v_label ~namespace ~subsystem ~label_name ~help name + in + metric maker "Number of times a function is called" "count" + + let summary = + let maker ~namespace ~subsystem ~label_name ~help name = + Prometheus.Summary.v_label ~namespace ~subsystem ~label_name ~help name + in + metric + maker + "Total time passed in a function and number of times it has been called" + "time" + + let output_entry subsystem (id, metadata) n d = + let namespace = "profiling" in + (* because prometheus can't handle high cardinality in metrics names, + we treat only those who are specifically marked for the prometheus backend *) + match Stdlib.List.assoc_opt "prometheus" metadata with + | None -> () + | Some id' -> + let id = if id' = "" then id else id' in + if d.wall = 0. then + Prometheus.Counter.inc + (counter ~namespace ~subsystem id) + (Float.of_int n) + else + Prometheus.Summary.observe + (summary ~namespace ~subsystem id) + ~n:(Float.of_int n) + d.wall + + let output_report = + let output t r = + let rec output ?t0 {recorded; aggregated} = + let t0 = + match t0 with + | Some t0 -> t0 + | None -> ( + match recorded with + | (_, {start; _}) :: _ -> start + | [] -> {wall = 0.; cpu = 0.}) + in + IdMap.iter + (fun id {count = n; total = Span d; node_verbosity = _; children} -> + output_entry t.name id n d ; + output ~t0 {recorded = []; aggregated = children}) + aggregated ; + List.iter + (fun ( id, + {start = t0; duration = Span d; contents; item_verbosity = _} + ) -> + output_entry t.name id 1 d ; + output ~t0 contents) + recorded + in + output r + in + Some output + end) + + let close _ = () +end + +let prometheus = + (module Prometheus : DRIVER with type config = prometheus_config) + +let () = + Profiler_instance.register_backend + ["prometheus"] + (fun max_verbosity ~directory:_ ~name -> + instance prometheus (name, max_verbosity)) diff --git a/src/bin_node/prometheus_profiler.mli b/src/bin_node/prometheus_profiler.mli new file mode 100644 index 000000000000..1be9b4703bad --- /dev/null +++ b/src/bin_node/prometheus_profiler.mli @@ -0,0 +1,21 @@ +(*****************************************************************************) +(* *) +(* SPDX-License-Identifier: MIT *) +(* SPDX-FileCopyrightText: 2024 Nomadic Labs *) +(* *) +(*****************************************************************************) + +(** Driver registering its report in a promotheus collector whenever a + toplevel section ends. + Enable it using [PROFILING_BACKEND=prometheus] environment variable. + + This backend relies on the presence or not of a "prometheus" attribute + in the metadata field of profiler calls. If this attribute is set to + the empty string, the id passed to the call will be used as metric + label. If the attribute is a non-empty string, that value will be + used instead of the one passed to the profiling function. + + You MUST avoid metric names that are too unique such as block or + operation hashes because prometheus will quickly fail at handling + a too high metrics cardinality otherwise. *) +val prometheus : (string * Profiler.verbosity) Profiler.driver diff --git a/src/lib_base/unix/simple_profiler.ml b/src/lib_base/unix/simple_profiler.ml index 8bbc565b465f..10e581a7c01f 100644 --- a/src/lib_base/unix/simple_profiler.ml +++ b/src/lib_base/unix/simple_profiler.ml @@ -312,16 +312,6 @@ let pp_report ?t0 ppf report = in Format.fprintf ppf "@[%a@]" (pp_report t0 0) report -(** The [Base] functor helps to define other backend - without having to write the same functions again and again. - - Given a way to get and set a [state] and an output function, - the functor will produce a module implementing the [DRIVER] - interface. - - Note that the produced module will try to [output] report every - time [stamp], [inc], [mark], [span] or [stop] is used. - *) module Base (P : sig type t diff --git a/src/lib_base/unix/simple_profiler.mli b/src/lib_base/unix/simple_profiler.mli index 043bbcb3cc91..cd6f29299d67 100644 --- a/src/lib_base/unix/simple_profiler.mli +++ b/src/lib_base/unix/simple_profiler.mli @@ -24,6 +24,50 @@ (* *) (*****************************************************************************) +val time : unit -> Profiler.time + +type state + +val empty : Profiler.verbosity -> state + +(** The [Base] functor helps to define other backend + without having to write the same functions again and again. + + Given a way to get and set a [state] and an output function, + the functor will produce a module implementing the [DRIVER] + interface. + + Note that the produced module will try to [output] report every + time [stamp], [inc], [mark], [span] or [stop] is used. + *) +module Base (P : sig + type t + + val get_state : t -> state + + val set_state : t -> state -> unit + + val output_report : (t -> Profiler.report -> unit) option +end) : sig + val time : P.t -> Profiler.time + + val record : P.t -> Profiler.verbosity -> Profiler.id -> unit + + val aggregate : P.t -> Profiler.verbosity -> Profiler.id -> unit + + val report : P.t -> Profiler.report option + + val stamp : P.t -> Profiler.verbosity -> Profiler.id -> unit + + val inc : P.t -> Profiler.report -> unit + + val mark : P.t -> Profiler.verbosity -> Profiler.ids -> unit + + val span : P.t -> Profiler.verbosity -> Profiler.span -> Profiler.ids -> unit + + val stop : P.t -> unit +end + (** Memory driver allowing building intermediate reports with purpose of being included in external reports or manipulated directly. *) val headless : Profiler.verbosity Profiler.driver -- GitLab From 9a0c2d0c85ca4b140f75f16cdb9ade3c20a0ee8b Mon Sep 17 00:00:00 2001 From: Julien Sagot Date: Wed, 6 Nov 2024 10:22:38 +0100 Subject: [PATCH 3/5] Profiler: prometheus backend: only register children if parent is registered --- src/bin_node/prometheus_profiler.ml | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/bin_node/prometheus_profiler.ml b/src/bin_node/prometheus_profiler.ml index 08dd49f197d6..bfb13b28dc98 100644 --- a/src/bin_node/prometheus_profiler.ml +++ b/src/bin_node/prometheus_profiler.ml @@ -67,12 +67,13 @@ module Prometheus = struct "Total time passed in a function and number of times it has been called" "time" + (** Because prometheus can't handle high cardinality in metrics names, + we treat only those who are specifically marked for the prometheus backend. + Returns a boolean that indicate if the entry has been registered or not. *) let output_entry subsystem (id, metadata) n d = let namespace = "profiling" in - (* because prometheus can't handle high cardinality in metrics names, - we treat only those who are specifically marked for the prometheus backend *) match Stdlib.List.assoc_opt "prometheus" metadata with - | None -> () + | None -> false | Some id' -> let id = if id' = "" then id else id' in if d.wall = 0. then @@ -83,7 +84,8 @@ module Prometheus = struct Prometheus.Summary.observe (summary ~namespace ~subsystem id) ~n:(Float.of_int n) - d.wall + d.wall ; + true let output_report = let output t r = @@ -96,17 +98,19 @@ module Prometheus = struct | (_, {start; _}) :: _ -> start | [] -> {wall = 0.; cpu = 0.}) in + (* For each node in both [aggregated] and [recorded] lists, + we only output the children nodes if the parent has been marked for + promotheus backend. This is similar to verbosity filtering where a child is + only recorded if the parent is. *) IdMap.iter (fun id {count = n; total = Span d; node_verbosity = _; children} -> - output_entry t.name id n d ; - output ~t0 {recorded = []; aggregated = children}) + if output_entry t.name id n d then + output ~t0 {recorded = []; aggregated = children}) aggregated ; List.iter (fun ( id, {start = t0; duration = Span d; contents; item_verbosity = _} - ) -> - output_entry t.name id 1 d ; - output ~t0 contents) + ) -> if output_entry t.name id 1 d then output ~t0 contents) recorded in output r -- GitLab From a53757c43c9321073a3539e5ca3e94bdc2ca1bc6 Mon Sep 17 00:00:00 2001 From: Julien Sagot Date: Wed, 6 Nov 2024 10:24:47 +0100 Subject: [PATCH 4/5] Profiler: prometheus backend: remove unused argument in output function --- src/bin_node/prometheus_profiler.ml | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/bin_node/prometheus_profiler.ml b/src/bin_node/prometheus_profiler.ml index bfb13b28dc98..78dad62ec590 100644 --- a/src/bin_node/prometheus_profiler.ml +++ b/src/bin_node/prometheus_profiler.ml @@ -89,15 +89,7 @@ module Prometheus = struct let output_report = let output t r = - let rec output ?t0 {recorded; aggregated} = - let t0 = - match t0 with - | Some t0 -> t0 - | None -> ( - match recorded with - | (_, {start; _}) :: _ -> start - | [] -> {wall = 0.; cpu = 0.}) - in + let rec output {recorded; aggregated} = (* For each node in both [aggregated] and [recorded] lists, we only output the children nodes if the parent has been marked for promotheus backend. This is similar to verbosity filtering where a child is @@ -105,12 +97,12 @@ module Prometheus = struct IdMap.iter (fun id {count = n; total = Span d; node_verbosity = _; children} -> if output_entry t.name id n d then - output ~t0 {recorded = []; aggregated = children}) + output {recorded = []; aggregated = children}) aggregated ; List.iter (fun ( id, - {start = t0; duration = Span d; contents; item_verbosity = _} - ) -> if output_entry t.name id 1 d then output ~t0 contents) + {start = _; duration = Span d; contents; item_verbosity = _} + ) -> if output_entry t.name id 1 d then output contents) recorded in output r -- GitLab From c3f380254ea89d51a0ea280e2010f66e9d2fdf0a Mon Sep 17 00:00:00 2001 From: Julien SAGOT Date: Wed, 6 Nov 2024 10:56:11 +0000 Subject: [PATCH 5/5] Profiler: fix documentation typos Co-authored-by: Gabriel Moise Co-authored-by: Mathias Bourgoin --- src/bin_node/prometheus_profiler.mli | 2 +- src/lib_base/unix/simple_profiler.mli | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bin_node/prometheus_profiler.mli b/src/bin_node/prometheus_profiler.mli index 1be9b4703bad..f126b5fb6d5b 100644 --- a/src/bin_node/prometheus_profiler.mli +++ b/src/bin_node/prometheus_profiler.mli @@ -9,7 +9,7 @@ toplevel section ends. Enable it using [PROFILING_BACKEND=prometheus] environment variable. - This backend relies on the presence or not of a "prometheus" attribute + This backend relies on the presence of a "prometheus" attribute in the metadata field of profiler calls. If this attribute is set to the empty string, the id passed to the call will be used as metric label. If the attribute is a non-empty string, that value will be diff --git a/src/lib_base/unix/simple_profiler.mli b/src/lib_base/unix/simple_profiler.mli index cd6f29299d67..b48b93b1e0ab 100644 --- a/src/lib_base/unix/simple_profiler.mli +++ b/src/lib_base/unix/simple_profiler.mli @@ -30,7 +30,7 @@ type state val empty : Profiler.verbosity -> state -(** The [Base] functor helps to define other backend +(** The [Base] functor helps to define other backends without having to write the same functions again and again. Given a way to get and set a [state] and an output function, -- GitLab