From 598735c070aedd5c45168196bca9e2a98820298c Mon Sep 17 00:00:00 2001 From: Brahima Dibassi Date: Mon, 3 Feb 2025 15:15:30 +0100 Subject: [PATCH] EVM/Rollup Node: Add elapsed time metric Co-authored-by: Thomas Letan Co-authored-by: Pierre-Emmanuel CORNILLEAU --- CHANGES.rst | 3 +++ etherlink/CHANGES_NODE.md | 2 ++ .../EVM node- list metrics regression.out | 2 ++ prometheus/src/prometheus.ml | 4 ++++ prometheus/src/prometheus.mli | 3 +++ .../octez_performance_metrics.ml | 16 ++++++++++++++++ ...ics regression (with performance metrics).out | 2 ++ 7 files changed, 32 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index cdedab46306c..522047db6f21 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -178,6 +178,9 @@ Smart Rollup node enabled with the flag ``--enable-performance-metrics`` (requires ``lsof``). (MR :gl:`!12290`) +- Addition of ``elapsed_time`` to performance metrics, + which exposes in seconds the time since the node started. (MR :gl:`!16551`) + - Rotate multiple batcher keys in injector so that they are used evenly. (MR :gl:`!14194`) diff --git a/etherlink/CHANGES_NODE.md b/etherlink/CHANGES_NODE.md index f4512cf93526..9ee3e37983f6 100644 --- a/etherlink/CHANGES_NODE.md +++ b/etherlink/CHANGES_NODE.md @@ -18,6 +18,8 @@ "history": { "mode": "rolling", "retention": 14 } ``` (!16465) +- Addition of `elapsed_time` to performance metrics, which exposes in seconds the time since the node + started. (!16551) #### UX diff --git a/etherlink/tezt/tests/expected/evm_rollup.ml/EVM node- list metrics regression.out b/etherlink/tezt/tests/expected/evm_rollup.ml/EVM node- list metrics regression.out index e354a207217d..439a2f691f50 100644 --- a/etherlink/tezt/tests/expected/evm_rollup.ml/EVM node- list metrics regression.out +++ b/etherlink/tezt/tests/expected/evm_rollup.ml/EVM node- list metrics regression.out @@ -32,6 +32,8 @@ #TYPE octez_evm_node_performance_data gauge #HELP octez_evm_node_performance_disk_percentage Disk Usage Percentage #TYPE octez_evm_node_performance_disk_percentage gauge +#HELP octez_evm_node_performance_elapsed_time Number of seconds since the node is running +#TYPE octez_evm_node_performance_elapsed_time counter #HELP octez_evm_node_performance_file_descriptors Open file descriptors #TYPE octez_evm_node_performance_file_descriptors gauge #HELP octez_evm_node_performance_logs Disk usage: daily_logs diff --git a/prometheus/src/prometheus.ml b/prometheus/src/prometheus.ml index 3ad76b78244c..c25cd306dbdd 100644 --- a/prometheus/src/prometheus.ml +++ b/prometheus/src/prometheus.ml @@ -288,6 +288,10 @@ module Counter = struct let inc t v = assert (v >= 0.0) ; t := !t +. v + + let set t v = + assert (v >= !t) ; + t := v end module Gauge = struct diff --git a/prometheus/src/prometheus.mli b/prometheus/src/prometheus.mli index f8bae5a1c20c..d146eb0fc048 100644 --- a/prometheus/src/prometheus.mli +++ b/prometheus/src/prometheus.mli @@ -166,6 +166,9 @@ module Counter : sig (** [inc t v] increases [t] by [v], which must be non-negative. *) val inc : t -> float -> unit + + (** [set t v] sets the current value of the counter to [v]. *) + val set : t -> float -> unit end (** A gauge is a metric that represents a single numerical value that can arbitrarily go up and down. *) diff --git a/src/lib_performance_metrics/octez_performance_metrics.ml b/src/lib_performance_metrics/octez_performance_metrics.ml index aabebec72221..0a9bc2053693 100644 --- a/src/lib_performance_metrics/octez_performance_metrics.ml +++ b/src/lib_performance_metrics/octez_performance_metrics.ml @@ -53,6 +53,10 @@ let supports_performance_metrics () = module Make (R : REGISTRY) = struct include R + let start_time = Time.System.now () + + let v_counter = Counter.v ~registry ~namespace ~subsystem + let v_gauge = Gauge.v ~registry ~namespace ~subsystem let virtual_ = v_gauge ~help:"Size Memory Stats" "performance_virtual" @@ -63,6 +67,11 @@ module Make (R : REGISTRY) = struct let cpu = v_gauge ~help:"CPU Percentage" "performance_cpu_percentage" + let elapsed_time = + v_counter + ~help:"Number of seconds since the node is running" + "performance_elapsed_time" + let get_ps pid = Lwt.catch (fun () -> @@ -224,8 +233,15 @@ module Make (R : REGISTRY) = struct Gauge.set connections @@ Float.of_int conn) r + let set_elapsed_time () = + let new_elapsed_time = + Ptime.Span.to_float_s @@ Ptime.diff (Time.System.now ()) start_time + in + Counter.set elapsed_time new_elapsed_time + let set_stats ~data_dir = let open Lwt_syntax in + set_elapsed_time () ; let* () = set_memory_cpu_stats () and* () = set_disk_usage_stats ~data_dir and* () = set_file_descriptors () in diff --git a/tezt/tests/expected/sc_rollup.ml/Smart rollup node- list metrics regression (with performance metrics).out b/tezt/tests/expected/sc_rollup.ml/Smart rollup node- list metrics regression (with performance metrics).out index 6211dc7269cb..b45c7b7b9f11 100644 --- a/tezt/tests/expected/sc_rollup.ml/Smart rollup node- list metrics regression (with performance metrics).out +++ b/tezt/tests/expected/sc_rollup.ml/Smart rollup node- list metrics regression (with performance metrics).out @@ -76,6 +76,8 @@ #TYPE octez_sc_rollup_node_performance_data gauge #HELP octez_sc_rollup_node_performance_disk_percentage Disk Usage Percentage #TYPE octez_sc_rollup_node_performance_disk_percentage gauge +#HELP octez_sc_rollup_node_performance_elapsed_time Number of seconds since the node is running +#TYPE octez_sc_rollup_node_performance_elapsed_time counter #HELP octez_sc_rollup_node_performance_file_descriptors Open file descriptors #TYPE octez_sc_rollup_node_performance_file_descriptors gauge #HELP octez_sc_rollup_node_performance_logs Disk usage: daily_logs -- GitLab