diff --git a/CHANGES.rst b/CHANGES.rst index cdedab46306ca16728781617b449936fea899ee5..522047db6f216a50f343c08d0d346329a51131a8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -178,6 +178,9 @@ Smart Rollup node enabled with the flag ``--enable-performance-metrics`` (requires ``lsof``). (MR :gl:`!12290`) +- Addition of ``elapsed_time`` to performance metrics, + which exposes in seconds the time since the node started. (MR :gl:`!16551`) + - Rotate multiple batcher keys in injector so that they are used evenly. (MR :gl:`!14194`) diff --git a/etherlink/CHANGES_NODE.md b/etherlink/CHANGES_NODE.md index f4512cf93526b23848c8f89afc68f83af47c3105..9ee3e37983f6add55f906cb56422f6bc5450f2b8 100644 --- a/etherlink/CHANGES_NODE.md +++ b/etherlink/CHANGES_NODE.md @@ -18,6 +18,8 @@ "history": { "mode": "rolling", "retention": 14 } ``` (!16465) +- Addition of `elapsed_time` to performance metrics, which exposes in seconds the time since the node + started. (!16551) #### UX diff --git a/etherlink/tezt/tests/expected/evm_rollup.ml/EVM node- list metrics regression.out b/etherlink/tezt/tests/expected/evm_rollup.ml/EVM node- list metrics regression.out index e354a207217dd6ab4e54105b999b7f8c33178bb6..439a2f691f50e26b122137c5698fc152b5b5f5b4 100644 --- a/etherlink/tezt/tests/expected/evm_rollup.ml/EVM node- list metrics regression.out +++ b/etherlink/tezt/tests/expected/evm_rollup.ml/EVM node- list metrics regression.out @@ -32,6 +32,8 @@ #TYPE octez_evm_node_performance_data gauge #HELP octez_evm_node_performance_disk_percentage Disk Usage Percentage #TYPE octez_evm_node_performance_disk_percentage gauge +#HELP octez_evm_node_performance_elapsed_time Number of seconds since the node is running +#TYPE octez_evm_node_performance_elapsed_time counter #HELP octez_evm_node_performance_file_descriptors Open file descriptors #TYPE octez_evm_node_performance_file_descriptors gauge #HELP octez_evm_node_performance_logs Disk usage: daily_logs diff --git a/prometheus/src/prometheus.ml b/prometheus/src/prometheus.ml index 3ad76b78244c4b80592b63d815f8fbd4da212f77..c25cd306dbdde57bfce7f1259cd2d947d2baa7a9 100644 --- a/prometheus/src/prometheus.ml +++ b/prometheus/src/prometheus.ml @@ -288,6 +288,10 @@ module Counter = struct let inc t v = assert (v >= 0.0) ; t := !t +. v + + let set t v = + assert (v >= !t) ; + t := v end module Gauge = struct diff --git a/prometheus/src/prometheus.mli b/prometheus/src/prometheus.mli index f8bae5a1c20cfc8498699d7a33affd6bd4ef02a3..d146eb0fc048aa9105a0fcb615b53467dac67211 100644 --- a/prometheus/src/prometheus.mli +++ b/prometheus/src/prometheus.mli @@ -166,6 +166,9 @@ module Counter : sig (** [inc t v] increases [t] by [v], which must be non-negative. *) val inc : t -> float -> unit + + (** [set t v] sets the current value of the counter to [v]. *) + val set : t -> float -> unit end (** A gauge is a metric that represents a single numerical value that can arbitrarily go up and down. *) diff --git a/src/lib_performance_metrics/octez_performance_metrics.ml b/src/lib_performance_metrics/octez_performance_metrics.ml index aabebec72221c4f579b474095b2d40d3bad3934f..0a9bc2053693e7fa14cc06a3233764c82fb2e329 100644 --- a/src/lib_performance_metrics/octez_performance_metrics.ml +++ b/src/lib_performance_metrics/octez_performance_metrics.ml @@ -53,6 +53,10 @@ let supports_performance_metrics () = module Make (R : REGISTRY) = struct include R + let start_time = Time.System.now () + + let v_counter = Counter.v ~registry ~namespace ~subsystem + let v_gauge = Gauge.v ~registry ~namespace ~subsystem let virtual_ = v_gauge ~help:"Size Memory Stats" "performance_virtual" @@ -63,6 +67,11 @@ module Make (R : REGISTRY) = struct let cpu = v_gauge ~help:"CPU Percentage" "performance_cpu_percentage" + let elapsed_time = + v_counter + ~help:"Number of seconds since the node is running" + "performance_elapsed_time" + let get_ps pid = Lwt.catch (fun () -> @@ -224,8 +233,15 @@ module Make (R : REGISTRY) = struct Gauge.set connections @@ Float.of_int conn) r + let set_elapsed_time () = + let new_elapsed_time = + Ptime.Span.to_float_s @@ Ptime.diff (Time.System.now ()) start_time + in + Counter.set elapsed_time new_elapsed_time + let set_stats ~data_dir = let open Lwt_syntax in + set_elapsed_time () ; let* () = set_memory_cpu_stats () and* () = set_disk_usage_stats ~data_dir and* () = set_file_descriptors () in diff --git a/tezt/tests/expected/sc_rollup.ml/Smart rollup node- list metrics regression (with performance metrics).out b/tezt/tests/expected/sc_rollup.ml/Smart rollup node- list metrics regression (with performance metrics).out index 6211dc7269cb1f349aed45b394871d67e3b0f513..b45c7b7b9f117fe10676c2ff4c3f541022d1ad83 100644 --- a/tezt/tests/expected/sc_rollup.ml/Smart rollup node- list metrics regression (with performance metrics).out +++ b/tezt/tests/expected/sc_rollup.ml/Smart rollup node- list metrics regression (with performance metrics).out @@ -76,6 +76,8 @@ #TYPE octez_sc_rollup_node_performance_data gauge #HELP octez_sc_rollup_node_performance_disk_percentage Disk Usage Percentage #TYPE octez_sc_rollup_node_performance_disk_percentage gauge +#HELP octez_sc_rollup_node_performance_elapsed_time Number of seconds since the node is running +#TYPE octez_sc_rollup_node_performance_elapsed_time counter #HELP octez_sc_rollup_node_performance_file_descriptors Open file descriptors #TYPE octez_sc_rollup_node_performance_file_descriptors gauge #HELP octez_sc_rollup_node_performance_logs Disk usage: daily_logs