From 91ee5f4c05137320d30dfaac15c504a1b3cd1d1a Mon Sep 17 00:00:00 2001 From: Thomas Letan Date: Thu, 23 Jan 2025 13:49:14 +0100 Subject: [PATCH] EVM Node: Add performance metrics These metrics can be used to monitor how the node behaves in the long run, similarly to what was done in the Rollup Node. We rely on the function introduced in e9f3ad0 to decide whether or not we enable these metrics. --- etherlink/CHANGES_NODE.md | 14 +++++++++ etherlink/bin_node/lib_dev/dune | 3 +- etherlink/bin_node/lib_dev/metrics.ml | 29 +++++++++++++++++++ etherlink/bin_node/lib_dev/metrics.mli | 6 ++++ etherlink/bin_node/lib_dev/observer.ml | 1 + etherlink/bin_node/lib_dev/rpc.ml | 1 + etherlink/bin_node/lib_dev/rpc_server.ml | 18 +++++++++++- etherlink/bin_node/lib_dev/rpc_server.mli | 6 +++- etherlink/bin_node/lib_dev/sequencer.ml | 1 + .../EVM node- list metrics regression.out | 24 +++++++++++++++ manifest/product_etherlink.ml | 1 + opam/octez-evm-node-libs.opam | 1 + 12 files changed, 102 insertions(+), 3 deletions(-) diff --git a/etherlink/CHANGES_NODE.md b/etherlink/CHANGES_NODE.md index 243b18512507..c8b51325d313 100644 --- a/etherlink/CHANGES_NODE.md +++ b/etherlink/CHANGES_NODE.md @@ -4,9 +4,23 @@ ### Features +#### RPCs + - Implements the RPC endpoint `debug_traceBlockByNumber`, with only the `callTracer` at the moment. (!16164) +#### Metrics + +- When the host running the node provides `ps`, `du` and `lsof`, the node will + now exports performance metrics. (!16367) + +#### Experimental + +*No guarantees are provided regarding backward compatibility of experimental +features. They can be modified or removed without any deprecation notices. If +you start using them, you probably want to use `octez-evm-node check config +--config-file PATH` to assert your configuration file is still valid.* + - Websocket connections with clients are monitored by default by sending regular heartbeats. This feature can be disabled or tweaked by changing `experimental_features.monitor_websocket_heartbeat` in the diff --git a/etherlink/bin_node/lib_dev/dune b/etherlink/bin_node/lib_dev/dune index 804eb224ee8c..fbda79166806 100644 --- a/etherlink/bin_node/lib_dev/dune +++ b/etherlink/bin_node/lib_dev/dune @@ -35,7 +35,8 @@ octez-libs.prometheus-app tezos-dal-node-services octez-evm-node-libs.evm_node_supported_installers - octez-evm-node-libs.evm_node_wasm_runtime) + octez-evm-node-libs.evm_node_wasm_runtime + octez-performance-metrics) (flags (:standard) -open Tezos_base.TzPervasives diff --git a/etherlink/bin_node/lib_dev/metrics.ml b/etherlink/bin_node/lib_dev/metrics.ml index d68c0eeeaeec..f59d3914bb41 100644 --- a/etherlink/bin_node/lib_dev/metrics.ml +++ b/etherlink/bin_node/lib_dev/metrics.ml @@ -378,8 +378,37 @@ let record_blueprint_chunks_sent_on_inbox chunks = let inc_rpc_method ~name = Prometheus.Counter.inc_one (Rpc.method_ name) +module Performance_metrics_config = struct + open Octez_performance_metrics + + let registry = registry + + let subsystem = "evm_node" + + let directories = + [ + data_dir_element ~metrics_suffix:"store_sqlite" "store.sqlite"; + data_dir_element ~metrics_suffix:"store_irmin" "store"; + data_dir_element ~metrics_suffix:"wasm" "wasm_2_0_0"; + data_dir_element ~metrics_suffix:"logs" "daily_logs"; + ] +end + +module type PERFORMANCE = sig + val set_stats : data_dir:string -> unit Lwt.t +end + +let performance_metrics : (module PERFORMANCE) Lazy.t = + lazy + (let module M = Octez_performance_metrics.Make (Performance_metrics_config) in + (module M : PERFORMANCE)) + let listing () = let open Lwt_syntax in + let* support = Octez_performance_metrics.supports_performance_metrics () in + (* If the host provides the necessary utils, we get performance metrics. + To list them, we need to force the evaluation of the Performance module. *) + if support then ignore (Lazy.force performance_metrics) ; let+ data = CollectorRegistry.(collect registry) in let body = Fmt.to_to_string Prometheus_app.TextFormat_0_0_4.output data in let metrics = diff --git a/etherlink/bin_node/lib_dev/metrics.mli b/etherlink/bin_node/lib_dev/metrics.mli index 1fe6a681d7f3..a30dd1b54d05 100644 --- a/etherlink/bin_node/lib_dev/metrics.mli +++ b/etherlink/bin_node/lib_dev/metrics.mli @@ -77,3 +77,9 @@ module Rpc : sig (unit -> Cohttp_lwt_unix.Server.response_action Lwt.t) -> Cohttp_lwt_unix.Server.response_action Lwt.t end + +module type PERFORMANCE = sig + val set_stats : data_dir:string -> unit Lwt.t +end + +val performance_metrics : (module PERFORMANCE) Lazy.t diff --git a/etherlink/bin_node/lib_dev/observer.ml b/etherlink/bin_node/lib_dev/observer.ml index 9ee790add5b7..1bb2121fd804 100644 --- a/etherlink/bin_node/lib_dev/observer.ml +++ b/etherlink/bin_node/lib_dev/observer.ml @@ -170,6 +170,7 @@ let main ?network ?kernel_path ~data_dir ~(config : Configuration.t) ~no_sync Rpc_server.start_public_server ~evm_services: Evm_ro_context.(evm_services_methods ro_ctxt time_between_blocks) + ~data_dir config (observer_backend, smart_rollup_address) in diff --git a/etherlink/bin_node/lib_dev/rpc.ml b/etherlink/bin_node/lib_dev/rpc.ml index 01124ec40abf..fbf6c9e49c82 100644 --- a/etherlink/bin_node/lib_dev/rpc.ml +++ b/etherlink/bin_node/lib_dev/rpc.ml @@ -69,6 +69,7 @@ let main ~data_dir ~evm_node_endpoint ~(config : Configuration.t) = ~delegate_health_check_to:evm_node_endpoint ~evm_services: Evm_ro_context.(evm_services_methods ctxt time_between_blocks) + ~data_dir config (rpc_backend, ctxt.smart_rollup_address) in diff --git a/etherlink/bin_node/lib_dev/rpc_server.ml b/etherlink/bin_node/lib_dev/rpc_server.ml index 3f6ab50cc652..32d576762c2f 100644 --- a/etherlink/bin_node/lib_dev/rpc_server.ml +++ b/etherlink/bin_node/lib_dev/rpc_server.ml @@ -114,9 +114,25 @@ let start_server rpc = function | Evm_directory.Resto dir -> Resto.start_server rpc dir | Evm_directory.Dream routes -> Dream.start_server rpc routes -let start_public_server ?delegate_health_check_to ?evm_services +let monitor_performances ~data_dir = + let (module Performance) = Lazy.force Metrics.performance_metrics in + let rec aux () = + let open Lwt_syntax in + let* () = Performance.set_stats ~data_dir in + let* () = Lwt_unix.sleep 10.0 in + aux () + in + Lwt.dont_wait aux (Fun.const ()) + +let start_public_server ?delegate_health_check_to ?evm_services ?data_dir (config : Configuration.t) ctxt = let open Lwt_result_syntax in + let*! can_start_performance_metrics = + Octez_performance_metrics.supports_performance_metrics () + in + if can_start_performance_metrics && Option.is_some data_dir then + monitor_performances + ~data_dir:WithExceptions.Option.(get ~loc:__LOC__ data_dir) ; let register_evm_services = match evm_services with | None -> Fun.id diff --git a/etherlink/bin_node/lib_dev/rpc_server.mli b/etherlink/bin_node/lib_dev/rpc_server.mli index 4032f962516d..35a2cfa49487 100644 --- a/etherlink/bin_node/lib_dev/rpc_server.mli +++ b/etherlink/bin_node/lib_dev/rpc_server.mli @@ -38,10 +38,14 @@ val start_private_server : in [config]. The optional argument [evm_services_methods] can be used to install - the EVM services. *) + the EVM services. + + If [data_dir] is provided and the host provides the necessary binaries, + performance metrics are enabled. *) val start_public_server : ?delegate_health_check_to:Uri.t -> ?evm_services:evm_services_methods -> + ?data_dir:string -> Configuration.t -> (module Services_backend_sig.S) * 'a -> finalizer tzresult Lwt.t diff --git a/etherlink/bin_node/lib_dev/sequencer.ml b/etherlink/bin_node/lib_dev/sequencer.ml index bc8c79951038..83ce93efc1a1 100644 --- a/etherlink/bin_node/lib_dev/sequencer.ml +++ b/etherlink/bin_node/lib_dev/sequencer.ml @@ -192,6 +192,7 @@ let main ~data_dir ?(genesis_timestamp = Misc.now ()) ~cctxt ~evm_services: Evm_ro_context.( evm_services_methods ro_ctxt sequencer_config.time_between_blocks) + ~data_dir configuration (backend, smart_rollup_address_typed) in diff --git a/etherlink/tezt/tests/expected/evm_rollup.ml/EVM node- list metrics regression.out b/etherlink/tezt/tests/expected/evm_rollup.ml/EVM node- list metrics regression.out index 01c6ea7b444d..e354a207217d 100644 --- a/etherlink/tezt/tests/expected/evm_rollup.ml/EVM node- list metrics regression.out +++ b/etherlink/tezt/tests/expected/evm_rollup.ml/EVM node- list metrics regression.out @@ -24,6 +24,30 @@ #TYPE octez_evm_node_inconsistent_da_fees counter #HELP octez_evm_node_l1_level Last processed L1 block level #TYPE octez_evm_node_l1_level gauge +#HELP octez_evm_node_performance_connections Open connections +#TYPE octez_evm_node_performance_connections gauge +#HELP octez_evm_node_performance_cpu_percentage CPU Percentage +#TYPE octez_evm_node_performance_cpu_percentage gauge +#HELP octez_evm_node_performance_data Disk Usage +#TYPE octez_evm_node_performance_data gauge +#HELP octez_evm_node_performance_disk_percentage Disk Usage Percentage +#TYPE octez_evm_node_performance_disk_percentage gauge +#HELP octez_evm_node_performance_file_descriptors Open file descriptors +#TYPE octez_evm_node_performance_file_descriptors gauge +#HELP octez_evm_node_performance_logs Disk usage: daily_logs +#TYPE octez_evm_node_performance_logs gauge +#HELP octez_evm_node_performance_mem_percentage Memory Percentage +#TYPE octez_evm_node_performance_mem_percentage gauge +#HELP octez_evm_node_performance_resident Resident Memory Stats +#TYPE octez_evm_node_performance_resident gauge +#HELP octez_evm_node_performance_store_irmin Disk usage: store +#TYPE octez_evm_node_performance_store_irmin gauge +#HELP octez_evm_node_performance_store_sqlite Disk usage: store.sqlite +#TYPE octez_evm_node_performance_store_sqlite gauge +#HELP octez_evm_node_performance_virtual Size Memory Stats +#TYPE octez_evm_node_performance_virtual gauge +#HELP octez_evm_node_performance_wasm Disk usage: wasm_2_0_0 +#TYPE octez_evm_node_performance_wasm gauge #HELP octez_evm_node_queue_size Size of the execution queue of simulations #TYPE octez_evm_node_queue_size gauge #HELP octez_evm_node_signals_sent Number of DAL import signals sent on the inbox diff --git a/manifest/product_etherlink.ml b/manifest/product_etherlink.ml index 2816bf82e9b8..c817781acd96 100644 --- a/manifest/product_etherlink.ml +++ b/manifest/product_etherlink.ml @@ -293,6 +293,7 @@ let evm_node_lib_dev = octez_dal_node_services; supported_installers; wasm_runtime; + performance_metrics; ] let _octez_evm_node_tests = diff --git a/opam/octez-evm-node-libs.opam b/opam/octez-evm-node-libs.opam index f43ee54ea8bb..3a57e37da042 100644 --- a/opam/octez-evm-node-libs.opam +++ b/opam/octez-evm-node-libs.opam @@ -30,6 +30,7 @@ depends: [ "lwt-exit" "octez-smart-rollup-wasm-debugger-lib" "tezos-dal-node-services" + "octez-performance-metrics" ] conflicts: [ "websocket" -- GitLab