From 17e8ba3ab71a575701416554d6cea250080203ad Mon Sep 17 00:00:00 2001 From: Etienne MARAIS Date: Wed, 22 Jun 2022 10:37:22 +0200 Subject: [PATCH 1/6] Manifest: add prometheus to lib_context.disk --- manifest/main.ml | 1 + opam/tezos-context.opam | 1 + src/lib_context/disk/dune | 1 + 3 files changed, 3 insertions(+) diff --git a/manifest/main.ml b/manifest/main.ml index a76913efbe32..48ab391499cb 100644 --- a/manifest/main.ml +++ b/manifest/main.ml @@ -1583,6 +1583,7 @@ let octez_context_disk = irmin; irmin_pack; irmin_pack_unix; + prometheus; octez_stdlib_unix |> open_; octez_stdlib |> open_; octez_context_sigs; diff --git a/opam/tezos-context.opam b/opam/tezos-context.opam index c37e208c8a22..850249574ec4 100644 --- a/opam/tezos-context.opam +++ b/opam/tezos-context.opam @@ -19,6 +19,7 @@ depends: [ "bigstringaf" { >= "0.2.0" } "logs" "digestif" { >= "0.7.3" } + "prometheus" { >= "1.2" } "tezos-test-helpers" {with-test} "tezos-test-helpers-extra" {with-test} "alcotest-lwt" { with-test & >= "1.5.0" } diff --git a/src/lib_context/disk/dune b/src/lib_context/disk/dune index 8f25b62f89cd..450221ec5149 100644 --- a/src/lib_context/disk/dune +++ b/src/lib_context/disk/dune @@ -15,6 +15,7 @@ irmin irmin-pack irmin-pack.unix + prometheus tezos-stdlib-unix tezos-stdlib tezos-context.sigs -- GitLab From c40c73e50cb92cf977adcba498b68b3d390e8175 Mon Sep 17 00:00:00 2001 From: Etienne MARAIS Date: Tue, 24 May 2022 17:21:55 +0200 Subject: [PATCH 2/6] lib_context: introduce Prometheus Introduce the `context_metrics` module to keep track of metrics in Irmin. --- src/lib_context/disk/context.ml | 1 + src/lib_context/disk/context_metrics.ml | 102 +++++++++++++++++++++++ src/lib_context/disk/context_metrics.mli | 26 ++++++ 3 files changed, 129 insertions(+) create mode 100644 src/lib_context/disk/context_metrics.ml create mode 100644 src/lib_context/disk/context_metrics.mli diff --git a/src/lib_context/disk/context.ml b/src/lib_context/disk/context.ml index a13ffce34063..d1b1ce449c84 100644 --- a/src/lib_context/disk/context.ml +++ b/src/lib_context/disk/context.ml @@ -731,6 +731,7 @@ module Make (Encoding : module type of Tezos_context_encoding.Context) = struct ~lru_size:!lru_size root) in + if not readonly then Context_metrics.init () ; {path = root; repo; patch_context; readonly} let close index = Store.Repo.close index.repo diff --git a/src/lib_context/disk/context_metrics.ml b/src/lib_context/disk/context_metrics.ml new file mode 100644 index 000000000000..dee9957e1031 --- /dev/null +++ b/src/lib_context/disk/context_metrics.ml @@ -0,0 +1,102 @@ +(*****************************************************************************) +(* *) +(* Open Source License *) +(* Copyright (c) 2022 Tarides *) +(* *) +(* Permission is hereby granted, free of charge, to any person obtaining a *) +(* copy of this software and associated documentation files (the "Software"),*) +(* to deal in the Software without restriction, including without limitation *) +(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *) +(* and/or sell copies of the Software, and to permit persons to whom the *) +(* Software is furnished to do so, subject to the following conditions: *) +(* *) +(* The above copyright notice and this permission notice shall be included *) +(* in all copies or substantial portions of the Software. *) +(* *) +(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*) +(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *) +(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *) +(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*) +(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *) +(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *) +(* DEALINGS IN THE SOFTWARE. *) +(* *) +(*****************************************************************************) + +module Irmin_stats = Irmin_pack.Stats + +let already_init = Atomic.make false + +let namespace = Tezos_version.Node_version.namespace + +let subsystem = "context" + +let metric ~help ~component name = + let name = String.concat "_" [component; name] in + Prometheus.Gauge.v ~help ~namespace ~subsystem name + +module Pack_store = struct + let component = "pack_store" + + module Stats = struct + type t = { + total : Prometheus.Gauge.t; + cache_misses : Prometheus.Gauge.t; + bytes_read : Prometheus.Gauge.t; + nb_reads : Prometheus.Gauge.t; + bytes_written : Prometheus.Gauge.t; + nb_writes : Prometheus.Gauge.t; + } + + let v = + let total = + let help = "Number of times a key is looked up in pack_store." in + metric ~component ~help "total" + in + + let cache_misses = + let help = "Looked up key is nowhere to be found." in + metric ~component ~help "cache_misses" + in + + let bytes_read = + let help = "Number of bytes read." in + metric ~component ~help "bytes_read" + in + + let nb_reads = + let help = "Number of reads executed." in + metric ~component ~help "nb_reads" + in + let bytes_written = + let help = "Number of bytes written" in + metric ~component ~help "bytes_written" + in + + let nb_writes = + let help = "Number of writes executed." in + metric ~component ~help "nb_writes" + in + {total; cache_misses; bytes_read; nb_reads; bytes_written; nb_writes} + end + + (* TODO: https://github.com/tarides/tezos/issues/20 + When moving to the new Irmin version, the stats from Irmin_pack_unix.Stats.Index should be replaced bellow. *) + let collect () = + let stats = Irmin_stats.get () in + let cache_stats = Irmin_stats.get_cache_stats () in + Prometheus.Gauge.set Stats.v.total (Int.to_float stats.finds.total) ; + Prometheus.Gauge.set Stats.v.cache_misses cache_stats.cache_misses ; + let stats = Index.Stats.get () in + Prometheus.Gauge.set Stats.v.bytes_read (Int.to_float stats.bytes_read) ; + Prometheus.Gauge.set Stats.v.nb_reads (Int.to_float stats.nb_reads) ; + Prometheus.Gauge.set + Stats.v.bytes_written + (Int.to_float stats.bytes_written) ; + Prometheus.Gauge.set Stats.v.nb_writes (Int.to_float stats.nb_writes) +end + +let init () = + if Atomic.compare_and_set already_init false true then + Prometheus.CollectorRegistry.(register_pre_collect default) (fun () -> + Pack_store.collect ()) diff --git a/src/lib_context/disk/context_metrics.mli b/src/lib_context/disk/context_metrics.mli new file mode 100644 index 000000000000..6cae1c4fcfba --- /dev/null +++ b/src/lib_context/disk/context_metrics.mli @@ -0,0 +1,26 @@ +(*****************************************************************************) +(* *) +(* Open Source License *) +(* Copyright (c) 2022 Tarides *) +(* *) +(* Permission is hereby granted, free of charge, to any person obtaining a *) +(* copy of this software and associated documentation files (the "Software"),*) +(* to deal in the Software without restriction, including without limitation *) +(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *) +(* and/or sell copies of the Software, and to permit persons to whom the *) +(* Software is furnished to do so, subject to the following conditions: *) +(* *) +(* The above copyright notice and this permission notice shall be included *) +(* in all copies or substantial portions of the Software. *) +(* *) +(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*) +(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *) +(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *) +(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*) +(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *) +(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *) +(* DEALINGS IN THE SOFTWARE. *) +(* *) +(*****************************************************************************) + +val init : unit -> unit -- GitLab From 8dcfc749c78755b96ee688140185af4153a07431 Mon Sep 17 00:00:00 2001 From: Victor Allombert Date: Tue, 21 Jun 2022 17:52:04 +0200 Subject: [PATCH 3/6] WIP:Manifest: add prometheus app to bin_validation --- manifest/main.ml | 1 + opam/tezos-validator.opam | 1 + src/bin_validation/dune | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/manifest/main.ml b/manifest/main.ml index 48ab391499cb..de2a374e41ff 100644 --- a/manifest/main.ml +++ b/manifest/main.ml @@ -2262,6 +2262,7 @@ let octez_validator_lib = octez_validation |> open_; octez_protocol_updater |> open_; octez_shell_context |> open_; + prometheus_app_unix; ] let octez_client_base = diff --git a/opam/tezos-validator.opam b/opam/tezos-validator.opam index 9ade7b5276bb..ba9b2b46af38 100644 --- a/opam/tezos-validator.opam +++ b/opam/tezos-validator.opam @@ -19,6 +19,7 @@ depends: [ "tezos-validation" "tezos-protocol-updater" "tezos-shell-context" + "prometheus-app" { >= "1.2" } ] build: [ ["rm" "-r" "vendors"] diff --git a/src/bin_validation/dune b/src/bin_validation/dune index 63b88d450bd4..dd8be49fa226 100644 --- a/src/bin_validation/dune +++ b/src/bin_validation/dune @@ -16,7 +16,8 @@ tezos-shell-services tezos-validation tezos-protocol-updater - tezos-shell-context) + tezos-shell-context + prometheus-app.unix) (flags (:standard) -open Tezos_base.TzPervasives -- GitLab From 489ee11b8566f1127e6e5a40edb0e1696b0aafd9 Mon Sep 17 00:00:00 2001 From: Victor Allombert Date: Tue, 21 Jun 2022 17:52:32 +0200 Subject: [PATCH 4/6] WIP:Bin_validation: allow external validator to run metrics endpoint --- src/bin_validation/command_line.ml | 13 ++++++++++--- src/bin_validation/validator.ml | 28 +++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/bin_validation/command_line.ml b/src/bin_validation/command_line.ml index ba387cec778e..0c2ebf65055b 100644 --- a/src/bin_validation/command_line.ml +++ b/src/bin_validation/command_line.ml @@ -25,6 +25,7 @@ let socket_dir () = let socket_dir = ref None in + let metrics_port = ref None in let args = Arg. [ @@ -42,6 +43,12 @@ let socket_dir () = tezos-validator's process identifier. By default, the validator will communicate through its standard input and output.|} ); + ( "--metrics-addr", + Int (fun i -> metrics_port := Some i), + {| + When provided, the validator will spawn a prometheus endpoint to expose + some metrics.|} + ); ( "--version", Unit (fun () -> @@ -57,11 +64,11 @@ let socket_dir () = args (fun s -> raise (Arg.Bad (Format.sprintf "Unexpected argument: %s" s))) usage_msg ; - !socket_dir + (!socket_dir, !metrics_port) let run () = - let socket_dir = socket_dir () in - let main_promise = Validator.main ?socket_dir () in + let socket_dir, metrics_port = socket_dir () in + let main_promise = Validator.main ?socket_dir ?metrics_port () in Stdlib.exit (Lwt_main.run (let open Lwt_syntax in diff --git a/src/bin_validation/validator.ml b/src/bin_validation/validator.ml index e924464ed10a..fbe097069751 100644 --- a/src/bin_validation/validator.ml +++ b/src/bin_validation/validator.ml @@ -471,7 +471,25 @@ let run input output = let*! () = loop None None in return_unit -let main ?socket_dir () = +module Metrics_server = Prometheus_app.Cohttp (Cohttp_lwt_unix.Server) + +let metrics_serve port = + let open Lwt_result_syntax in + let*! server = + let host = Ipaddr.V6.(to_string localhost) in + (* let*! () = Event.(emit starting_metrics_server) (host, port) in *) + let*! ctx = Conduit_lwt_unix.init ~src:host () in + let ctx = Cohttp_lwt_unix.Net.init ~ctx () in + let mode = `TCP (`Port port) in + let callback = Metrics_server.callback in + Cohttp_lwt_unix.Server.create + ~ctx + ~mode + (Cohttp_lwt_unix.Server.make ~callback ()) + in + return server + +let main ?socket_dir ?metrics_port () = let open Lwt_result_syntax in let canceler = Lwt_canceler.create () in let*! in_channel, out_channel = @@ -488,6 +506,14 @@ let main ?socket_dir () = Lwt.return (socket_in, socket_out) | None -> Lwt.return (Lwt_io.stdin, Lwt_io.stdout) in + (match metrics_port with + | Some port -> + Lwt.dont_wait + (fun () -> + let*! r = metrics_serve port in + match r with Ok _ -> Lwt.return_unit | Error _err -> assert false) + (fun _exn -> assert false) + | None -> ()) ; let*! () = Events.(emit initialized ()) in let*! r = Error_monad.catch_es (fun () -> -- GitLab From 14c65df3b1e3906454fa74ba7d4348246229c4c1 Mon Sep 17 00:00:00 2001 From: Victor Allombert Date: Tue, 21 Jun 2022 17:58:02 +0200 Subject: [PATCH 5/6] WIP:Node: clean metrics serve --- src/bin_node/node_run_command.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bin_node/node_run_command.ml b/src/bin_node/node_run_command.ml index a26fc15a3c56..197bfdbca06b 100644 --- a/src/bin_node/node_run_command.ml +++ b/src/bin_node/node_run_command.ml @@ -485,8 +485,8 @@ let metrics_serve metrics_addrs = let* addrs = List.map_ep Node_config_file.resolve_metrics_addrs metrics_addrs in - let*! servers = - List.map_p + let*! () = + List.iter_p (fun (addr, port) -> let host = Ipaddr.V6.to_string addr in let*! () = Event.(emit starting_metrics_server) (host, port) in @@ -500,7 +500,7 @@ let metrics_serve metrics_addrs = (Cohttp_lwt_unix.Server.make ~callback ())) (List.flatten addrs) in - return servers + return_unit (* This call is not strictly necessary as the parameters are initialized lazily the first time a Sapling operation (validation or forging) is -- GitLab From 0feabb24399cc9de335e9a0790dc9bb1ce5d0d08 Mon Sep 17 00:00:00 2001 From: Victor Allombert Date: Tue, 21 Jun 2022 18:12:13 +0200 Subject: [PATCH 6/6] WIP:Bin_node: plug node with external validator --- src/bin_node/node_replay_command.ml | 1 + src/bin_node/node_run_command.ml | 9 +++++++++ src/lib_shell/block_validator_process.ml | 17 ++++++++++++++--- src/lib_shell/block_validator_process.mli | 1 + src/lib_shell/node.ml | 4 +++- src/lib_shell/node.mli | 1 + 6 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/bin_node/node_replay_command.ml b/src/bin_node/node_replay_command.ml index 3ad28e8a6c5e..89039b4d4329 100644 --- a/src/bin_node/node_replay_command.ml +++ b/src/bin_node/node_replay_command.ml @@ -191,6 +191,7 @@ let replay ~singleprocess (config : Node_config_file.t) blocks = context_root; protocol_root; process_path = Sys.executable_name; + metrics_endpoint = None; sandbox_parameters = None; }) in diff --git a/src/bin_node/node_run_command.ml b/src/bin_node/node_run_command.ml index 197bfdbca06b..30c7a62f50a6 100644 --- a/src/bin_node/node_run_command.ml +++ b/src/bin_node/node_run_command.ml @@ -348,9 +348,18 @@ let init_node ?sandbox ?target ~identity ~singleprocess ~new_history_mode:history_mode | _ -> return_unit in + let* external_validator_metrics_port = + let* addrs = + List.map_ep Node_config_file.resolve_metrics_addrs config.metrics_addr + in + match List.(hd (flatten addrs)) with + | Some (_, p) -> return (p + 1) + | None -> assert false + in Node.create ~sandboxed:(sandbox <> None) ?sandbox_parameters:(Option.map snd sandbox_param) + ~external_validator_metrics_port ~singleprocess node_config config.shell.peer_validator_limits diff --git a/src/lib_shell/block_validator_process.ml b/src/lib_shell/block_validator_process.ml index e6f41333302e..a29759554f97 100644 --- a/src/lib_shell/block_validator_process.ml +++ b/src/lib_shell/block_validator_process.ml @@ -39,6 +39,7 @@ type validator_kind = context_root : string; protocol_root : string; process_path : string; + metrics_endpoint : int option; sandbox_parameters : Data_encoding.json option; } -> validator_kind @@ -513,6 +514,7 @@ module External_validator_process = struct process_path : string; mutable validator_process : process_status; lock : Lwt_mutex.t; + metrics_endpoint : int option; sandbox_parameters : Data_encoding.json option; } @@ -536,9 +538,15 @@ module External_validator_process = struct let canceler = Lwt_canceler.create () in (* We assume that there is only one validation process per socket *) let socket_dir = get_temporary_socket_dir () in + let args = + ["tezos-validator"; "--socket-dir"; socket_dir] + @ + match vp.metrics_endpoint with + | Some port -> ["--metrics-addr"; string_of_int port] + | None -> [] + in let process = - Lwt_process.open_process_none - (vp.process_path, [|"tezos-validator"; "--socket-dir"; socket_dir|]) + Lwt_process.open_process_none (vp.process_path, Array.of_list args) in let socket_path = External_validation.socket_path ~socket_dir ~pid:process#pid @@ -714,7 +722,7 @@ module External_validator_process = struct _; } : validator_environment) ~genesis ~data_dir ~context_root ~protocol_root - ~process_path ~sandbox_parameters = + ~process_path ~sandbox_parameters ~metrics_endpoint = let open Lwt_result_syntax in let*! () = Events.(emit init ()) in let validator = @@ -729,6 +737,7 @@ module External_validator_process = struct process_path; validator_process = Uninitialized; lock = Lwt_mutex.create (); + metrics_endpoint; sandbox_parameters; } in @@ -887,6 +896,7 @@ let init validator_environment validator_kind = context_root; protocol_root; process_path; + metrics_endpoint; sandbox_parameters; } -> let* (validator : 'b) = @@ -898,6 +908,7 @@ let init validator_environment validator_kind = ~protocol_root ~process_path ~sandbox_parameters + ~metrics_endpoint in let validator_process : (module S with type t = 'b) = (module External_validator_process) diff --git a/src/lib_shell/block_validator_process.mli b/src/lib_shell/block_validator_process.mli index a726e5d705ef..49105ce7b4ca 100644 --- a/src/lib_shell/block_validator_process.mli +++ b/src/lib_shell/block_validator_process.mli @@ -51,6 +51,7 @@ type validator_kind = context_root : string; protocol_root : string; process_path : string; + metrics_endpoint : int option; sandbox_parameters : Data_encoding.json option; } -> validator_kind diff --git a/src/lib_shell/node.ml b/src/lib_shell/node.ml index 791bdb48deeb..32208f20ad14 100644 --- a/src/lib_shell/node.ml +++ b/src/lib_shell/node.ml @@ -215,7 +215,8 @@ let check_context_consistency store = let*! () = Node_event.(emit storage_corrupted_context_detected ()) in tzfail Non_recoverable_context -let create ?(sandboxed = false) ?sandbox_parameters ~singleprocess +let create ?(sandboxed = false) ?sandbox_parameters + ?external_validator_metrics_port ~singleprocess { genesis; chain_name; @@ -283,6 +284,7 @@ let create ?(sandboxed = false) ?sandbox_parameters ~singleprocess protocol_root; process_path = Sys.executable_name; sandbox_parameters; + metrics_endpoint = external_validator_metrics_port; }) in let commit_genesis ~chain_id = diff --git a/src/lib_shell/node.mli b/src/lib_shell/node.mli index f695249ffa67..0de152ffa5a9 100644 --- a/src/lib_shell/node.mli +++ b/src/lib_shell/node.mli @@ -57,6 +57,7 @@ val default_chain_validator_limits : Chain_validator.limits val create : ?sandboxed:bool -> ?sandbox_parameters:Data_encoding.json -> + ?external_validator_metrics_port:int -> singleprocess:bool -> config -> Peer_validator.limits -> -- GitLab