From 517c5f7b9e37edc4dd4f01a9d706b98179c6aaa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Thir=C3=A9?= Date: Fri, 21 Feb 2025 13:25:51 +0100 Subject: [PATCH 1/2] Octez/P2P: Decrease the time to detect a connection is dead --- manifest/product_octez.ml | 7 ++++++ src/lib_base/unix/dune | 3 ++- src/lib_base/unix/socket.c | 28 ++++++++++++++++++++++++ src/lib_base/unix/socket.ml | 8 +++++++ src/lib_base/unix/socket.mli | 16 ++++++++++++++ src/lib_p2p/p2p_events.ml | 9 ++++++++ src/lib_p2p/p2p_fd.ml | 42 +++++++++++++++++++++++++++++++++--- 7 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 src/lib_base/unix/socket.c diff --git a/manifest/product_octez.ml b/manifest/product_octez.ml index 1aa574c58216..238513cf02e9 100644 --- a/manifest/product_octez.ml +++ b/manifest/product_octez.ml @@ -1846,6 +1846,13 @@ let octez_base_unix = "base.unix" ~internal_name:"tezos_base_unix" ~path:"src/lib_base/unix" + ~foreign_stubs: + { + language = C; + flags = [S ":standard"]; + include_dirs = []; + names = ["socket"]; + } ~deps: [ octez_error_monad |> open_; diff --git a/src/lib_base/unix/dune b/src/lib_base/unix/dune index b959563fca23..c9a002b6a947 100644 --- a/src/lib_base/unix/dune +++ b/src/lib_base/unix/dune @@ -33,4 +33,5 @@ -open Tezos_stdlib_unix -open Tezos_profiler -open Data_encoding - -open Tezos_event_logging)) + -open Tezos_event_logging) + (foreign_stubs (language c) (flags (:standard)) (names socket))) diff --git a/src/lib_base/unix/socket.c b/src/lib_base/unix/socket.c new file mode 100644 index 000000000000..65b3ebd6d4e1 --- /dev/null +++ b/src/lib_base/unix/socket.c @@ -0,0 +1,28 @@ +/* tcp_user_timeout_stubs.c */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +CAMLprim value ocaml_set_tcp_user_timeout(value fd_val, value timeout_val) { + CAMLparam2(fd_val, timeout_val); + int fd = Int_val(fd_val); + socklen_t timeout = Int_val(timeout_val); + +// TCP_USER_TIMEOUT is not always defined such as in Mac OS/X +#ifdef TCP_USER_TIMEOUT + if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout, sizeof(timeout)) < 0) { + uerror("setsockopt(TCP_USER_TIMEOUT)", Nothing); + } + CAMLreturn(Val_unit); +#else + caml_failwith("TCP_USER_TIMEOUT not supported on this platform"); +#endif +} + diff --git a/src/lib_base/unix/socket.ml b/src/lib_base/unix/socket.ml index d4274f6088ce..f2d9a2f95e97 100644 --- a/src/lib_base/unix/socket.ml +++ b/src/lib_base/unix/socket.ml @@ -232,3 +232,11 @@ let get_temporary_socket_dir () = match Sys.getenv_opt "XDG_RUNTIME_DIR" with | Some xdg_runtime_dir when xdg_runtime_dir <> "" -> xdg_runtime_dir | Some _ | None -> Filename.get_temp_dir_name () + +external set_tcp_user_timeout : Unix.file_descr -> int -> unit + = "ocaml_set_tcp_user_timeout" + +let set_tcp_user_timeout fd ~ms = + try Ok (set_tcp_user_timeout fd ms) with + | Unix.Unix_error _ as exn -> Error (`Unix_error exn) + | Failure _ -> Error `Unsupported diff --git a/src/lib_base/unix/socket.mli b/src/lib_base/unix/socket.mli index 4fff6f542f7b..98a2648b96fa 100644 --- a/src/lib_base/unix/socket.mli +++ b/src/lib_base/unix/socket.mli @@ -72,3 +72,19 @@ val handshake : Lwt_unix.file_descr -> bytes -> unit tzresult Lwt.t environment variable is defined. Otherwise, the default temporary directory is used. *) val get_temporary_socket_dir : unit -> string + +(** [set_tcp_user_timeout fd ~ms] sets the TCP user timeout on socket [fd]. If a message sent + on this socket is not acknowledged within [ms] milliseconds, the connection is considered dead. + This function uses the [TCP_USER_TIMEOUT] socket option. + + @param fd the file descriptor of the socket. + @param ms the timeout value in milliseconds. + @return [Ok ()] if the option was successfully set, or [Error + (`Unix_error exn)] if a Unix error occurred, or [Error + `Unsupported] if the TCP_USER_TIMEOUT option is not supported on + this platform. +*) +val set_tcp_user_timeout : + Unix.file_descr -> + ms:int -> + (unit, [`Unix_error of exn | `Unsupported]) result diff --git a/src/lib_p2p/p2p_events.ml b/src/lib_p2p/p2p_events.ml index a4a097c93931..4f2d1e879869 100644 --- a/src/lib_p2p/p2p_events.ml +++ b/src/lib_p2p/p2p_events.ml @@ -550,6 +550,15 @@ module P2p_fd = struct ~level:Debug ("connection_id", Data_encoding.int31) ("socket", Data_encoding.string) + + let set_socket_option_tcp_user_timeout_failed = + declare_1 + ~section + ~name:"set_socket_option_tcp_user_timeout_failed" + ~msg:"Could not set the TCP_USER_TIMEOUT socket option: {error}" + ~level:Info + ~pp1:Error_monad.pp_print_trace + ("error", Error_monad.trace_encoding) end module P2p_maintainance = struct diff --git a/src/lib_p2p/p2p_fd.ml b/src/lib_p2p/p2p_fd.ml index 2339af69377b..9e1b1d2d0cb8 100644 --- a/src/lib_p2p/p2p_fd.ml +++ b/src/lib_p2p/p2p_fd.ml @@ -164,6 +164,7 @@ let string_of_sockaddr addr = let id t = t.id let raw_socket () = + let open Lwt_syntax in let sock = Lwt_unix.socket ~cloexec:true PF_INET6 SOCK_STREAM 0 in (* By setting [SO_KEEPALIVE] to [true], the socket is configured to send periodic keep-alive probes to verify that the connection is still @@ -172,16 +173,51 @@ let raw_socket () = It reset (send TCP RST message and close) if the peer is unresponsive. *) Lwt_unix.(setsockopt sock SO_KEEPALIVE true) ; - sock + (* By setting [TCP_USER_TIMEOUT], we ensure that a dead connection is reported + after at most [ms] milliseconds. This option allows the connection timeout + to be much shorter than the default behavior—which can last several minutes + (typically between 5 and 15 minutes) due to TCP retransmission timeouts (RTO). + + Below, we set this value to 15 seconds. This value should not be + too low otherwise we may drop valid connection that were + temporarily busy. The higher it is, the longer it is to detect a + dead connection. We believe 15 seconds is reasonable in practice + (especially this acknowledgement is done at the OS level and so + is quite independent of the Lwt scheduler). *) + let ms_opt = + let default = 15000 (* 15s *) in + try + match Sys.getenv_opt "OCTEZ_P2P_TCP_USER_TIMEOUT" with + | None -> Some default + | Some "0" -> None + | Some value -> Some (int_of_string value) + with _ -> Some default + in + match ms_opt with + | None -> (* The user opt-out from the socket option *) Lwt.return sock + | Some ms -> ( + match Socket.set_tcp_user_timeout (Lwt_unix.unix_file_descr sock) ~ms with + | Ok () | Error `Unsupported -> Lwt.return sock + | Error (`Unix_error exn) -> + (* Socket option [TCP_USER_TIMEOUT] is not mandatory, this is why we only emit an + event at [Info] level. *) + let* () = + Events.(emit set_socket_option_tcp_user_timeout_failed) + [Error_monad.error_of_exn exn] + in + Lwt.return sock) -let socket () = create (raw_socket ()) +let socket () = + let open Lwt_syntax in + let* socket = raw_socket () in + create socket let create_listening_socket ?(reuse_port = false) ~backlog ?(addr = Ipaddr.V6.unspecified) port = let open Lwt_result_syntax in Lwt.catch (fun () -> - let sock = raw_socket () in + let*! sock = raw_socket () in (if reuse_port then Lwt_unix.(setsockopt sock SO_REUSEPORT true)) ; Lwt_unix.(setsockopt sock SO_REUSEADDR true) ; let*! () = -- GitLab From c2d5db422fa468b91913cd4482225c249a4568e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Thir=C3=A9?= Date: Mon, 24 Feb 2025 17:12:50 +0100 Subject: [PATCH 2/2] Changelog --- CHANGES.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 6a6e4d3e99b2..6ebf7c9da342 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -49,6 +49,14 @@ Node ``/chain/{chain_id}/protocols/{protocol_hash}``) to retrieve protocol activation levels of the chain. (MR :gl:`!15447`) +- The node will detect stalled connections more quickly (on + Linux-based distributions). This behavior can be controlled via the + environment variable ``OCTEZ_P2P_TCP_USER_TIMEOUT``. Its default + value is ``15000``, meaning that it will now take ``15s`` to detect + a stalled connection (compared to up to ``15`` minutes by default on + Linux). Users can opt out by setting the value to ``0``. (MR + :gl:`!16907`) + Client ------ @@ -256,6 +264,15 @@ Data Availability Layer (DAL) DAL node ~~~~~~~~ +- **Feature** The node will detect stalled connections more quickly (on + Linux-based distributions). This behavior can be controlled via the + environment variable ``OCTEZ_P2P_TCP_USER_TIMEOUT``. Its default + value is ``15000``, meaning that it will now take ``15s`` to detect + a stalled connection (compared to up to ``15`` minutes by default on + Linux). Users can opt out by setting the value to ``0``. (MR + :gl:`!16907`) + + - **Feature** A new RPC ``/p2p/gossipsub/reconnection_delays`` which provides for each unreachable point, the time remaining until the next reconnection attempt. (MR :gl:`!16767`) -- GitLab