From 9e7b3c07351af52136ba2bccba5d37c24651b8e7 Mon Sep 17 00:00:00 2001 From: Guillaume Bau Date: Thu, 6 Mar 2025 10:25:24 +0100 Subject: [PATCH 1/4] Octez/P2P: adds keepalive and set values --- src/lib_base/unix/socket.c | 65 ++++++++++++++++++++++++++++++------ src/lib_base/unix/socket.ml | 8 +++++ src/lib_base/unix/socket.mli | 22 ++++++++++++ src/lib_p2p/p2p_events.ml | 9 +++++ src/lib_p2p/p2p_fd.ml | 63 ++++++++++++++++++++++++++-------- 5 files changed, 143 insertions(+), 24 deletions(-) diff --git a/src/lib_base/unix/socket.c b/src/lib_base/unix/socket.c index 65b3ebd6d4e1..5c7a44f4f15e 100644 --- a/src/lib_base/unix/socket.c +++ b/src/lib_base/unix/socket.c @@ -1,23 +1,24 @@ /* tcp_user_timeout_stubs.c */ -#include -#include -#include -#include -#include -#include -#include #include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include CAMLprim value ocaml_set_tcp_user_timeout(value fd_val, value timeout_val) { CAMLparam2(fd_val, timeout_val); int fd = Int_val(fd_val); - socklen_t timeout = Int_val(timeout_val); + int timeout = Int_val(timeout_val); +#define timeout_sz sizeof(timeout) // TCP_USER_TIMEOUT is not always defined such as in Mac OS/X #ifdef TCP_USER_TIMEOUT - if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout, sizeof(timeout)) < 0) { + if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout, timeout_sz) < 0) { uerror("setsockopt(TCP_USER_TIMEOUT)", Nothing); } CAMLreturn(Val_unit); @@ -26,3 +27,47 @@ CAMLprim value ocaml_set_tcp_user_timeout(value fd_val, value timeout_val) { #endif } +CAMLprim value ocaml_set_tcp_keepalive(value fd_val, value duration_val, + value intv_val) { + CAMLparam3(fd_val, duration_val, intv_val); + + int fd = Int_val(fd_val); + + int enabled = 1; + int duration = Int_val(duration_val); + int intv = Int_val(intv_val); +#define enabled_sz sizeof(enabled) +#define duration_sz sizeof(duration) +#define intv_sz sizeof(intv) + +/* On linux, the way to set keepalive per socket is + * - enable so_keepalive + * - set tcp_keepidle to a value in ms + * - set tcp_keepalive_interval to the same value in ms + * (do not set the default number of retransmissions if not acknowledged. + * Should be 9 by default). + * On mac, the equivalent setup is + * - enable so_keepalive + * - set tcp_keepalive to a value in ms */ +#ifdef __linux__ + if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &enabled, enabled_sz) < 0) { + uerror("setsockopt(TCP_KEEPALIVE)", Nothing); + } + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &duration, duration_sz) < 0) { + uerror("setsockopt(TCP_KEEPIDLE)", Nothing); + } + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &duration, intv_sz) < 0) { + uerror("setsockopt(TCP_KEEPINTVL)", Nothing); + } +#elif defined __APPLE__ + if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &enabled, enabled_sz) < 0) { + uerror("setsockopt(SO_KEEPALIVE)", Nothing); + } + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &duration, duration_sz) < 0) { + uerror("setsockopt(TCP_KEEPALIVE)", Nothing); + } +#else + caml_failwith("TCP KEEPALIVE not supported on this platform"); +#endif + CAMLreturn(Val_unit); +} diff --git a/src/lib_base/unix/socket.ml b/src/lib_base/unix/socket.ml index f2d9a2f95e97..fd056a00a6a8 100644 --- a/src/lib_base/unix/socket.ml +++ b/src/lib_base/unix/socket.ml @@ -240,3 +240,11 @@ let set_tcp_user_timeout fd ~ms = try Ok (set_tcp_user_timeout fd ms) with | Unix.Unix_error _ as exn -> Error (`Unix_error exn) | Failure _ -> Error `Unsupported + +external set_tcp_keepalive : Unix.file_descr -> int -> int -> unit + = "ocaml_set_tcp_keepalive" + +let set_tcp_keepalive fd ~ms ~intv = + try Ok (set_tcp_keepalive fd ms intv) with + | Unix.Unix_error _ as exn -> Error (`Unix_error exn) + | Failure _ -> Error `Unsupported diff --git a/src/lib_base/unix/socket.mli b/src/lib_base/unix/socket.mli index 98a2648b96fa..8ba726a6259a 100644 --- a/src/lib_base/unix/socket.mli +++ b/src/lib_base/unix/socket.mli @@ -88,3 +88,25 @@ val set_tcp_user_timeout : Unix.file_descr -> ms:int -> (unit, [`Unix_error of exn | `Unsupported]) result + +(** [set_tcp_keep_alive fd ~ms ~intv] enables and sets the TCP keep alive timeout on + socket [fd]. If a message sent on this socket is not acknowledged within + [ms] milliseconds, empty TCP packets will be sent each [intv] ms. + + This function uses: + - [SO_KEEPALIVE], [TCP_KEEPIDLE], [TCP_KEEPINTVL] socket options on linux + - [SO_KEEPALIVE], [TCP_KEEPALIVE] on macos. + + @param fd the file descriptor of the socket. + @param ms the timeout value in milliseconds. + @param intv the delay to wait before retrying another probe + @return [Ok ()] if the option was successfully set, or [Error + (`Unix_error exn)] if a Unix error occurred, or [Error + `Unsupported] if the KEEP_ALIVE option is not supported on + this platform. +*) +val set_tcp_keepalive : + Unix.file_descr -> + ms:int -> + intv:int -> + (unit, [`Unix_error of exn | `Unsupported]) result diff --git a/src/lib_p2p/p2p_events.ml b/src/lib_p2p/p2p_events.ml index 4f2d1e879869..c22df78b6719 100644 --- a/src/lib_p2p/p2p_events.ml +++ b/src/lib_p2p/p2p_events.ml @@ -551,6 +551,15 @@ module P2p_fd = struct ("connection_id", Data_encoding.int31) ("socket", Data_encoding.string) + let set_socket_option_tcp_keepalive_failed = + declare_1 + ~section + ~name:"set_socket_option_tcp_keepalive_failed" + ~msg:"Could not set the TCP_KEEPALIVE socket option: {error}" + ~level:Info + ~pp1:Error_monad.pp_print_trace + ("error", Error_monad.trace_encoding) + let set_socket_option_tcp_user_timeout_failed = declare_1 ~section diff --git a/src/lib_p2p/p2p_fd.ml b/src/lib_p2p/p2p_fd.ml index 9e1b1d2d0cb8..87ae5f441acc 100644 --- a/src/lib_p2p/p2p_fd.ml +++ b/src/lib_p2p/p2p_fd.ml @@ -166,13 +166,6 @@ let id t = t.id let raw_socket () = let open Lwt_syntax in let sock = Lwt_unix.socket ~cloexec:true PF_INET6 SOCK_STREAM 0 in - (* By setting [SO_KEEPALIVE] to [true], the socket is configured to send - periodic keep-alive probes to verify that the connection is still - active. - - It reset (send TCP RST message and close) if the peer is - unresponsive. *) - Lwt_unix.(setsockopt sock SO_KEEPALIVE true) ; (* By setting [TCP_USER_TIMEOUT], we ensure that a dead connection is reported after at most [ms] milliseconds. This option allows the connection timeout to be much shorter than the default behavior—which can last several minutes @@ -193,16 +186,58 @@ let raw_socket () = | Some value -> Some (int_of_string value) with _ -> Some default in - match ms_opt with - | None -> (* The user opt-out from the socket option *) Lwt.return sock - | Some ms -> ( - match Socket.set_tcp_user_timeout (Lwt_unix.unix_file_descr sock) ~ms with + let* sock = + match ms_opt with + | None -> (* The user opt-out from the socket option *) Lwt.return sock + | Some ms -> ( + match + Socket.set_tcp_user_timeout (Lwt_unix.unix_file_descr sock) ~ms + with + | Ok () | Error `Unsupported -> Lwt.return sock + | Error (`Unix_error exn) -> + (* Socket option [TCP_USER_TIMEOUT] is not mandatory, this is why we only emit an + event at [Info] level. *) + let* () = + Events.(emit set_socket_option_tcp_user_timeout_failed) + [Error_monad.error_of_exn exn] + in + Lwt.return sock) + in + (* By setting [TCP_KEEPALIVE], we ensure that the connection stays alive + for NAT or firewalls between the node and the other peer. If no TCP + packet is sent after [ms] milliseconds, an empty TCP message will be + sent. If not acknowledged before intv, some retries will be made (number + depending on OS, default 9 for linux) after [intv] seconds. The connection + will be dropped if no ACK is received. *) + let keepalive_opts = + let default = + (* after 10s of inactivity, and every 5s if no ACK *) + (10000, 5000) + in + match Sys.getenv_opt "OCTEZ_P2P_TCP_KEEPALIVE" with + | Some "0" -> None + | None -> + (* Sends a keepalive starting at 10 seconds of inactivity and every 5 + seconds interval *) + Some default + | Some value -> ( + match String.split ',' value with + | [ms] -> Some (int_of_string ms, snd default) + | [ms; intv] -> Some (int_of_string ms, int_of_string intv) + | _ -> None) + in + match keepalive_opts with + | None -> Lwt.return sock + | Some (ms, intv) -> ( + match + Socket.set_tcp_keepalive (Lwt_unix.unix_file_descr sock) ~ms ~intv + with | Ok () | Error `Unsupported -> Lwt.return sock | Error (`Unix_error exn) -> - (* Socket option [TCP_USER_TIMEOUT] is not mandatory, this is why we only emit an - event at [Info] level. *) + (* Socket option [TCP_KEEPALIVE] is not mandatory, this is why we + only emit an event at [Info] level. *) let* () = - Events.(emit set_socket_option_tcp_user_timeout_failed) + Events.(emit set_socket_option_tcp_keepalive_failed) [Error_monad.error_of_exn exn] in Lwt.return sock) -- GitLab From f86dc6965580d00390006ba58f549d1c0eb2126c Mon Sep 17 00:00:00 2001 From: Guillaume Bau Date: Thu, 6 Mar 2025 10:26:28 +0100 Subject: [PATCH 2/4] Octez/P2P: set socket user_timeout to 45s --- src/lib_p2p/p2p_fd.ml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib_p2p/p2p_fd.ml b/src/lib_p2p/p2p_fd.ml index 87ae5f441acc..5cebd9e30ca9 100644 --- a/src/lib_p2p/p2p_fd.ml +++ b/src/lib_p2p/p2p_fd.ml @@ -171,14 +171,14 @@ let raw_socket () = to be much shorter than the default behavior—which can last several minutes (typically between 5 and 15 minutes) due to TCP retransmission timeouts (RTO). - Below, we set this value to 15 seconds. This value should not be + Below, we set this value to 45 seconds. This value should not be too low otherwise we may drop valid connection that were temporarily busy. The higher it is, the longer it is to detect a - dead connection. We believe 15 seconds is reasonable in practice + dead connection. We believe 45 seconds is reasonable in practice (especially this acknowledgement is done at the OS level and so is quite independent of the Lwt scheduler). *) let ms_opt = - let default = 15000 (* 15s *) in + let default = 45000 (* 45s *) in try match Sys.getenv_opt "OCTEZ_P2P_TCP_USER_TIMEOUT" with | None -> Some default -- GitLab From 53a05850f4fc78d91413c491df0ea4df656350df Mon Sep 17 00:00:00 2001 From: Guillaume Bau Date: Thu, 6 Mar 2025 10:27:26 +0100 Subject: [PATCH 3/4] Octez/P2P: add functions for setting user_timeout and keepalive options --- src/lib_p2p/p2p_fd.ml | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/lib_p2p/p2p_fd.ml b/src/lib_p2p/p2p_fd.ml index 5cebd9e30ca9..41626b53ed62 100644 --- a/src/lib_p2p/p2p_fd.ml +++ b/src/lib_p2p/p2p_fd.ml @@ -163,9 +163,8 @@ let string_of_sockaddr addr = let id t = t.id -let raw_socket () = +let socket_setopt_user_timeout sock = let open Lwt_syntax in - let sock = Lwt_unix.socket ~cloexec:true PF_INET6 SOCK_STREAM 0 in (* By setting [TCP_USER_TIMEOUT], we ensure that a dead connection is reported after at most [ms] milliseconds. This option allows the connection timeout to be much shorter than the default behavior—which can last several minutes @@ -186,23 +185,22 @@ let raw_socket () = | Some value -> Some (int_of_string value) with _ -> Some default in - let* sock = - match ms_opt with - | None -> (* The user opt-out from the socket option *) Lwt.return sock - | Some ms -> ( - match - Socket.set_tcp_user_timeout (Lwt_unix.unix_file_descr sock) ~ms - with - | Ok () | Error `Unsupported -> Lwt.return sock - | Error (`Unix_error exn) -> - (* Socket option [TCP_USER_TIMEOUT] is not mandatory, this is why we only emit an - event at [Info] level. *) - let* () = - Events.(emit set_socket_option_tcp_user_timeout_failed) - [Error_monad.error_of_exn exn] - in - Lwt.return sock) - in + match ms_opt with + | None -> (* The user opt-out from the socket option *) Lwt.return sock + | Some ms -> ( + match Socket.set_tcp_user_timeout (Lwt_unix.unix_file_descr sock) ~ms with + | Ok () | Error `Unsupported -> Lwt.return sock + | Error (`Unix_error exn) -> + (* Socket option [TCP_USER_TIMEOUT] is not mandatory, this is why we only emit an + event at [Info] level. *) + let* () = + Events.(emit set_socket_option_tcp_user_timeout_failed) + [Error_monad.error_of_exn exn] + in + Lwt.return sock) + +let socket_setopt_keepalive sock = + let open Lwt_syntax in (* By setting [TCP_KEEPALIVE], we ensure that the connection stays alive for NAT or firewalls between the node and the other peer. If no TCP packet is sent after [ms] milliseconds, an empty TCP message will be @@ -242,6 +240,12 @@ let raw_socket () = in Lwt.return sock) +let raw_socket () = + let open Lwt_syntax in + let sock = Lwt_unix.socket ~cloexec:true PF_INET6 SOCK_STREAM 0 in + let* sock = socket_setopt_user_timeout sock in + socket_setopt_keepalive sock + let socket () = let open Lwt_syntax in let* socket = raw_socket () in -- GitLab From f57ea7e33cea52254602942a547133360d616f1f Mon Sep 17 00:00:00 2001 From: Guillaume Bau Date: Thu, 6 Mar 2025 17:33:08 +0100 Subject: [PATCH 4/4] Octez/P2P: adds some comments about keepalive/timeout values --- src/lib_p2p/p2p_fd.ml | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/lib_p2p/p2p_fd.ml b/src/lib_p2p/p2p_fd.ml index 41626b53ed62..37f8aa57f283 100644 --- a/src/lib_p2p/p2p_fd.ml +++ b/src/lib_p2p/p2p_fd.ml @@ -175,7 +175,13 @@ let socket_setopt_user_timeout sock = temporarily busy. The higher it is, the longer it is to detect a dead connection. We believe 45 seconds is reasonable in practice (especially this acknowledgement is done at the OS level and so - is quite independent of the Lwt scheduler). *) + is quite independent of the Lwt scheduler). + + This value is intimatly linked to the keepalive value (see below). + Following Cloudflare article recommendation, + https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/ + We set the value to 45s, ie "should be set to a value slightly lower than + TCP_KEEPIDLE + TCP_KEEPINTVL * TCP_KEEPCNT." *) let ms_opt = let default = 45000 (* 45s *) in try @@ -204,9 +210,15 @@ let socket_setopt_keepalive sock = (* By setting [TCP_KEEPALIVE], we ensure that the connection stays alive for NAT or firewalls between the node and the other peer. If no TCP packet is sent after [ms] milliseconds, an empty TCP message will be - sent. If not acknowledged before intv, some retries will be made (number + sent. + + If not acknowledged before intv, some retries will be made (number depending on OS, default 9 for linux) after [intv] seconds. The connection - will be dropped if no ACK is received. *) + will be dropped if no ACK is received. + + See also the comment of the [socket_setopt_user_timeout] about the cloudflare + article, to understand the rationale for the 10s and 5s interval default + value, in conjunction with the user_timeout value. *) let keepalive_opts = let default = (* after 10s of inactivity, and every 5s if no ACK *) -- GitLab