diff --git a/src/lib_base/unix/socket.c b/src/lib_base/unix/socket.c index 65b3ebd6d4e1c7a3cfac0b56e53686661cf5b6dc..5c7a44f4f15e68ba1310f42fc889a78f724c7d76 100644 --- a/src/lib_base/unix/socket.c +++ b/src/lib_base/unix/socket.c @@ -1,23 +1,24 @@ /* tcp_user_timeout_stubs.c */ -#include -#include -#include -#include -#include -#include -#include #include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include CAMLprim value ocaml_set_tcp_user_timeout(value fd_val, value timeout_val) { CAMLparam2(fd_val, timeout_val); int fd = Int_val(fd_val); - socklen_t timeout = Int_val(timeout_val); + int timeout = Int_val(timeout_val); +#define timeout_sz sizeof(timeout) // TCP_USER_TIMEOUT is not always defined such as in Mac OS/X #ifdef TCP_USER_TIMEOUT - if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout, sizeof(timeout)) < 0) { + if (setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout, timeout_sz) < 0) { uerror("setsockopt(TCP_USER_TIMEOUT)", Nothing); } CAMLreturn(Val_unit); @@ -26,3 +27,47 @@ CAMLprim value ocaml_set_tcp_user_timeout(value fd_val, value timeout_val) { #endif } +CAMLprim value ocaml_set_tcp_keepalive(value fd_val, value duration_val, + value intv_val) { + CAMLparam3(fd_val, duration_val, intv_val); + + int fd = Int_val(fd_val); + + int enabled = 1; + int duration = Int_val(duration_val); + int intv = Int_val(intv_val); +#define enabled_sz sizeof(enabled) +#define duration_sz sizeof(duration) +#define intv_sz sizeof(intv) + +/* On linux, the way to set keepalive per socket is + * - enable so_keepalive + * - set tcp_keepidle to a value in ms + * - set tcp_keepalive_interval to the same value in ms + * (do not set the default number of retransmissions if not acknowledged. + * Should be 9 by default). + * On mac, the equivalent setup is + * - enable so_keepalive + * - set tcp_keepalive to a value in ms */ +#ifdef __linux__ + if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &enabled, enabled_sz) < 0) { + uerror("setsockopt(TCP_KEEPALIVE)", Nothing); + } + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &duration, duration_sz) < 0) { + uerror("setsockopt(TCP_KEEPIDLE)", Nothing); + } + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &duration, intv_sz) < 0) { + uerror("setsockopt(TCP_KEEPINTVL)", Nothing); + } +#elif defined __APPLE__ + if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &enabled, enabled_sz) < 0) { + uerror("setsockopt(SO_KEEPALIVE)", Nothing); + } + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &duration, duration_sz) < 0) { + uerror("setsockopt(TCP_KEEPALIVE)", Nothing); + } +#else + caml_failwith("TCP KEEPALIVE not supported on this platform"); +#endif + CAMLreturn(Val_unit); +} diff --git a/src/lib_base/unix/socket.ml b/src/lib_base/unix/socket.ml index f2d9a2f95e9757d32fd47bc6e75946946f3053b4..fd056a00a6a80c9cf25466c20eae8c8fc1bcadbc 100644 --- a/src/lib_base/unix/socket.ml +++ b/src/lib_base/unix/socket.ml @@ -240,3 +240,11 @@ let set_tcp_user_timeout fd ~ms = try Ok (set_tcp_user_timeout fd ms) with | Unix.Unix_error _ as exn -> Error (`Unix_error exn) | Failure _ -> Error `Unsupported + +external set_tcp_keepalive : Unix.file_descr -> int -> int -> unit + = "ocaml_set_tcp_keepalive" + +let set_tcp_keepalive fd ~ms ~intv = + try Ok (set_tcp_keepalive fd ms intv) with + | Unix.Unix_error _ as exn -> Error (`Unix_error exn) + | Failure _ -> Error `Unsupported diff --git a/src/lib_base/unix/socket.mli b/src/lib_base/unix/socket.mli index 98a2648b96faeb1ff59585f2e1a58e3b0f631fbd..8ba726a6259a2409fd8070651349c05802ad9377 100644 --- a/src/lib_base/unix/socket.mli +++ b/src/lib_base/unix/socket.mli @@ -88,3 +88,25 @@ val set_tcp_user_timeout : Unix.file_descr -> ms:int -> (unit, [`Unix_error of exn | `Unsupported]) result + +(** [set_tcp_keep_alive fd ~ms ~intv] enables and sets the TCP keep alive timeout on + socket [fd]. If a message sent on this socket is not acknowledged within + [ms] milliseconds, empty TCP packets will be sent each [intv] ms. + + This function uses: + - [SO_KEEPALIVE], [TCP_KEEPIDLE], [TCP_KEEPINTVL] socket options on linux + - [SO_KEEPALIVE], [TCP_KEEPALIVE] on macos. + + @param fd the file descriptor of the socket. + @param ms the timeout value in milliseconds. + @param intv the delay to wait before retrying another probe + @return [Ok ()] if the option was successfully set, or [Error + (`Unix_error exn)] if a Unix error occurred, or [Error + `Unsupported] if the KEEP_ALIVE option is not supported on + this platform. +*) +val set_tcp_keepalive : + Unix.file_descr -> + ms:int -> + intv:int -> + (unit, [`Unix_error of exn | `Unsupported]) result diff --git a/src/lib_p2p/p2p_events.ml b/src/lib_p2p/p2p_events.ml index 4f2d1e87986950023414f6e0194d8d572898b2d3..c22df78b671940d23e07848a0e3c099d95a6e45b 100644 --- a/src/lib_p2p/p2p_events.ml +++ b/src/lib_p2p/p2p_events.ml @@ -551,6 +551,15 @@ module P2p_fd = struct ("connection_id", Data_encoding.int31) ("socket", Data_encoding.string) + let set_socket_option_tcp_keepalive_failed = + declare_1 + ~section + ~name:"set_socket_option_tcp_keepalive_failed" + ~msg:"Could not set the TCP_KEEPALIVE socket option: {error}" + ~level:Info + ~pp1:Error_monad.pp_print_trace + ("error", Error_monad.trace_encoding) + let set_socket_option_tcp_user_timeout_failed = declare_1 ~section diff --git a/src/lib_p2p/p2p_fd.ml b/src/lib_p2p/p2p_fd.ml index e3b28241e2817ef149243a71188ea11084652a47..5ee6d98a88ec8be0537d96e5d6ed4c5b4865a930 100644 --- a/src/lib_p2p/p2p_fd.ml +++ b/src/lib_p2p/p2p_fd.ml @@ -163,29 +163,27 @@ let string_of_sockaddr addr = let id t = t.id -let raw_socket () = +let socket_setopt_user_timeout sock = let open Lwt_syntax in - let sock = Lwt_unix.socket ~cloexec:true PF_INET6 SOCK_STREAM 0 in - (* By setting [SO_KEEPALIVE] to [true], the socket is configured to send - periodic keep-alive probes to verify that the connection is still - active. - - It reset (send TCP RST message and close) if the peer is - unresponsive. *) - Lwt_unix.(setsockopt sock SO_KEEPALIVE true) ; (* By setting [TCP_USER_TIMEOUT], we ensure that a dead connection is reported after at most [ms] milliseconds. This option allows the connection timeout to be much shorter than the default behavior—which can last several minutes (typically between 5 and 15 minutes) due to TCP retransmission timeouts (RTO). - Below, we set this value to 15 seconds. This value should not be + Below, we set this value to 45 seconds. This value should not be too low otherwise we may drop valid connection that were temporarily busy. The higher it is, the longer it is to detect a - dead connection. We believe 15 seconds is reasonable in practice + dead connection. We believe 45 seconds is reasonable in practice (especially this acknowledgement is done at the OS level and so - is quite independent of the Lwt scheduler). *) + is quite independent of the Lwt scheduler). + + This value is intimatly linked to the keepalive value (see below). + Following Cloudflare article recommendation, + https://blog.cloudflare.com/when-tcp-sockets-refuse-to-die/ + We set the value to 45s, ie "should be set to a value slightly lower than + TCP_KEEPIDLE + TCP_KEEPINTVL * TCP_KEEPCNT." *) let ms_opt = - let default = 15000 (* 15s *) in + let default = 45000 (* 45s *) in try match Sys.getenv_opt "OCTEZ_P2P_TCP_USER_TIMEOUT" with | None -> Some default @@ -207,6 +205,59 @@ let raw_socket () = in Lwt.return sock) +let socket_setopt_keepalive sock = + let open Lwt_syntax in + (* By setting [TCP_KEEPALIVE], we ensure that the connection stays alive + for NAT or firewalls between the node and the other peer. If no TCP + packet is sent after [ms] milliseconds, an empty TCP message will be + sent. + + If not acknowledged before intv, some retries will be made (number + depending on OS, default 9 for linux) after [intv] seconds. The connection + will be dropped if no ACK is received. + + See also the comment of the [socket_setopt_user_timeout] about the cloudflare + article, to understand the rationale for the 10s and 5s interval default + value, in conjunction with the user_timeout value. *) + let keepalive_opts = + let default = + (* after 10s of inactivity, and every 5s if no ACK *) + (10000, 5000) + in + match Sys.getenv_opt "OCTEZ_P2P_TCP_KEEPALIVE" with + | Some "0" -> None + | None -> + (* Sends a keepalive starting at 10 seconds of inactivity and every 5 + seconds interval *) + Some default + | Some value -> ( + match String.split ',' value with + | [ms] -> Some (int_of_string ms, snd default) + | [ms; intv] -> Some (int_of_string ms, int_of_string intv) + | _ -> None) + in + match keepalive_opts with + | None -> Lwt.return sock + | Some (ms, intv) -> ( + match + Socket.set_tcp_keepalive (Lwt_unix.unix_file_descr sock) ~ms ~intv + with + | Ok () | Error `Unsupported -> Lwt.return sock + | Error (`Unix_error exn) -> + (* Socket option [TCP_KEEPALIVE] is not mandatory, this is why we + only emit an event at [Info] level. *) + let* () = + Events.(emit set_socket_option_tcp_keepalive_failed) + [Error_monad.error_of_exn exn] + in + Lwt.return sock) + +let raw_socket () = + let open Lwt_syntax in + let sock = Lwt_unix.socket ~cloexec:true PF_INET6 SOCK_STREAM 0 in + let* sock = socket_setopt_user_timeout sock in + socket_setopt_keepalive sock + let socket () = let open Lwt_syntax in let* socket = raw_socket () in