From 90433ea08787eb838d63fcb10878eb2bcbdf0914 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 5 Feb 2021 12:22:47 +0100 Subject: [PATCH 1/6] tests/echo_srv: make the main server logic resilient to ECONNABORTED from accept() That should fix a race where the connect() directly followed by close() in test_thread_echo_tcp_connect will cause the echo_srv to terminate early, which results in connect() returning ECONNREFUSED in for other threads. This mainly happens on FreeBSD, but it can also happen on Linux. Signed-off-by: Stefan Metzmacher --- tests/echo_srv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/echo_srv.c b/tests/echo_srv.c index 87c85f7..0aefa9a 100644 --- a/tests/echo_srv.c +++ b/tests/echo_srv.c @@ -538,6 +538,9 @@ static void echo_tcp(int sock) while (1) { s = accept(sock, &addr.sa.s, &addr.sa_socklen); + if (s == -1 && errno == ECONNABORTED) { + continue; + } if (s == -1) { perror("accept"); goto done; -- GitLab From 3b55c41ea34da81018c29836116eb50fec5919db Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 5 Feb 2021 13:13:07 +0100 Subject: [PATCH 2/6] swrap: add better logging to convert_un_in() Signed-off-by: Stefan Metzmacher --- src/socket_wrapper.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/socket_wrapper.c b/src/socket_wrapper.c index 14d0dda..5839a5c 100644 --- a/src/socket_wrapper.c +++ b/src/socket_wrapper.c @@ -1885,31 +1885,40 @@ static int convert_un_in(const struct sockaddr_un *un, struct sockaddr *in, sock if (p) p++; else p = un->sun_path; if (sscanf(p, SOCKET_FORMAT, &type, &iface, &prt) != 3) { + SWRAP_LOG(SWRAP_LOG_ERROR, "sun_path[%s] p[%s]", + un->sun_path, p); errno = EINVAL; return -1; } - SWRAP_LOG(SWRAP_LOG_TRACE, "type %c iface %u port %u", - type, iface, prt); - if (iface == 0 || iface > MAX_WRAPPED_INTERFACES) { + SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u", + type, iface, prt); errno = EINVAL; return -1; } if (prt > 0xFFFF) { + SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u", + type, iface, prt); errno = EINVAL; return -1; } + SWRAP_LOG(SWRAP_LOG_TRACE, "type %c iface %u port %u", + type, iface, prt); + switch(type) { case SOCKET_TYPE_CHAR_TCP: case SOCKET_TYPE_CHAR_UDP: { struct sockaddr_in *in2 = (struct sockaddr_in *)(void *)in; if ((*len) < sizeof(*in2)) { - errno = EINVAL; - return -1; + SWRAP_LOG(SWRAP_LOG_ERROR, + "V4: *len(%zu) < sizeof(*in2)=%zu", + (size_t)*len, sizeof(*in2)); + errno = EINVAL; + return -1; } memset(in2, 0, sizeof(*in2)); @@ -1926,6 +1935,10 @@ static int convert_un_in(const struct sockaddr_un *un, struct sockaddr *in, sock struct sockaddr_in6 *in2 = (struct sockaddr_in6 *)(void *)in; if ((*len) < sizeof(*in2)) { + SWRAP_LOG(SWRAP_LOG_ERROR, + "V6: *len(%zu) < sizeof(*in2)=%zu", + (size_t)*len, sizeof(*in2)); + SWRAP_LOG(SWRAP_LOG_ERROR, "LINE:%d", __LINE__); errno = EINVAL; return -1; } @@ -1941,6 +1954,8 @@ static int convert_un_in(const struct sockaddr_un *un, struct sockaddr *in, sock } #endif default: + SWRAP_LOG(SWRAP_LOG_ERROR, "type %c iface %u port %u", + type, iface, prt); errno = EINVAL; return -1; } -- GitLab From e30eb5884449a911e2b7c701c5560972dd8866d1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 5 Feb 2021 12:13:12 +0100 Subject: [PATCH 3/6] swrap: make swrap_accept() more resilient against races related to already disconnected sockets Callers of accept() expect to get ECONNABORTED instead of a disconnected socket. Even on Linux we have a potential race calling libc_getsockname() after accept(), so we map ENOTCONN to ECONNABORTED. We should do all syscalls in order to have peer and sockname, before doing in memory things like calling sockaddr_convert_from_un(). Signed-off-by: Stefan Metzmacher --- src/socket_wrapper.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/socket_wrapper.c b/src/socket_wrapper.c index 5839a5c..d72b31f 100644 --- a/src/socket_wrapper.c +++ b/src/socket_wrapper.c @@ -3642,10 +3642,12 @@ static int swrap_accept(int s, ret = libc_accept(s, &un_addr.sa.s, &un_addr.sa_socklen); #endif if (ret == -1) { - if (errno == ENOTSOCK) { + int saved_errno = errno; + if (saved_errno == ENOTSOCK) { /* Remove stale fds */ swrap_remove_stale(s); } + errno = saved_errno; return ret; } @@ -3654,6 +3656,23 @@ static int swrap_accept(int s, /* Check if we have a stale fd and remove it */ swrap_remove_stale(fd); + ret = libc_getsockname(fd, + &un_my_addr.sa.s, + &un_my_addr.sa_socklen); + if (ret == -1) { + int saved_errno = errno; + libc_close(fd); + if (saved_errno == ENOTCONN) { + /* + * If the connection is already disconnected + * we should return ECONNABORTED. + */ + saved_errno = ECONNABORTED; + } + errno = saved_errno; + return ret; + } + SWRAP_LOCK_SI(parent_si); ret = sockaddr_convert_from_un(parent_si, @@ -3663,8 +3682,10 @@ static int swrap_accept(int s, &in_addr.sa.s, &in_addr.sa_socklen); if (ret == -1) { + int saved_errno = errno; SWRAP_UNLOCK_SI(parent_si); libc_close(fd); + errno = saved_errno; return ret; } @@ -3692,14 +3713,6 @@ static int swrap_accept(int s, *addrlen = in_addr.sa_socklen; } - ret = libc_getsockname(fd, - &un_my_addr.sa.s, - &un_my_addr.sa_socklen); - if (ret == -1) { - libc_close(fd); - return ret; - } - ret = sockaddr_convert_from_un(child_si, &un_my_addr.sa.un, un_my_addr.sa_socklen, @@ -3707,7 +3720,9 @@ static int swrap_accept(int s, &in_my_addr.sa.s, &in_my_addr.sa_socklen); if (ret == -1) { + int saved_errno = errno; libc_close(fd); + errno = saved_errno; return ret; } -- GitLab From c2636c53daca4af01b086c12aec7a62470931b61 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 5 Feb 2021 13:13:44 +0100 Subject: [PATCH 4/6] swrap: fallback to libc_getpeername() when we get an empty sun_path from accept() This hopefully hides the strange behaviour of FreeBSD (at least 12.1) for already disconnected AF_UNIX sockets. The race is triggered when the following detects the usage of 'getpeername': truss -o ./truss.out -f -H -a -e -D -s 160 ctest -V -R test_thread_echo_tcp_connect; grep getpeername truss.out In a simplified log the following is happening: ECHO_SRV(parent): socket(PF_LOCAL,SOCK_STREAM,0) = 4 (0x4) ECHO_SRV(parent): unlink("/tmp/w_E37bkf/T0A0007") ERR#2 'No such file or directory' ECHO_SRV(parent): bind(4,{ AF_UNIX "/tmp/w_E37bkf/T0A0007" },106) = 0 (0x0) ECHO_SRV(parent): listen(4,16) = 0 (0x0) ... ECHO_SRV(parent): write(2,"SWRAP_ERROR[echo_srv (9792)] - swrap_accept: before accept(sa_socklen=106)\n",75) = 75 (0x4b) ECHO_SRV(parent): accept4(0x4,0x7ffffffde158,0x7ffffffde150,0x0) = 5 (0x5) ECHO_SRV(parent): write(2,"SWRAP_ERROR[echo_srv (9792)] - swrap_accept: after accept(sa_socklen=106, family=1)\n",84) = 84 (0x54) ECHO_SRV(parent): getsockname(5,{ AF_UNIX "/tmp/w_E37bkf/T0A0007" },0x7ffffffde0c0) = 0 (0x0) ECHO_SRV(parent): swrap_accept() returned a valid connection and a per connection child (pid=9793) handles it TEST_THREAD: socket(PF_LOCAL,SOCK_STREAM,0) = 7 (0x7) TEST_THREAD: bind(7,{ AF_UNIX "/tmp/w_E37bkf/T014D4F" },106) = 0 (0x0) TEST_THREAD: connect(7,{ AF_UNIX "/tmp/w_E37bkf/T0A0007" },106) = 0 (0x0) TEST_THREAD: close(7) = 0 (0x0) ECHO_SRV(parent): wait4(-1,0x0,0x0,0x0) = 9793 (0x2641) ECHO_SRV(parent): close(5) = 0 (0x0) ECHO_SRV(parent): write(2,"SWRAP_ERROR[echo_srv (9792)] - swrap_accept: before accept(sa_socklen=106)\n",75) = 75 (0x4b) ECHO_SRV(parent): accept4(0x4,0x7ffffffde158,0x7ffffffde150,0x0) = 5 (0x5) TEST_THREAD: unlink("/tmp/w_E37bkf/T014D4F") = 0 (0x0) ECHO_SRV(parent): write(2,"SWRAP_ERROR[echo_srv (9792)] - swrap_accept: after accept(sa_socklen=16, family=1)\n",83) = 83 (0x53) ECHO_SRV(parent): getpeername(5,0x7ffffffde158,0x7ffffffde150) ERR#57 'Socket is not connected' ECHO_SRV(parent): getsockname(5,{ AF_UNIX "/tmp/w_E37bkf/T0A0007" },0x7ffffffde0c0) = 0 (0x0) ECHO_SRV(parent): getpeername(5,0x7ffffffde158,0x7ffffffde150) ERR#57 'Socket is not connected' Signed-off-by: Stefan Metzmacher --- src/socket_wrapper.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/socket_wrapper.c b/src/socket_wrapper.c index d72b31f..0f37605 100644 --- a/src/socket_wrapper.c +++ b/src/socket_wrapper.c @@ -3656,6 +3656,33 @@ static int swrap_accept(int s, /* Check if we have a stale fd and remove it */ swrap_remove_stale(fd); + if (un_addr.sa.un.sun_path[0] == '\0') { + /* + * FreeBSD seems to have a problem where + * accept4() on the unix socket doesn't + * ECONNABORTED for already disconnected connections. + * + * Let's try libc_getpeername() to get the peer address + * as a fallback, but it'll likely return ENOTCONN, + * which we have to map to ECONNABORTED. + */ + un_addr.sa_socklen = sizeof(struct sockaddr_un), + ret = libc_getpeername(fd, &un_addr.sa.s, &un_addr.sa_socklen); + if (ret == -1) { + int saved_errno = errno; + libc_close(fd); + if (saved_errno == ENOTCONN) { + /* + * If the connection is already disconnected + * we should return ECONNABORTED. + */ + saved_errno = ECONNABORTED; + } + errno = saved_errno; + return ret; + } + } + ret = libc_getsockname(fd, &un_my_addr.sa.s, &un_my_addr.sa_socklen); -- GitLab From 0cfe08b225d0c876a4df17b10f47b47a5c05b959 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 5 Feb 2021 11:50:17 +0100 Subject: [PATCH 5/6] swrap: abort on mutex errors There's no way to continue in a reliable way... Signed-off-by: Stefan Metzmacher --- src/socket_wrapper.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/socket_wrapper.c b/src/socket_wrapper.c index 0f37605..00db6f0 100644 --- a/src/socket_wrapper.c +++ b/src/socket_wrapper.c @@ -755,6 +755,7 @@ static void _swrap_mutex_lock(pthread_mutex_t *mutex, const char *name, const ch if (ret != 0) { SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't lock pthread mutex(%s) - %s", getpid(), getppid(), caller, line, name, strerror(ret)); + abort(); } } @@ -767,6 +768,7 @@ static void _swrap_mutex_unlock(pthread_mutex_t *mutex, const char *name, const if (ret != 0) { SWRAP_LOG(SWRAP_LOG_ERROR, "PID(%d):PPID(%d): %s(%u): Couldn't unlock pthread mutex(%s) - %s", getpid(), getppid(), caller, line, name, strerror(ret)); + abort(); } } -- GitLab From a2860cbeee4eb845cf88b0ed91c7245733669b8d Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 4 Feb 2021 05:02:32 +0100 Subject: [PATCH 6/6] swrap: fix copy on write leak of ~38M for every fork. commit 0f8e90dd7e59c473be615dee08d445dca98fdab9 (src/socket_wrapper.c: fix mutex fork handling) let us touch the whole sockets array on every fork, because each element in the array has it's own mutex. max_sockets=65535 * sizeof(struct socket_info_container)=592 = 38796720 This was designed for the use of robust shared mutexes when moving the sockets array into a shared memory file. Until we really move to shared memory, we can use a single global mutex in order to avoid the copy on write leaking. Signed-off-by: Stefan Metzmacher --- src/socket_wrapper.c | 59 +++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/src/socket_wrapper.c b/src/socket_wrapper.c index 00db6f0..43a5892 100644 --- a/src/socket_wrapper.c +++ b/src/socket_wrapper.c @@ -183,7 +183,6 @@ enum swrap_dbglvl_e { /* Add new global locks here please */ # define SWRAP_REINIT_ALL do { \ - size_t __i; \ int ret; \ ret = socket_wrapper_init_mutex(&sockets_mutex); \ if (ret != 0) exit(-1); \ @@ -191,10 +190,8 @@ enum swrap_dbglvl_e { if (ret != 0) exit(-1); \ ret = socket_wrapper_init_mutex(&first_free_mutex); \ if (ret != 0) exit(-1); \ - for (__i = 0; (sockets != NULL) && __i < socket_info_max; __i++) { \ - ret = socket_wrapper_init_mutex(&sockets[__i].meta.mutex); \ - if (ret != 0) exit(-1); \ - } \ + ret = socket_wrapper_init_mutex(&sockets_si_global); \ + if (ret != 0) exit(-1); \ ret = socket_wrapper_init_mutex(&autobind_start_mutex); \ if (ret != 0) exit(-1); \ ret = socket_wrapper_init_mutex(&pcap_dump_mutex); \ @@ -204,27 +201,20 @@ enum swrap_dbglvl_e { } while(0) # define SWRAP_LOCK_ALL do { \ - size_t __i; \ swrap_mutex_lock(&sockets_mutex); \ swrap_mutex_lock(&socket_reset_mutex); \ swrap_mutex_lock(&first_free_mutex); \ - for (__i = 0; (sockets != NULL) && __i < socket_info_max; __i++) { \ - swrap_mutex_lock(&sockets[__i].meta.mutex); \ - } \ + swrap_mutex_lock(&sockets_si_global); \ swrap_mutex_lock(&autobind_start_mutex); \ swrap_mutex_lock(&pcap_dump_mutex); \ swrap_mutex_lock(&mtu_update_mutex); \ } while(0) # define SWRAP_UNLOCK_ALL do { \ - size_t __s; \ swrap_mutex_unlock(&mtu_update_mutex); \ swrap_mutex_unlock(&pcap_dump_mutex); \ swrap_mutex_unlock(&autobind_start_mutex); \ - for (__s = 0; (sockets != NULL) && __s < socket_info_max; __s++) { \ - size_t __i = (socket_info_max - 1) - __s; \ - swrap_mutex_unlock(&sockets[__i].meta.mutex); \ - } \ + swrap_mutex_unlock(&sockets_si_global); \ swrap_mutex_unlock(&first_free_mutex); \ swrap_mutex_unlock(&socket_reset_mutex); \ swrap_mutex_unlock(&sockets_mutex); \ @@ -235,12 +225,20 @@ enum swrap_dbglvl_e { #define SWRAP_LOCK_SI(si) do { \ struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \ - swrap_mutex_lock(&sic->meta.mutex); \ + if (sic != NULL) { \ + swrap_mutex_lock(&sockets_si_global); \ + } else { \ + abort(); \ + } \ } while(0) #define SWRAP_UNLOCK_SI(si) do { \ struct socket_info_container *sic = SOCKET_INFO_CONTAINER(si); \ - swrap_mutex_unlock(&sic->meta.mutex); \ + if (sic != NULL) { \ + swrap_mutex_unlock(&sockets_si_global); \ + } else { \ + abort(); \ + } \ } while(0) #if defined(HAVE_GETTIMEOFDAY_TZ) || defined(HAVE_GETTIMEOFDAY_TZ_VOID) @@ -337,7 +335,13 @@ struct socket_info_meta { unsigned int refcount; int next_free; - pthread_mutex_t mutex; + /* + * As long as we don't use shared memory + * for the sockets array, we use + * sockets_si_global as a single mutex. + * + * pthread_mutex_t mutex; + */ }; struct socket_info_container @@ -372,6 +376,14 @@ static pthread_mutex_t socket_reset_mutex = PTHREAD_MUTEX_INITIALIZER; /* Mutex to synchronize access to first free index in socket_info array */ static pthread_mutex_t first_free_mutex = PTHREAD_MUTEX_INITIALIZER; +/* + * Mutex to synchronize access to to socket_info structures + * We use a single global mutex in order to avoid leaking + * ~ 38M copy on write memory per fork. + * max_sockets=65535 * sizeof(struct socket_info_container)=592 = 38796720 + */ +static pthread_mutex_t sockets_si_global = PTHREAD_MUTEX_INITIALIZER; + /* Mutex to synchronize access to packet capture dump file */ static pthread_mutex_t pcap_dump_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -1707,27 +1719,18 @@ static void socket_wrapper_init_sockets(void) } swrap_mutex_lock(&first_free_mutex); + swrap_mutex_lock(&sockets_si_global); first_free = 0; for (i = 0; i < max_sockets; i++) { swrap_set_next_free(&sockets[i].info, i+1); - sockets[i].meta.mutex = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER; - } - - for (i = 0; i < max_sockets; i++) { - ret = socket_wrapper_init_mutex(&sockets[i].meta.mutex); - if (ret != 0) { - SWRAP_LOG(SWRAP_LOG_ERROR, - "Failed to initialize pthread mutex i=%zu", i); - goto done; - } } /* mark the end of the free list */ swrap_set_next_free(&sockets[max_sockets-1].info, -1); -done: + swrap_mutex_unlock(&sockets_si_global); swrap_mutex_unlock(&first_free_mutex); swrap_mutex_unlock(&sockets_mutex); if (ret != 0) { -- GitLab