diff --git a/include/Makefile.am b/include/Makefile.am index a27857c..73d7c15 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -142,6 +142,7 @@ noinst_HEADERS = \ linux/tc_ematch/tc_em_meta.h \ netlink-private/genl.h \ netlink-private/netlink.h \ + netlink-private/socket.h \ netlink-private/tc.h \ netlink-private/types.h \ netlink-private/cache-api.h \ diff --git a/include/netlink-private/socket.h b/include/netlink-private/socket.h new file mode 100644 index 0000000..86a440c --- /dev/null +++ b/include/netlink-private/socket.h @@ -0,0 +1,31 @@ +/* + * netlink-private/socket.h Private declarations for socket + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation version 2.1 + * of the License. + * + * Copyright (c) 2014 Thomas Graf + */ + +#ifndef NETLINK_SOCKET_PRIV_H_ +#define NETLINK_SOCKET_PRIV_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int _nl_socket_is_local_port_unspecified (struct nl_sock *sk); +uint32_t _nl_socket_generate_local_port_no_release(struct nl_sock *sk); + +void _nl_socket_used_ports_release_all(const uint32_t *used_ports); +void _nl_socket_used_ports_set(uint32_t *used_ports, uint32_t port); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/include/netlink/utils.h b/include/netlink/utils.h index 8faf917..6b4b787 100644 --- a/include/netlink/utils.h +++ b/include/netlink/utils.h @@ -104,6 +104,15 @@ enum { NL_CAPABILITY_ROUTE_LINK_CLS_ADD_ACT_OWN_REFERENCE = 3, #define NL_CAPABILITY_ROUTE_LINK_CLS_ADD_ACT_OWN_REFERENCE NL_CAPABILITY_ROUTE_LINK_CLS_ADD_ACT_OWN_REFERENCE + /** + * Indicate that the local port is unspecified until the user accesses + * it (via nl_socket_get_local_port()) or until nl_connect(). More importantly, + * if the port is left unspecified, nl_connect() will retry generating another + * port when bind() fails with ADDRINUSE. + */ + NL_CAPABILITY_NL_CONNECT_RETRY_GENERATE_PORT_ON_ADDRINUSE = 4, +#define NL_CAPABILITY_NL_CONNECT_RETRY_GENERATE_PORT_ON_ADDRINUSE NL_CAPABILITY_NL_CONNECT_RETRY_GENERATE_PORT_ON_ADDRINUSE + __NL_CAPABILITY_MAX #define NL_CAPABILITY_MAX (__NL_CAPABILITY_MAX - 1) }; diff --git a/lib/nl.c b/lib/nl.c index 4692490..25fd59c 100644 --- a/lib/nl.c +++ b/lib/nl.c @@ -26,6 +26,7 @@ */ #include +#include #include #include #include @@ -75,6 +76,16 @@ * be closed automatically if any of the `exec` family functions succeed. * This is essential for multi threaded programs. * + * @note The local port (`nl_socket_get_local_port()`) is unspecified after + * creating a new socket. It only gets determined when accessing the + * port the first time or during `nl_connect()`. When nl_connect() + * fails during `bind()` due to `ADDRINUSE`, it will retry with + * different ports if the port is unspecified. Unless you want to enforce + * the use of a specific local port, don't access the local port (or + * reset it to `unspecified` by calling `nl_socket_set_local_port(sk, 0)`). + * This capability is indicated by + * `%NL_CAPABILITY_NL_CONNECT_RETRY_GENERATE_PORT_ON_ADDRINUSE`. + * * @see nl_socket_alloc() * @see nl_close() * @@ -85,6 +96,7 @@ int nl_connect(struct nl_sock *sk, int protocol) { int err, flags = 0; + int errsv; socklen_t addrlen; #ifdef SOCK_CLOEXEC @@ -96,7 +108,9 @@ int nl_connect(struct nl_sock *sk, int protocol) sk->s_fd = socket(AF_NETLINK, SOCK_RAW | flags, protocol); if (sk->s_fd < 0) { - err = -nl_syserr2nlerr(errno); + errsv = errno; + NL_DBG(4, "nl_connect(%p): socket() failed with %d\n", sk, errsv); + err = -nl_syserr2nlerr(errsv); goto errout; } @@ -106,11 +120,45 @@ int nl_connect(struct nl_sock *sk, int protocol) goto errout; } - err = bind(sk->s_fd, (struct sockaddr*) &sk->s_local, - sizeof(sk->s_local)); - if (err < 0) { - err = -nl_syserr2nlerr(errno); - goto errout; + if (_nl_socket_is_local_port_unspecified (sk)) { + uint32_t port; + uint32_t used_ports[32] = { 0 }; + + while (1) { + port = _nl_socket_generate_local_port_no_release(sk); + + if (port == UINT32_MAX) { + NL_DBG(4, "nl_connect(%p): no more unused local ports.\n", sk); + _nl_socket_used_ports_release_all(used_ports); + err = -NLE_EXIST; + goto errout; + } + err = bind(sk->s_fd, (struct sockaddr*) &sk->s_local, + sizeof(sk->s_local)); + if (err == 0) + break; + + errsv = errno; + if (errsv == EADDRINUSE) { + NL_DBG(4, "nl_connect(%p): local port %u already in use. Retry.\n", sk, (unsigned) port); + _nl_socket_used_ports_set(used_ports, port); + } else { + NL_DBG(4, "nl_connect(%p): bind() for port %u failed with %d\n", sk, (unsigned) port, errsv); + _nl_socket_used_ports_release_all(used_ports); + err = -nl_syserr2nlerr(errsv); + goto errout; + } + } + _nl_socket_used_ports_release_all(used_ports); + } else { + err = bind(sk->s_fd, (struct sockaddr*) &sk->s_local, + sizeof(sk->s_local)); + if (err != 0) { + errsv = errno; + NL_DBG(4, "nl_connect(%p): bind() failed with %d\n", sk, errsv); + err = -nl_syserr2nlerr(errsv); + goto errout; + } } addrlen = sizeof(sk->s_local); @@ -405,7 +453,7 @@ void nl_complete_msg(struct nl_sock *sk, struct nl_msg *msg) nlh = nlmsg_hdr(msg); if (nlh->nlmsg_pid == NL_AUTO_PORT) - nlh->nlmsg_pid = sk->s_local.nl_pid; + nlh->nlmsg_pid = nl_socket_get_local_port(sk); if (nlh->nlmsg_seq == NL_AUTO_SEQ) nlh->nlmsg_seq = sk->s_seq_next++; diff --git a/lib/socket.c b/lib/socket.c index 00d8d6a..5f61b38 100644 --- a/lib/socket.c +++ b/lib/socket.c @@ -30,6 +30,7 @@ #include "defs.h" #include +#include #include #include #include @@ -61,16 +62,39 @@ static NL_RW_LOCK(port_map_lock); static uint32_t generate_local_port(void) { - int i, n; + int i, j, n, m; + static uint16_t idx_state = 0; uint32_t pid = getpid() & 0x3FFFFF; nl_write_lock(&port_map_lock); - for (i = 0; i < 32; i++) { + if (idx_state == 0) { + uint32_t t = time(NULL); + + /* from time to time (on average each 2^15 calls), the idx_state will + * be zero again. No problem, just "seed" anew with time(). */ + idx_state = t ^ (t >> 16) ^ 0x3047; + } else + idx_state = idx_state + 20011; /* add prime number */ + + i = idx_state >> 5; + n = idx_state; + for (j = 0; j < 32; j++) { + /* walk the index somewhat randomized, with always leaving the block + * #0 as last. The reason is that libnl-1 will start at block #0, + * so just leave the first 32 ports preferably for libnl-1 owned sockets + * (this is relevant only if the applications ends up using both versions + * of the library and doesn't hurt otherwise). */ + if (j == 31) + i = 0; + else + i = (((i-1) + 7) % 31) + 1; + if (used_ports_map[i] == 0xFFFFFFFF) continue; - for (n = 0; n < 32; n++) { + for (m = 0; m < 32; m++) { + n = (n + 13) % 32; if (1UL & (used_ports_map[i] >> n)) continue; @@ -82,7 +106,7 @@ static uint32_t generate_local_port(void) nl_write_unlock(&port_map_lock); - return pid + (n << 22); + return pid + (((uint32_t)n) << 22); } } @@ -90,23 +114,65 @@ static uint32_t generate_local_port(void) /* Out of sockets in our own PID namespace, what to do? FIXME */ NL_DBG(1, "Warning: Ran out of unique local port namespace\n"); - return UINT_MAX; + return UINT32_MAX; } static void release_local_port(uint32_t port) { int nr; + uint32_t mask; - if (port == UINT_MAX) + if (port == UINT32_MAX) return; - + + BUG_ON(port == 0); + nr = port >> 22; + mask = 1UL << (nr % 32); + nr /= 32; nl_write_lock(&port_map_lock); - used_ports_map[nr / 32] &= ~(1 << (nr % 32)); + BUG_ON((used_ports_map[nr] & mask) != mask); + used_ports_map[nr] &= ~mask; nl_write_unlock(&port_map_lock); } +/** \cond skip */ +void _nl_socket_used_ports_release_all(const uint32_t *used_ports) +{ + int i; + + for (i = 0; i < 32; i++) { + if (used_ports[i] != 0) { + nl_write_lock(&port_map_lock); + for (; i < 32; i++) { + BUG_ON((used_ports_map[i] & used_ports[i]) != used_ports[i]); + used_ports_map[i] &= ~(used_ports[i]); + } + nl_write_unlock(&port_map_lock); + return; + } + } +} + +void _nl_socket_used_ports_set(uint32_t *used_ports, uint32_t port) +{ + int nr; + int32_t mask; + + nr = port >> 22; + mask = 1UL << (nr % 32); + nr /= 32; + + /* + BUG_ON(port == UINT32_MAX || port == 0 || (getpid() & 0x3FFFFF) != (port & 0x3FFFFF)); + BUG_ON(used_ports[nr] & mask); + */ + + used_ports[nr] |= mask; +} +/** \endcond */ + /** * @name Allocation * @{ @@ -125,11 +191,9 @@ static struct nl_sock *__alloc_socket(struct nl_cb *cb) sk->s_local.nl_family = AF_NETLINK; sk->s_peer.nl_family = AF_NETLINK; sk->s_seq_expect = sk->s_seq_next = time(0); - sk->s_local.nl_pid = generate_local_port(); - if (sk->s_local.nl_pid == UINT_MAX) { - nl_socket_free(sk); - return NULL; - } + + /* the port is 0 (unspecified), meaning NL_OWN_PORT */ + sk->s_flags = NL_OWN_PORT; return sk; } @@ -265,6 +329,26 @@ void nl_socket_enable_auto_ack(struct nl_sock *sk) /** @} */ +/** \cond skip */ +int _nl_socket_is_local_port_unspecified(struct nl_sock *sk) +{ + return (sk->s_local.nl_pid == 0); +} + +uint32_t _nl_socket_generate_local_port_no_release(struct nl_sock *sk) +{ + uint32_t port; + + /* reset the port to generate_local_port(), but do not release + * the previously generated port. */ + + port = generate_local_port(); + sk->s_flags &= ~NL_OWN_PORT; + sk->s_local.nl_pid = port; + return port; +} +/** \endcond */ + /** * @name Source Idenficiation * @{ @@ -272,6 +356,18 @@ void nl_socket_enable_auto_ack(struct nl_sock *sk) uint32_t nl_socket_get_local_port(const struct nl_sock *sk) { + if (sk->s_local.nl_pid == 0) { + /* modify the const argument sk. This is justified, because + * nobody ever saw the local_port from externally. So, we + * initilize it on first use. + * + * Note that this also means that you cannot call this function + * from multiple threads without synchronization. But nl_sock + * is not automatically threadsafe anyway, so the user is not + * allowed to do that. + */ + return _nl_socket_generate_local_port_no_release((struct nl_sock *) sk); + } return sk->s_local.nl_pid; } @@ -280,27 +376,18 @@ uint32_t nl_socket_get_local_port(const struct nl_sock *sk) * @arg sk Netlink socket. * @arg port Local port identifier * - * Assigns a local port identifier to the socket. If port is 0 - * a unique port identifier will be generated automatically. + * Assigns a local port identifier to the socket. + * + * If port is 0, the port is reset to 'unspecified' as it is after newly + * calling nl_socket_alloc(). + * Unspecified means, that the port will be generated automatically later + * on first use (either on nl_socket_get_local_port() or nl_connect()). */ void nl_socket_set_local_port(struct nl_sock *sk, uint32_t port) { - if (port == 0) { - port = generate_local_port(); - /* - * Release local port after generation of a new one to be - * able to change local port using nl_socket_set_local_port(, 0) - */ - if (!(sk->s_flags & NL_OWN_PORT)) - release_local_port(sk->s_local.nl_pid); - else - sk->s_flags &= ~NL_OWN_PORT; - } else { - if (!(sk->s_flags & NL_OWN_PORT)) - release_local_port(sk->s_local.nl_pid); - sk->s_flags |= NL_OWN_PORT; - } - + if (!(sk->s_flags & NL_OWN_PORT)) + release_local_port(sk->s_local.nl_pid); + sk->s_flags |= NL_OWN_PORT; sk->s_local.nl_pid = port; } diff --git a/lib/utils.c b/lib/utils.c index e2294e6..5cc9e94 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -1147,7 +1147,7 @@ int nl_has_capability (int capability) NL_CAPABILITY_ROUTE_BUILD_MSG_SET_SCOPE, NL_CAPABILITY_ROUTE_LINK_VETH_GET_PEER_OWN_REFERENCE, NL_CAPABILITY_ROUTE_LINK_CLS_ADD_ACT_OWN_REFERENCE, - 0, + NL_CAPABILITY_NL_CONNECT_RETRY_GENERATE_PORT_ON_ADDRINUSE, 0, 0, 0, diff --git a/libnl.sym.in b/libnl.sym.in index e8f6c53..df8888c 100644 --- a/libnl.sym.in +++ b/libnl.sym.in @@ -1,4 +1,9 @@ libnl_@MAJ_VERSION@ { global: *; +local: + _nl_socket_generate_local_port_no_release; + _nl_socket_is_local_port_unspecified; + _nl_socket_used_ports_release_all; + _nl_socket_used_ports_set; };