From 308841d8c99907a364d6876ed9375507153729eb Mon Sep 17 00:00:00 2001 From: Andreas Schultz Date: Thu, 2 Feb 2017 11:17:17 +0100 Subject: implement SO_BINDTODEVICE for inet protocols bind to device is needed to properly support VRF-Lite under Linux (see [1] for details). [1]: https://www.kernel.org/doc/Documentation/networking/vrf.txt --- erts/emulator/drivers/common/inet_drv.c | 97 ++++++++++++++++++++++++++++++++- erts/preloaded/src/prim_inet.erl | 3 + lib/kernel/doc/src/inet.xml | 26 +++++++++ lib/kernel/src/inet.erl | 8 +-- lib/kernel/src/inet_int.hrl | 1 + lib/kernel/test/inet_SUITE.erl | 70 +++++++++++++++++++++++- 6 files changed, 198 insertions(+), 7 deletions(-) diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 0fe5183b42..90114b77f5 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -591,7 +591,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) (((unsigned char*) (s))[1] << 8) | \ (((unsigned char*) (s))[0])) -#ifdef HAVE_SYS_UN_H +#if defined(HAVE_SYS_UN_H) || defined(SO_BINDTODEVICE) /* strnlen doesn't exist everywhere */ static size_t my_strnlen(const char *s, size_t maxlen) @@ -602,6 +602,10 @@ static size_t my_strnlen(const char *s, size_t maxlen) return i; } +#endif + +#if defined(HAVE_SYS_UN_H) + /* Check that some character in the buffer != '\0' */ static int is_nonzero(const char *s, size_t n) { @@ -778,6 +782,7 @@ static int is_nonzero(const char *s, size_t n) #define INET_LOPT_TCP_SHOW_ECONNRESET 39 /* tell user about incoming RST */ #define INET_LOPT_LINE_DELIM 40 /* Line delimiting char */ #define INET_OPT_TCLASS 41 /* IPv6 transport class */ +#define INET_OPT_BIND_TO_DEVICE 42 /* get/set network device the socket is bound to */ /* SCTP options: a separate range, from 100: */ #define SCTP_OPT_RTOINFO 100 #define SCTP_OPT_ASSOCINFO 101 @@ -1334,6 +1339,7 @@ static ErlDrvTermData am_tos; static ErlDrvTermData am_tclass; static ErlDrvTermData am_ipv6_v6only; static ErlDrvTermData am_netns; +static ErlDrvTermData am_bind_to_device; #endif static char str_eafnosupport[] = "eafnosupport"; @@ -3725,6 +3731,7 @@ static void inet_init_sctp(void) { INIT_ATOM(tclass); INIT_ATOM(ipv6_v6only); INIT_ATOM(netns); + INIT_ATOM(bind_to_device); /* Option names */ INIT_ATOM(sctp_rtoinfo); @@ -5946,6 +5953,9 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) int ival; char* arg_ptr; int arg_sz; +#ifdef SO_BINDTODEVICE + char ifname[IFNAMSIZ]; +#endif enum PacketParseType old_htype = desc->htype; int old_active = desc->active; int propagate; /* Set to 1 if failure to set this option @@ -6331,6 +6341,29 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) len -= arg_sz; break; +#ifdef SO_BINDTODEVICE + case INET_OPT_BIND_TO_DEVICE: + if (ival < 0) return -1; + if (len < ival) return -1; + if (ival > sizeof(ifname)) { + return -1; + } + memcpy(ifname, ptr, ival); + ifname[ival] = '\0'; + ptr += ival; + len -= ival; + + proto = SOL_SOCKET; + type = SO_BINDTODEVICE; + arg_ptr = (char*)&ifname; + arg_sz = sizeof(ifname); + propagate = 1; /* We do want to know if this fails */ + + DEBUGF(("inet_set_opts(%ld): s=%d, SO_BINDTODEVICE=%s\r\n", + (long)desc->port, desc->s, ifname)); + break; +#endif + default: return -1; } @@ -6462,6 +6495,9 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) struct sctp_event_subscribe es; # ifdef SCTP_DELAYED_ACK_TIME struct sctp_assoc_value av; /* Not in SOLARIS10 */ +# endif +# ifdef SO_BINDTODEVICE + char ifname[IFNAMSIZ]; # endif } arg; @@ -6702,6 +6738,23 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) continue; /* Option not supported -- ignore it */ # endif +#ifdef SO_BINDTODEVICE + case INET_OPT_BIND_TO_DEVICE: + arg_sz = get_int32(curr); curr += 4; + CHKLEN(curr, arg_sz); + if (arg_sz >= sizeof(arg.ifname)) + return -1; + memcpy(arg.ifname, curr, arg_sz); + arg.ifname[arg_sz] = '\0'; + curr += arg_sz; + + proto = SOL_SOCKET; + type = SO_BINDTODEVICE; + arg_ptr = (char*) (&arg.ifname); + arg_sz = sizeof ( arg.ifname); + break; +#endif + case SCTP_OPT_AUTOCLOSE: { arg.ival= get_int32 (curr); curr += 4; @@ -6967,6 +7020,9 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, ErlDrvSizeT dest_used = 0; ErlDrvSizeT dest_allocated = destlen; char *orig_dest = *dest; +#ifdef SO_BINDTODEVICE + char ifname[IFNAMSIZ]; +#endif /* Ptr is a name parameter */ #define RETURN_ERROR() \ @@ -7302,6 +7358,26 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, put_int32(arg_sz,ptr); continue; } + +#ifdef SO_BINDTODEVICE + case INET_OPT_BIND_TO_DEVICE: + arg_sz = sizeof(ifname); + TRUNCATE_TO(0,ptr); + PLACE_FOR(5 + arg_sz,ptr); + arg_ptr = ptr + 5; + if (IS_SOCKET_ERROR(sock_getopt(desc->s,SOL_SOCKET,SO_BINDTODEVICE, + arg_ptr,&arg_sz))) { + TRUNCATE_TO(0,ptr); + continue; + } + arg_sz = my_strnlen(arg_ptr, arg_sz); + TRUNCATE_TO(arg_sz + 5,ptr); + *ptr++ = opt; + put_int32(arg_sz,ptr); + ptr += arg_sz; + continue; +#endif + default: RETURN_ERROR(); } @@ -7583,6 +7659,25 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, i = LOAD_TUPLE (spec, i, 2); break; } + +#ifdef SO_BINDTODEVICE + /* The following option returns a binary: */ + case INET_OPT_BIND_TO_DEVICE: { + char ifname[IFNAMSIZ]; + unsigned int sz = sizeof(ifname); + + if (sock_getopt(desc->s, SOL_SOCKET, SO_BINDTODEVICE, + &ifname, &sz) < 0) continue; + /* Fill in the response: */ + PLACE_FOR(spec, i, + LOAD_ATOM_CNT + LOAD_BUF2BINARY_CNT + LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_bind_to_device); + i = LOAD_BUF2BINARY(spec, i, ifname, my_strnlen(ifname, sz)); + i = LOAD_TUPLE (spec, i, 2); + break; + } +#endif + /* The following options just return an integer value: */ case INET_OPT_RCVBUF : case INET_OPT_SNDBUF : diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl index 61f727e8a4..19f8b0d873 100644 --- a/erts/preloaded/src/prim_inet.erl +++ b/erts/preloaded/src/prim_inet.erl @@ -1234,6 +1234,7 @@ enc_opt(netns) -> ?INET_LOPT_NETNS; enc_opt(show_econnreset) -> ?INET_LOPT_TCP_SHOW_ECONNRESET; enc_opt(line_delimiter) -> ?INET_LOPT_LINE_DELIM; enc_opt(raw) -> ?INET_OPT_RAW; +enc_opt(bind_to_device) -> ?INET_OPT_BIND_TO_DEVICE; % Names of SCTP opts: enc_opt(sctp_rtoinfo) -> ?SCTP_OPT_RTOINFO; enc_opt(sctp_associnfo) -> ?SCTP_OPT_ASSOCINFO; @@ -1294,6 +1295,7 @@ dec_opt(?INET_LOPT_NETNS) -> netns; dec_opt(?INET_LOPT_TCP_SHOW_ECONNRESET) -> show_econnreset; dec_opt(?INET_LOPT_LINE_DELIM) -> line_delimiter; dec_opt(?INET_OPT_RAW) -> raw; +dec_opt(?INET_OPT_BIND_TO_DEVICE) -> bind_to_device; dec_opt(I) when is_integer(I) -> undefined. @@ -1395,6 +1397,7 @@ type_opt_1(packet_size) -> uint; type_opt_1(read_packets) -> uint; type_opt_1(netns) -> binary; type_opt_1(show_econnreset) -> bool; +type_opt_1(bind_to_device) -> binary; %% %% SCTP options (to be set). If the type is a record type, the corresponding %% record signature is returned, otherwise, an "elementary" type tag diff --git a/lib/kernel/doc/src/inet.xml b/lib/kernel/doc/src/inet.xml index 076e50cd10..947e4d4560 100644 --- a/lib/kernel/doc/src/inet.xml +++ b/lib/kernel/doc/src/inet.xml @@ -897,6 +897,32 @@ setcap cap_sys_admin,cap_sys_ptrace,cap_dac_read_search+epi beam.smp file:native_name_encoding/0.

+ {bind_to_device, Ifname :: binary()} + +

Binds a socket to a specific network interface. This option + must be used in a function call that creates a socket, that is, + gen_tcp:connect/3,4, + gen_tcp:listen/2, + gen_udp:open/1,2, or + gen_sctp:open/0,1,2.

+

Unlike getifaddrs/0, Ifname + is encoded a binary. In the unlikely case that a system is using + non-7-bit-ASCII characters in network device names, special care + has to be taken when encoding this argument.

+

This option uses the Linux-specific socket option + SO_BINDTODEVICE, such as in Linux kernel 2.0.30 or later, + and therefore only exists when the runtime system + is compiled for such an operating system.

+

Before Linux 3.8, this socket option could be set, but could not retrieved + with getopts/2. Since Linux 3.8, + it is readable.

+

The virtual machine also needs elevated privileges, either + running as superuser or (for Linux) having capability + CAP_NET_RAW.

+

The primary use case for this option is to bind sockets into + Linux VRF instances. +

+
list

Received Packet is delivered as a list.

diff --git a/lib/kernel/src/inet.erl b/lib/kernel/src/inet.erl index f5c13ecdd7..5be790b7d9 100644 --- a/lib/kernel/src/inet.erl +++ b/lib/kernel/src/inet.erl @@ -702,7 +702,7 @@ connect_options() -> header, active, packet, packet_size, buffer, mode, deliver, line_delimiter, exit_on_close, high_watermark, low_watermark, high_msgq_watermark, low_msgq_watermark, send_timeout, send_timeout_close, delay_send, raw, - show_econnreset]. + show_econnreset, bind_to_device]. connect_options(Opts, Mod) -> BaseOpts = @@ -770,7 +770,7 @@ listen_options() -> header, active, packet, buffer, mode, deliver, backlog, ipv6_v6only, exit_on_close, high_watermark, low_watermark, high_msgq_watermark, low_msgq_watermark, send_timeout, send_timeout_close, delay_send, - packet_size, raw, show_econnreset]. + packet_size, raw, show_econnreset, bind_to_device]. listen_options(Opts, Mod) -> BaseOpts = @@ -850,7 +850,7 @@ udp_options() -> deliver, ipv6_v6only, broadcast, dontroute, multicast_if, multicast_ttl, multicast_loop, add_membership, drop_membership, read_packets,raw, - high_msgq_watermark, low_msgq_watermark]. + high_msgq_watermark, low_msgq_watermark, bind_to_device]. udp_options(Opts, Mod) -> @@ -919,6 +919,7 @@ sctp_options() -> [ % The following are generic inet options supported for SCTP sockets: mode, active, buffer, tos, tclass, priority, dontroute, reuseaddr, linger, sndbuf, recbuf, ipv6_v6only, high_msgq_watermark, low_msgq_watermark, + bind_to_device, % Other options are SCTP-specific (though they may be similar to their % TCP and UDP counter-parts): @@ -1055,7 +1056,6 @@ binary2filename(Bin) -> Bin end. - translate_ip(any, inet) -> {0,0,0,0}; translate_ip(loopback, inet) -> {127,0,0,1}; translate_ip(any, inet6) -> {0,0,0,0,0,0,0,0}; diff --git a/lib/kernel/src/inet_int.hrl b/lib/kernel/src/inet_int.hrl index 4e8f59a3b9..e6cd48935a 100644 --- a/lib/kernel/src/inet_int.hrl +++ b/lib/kernel/src/inet_int.hrl @@ -154,6 +154,7 @@ -define(INET_LOPT_TCP_SHOW_ECONNRESET, 39). -define(INET_LOPT_LINE_DELIM, 40). -define(INET_OPT_TCLASS, 41). +-define(INET_OPT_BIND_TO_DEVICE, 42). % Specific SCTP options: separate range: -define(SCTP_OPT_RTOINFO, 100). -define(SCTP_OPT_ASSOCINFO, 101). diff --git a/lib/kernel/test/inet_SUITE.erl b/lib/kernel/test/inet_SUITE.erl index f60c13d2e3..86f6b95fb9 100644 --- a/lib/kernel/test/inet_SUITE.erl +++ b/lib/kernel/test/inet_SUITE.erl @@ -40,7 +40,8 @@ lookup_bad_search_option/1, getif/1, getif_ifr_name_overflow/1,getservbyname_overflow/1, getifaddrs/1, - parse_strict_address/1, simple_netns/1, simple_netns_open/1]). + parse_strict_address/1, simple_netns/1, simple_netns_open/1, + simple_bind_to_device/1, simple_bind_to_device_open/1]). -export([get_hosts/1, get_ipv6_hosts/1, parse_hosts/1, parse_address/1, kill_gethost/0, parallell_gethost/0, test_netns/0]). @@ -58,7 +59,8 @@ all() -> gethostnative_debug_level, gethostnative_soft_restart, lookup_bad_search_option, getif, getif_ifr_name_overflow, getservbyname_overflow, - getifaddrs, parse_strict_address, simple_netns, simple_netns_open]. + getifaddrs, parse_strict_address, simple_netns, simple_netns_open, + simple_bind_to_device, simple_bind_to_device_open]. groups() -> [{parse, [], [parse_hosts, parse_address]}]. @@ -1247,3 +1249,67 @@ cmd(CmdString) -> io:put_chars(["# ",CmdString,io_lib:nl()]), io:put_chars([os:cmd(CmdString++" ; echo ' =>' $?")]), ok. + +-define(CAP_NET_RAW, 13). %% from /usr/include/linux/capability.h + +can_bind_to_device({unix, linux}, {Major, _, _}) + when Major > 2 -> + Status = os:cmd("cat /proc/self/status | grep CapEff"), + [_, CapEffStr] = string:tokens(Status, [$\n, $\t]), + CapEff = list_to_integer(CapEffStr, 16), + if CapEff band (1 bsl ?CAP_NET_RAW) =/= 0 -> + ok; + true -> + {skip,"insufficient capabilities, CAP_NET_RAW not granted"} + end; +can_bind_to_device(_OS, _Version) -> + {skip,"socket option bind_to_device not supported on this OS or version"}. + +simple_bind_to_device(Config) when is_list(Config) -> + case can_bind_to_device(os:type(), os:version()) of + ok -> + {ok,U} = gen_udp:open(0), + jog_bind_to_device_opt(U), + ok = gen_udp:close(U), + %% + {ok,L} = gen_tcp:listen(0, []), + jog_bind_to_device_opt(L), + ok = gen_tcp:close(L), + %% + case gen_sctp:open() of + {ok,S} -> + jog_bind_to_device_opt(S), + ok = gen_sctp:close(S); + {error,eprotonosupport} -> + ok + end; + Other -> + Other + end. + +%% Smoke test bind_to_device support. +simple_bind_to_device_open(Config) when is_list(Config) -> + case can_bind_to_device(os:type(), os:version()) of + ok -> + {ok,U} = gen_udp:open(0, [binary,{bind_to_device,<<"lo">>},inet]), + ok = gen_udp:close(U), + {ok,T} = gen_tcp:listen(0, [binary,{bind_to_device,<<"lo">>},inet]), + ok = gen_tcp:close(T), + + case gen_sctp:open(0, [binary,{bind_to_device,<<"lo">>},inet]) of + {ok,S} -> + ok = gen_sctp:close(S); + {error,eprotonosupport} -> + ok + end; + Other -> + Other + end. + +jog_bind_to_device_opt(S) -> + %% This is just jogging the option mechanics + ok = inet:setopts(S, [{bind_to_device,<<>>}]), + {ok,[{bind_to_device,<<>>}]} = inet:getopts(S, [bind_to_device]), + ok = inet:setopts(S, [{bind_to_device,<<"lo">>}]), + {ok,[{bind_to_device,<<"lo">>}]} = inet:getopts(S, [bind_to_device]), + ok. -- cgit v1.2.3