aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreas Schultz <[email protected]>2017-02-02 11:17:17 +0100
committerAndreas Schultz <[email protected]>2017-04-20 16:11:12 +0200
commit308841d8c99907a364d6876ed9375507153729eb (patch)
tree99888ddbcf41f5452da392677b396958d46fa48e
parent8caa54fc05e381a3d321cd48770e48853ddb177a (diff)
downloadotp-308841d8c99907a364d6876ed9375507153729eb.tar.gz
otp-308841d8c99907a364d6876ed9375507153729eb.tar.bz2
otp-308841d8c99907a364d6876ed9375507153729eb.zip
implement SO_BINDTODEVICE for inet protocols
bind to device is needed to properly support VRF-Lite under Linux (see [1] for details). [1]: https://www.kernel.org/doc/Documentation/networking/vrf.txt
-rw-r--r--erts/emulator/drivers/common/inet_drv.c97
-rw-r--r--erts/preloaded/src/prim_inet.erl3
-rw-r--r--lib/kernel/doc/src/inet.xml26
-rw-r--r--lib/kernel/src/inet.erl8
-rw-r--r--lib/kernel/src/inet_int.hrl1
-rw-r--r--lib/kernel/test/inet_SUITE.erl70
6 files changed, 198 insertions, 7 deletions
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index 0fe5183b42..90114b77f5 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -591,7 +591,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n)
(((unsigned char*) (s))[1] << 8) | \
(((unsigned char*) (s))[0]))
-#ifdef HAVE_SYS_UN_H
+#if defined(HAVE_SYS_UN_H) || defined(SO_BINDTODEVICE)
/* strnlen doesn't exist everywhere */
static size_t my_strnlen(const char *s, size_t maxlen)
@@ -602,6 +602,10 @@ static size_t my_strnlen(const char *s, size_t maxlen)
return i;
}
+#endif
+
+#if defined(HAVE_SYS_UN_H)
+
/* Check that some character in the buffer != '\0' */
static int is_nonzero(const char *s, size_t n)
{
@@ -778,6 +782,7 @@ static int is_nonzero(const char *s, size_t n)
#define INET_LOPT_TCP_SHOW_ECONNRESET 39 /* tell user about incoming RST */
#define INET_LOPT_LINE_DELIM 40 /* Line delimiting char */
#define INET_OPT_TCLASS 41 /* IPv6 transport class */
+#define INET_OPT_BIND_TO_DEVICE 42 /* get/set network device the socket is bound to */
/* SCTP options: a separate range, from 100: */
#define SCTP_OPT_RTOINFO 100
#define SCTP_OPT_ASSOCINFO 101
@@ -1334,6 +1339,7 @@ static ErlDrvTermData am_tos;
static ErlDrvTermData am_tclass;
static ErlDrvTermData am_ipv6_v6only;
static ErlDrvTermData am_netns;
+static ErlDrvTermData am_bind_to_device;
#endif
static char str_eafnosupport[] = "eafnosupport";
@@ -3725,6 +3731,7 @@ static void inet_init_sctp(void) {
INIT_ATOM(tclass);
INIT_ATOM(ipv6_v6only);
INIT_ATOM(netns);
+ INIT_ATOM(bind_to_device);
/* Option names */
INIT_ATOM(sctp_rtoinfo);
@@ -5946,6 +5953,9 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
int ival;
char* arg_ptr;
int arg_sz;
+#ifdef SO_BINDTODEVICE
+ char ifname[IFNAMSIZ];
+#endif
enum PacketParseType old_htype = desc->htype;
int old_active = desc->active;
int propagate; /* Set to 1 if failure to set this option
@@ -6331,6 +6341,29 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
len -= arg_sz;
break;
+#ifdef SO_BINDTODEVICE
+ case INET_OPT_BIND_TO_DEVICE:
+ if (ival < 0) return -1;
+ if (len < ival) return -1;
+ if (ival > sizeof(ifname)) {
+ return -1;
+ }
+ memcpy(ifname, ptr, ival);
+ ifname[ival] = '\0';
+ ptr += ival;
+ len -= ival;
+
+ proto = SOL_SOCKET;
+ type = SO_BINDTODEVICE;
+ arg_ptr = (char*)&ifname;
+ arg_sz = sizeof(ifname);
+ propagate = 1; /* We do want to know if this fails */
+
+ DEBUGF(("inet_set_opts(%ld): s=%d, SO_BINDTODEVICE=%s\r\n",
+ (long)desc->port, desc->s, ifname));
+ break;
+#endif
+
default:
return -1;
}
@@ -6463,6 +6496,9 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
# ifdef SCTP_DELAYED_ACK_TIME
struct sctp_assoc_value av; /* Not in SOLARIS10 */
# endif
+# ifdef SO_BINDTODEVICE
+ char ifname[IFNAMSIZ];
+# endif
}
arg;
@@ -6702,6 +6738,23 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
continue; /* Option not supported -- ignore it */
# endif
+#ifdef SO_BINDTODEVICE
+ case INET_OPT_BIND_TO_DEVICE:
+ arg_sz = get_int32(curr); curr += 4;
+ CHKLEN(curr, arg_sz);
+ if (arg_sz >= sizeof(arg.ifname))
+ return -1;
+ memcpy(arg.ifname, curr, arg_sz);
+ arg.ifname[arg_sz] = '\0';
+ curr += arg_sz;
+
+ proto = SOL_SOCKET;
+ type = SO_BINDTODEVICE;
+ arg_ptr = (char*) (&arg.ifname);
+ arg_sz = sizeof ( arg.ifname);
+ break;
+#endif
+
case SCTP_OPT_AUTOCLOSE:
{
arg.ival= get_int32 (curr); curr += 4;
@@ -6967,6 +7020,9 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
ErlDrvSizeT dest_used = 0;
ErlDrvSizeT dest_allocated = destlen;
char *orig_dest = *dest;
+#ifdef SO_BINDTODEVICE
+ char ifname[IFNAMSIZ];
+#endif
/* Ptr is a name parameter */
#define RETURN_ERROR() \
@@ -7302,6 +7358,26 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc,
put_int32(arg_sz,ptr);
continue;
}
+
+#ifdef SO_BINDTODEVICE
+ case INET_OPT_BIND_TO_DEVICE:
+ arg_sz = sizeof(ifname);
+ TRUNCATE_TO(0,ptr);
+ PLACE_FOR(5 + arg_sz,ptr);
+ arg_ptr = ptr + 5;
+ if (IS_SOCKET_ERROR(sock_getopt(desc->s,SOL_SOCKET,SO_BINDTODEVICE,
+ arg_ptr,&arg_sz))) {
+ TRUNCATE_TO(0,ptr);
+ continue;
+ }
+ arg_sz = my_strnlen(arg_ptr, arg_sz);
+ TRUNCATE_TO(arg_sz + 5,ptr);
+ *ptr++ = opt;
+ put_int32(arg_sz,ptr);
+ ptr += arg_sz;
+ continue;
+#endif
+
default:
RETURN_ERROR();
}
@@ -7583,6 +7659,25 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc,
i = LOAD_TUPLE (spec, i, 2);
break;
}
+
+#ifdef SO_BINDTODEVICE
+ /* The following option returns a binary: */
+ case INET_OPT_BIND_TO_DEVICE: {
+ char ifname[IFNAMSIZ];
+ unsigned int sz = sizeof(ifname);
+
+ if (sock_getopt(desc->s, SOL_SOCKET, SO_BINDTODEVICE,
+ &ifname, &sz) < 0) continue;
+ /* Fill in the response: */
+ PLACE_FOR(spec, i,
+ LOAD_ATOM_CNT + LOAD_BUF2BINARY_CNT + LOAD_TUPLE_CNT);
+ i = LOAD_ATOM (spec, i, am_bind_to_device);
+ i = LOAD_BUF2BINARY(spec, i, ifname, my_strnlen(ifname, sz));
+ i = LOAD_TUPLE (spec, i, 2);
+ break;
+ }
+#endif
+
/* The following options just return an integer value: */
case INET_OPT_RCVBUF :
case INET_OPT_SNDBUF :
diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl
index 61f727e8a4..19f8b0d873 100644
--- a/erts/preloaded/src/prim_inet.erl
+++ b/erts/preloaded/src/prim_inet.erl
@@ -1234,6 +1234,7 @@ enc_opt(netns) -> ?INET_LOPT_NETNS;
enc_opt(show_econnreset) -> ?INET_LOPT_TCP_SHOW_ECONNRESET;
enc_opt(line_delimiter) -> ?INET_LOPT_LINE_DELIM;
enc_opt(raw) -> ?INET_OPT_RAW;
+enc_opt(bind_to_device) -> ?INET_OPT_BIND_TO_DEVICE;
% Names of SCTP opts:
enc_opt(sctp_rtoinfo) -> ?SCTP_OPT_RTOINFO;
enc_opt(sctp_associnfo) -> ?SCTP_OPT_ASSOCINFO;
@@ -1294,6 +1295,7 @@ dec_opt(?INET_LOPT_NETNS) -> netns;
dec_opt(?INET_LOPT_TCP_SHOW_ECONNRESET) -> show_econnreset;
dec_opt(?INET_LOPT_LINE_DELIM) -> line_delimiter;
dec_opt(?INET_OPT_RAW) -> raw;
+dec_opt(?INET_OPT_BIND_TO_DEVICE) -> bind_to_device;
dec_opt(I) when is_integer(I) -> undefined.
@@ -1395,6 +1397,7 @@ type_opt_1(packet_size) -> uint;
type_opt_1(read_packets) -> uint;
type_opt_1(netns) -> binary;
type_opt_1(show_econnreset) -> bool;
+type_opt_1(bind_to_device) -> binary;
%%
%% SCTP options (to be set). If the type is a record type, the corresponding
%% record signature is returned, otherwise, an "elementary" type tag
diff --git a/lib/kernel/doc/src/inet.xml b/lib/kernel/doc/src/inet.xml
index 076e50cd10..947e4d4560 100644
--- a/lib/kernel/doc/src/inet.xml
+++ b/lib/kernel/doc/src/inet.xml
@@ -897,6 +897,32 @@ setcap cap_sys_admin,cap_sys_ptrace,cap_dac_read_search+epi beam.smp</code>
<seealso marker="file#native_name_encoding/0"><c>file:native_name_encoding/0</c></seealso>.</p></item>
</list>
</item>
+ <tag><c>{bind_to_device, Ifname :: binary()}</c></tag>
+ <item>
+ <p>Binds a socket to a specific network interface. This option
+ must be used in a function call that creates a socket, that is,
+ <seealso marker="gen_tcp#connect/3"><c>gen_tcp:connect/3,4</c></seealso>,
+ <seealso marker="gen_tcp#listen/2"><c>gen_tcp:listen/2</c></seealso>,
+ <seealso marker="gen_udp#open/1"><c>gen_udp:open/1,2</c></seealso>, or
+ <seealso marker="gen_sctp#open/0"><c>gen_sctp:open/0,1,2</c></seealso>.</p>
+ <p>Unlike <seealso marker="#getifaddrs/0"><c>getifaddrs/0</c></seealso>, Ifname
+ is encoded a binary. In the unlikely case that a system is using
+ non-7-bit-ASCII characters in network device names, special care
+ has to be taken when encoding this argument.</p>
+ <p>This option uses the Linux-specific socket option
+ <c>SO_BINDTODEVICE</c>, such as in Linux kernel 2.0.30 or later,
+ and therefore only exists when the runtime system
+ is compiled for such an operating system.</p>
+ <p>Before Linux 3.8, this socket option could be set, but could not retrieved
+ with <seealso marker="#getopts/2"><c>getopts/2</c></seealso>. Since Linux 3.8,
+ it is readable.</p>
+ <p>The virtual machine also needs elevated privileges, either
+ running as superuser or (for Linux) having capability
+ <c>CAP_NET_RAW</c>.</p>
+ <p>The primary use case for this option is to bind sockets into
+ <url href="http://www.kernel.org/doc/Documentation/networking/vrf.txt">Linux VRF instances</url>.
+ </p>
+ </item>
<tag><c>list</c></tag>
<item>
<p>Received <c>Packet</c> is delivered as a list.</p>
diff --git a/lib/kernel/src/inet.erl b/lib/kernel/src/inet.erl
index f5c13ecdd7..5be790b7d9 100644
--- a/lib/kernel/src/inet.erl
+++ b/lib/kernel/src/inet.erl
@@ -702,7 +702,7 @@ connect_options() ->
header, active, packet, packet_size, buffer, mode, deliver, line_delimiter,
exit_on_close, high_watermark, low_watermark, high_msgq_watermark,
low_msgq_watermark, send_timeout, send_timeout_close, delay_send, raw,
- show_econnreset].
+ show_econnreset, bind_to_device].
connect_options(Opts, Mod) ->
BaseOpts =
@@ -770,7 +770,7 @@ listen_options() ->
header, active, packet, buffer, mode, deliver, backlog, ipv6_v6only,
exit_on_close, high_watermark, low_watermark, high_msgq_watermark,
low_msgq_watermark, send_timeout, send_timeout_close, delay_send,
- packet_size, raw, show_econnreset].
+ packet_size, raw, show_econnreset, bind_to_device].
listen_options(Opts, Mod) ->
BaseOpts =
@@ -850,7 +850,7 @@ udp_options() ->
deliver, ipv6_v6only,
broadcast, dontroute, multicast_if, multicast_ttl, multicast_loop,
add_membership, drop_membership, read_packets,raw,
- high_msgq_watermark, low_msgq_watermark].
+ high_msgq_watermark, low_msgq_watermark, bind_to_device].
udp_options(Opts, Mod) ->
@@ -919,6 +919,7 @@ sctp_options() ->
[ % The following are generic inet options supported for SCTP sockets:
mode, active, buffer, tos, tclass, priority, dontroute, reuseaddr, linger, sndbuf,
recbuf, ipv6_v6only, high_msgq_watermark, low_msgq_watermark,
+ bind_to_device,
% Other options are SCTP-specific (though they may be similar to their
% TCP and UDP counter-parts):
@@ -1055,7 +1056,6 @@ binary2filename(Bin) ->
Bin
end.
-
translate_ip(any, inet) -> {0,0,0,0};
translate_ip(loopback, inet) -> {127,0,0,1};
translate_ip(any, inet6) -> {0,0,0,0,0,0,0,0};
diff --git a/lib/kernel/src/inet_int.hrl b/lib/kernel/src/inet_int.hrl
index 4e8f59a3b9..e6cd48935a 100644
--- a/lib/kernel/src/inet_int.hrl
+++ b/lib/kernel/src/inet_int.hrl
@@ -154,6 +154,7 @@
-define(INET_LOPT_TCP_SHOW_ECONNRESET, 39).
-define(INET_LOPT_LINE_DELIM, 40).
-define(INET_OPT_TCLASS, 41).
+-define(INET_OPT_BIND_TO_DEVICE, 42).
% Specific SCTP options: separate range:
-define(SCTP_OPT_RTOINFO, 100).
-define(SCTP_OPT_ASSOCINFO, 101).
diff --git a/lib/kernel/test/inet_SUITE.erl b/lib/kernel/test/inet_SUITE.erl
index f60c13d2e3..86f6b95fb9 100644
--- a/lib/kernel/test/inet_SUITE.erl
+++ b/lib/kernel/test/inet_SUITE.erl
@@ -40,7 +40,8 @@
lookup_bad_search_option/1,
getif/1,
getif_ifr_name_overflow/1,getservbyname_overflow/1, getifaddrs/1,
- parse_strict_address/1, simple_netns/1, simple_netns_open/1]).
+ parse_strict_address/1, simple_netns/1, simple_netns_open/1,
+ simple_bind_to_device/1, simple_bind_to_device_open/1]).
-export([get_hosts/1, get_ipv6_hosts/1, parse_hosts/1, parse_address/1,
kill_gethost/0, parallell_gethost/0, test_netns/0]).
@@ -58,7 +59,8 @@ all() ->
gethostnative_debug_level, gethostnative_soft_restart,
lookup_bad_search_option,
getif, getif_ifr_name_overflow, getservbyname_overflow,
- getifaddrs, parse_strict_address, simple_netns, simple_netns_open].
+ getifaddrs, parse_strict_address, simple_netns, simple_netns_open,
+ simple_bind_to_device, simple_bind_to_device_open].
groups() ->
[{parse, [], [parse_hosts, parse_address]}].
@@ -1247,3 +1249,67 @@ cmd(CmdString) ->
io:put_chars(["# ",CmdString,io_lib:nl()]),
io:put_chars([os:cmd(CmdString++" ; echo ' =>' $?")]),
ok.
+
+-define(CAP_NET_RAW, 13). %% from /usr/include/linux/capability.h
+
+can_bind_to_device({unix, linux}, {Major, _, _})
+ when Major > 2 ->
+ Status = os:cmd("cat /proc/self/status | grep CapEff"),
+ [_, CapEffStr] = string:tokens(Status, [$\n, $\t]),
+ CapEff = list_to_integer(CapEffStr, 16),
+ if CapEff band (1 bsl ?CAP_NET_RAW) =/= 0 ->
+ ok;
+ true ->
+ {skip,"insufficient capabilities, CAP_NET_RAW not granted"}
+ end;
+can_bind_to_device(_OS, _Version) ->
+ {skip,"socket option bind_to_device not supported on this OS or version"}.
+
+simple_bind_to_device(Config) when is_list(Config) ->
+ case can_bind_to_device(os:type(), os:version()) of
+ ok ->
+ {ok,U} = gen_udp:open(0),
+ jog_bind_to_device_opt(U),
+ ok = gen_udp:close(U),
+ %%
+ {ok,L} = gen_tcp:listen(0, []),
+ jog_bind_to_device_opt(L),
+ ok = gen_tcp:close(L),
+ %%
+ case gen_sctp:open() of
+ {ok,S} ->
+ jog_bind_to_device_opt(S),
+ ok = gen_sctp:close(S);
+ {error,eprotonosupport} ->
+ ok
+ end;
+ Other ->
+ Other
+ end.
+
+%% Smoke test bind_to_device support.
+simple_bind_to_device_open(Config) when is_list(Config) ->
+ case can_bind_to_device(os:type(), os:version()) of
+ ok ->
+ {ok,U} = gen_udp:open(0, [binary,{bind_to_device,<<"lo">>},inet]),
+ ok = gen_udp:close(U),
+ {ok,T} = gen_tcp:listen(0, [binary,{bind_to_device,<<"lo">>},inet]),
+ ok = gen_tcp:close(T),
+
+ case gen_sctp:open(0, [binary,{bind_to_device,<<"lo">>},inet]) of
+ {ok,S} ->
+ ok = gen_sctp:close(S);
+ {error,eprotonosupport} ->
+ ok
+ end;
+ Other ->
+ Other
+ end.
+
+jog_bind_to_device_opt(S) ->
+ %% This is just jogging the option mechanics
+ ok = inet:setopts(S, [{bind_to_device,<<>>}]),
+ {ok,[{bind_to_device,<<>>}]} = inet:getopts(S, [bind_to_device]),
+ ok = inet:setopts(S, [{bind_to_device,<<"lo">>}]),
+ {ok,[{bind_to_device,<<"lo">>}]} = inet:getopts(S, [bind_to_device]),
+ ok.