aboutsummaryrefslogtreecommitdiffstats
path: root/lib/kernel
diff options
context:
space:
mode:
authorAndreas Schultz <[email protected]>2017-02-02 11:17:17 +0100
committerAndreas Schultz <[email protected]>2017-04-20 16:11:12 +0200
commit308841d8c99907a364d6876ed9375507153729eb (patch)
tree99888ddbcf41f5452da392677b396958d46fa48e /lib/kernel
parent8caa54fc05e381a3d321cd48770e48853ddb177a (diff)
downloadotp-308841d8c99907a364d6876ed9375507153729eb.tar.gz
otp-308841d8c99907a364d6876ed9375507153729eb.tar.bz2
otp-308841d8c99907a364d6876ed9375507153729eb.zip
implement SO_BINDTODEVICE for inet protocols
bind to device is needed to properly support VRF-Lite under Linux (see [1] for details). [1]: https://www.kernel.org/doc/Documentation/networking/vrf.txt
Diffstat (limited to 'lib/kernel')
-rw-r--r--lib/kernel/doc/src/inet.xml26
-rw-r--r--lib/kernel/src/inet.erl8
-rw-r--r--lib/kernel/src/inet_int.hrl1
-rw-r--r--lib/kernel/test/inet_SUITE.erl70
4 files changed, 99 insertions, 6 deletions
diff --git a/lib/kernel/doc/src/inet.xml b/lib/kernel/doc/src/inet.xml
index 076e50cd10..947e4d4560 100644
--- a/lib/kernel/doc/src/inet.xml
+++ b/lib/kernel/doc/src/inet.xml
@@ -897,6 +897,32 @@ setcap cap_sys_admin,cap_sys_ptrace,cap_dac_read_search+epi beam.smp</code>
<seealso marker="file#native_name_encoding/0"><c>file:native_name_encoding/0</c></seealso>.</p></item>
</list>
</item>
+ <tag><c>{bind_to_device, Ifname :: binary()}</c></tag>
+ <item>
+ <p>Binds a socket to a specific network interface. This option
+ must be used in a function call that creates a socket, that is,
+ <seealso marker="gen_tcp#connect/3"><c>gen_tcp:connect/3,4</c></seealso>,
+ <seealso marker="gen_tcp#listen/2"><c>gen_tcp:listen/2</c></seealso>,
+ <seealso marker="gen_udp#open/1"><c>gen_udp:open/1,2</c></seealso>, or
+ <seealso marker="gen_sctp#open/0"><c>gen_sctp:open/0,1,2</c></seealso>.</p>
+ <p>Unlike <seealso marker="#getifaddrs/0"><c>getifaddrs/0</c></seealso>, Ifname
+ is encoded a binary. In the unlikely case that a system is using
+ non-7-bit-ASCII characters in network device names, special care
+ has to be taken when encoding this argument.</p>
+ <p>This option uses the Linux-specific socket option
+ <c>SO_BINDTODEVICE</c>, such as in Linux kernel 2.0.30 or later,
+ and therefore only exists when the runtime system
+ is compiled for such an operating system.</p>
+ <p>Before Linux 3.8, this socket option could be set, but could not retrieved
+ with <seealso marker="#getopts/2"><c>getopts/2</c></seealso>. Since Linux 3.8,
+ it is readable.</p>
+ <p>The virtual machine also needs elevated privileges, either
+ running as superuser or (for Linux) having capability
+ <c>CAP_NET_RAW</c>.</p>
+ <p>The primary use case for this option is to bind sockets into
+ <url href="http://www.kernel.org/doc/Documentation/networking/vrf.txt">Linux VRF instances</url>.
+ </p>
+ </item>
<tag><c>list</c></tag>
<item>
<p>Received <c>Packet</c> is delivered as a list.</p>
diff --git a/lib/kernel/src/inet.erl b/lib/kernel/src/inet.erl
index f5c13ecdd7..5be790b7d9 100644
--- a/lib/kernel/src/inet.erl
+++ b/lib/kernel/src/inet.erl
@@ -702,7 +702,7 @@ connect_options() ->
header, active, packet, packet_size, buffer, mode, deliver, line_delimiter,
exit_on_close, high_watermark, low_watermark, high_msgq_watermark,
low_msgq_watermark, send_timeout, send_timeout_close, delay_send, raw,
- show_econnreset].
+ show_econnreset, bind_to_device].
connect_options(Opts, Mod) ->
BaseOpts =
@@ -770,7 +770,7 @@ listen_options() ->
header, active, packet, buffer, mode, deliver, backlog, ipv6_v6only,
exit_on_close, high_watermark, low_watermark, high_msgq_watermark,
low_msgq_watermark, send_timeout, send_timeout_close, delay_send,
- packet_size, raw, show_econnreset].
+ packet_size, raw, show_econnreset, bind_to_device].
listen_options(Opts, Mod) ->
BaseOpts =
@@ -850,7 +850,7 @@ udp_options() ->
deliver, ipv6_v6only,
broadcast, dontroute, multicast_if, multicast_ttl, multicast_loop,
add_membership, drop_membership, read_packets,raw,
- high_msgq_watermark, low_msgq_watermark].
+ high_msgq_watermark, low_msgq_watermark, bind_to_device].
udp_options(Opts, Mod) ->
@@ -919,6 +919,7 @@ sctp_options() ->
[ % The following are generic inet options supported for SCTP sockets:
mode, active, buffer, tos, tclass, priority, dontroute, reuseaddr, linger, sndbuf,
recbuf, ipv6_v6only, high_msgq_watermark, low_msgq_watermark,
+ bind_to_device,
% Other options are SCTP-specific (though they may be similar to their
% TCP and UDP counter-parts):
@@ -1055,7 +1056,6 @@ binary2filename(Bin) ->
Bin
end.
-
translate_ip(any, inet) -> {0,0,0,0};
translate_ip(loopback, inet) -> {127,0,0,1};
translate_ip(any, inet6) -> {0,0,0,0,0,0,0,0};
diff --git a/lib/kernel/src/inet_int.hrl b/lib/kernel/src/inet_int.hrl
index 4e8f59a3b9..e6cd48935a 100644
--- a/lib/kernel/src/inet_int.hrl
+++ b/lib/kernel/src/inet_int.hrl
@@ -154,6 +154,7 @@
-define(INET_LOPT_TCP_SHOW_ECONNRESET, 39).
-define(INET_LOPT_LINE_DELIM, 40).
-define(INET_OPT_TCLASS, 41).
+-define(INET_OPT_BIND_TO_DEVICE, 42).
% Specific SCTP options: separate range:
-define(SCTP_OPT_RTOINFO, 100).
-define(SCTP_OPT_ASSOCINFO, 101).
diff --git a/lib/kernel/test/inet_SUITE.erl b/lib/kernel/test/inet_SUITE.erl
index f60c13d2e3..86f6b95fb9 100644
--- a/lib/kernel/test/inet_SUITE.erl
+++ b/lib/kernel/test/inet_SUITE.erl
@@ -40,7 +40,8 @@
lookup_bad_search_option/1,
getif/1,
getif_ifr_name_overflow/1,getservbyname_overflow/1, getifaddrs/1,
- parse_strict_address/1, simple_netns/1, simple_netns_open/1]).
+ parse_strict_address/1, simple_netns/1, simple_netns_open/1,
+ simple_bind_to_device/1, simple_bind_to_device_open/1]).
-export([get_hosts/1, get_ipv6_hosts/1, parse_hosts/1, parse_address/1,
kill_gethost/0, parallell_gethost/0, test_netns/0]).
@@ -58,7 +59,8 @@ all() ->
gethostnative_debug_level, gethostnative_soft_restart,
lookup_bad_search_option,
getif, getif_ifr_name_overflow, getservbyname_overflow,
- getifaddrs, parse_strict_address, simple_netns, simple_netns_open].
+ getifaddrs, parse_strict_address, simple_netns, simple_netns_open,
+ simple_bind_to_device, simple_bind_to_device_open].
groups() ->
[{parse, [], [parse_hosts, parse_address]}].
@@ -1247,3 +1249,67 @@ cmd(CmdString) ->
io:put_chars(["# ",CmdString,io_lib:nl()]),
io:put_chars([os:cmd(CmdString++" ; echo ' =>' $?")]),
ok.
+
+-define(CAP_NET_RAW, 13). %% from /usr/include/linux/capability.h
+
+can_bind_to_device({unix, linux}, {Major, _, _})
+ when Major > 2 ->
+ Status = os:cmd("cat /proc/self/status | grep CapEff"),
+ [_, CapEffStr] = string:tokens(Status, [$\n, $\t]),
+ CapEff = list_to_integer(CapEffStr, 16),
+ if CapEff band (1 bsl ?CAP_NET_RAW) =/= 0 ->
+ ok;
+ true ->
+ {skip,"insufficient capabilities, CAP_NET_RAW not granted"}
+ end;
+can_bind_to_device(_OS, _Version) ->
+ {skip,"socket option bind_to_device not supported on this OS or version"}.
+
+simple_bind_to_device(Config) when is_list(Config) ->
+ case can_bind_to_device(os:type(), os:version()) of
+ ok ->
+ {ok,U} = gen_udp:open(0),
+ jog_bind_to_device_opt(U),
+ ok = gen_udp:close(U),
+ %%
+ {ok,L} = gen_tcp:listen(0, []),
+ jog_bind_to_device_opt(L),
+ ok = gen_tcp:close(L),
+ %%
+ case gen_sctp:open() of
+ {ok,S} ->
+ jog_bind_to_device_opt(S),
+ ok = gen_sctp:close(S);
+ {error,eprotonosupport} ->
+ ok
+ end;
+ Other ->
+ Other
+ end.
+
+%% Smoke test bind_to_device support.
+simple_bind_to_device_open(Config) when is_list(Config) ->
+ case can_bind_to_device(os:type(), os:version()) of
+ ok ->
+ {ok,U} = gen_udp:open(0, [binary,{bind_to_device,<<"lo">>},inet]),
+ ok = gen_udp:close(U),
+ {ok,T} = gen_tcp:listen(0, [binary,{bind_to_device,<<"lo">>},inet]),
+ ok = gen_tcp:close(T),
+
+ case gen_sctp:open(0, [binary,{bind_to_device,<<"lo">>},inet]) of
+ {ok,S} ->
+ ok = gen_sctp:close(S);
+ {error,eprotonosupport} ->
+ ok
+ end;
+ Other ->
+ Other
+ end.
+
+jog_bind_to_device_opt(S) ->
+ %% This is just jogging the option mechanics
+ ok = inet:setopts(S, [{bind_to_device,<<>>}]),
+ {ok,[{bind_to_device,<<>>}]} = inet:getopts(S, [bind_to_device]),
+ ok = inet:setopts(S, [{bind_to_device,<<"lo">>}]),
+ {ok,[{bind_to_device,<<"lo">>}]} = inet:getopts(S, [bind_to_device]),
+ ok.