diff options
author | Raimo Niskanen <[email protected]> | 2013-09-04 14:58:35 +0200 |
---|---|---|
committer | Raimo Niskanen <[email protected]> | 2013-09-04 14:58:35 +0200 |
commit | 13713794c52e0273dd312823af7a0427d9295574 (patch) | |
tree | 4a55995dbeb41bee9d3783b76243449fabeecc49 /lib/kernel | |
parent | 11c31f5ba8489b0e179caea46e514b990fbadb95 (diff) | |
parent | af112cb10613d422080785621a274a18d96567c0 (diff) | |
download | otp-13713794c52e0273dd312823af7a0427d9295574.tar.gz otp-13713794c52e0273dd312823af7a0427d9295574.tar.bz2 otp-13713794c52e0273dd312823af7a0427d9295574.zip |
Merge branch 'raimo/linux-network-namespace-sockopt/OTP-11157' into maint
* raimo/linux-network-namespace-sockopt/OTP-11157:
Document socket option 'netns'
Rudimentary test
Make netns option value a string
Implement netns for SCTP + bugfixes
Implement netns option for TCP and UDP
Implement emulator netns support for TCP and UDP
Diffstat (limited to 'lib/kernel')
-rw-r--r-- | lib/kernel/doc/src/inet.xml | 53 | ||||
-rw-r--r-- | lib/kernel/src/inet.erl | 105 | ||||
-rw-r--r-- | lib/kernel/src/inet_int.hrl | 1 | ||||
-rw-r--r-- | lib/kernel/test/inet_SUITE.erl | 99 |
4 files changed, 249 insertions, 9 deletions
diff --git a/lib/kernel/doc/src/inet.xml b/lib/kernel/doc/src/inet.xml index 254dfbf034..fd62f778a2 100644 --- a/lib/kernel/doc/src/inet.xml +++ b/lib/kernel/doc/src/inet.xml @@ -722,6 +722,59 @@ fe80::204:acff:fe17:bf38 <p>Received <c>Packet</c> is delivered as defined by Mode.</p> </item> + <tag><c>{netns, Namespace :: file:filename_all()}</c></tag> + <item> + <p>Set a network namespace for the socket. The <c>Namespace</c> + parameter is a filename defining the namespace for example + <c>"/var/run/netns/example"</c> typically created by the command + <c>ip netns add example</c>. This option must be used in a + function call that creates a socket i.e + <seealso marker="gen_tcp#connect/3"> + gen_tcp:connect/3,4</seealso>, + <seealso marker="gen_tcp#listen/2"> + gen_tcp:listen/2</seealso>, + <seealso marker="gen_udp#open/1"> + gen_udp:open/1,2</seealso> or + <seealso marker="gen_sctp#open/0"> + gen_sctp:open/0-2</seealso>. + </p> + <p>This option uses the Linux specific syscall + <c>setns()</c> such as in Linux kernel 3.0 or later + and therefore only exists when the runtime system + has been compiled for such an operating system. + </p> + <p> + The virtual machine also needs elevated privileges either + running as superuser or (for Linux) having the capability + <c>CAP_SYS_ADMIN</c> according to the documentation for setns(2). + However, during testing also <c>CAP_SYS_PTRACE</c> + and <c>CAP_DAC_READ_SEARCH</c> has proven to be necessary. + Example:<code> +setcap cap_sys_admin,cap_sys_ptrace,cap_dac_read_search+epi beam.smp +</code> + Note also that the filesystem containing the virtual machine + executable (<c>beam.smp</c> in the example above) has to be local, + mounted without the <c>nosetuid</c> flag, + support extended attributes and that + the kernel has to support file capabilities. + All this runs out of the box on at least Ubuntu 12.04 LTS, + except that SCTP sockets appears to not support + network namespaces. + </p> + <p>The <c>Namespace</c> is a file name and is encoded + and decoded as discussed in + <seealso marker="file">file</seealso> + except that the emulator flag <c>+fnu</c> is ignored and + <seealso marker="#getopts/2">getopts/2</seealso> + for this option will return a binary for the filename + if the stored filename can not be decoded, + which should only happen if you set the option using a binary + that can not be decoded with the emulator's filename encoding: + <seealso marker="file#native_name_encoding/0"> + file:native_name_encoding/0</seealso>. + </p> + </item> + <tag><c>list</c></tag> <item> <p>Received <c>Packet</c> is delivered as a list.</p> diff --git a/lib/kernel/src/inet.erl b/lib/kernel/src/inet.erl index 5749027acd..27f085c3aa 100644 --- a/lib/kernel/src/inet.erl +++ b/lib/kernel/src/inet.erl @@ -200,7 +200,14 @@ send(Socket, Packet) -> Options :: [socket_setopt()]. setopts(Socket, Opts) -> - prim_inet:setopts(Socket, Opts). + SocketOpts = + [case Opt of + {netns,NS} -> + {netns,filename2binary(NS)}; + _ -> + Opt + end || Opt <- Opts], + prim_inet:setopts(Socket, SocketOpts). -spec getopts(Socket, Options) -> {'ok', OptionValues} | {'error', posix()} when @@ -209,7 +216,18 @@ setopts(Socket, Opts) -> OptionValues :: [socket_setopt()]. getopts(Socket, Opts) -> - prim_inet:getopts(Socket, Opts). + case prim_inet:getopts(Socket, Opts) of + {ok,OptionValues} -> + {ok, + [case OptionValue of + {netns,Bin} -> + {netns,binary2filename(Bin)}; + _ -> + OptionValue + end || OptionValue <- OptionValues]}; + Other -> + Other + end. -spec getifaddrs(Socket :: socket()) -> {'ok', [string()]} | {'error', posix()}. @@ -641,6 +659,14 @@ con_opt([Opt | Opts], R, As) -> {tcp_module,_} -> con_opt(Opts, R, As); inet -> con_opt(Opts, R, As); inet6 -> con_opt(Opts, R, As); + {netns,NS} -> + BinNS = filename2binary(NS), + case prim_inet:is_sockopt_val(netns, BinNS) of + true -> + con_opt(Opts, R#connect_opts { fd = [{netns,BinNS}] }, As); + false -> + {error, badarg} + end; {Name,Val} when is_atom(Name) -> con_add(Name, Val, R, Opts, As); _ -> {error, badarg} end; @@ -699,6 +725,14 @@ list_opt([Opt | Opts], R, As) -> {tcp_module,_} -> list_opt(Opts, R, As); inet -> list_opt(Opts, R, As); inet6 -> list_opt(Opts, R, As); + {netns,NS} -> + BinNS = filename2binary(NS), + case prim_inet:is_sockopt_val(netns, BinNS) of + true -> + list_opt(Opts, R#listen_opts { fd = [{netns,BinNS}] }, As); + false -> + {error, badarg} + end; {Name,Val} when is_atom(Name) -> list_add(Name, Val, R, Opts, As); _ -> {error, badarg} end; @@ -745,6 +779,14 @@ udp_opt([Opt | Opts], R, As) -> {udp_module,_} -> udp_opt(Opts, R, As); inet -> udp_opt(Opts, R, As); inet6 -> udp_opt(Opts, R, As); + {netns,NS} -> + BinNS = filename2binary(NS), + case prim_inet:is_sockopt_val(netns, BinNS) of + true -> + list_opt(Opts, R#udp_opts { fd = [{netns,BinNS}] }, As); + false -> + {error, badarg} + end; {Name,Val} when is_atom(Name) -> udp_add(Name, Val, R, Opts, As); _ -> {error, badarg} end; @@ -814,6 +856,17 @@ sctp_opt([Opt|Opts], Mod, R, As) -> {sctp_module,_} -> sctp_opt (Opts, Mod, R, As); % Done with inet -> sctp_opt (Opts, Mod, R, As); % Done with inet6 -> sctp_opt (Opts, Mod, R, As); % Done with + {netns,NS} -> + BinNS = filename2binary(NS), + case prim_inet:is_sockopt_val(netns, BinNS) of + true -> + sctp_opt( + Opts, Mod, + R#sctp_opts { fd = [{netns,BinNS}] }, + As); + false -> + {error, badarg} + end; {Name,Val} -> sctp_opt (Opts, Mod, R, As, Name, Val); _ -> {error,badarg} end; @@ -858,6 +911,39 @@ add_opt(Name, Val, Opts, As) -> end. +%% Passthrough all unknown - catch type errors later +filename2binary(List) when is_list(List) -> + OutEncoding = file:native_name_encoding(), + try unicode:characters_to_binary(List, unicode, OutEncoding) of + Bin when is_binary(Bin) -> + Bin; + _ -> + List + catch + error:badarg -> + List + end; +filename2binary(Bin) -> + Bin. + +binary2filename(Bin) -> + InEncoding = file:native_name_encoding(), + case unicode:characters_to_list(Bin, InEncoding) of + Filename when is_list(Filename) -> + Filename; + _ -> + %% For getopt/setopt of netns this should only happen if + %% a binary with wrong encoding was used when setting the + %% option, hence the user shall eat his/her own medicine. + %% + %% I.e passthrough here too for now. + %% Future usecases will most probably not want this, + %% rather Unicode error or warning + %% depending on emulator flag instead. + Bin + end. + + translate_ip(any, inet) -> {0,0,0,0}; translate_ip(loopback, inet) -> {127,0,0,1}; translate_ip(any, inet6) -> {0,0,0,0,0,0,0,0}; @@ -1070,7 +1156,7 @@ gethostbyaddr_tm_native(Addr, Timer, Opts) -> Result -> Result end. --spec open(Fd :: integer(), +-spec open(Fd_or_OpenOpts :: integer() | list(), Addr :: ip_address(), Port :: port_number(), Opts :: [socket_setopt()], @@ -1080,8 +1166,14 @@ gethostbyaddr_tm_native(Addr, Timer, Opts) -> Module :: atom()) -> {'ok', socket()} | {'error', posix()}. -open(Fd, Addr, Port, Opts, Protocol, Family, Type, Module) when Fd < 0 -> - case prim_inet:open(Protocol, Family, Type) of +open(FdO, Addr, Port, Opts, Protocol, Family, Type, Module) + when is_integer(FdO), FdO < 0; + is_list(FdO) -> + OpenOpts = + if is_list(FdO) -> FdO; + true -> [] + end, + case prim_inet:open(Protocol, Family, Type, OpenOpts) of {ok,S} -> case prim_inet:setopts(S, Opts) of ok -> @@ -1104,7 +1196,8 @@ open(Fd, Addr, Port, Opts, Protocol, Family, Type, Module) when Fd < 0 -> Error -> Error end; -open(Fd, _Addr, _Port, Opts, Protocol, Family, Type, Module) -> +open(Fd, _Addr, _Port, Opts, Protocol, Family, Type, Module) + when is_integer(Fd) -> fdopen(Fd, Opts, Protocol, Family, Type, Module). bindx(S, [Addr], Port0) -> diff --git a/lib/kernel/src/inet_int.hrl b/lib/kernel/src/inet_int.hrl index 67a99913a1..18a4a61b2f 100644 --- a/lib/kernel/src/inet_int.hrl +++ b/lib/kernel/src/inet_int.hrl @@ -143,6 +143,7 @@ -define(INET_LOPT_TCP_SEND_TIMEOUT_CLOSE, 35). -define(INET_LOPT_MSGQ_HIWTRMRK, 36). -define(INET_LOPT_MSGQ_LOWTRMRK, 37). +-define(INET_LOPT_NETNS, 38). % Specific SCTP options: separate range: -define(SCTP_OPT_RTOINFO, 100). -define(SCTP_OPT_ASSOCINFO, 101). diff --git a/lib/kernel/test/inet_SUITE.erl b/lib/kernel/test/inet_SUITE.erl index 46c8c0b88b..ed43749cc0 100644 --- a/lib/kernel/test/inet_SUITE.erl +++ b/lib/kernel/test/inet_SUITE.erl @@ -38,10 +38,10 @@ gethostnative_debug_level/0, gethostnative_debug_level/1, getif/1, getif_ifr_name_overflow/1,getservbyname_overflow/1, getifaddrs/1, - parse_strict_address/1]). + parse_strict_address/1, simple_netns/1]). -export([get_hosts/1, get_ipv6_hosts/1, parse_hosts/1, parse_address/1, - kill_gethost/0, parallell_gethost/0]). + kill_gethost/0, parallell_gethost/0, test_netns/0]). -export([init_per_testcase/2, end_per_testcase/2]). suite() -> [{ct_hooks,[ts_install_cth]}]. @@ -53,7 +53,7 @@ all() -> t_gethostnative, gethostnative_parallell, cname_loop, gethostnative_debug_level, gethostnative_soft_restart, getif, getif_ifr_name_overflow, getservbyname_overflow, - getifaddrs, parse_strict_address]. + getifaddrs, parse_strict_address, simple_netns]. groups() -> [{parse, [], [parse_hosts, parse_address]}]. @@ -1099,3 +1099,96 @@ toupper([C|Cs]) when is_integer(C) -> end; toupper([]) -> []. + + +simple_netns(Config) when is_list(Config) -> + {ok,U} = gen_udp:open(0), + case inet:setopts(U, [{netns,""}]) of + ok -> + jog_netns_opt(U), + ok = gen_udp:close(U), + %% + {ok,L} = gen_tcp:listen(0, []), + jog_netns_opt(L), + ok = gen_tcp:close(L), + %% + {ok,S} = gen_sctp:open(), + jog_netns_opt(S), + ok = gen_sctp:close(S); + {error,einval} -> + {skip,"setns() not supported"} + end. + +jog_netns_opt(S) -> + %% This is just jogging the option mechanics + ok = inet:setopts(S, [{netns,""}]), + {ok,[{netns,""}]} = inet:getopts(S, [netns]), + ok = inet:setopts(S, [{netns,"/proc/self/ns/net"}]), + {ok,[{netns,"/proc/self/ns/net"}]} = inet:getopts(S, [netns]), + ok. + + +%% Manual test to be run outside test_server in an emulator +%% started by root, in a machine with setns() support... +test_netns() -> + DefaultIF = v1, + DefaultIP = {192,168,1,17}, + Namespace = "test", + NamespaceIF = v2, + NamespaceIP = {192,168,1,18}, + %% + DefaultIPString = inet_parse:ntoa(DefaultIP), + NamespaceIPString = inet_parse:ntoa(NamespaceIP), + cmd("ip netns add ~s", + [Namespace]), + cmd("ip link add name ~w type veth peer name ~w netns ~s", + [DefaultIF,NamespaceIF,Namespace]), + cmd("ip netns exec ~s ip addr add ~s/30 dev ~w", + [Namespace,NamespaceIPString,NamespaceIF]), + cmd("ip netns exec ~s ip link set ~w up", + [Namespace,NamespaceIF]), + cmd("ip addr add ~s/30 dev ~w", + [DefaultIPString,DefaultIF]), + cmd("ip link set ~w up", + [DefaultIF]), + try test_netns( + {DefaultIF,DefaultIP}, + filename:join("/var/run/netns/", Namespace), + {NamespaceIF,NamespaceIP}) of + Result -> + io:put_chars(["#### Test done",io_lib:nl()]), + Result + after + cmd("ip link delete ~w type veth", + [DefaultIF]), + cmd("ip netns delete ~s", + [Namespace]) + end. + +test_netns({DefaultIF,DefaultIP}, Namespace, {NamespaceIF,NamespaceIP}) -> + {ok,ListenSocket} = gen_tcp:listen(0, [{active,false}]), + {ok,[{addr,DefaultIP}]} = inet:ifget(ListenSocket, DefaultIF, [addr]), + {ok,ListenPort} = inet:port(ListenSocket), + {ok,ConnectSocket} = + gen_tcp:connect( + DefaultIP, ListenPort, [{active,false},{netns,Namespace}], 3000), + {ok,[{addr,NamespaceIP}]} = inet:ifget(ConnectSocket, NamespaceIF, [addr]), + {ok,ConnectPort} = inet:port(ConnectSocket), + {ok,AcceptSocket} = gen_tcp:accept(ListenSocket, 0), + {ok,AcceptPort} = inet:port(AcceptSocket), + {ok,{NamespaceIP,ConnectPort}} = inet:peername(AcceptSocket), + {ok,{DefaultIP,AcceptPort}} = inet:peername(ConnectSocket), + ok = gen_tcp:send(ConnectSocket, "data"), + ok = gen_tcp:close(ConnectSocket), + {ok,"data"} = gen_tcp:recv(AcceptSocket, 4, 1000), + {error,closed} = gen_tcp:recv(AcceptSocket, 1, 1000), + ok = gen_tcp:close(AcceptSocket), + ok = gen_tcp:close(ListenSocket). + +cmd(Cmd, Args) -> + cmd(io_lib:format(Cmd, Args)). +%% +cmd(CmdString) -> + io:put_chars(["# ",CmdString,io_lib:nl()]), + io:put_chars([os:cmd(CmdString++" ; echo ' =>' $?")]), + ok. |