From 41989072fdfa766916489088aa7eae48a7d89961 Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Fri, 12 Jul 2013 15:42:33 +0200 Subject: Implement emulator netns support for TCP and UDP --- erts/configure.in | 4 ++ erts/emulator/drivers/common/inet_drv.c | 106 ++++++++++++++++++++++++++++++-- erts/preloaded/ebin/prim_inet.beam | Bin 70520 -> 70960 bytes erts/preloaded/src/prim_inet.erl | 37 +++++++---- lib/kernel/src/inet_int.hrl | 1 + 5 files changed, 133 insertions(+), 15 deletions(-) diff --git a/erts/configure.in b/erts/configure.in index b056ba44e2..236c7075f2 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -1663,6 +1663,10 @@ if test x"$ac_cv_header_netinet_sctp_h" = x"yes"; then ]) fi +dnl Check for setns +AC_CHECK_HEADERS(sched.h setns.h) +AC_CHECK_FUNCS([setns]) + HAVE_VALGRIND=no AC_CHECK_HEADER(valgrind/valgrind.h, HAVE_VALGRIND=yes) AC_SUBST(HAVE_VALGRIND) diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 301ce2d0e2..9ef2a1cfc0 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -282,7 +282,7 @@ static BOOL (WINAPI *fpSetHandleInformation)(HANDLE,DWORD,DWORD); static unsigned long zero_value = 0; static unsigned long one_value = 1; -#else +#else /* #ifdef __WIN32__ */ #include #ifdef NETDB_H_NEEDS_IN_H @@ -315,9 +315,17 @@ static unsigned long one_value = 1; #include +#ifdef HAVE_SCHED_H +#include +#endif + +#ifdef HAVE_SETNS_H +#include +#endif + /* SCTP support -- currently for UNIX platforms only: */ #undef HAVE_SCTP -#if (!defined(__WIN32__) && defined(HAVE_SCTP_H)) +#if defined(HAVE_SCTP_H) #include @@ -418,7 +426,7 @@ static int (*p_sctp_bindx)(int sd, struct sockaddr *addrs, static int (*p_sctp_peeloff)(int sd, sctp_assoc_t assoc_id) = NULL; #endif -#endif /* SCTP supported */ +#endif /* #if defined(HAVE_SCTP_H) */ #ifndef WANT_NONBLOCKING #define WANT_NONBLOCKING @@ -512,7 +520,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) } while(0) -#endif /* __WIN32__ */ +#endif /* #ifdef __WIN32__ #else */ #ifdef HAVE_SOCKLEN_T # define SOCKLEN_T socklen_t @@ -680,6 +688,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_LOPT_TCP_SEND_TIMEOUT_CLOSE 35 /* auto-close on send timeout or not */ #define INET_LOPT_MSGQ_HIWTRMRK 36 /* set local msgq high watermark */ #define INET_LOPT_MSGQ_LOWTRMRK 37 /* set local msgq low watermark */ +#define INET_LOPT_NETNS 38 /* Network namespace pathname */ /* SCTP options: a separate range, from 100: */ #define SCTP_OPT_RTOINFO 100 #define SCTP_OPT_ASSOCINFO 101 @@ -955,6 +964,10 @@ typedef struct { int is_ignored; /* if a fd is ignored by the inet_drv. This flag should be set to true when the fd is used outside of inet_drv. */ +#ifdef HAVE_SETNS + char *netns; /* Socket network namespace name + as full file path */ +#endif } inet_descriptor; @@ -3908,10 +3921,57 @@ static int erl_inet_close(inet_descriptor* desc) static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type, char** rbuf, ErlDrvSizeT rsize) { +#ifdef HAVE_SETNS + int current_ns, new_ns, save_errno; + current_ns = new_ns = save_errno = 0; +#endif if (desc->state != INET_STATE_CLOSED) return ctl_xerror(EXBADSEQ, rbuf, rsize); +#ifdef HAVE_SETNS + if (desc->netns != NULL) { + current_ns = open("/proc/self/ns/net", O_RDONLY); + if (current_ns == INVALID_SOCKET) + return ctl_error(sock_errno(), rbuf, rsize); + new_ns = open(desc->netns, O_RDONLY); + if (new_ns == INVALID_SOCKET) { + save_errno = sock_errno(); + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + return ctl_error(save_errno, rbuf, rsize); + } + if (setns(new_ns, CLONE_NEWNET) != 0) { + save_errno = sock_errno(); + while (close(new_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + return ctl_error(save_errno, rbuf, rsize); + } + else { + while (close(new_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + } + } +#endif if ((desc->s = sock_open(domain, type, desc->sprotocol)) == INVALID_SOCKET) return ctl_error(sock_errno(), rbuf, rsize); +#ifdef HAVE_SETNS + if (desc->netns != NULL) { + if (setns(current_ns, CLONE_NEWNET) != 0) { + save_errno = sock_errno(); + while (close(desc->s) == INVALID_SOCKET && + sock_errno() == EINTR); + desc->s = INVALID_SOCKET; + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + return ctl_error(save_errno, rbuf, rsize); + } + else { + while (close(current_ns) == INVALID_SOCKET && + sock_errno() == EINTR); + } + } +#endif if ((desc->event = sock_create_event(desc)) == INVALID_EVENT) return ctl_error(sock_errno(), rbuf, rsize); SET_NONBLOCKING(desc->s); @@ -5529,6 +5589,20 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) } continue; +#ifdef HAVE_SETNS + case INET_LOPT_NETNS: + /* It is annoying that ival and len are both (signed) int */ + if (ival < 0) return -1; + if (len < ival) return -1; + if (desc->netns != NULL) FREE(desc->netns); + desc->netns = ALLOC(((unsigned int) ival) + 1); + memcpy(desc->netns, ptr, ival); + desc->netns[ival] = '\0'; + ptr += ival; + len -= ival; + continue; +#endif + case INET_OPT_REUSEADDR: #ifdef __WIN32__ continue; /* Bjorn says */ @@ -6454,6 +6528,22 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, } continue; +#ifdef HAVE_SETNS + case INET_LOPT_NETNS: + if (desc->netns != NULL) { + size_t netns_len; + netns_len = strlen(desc->netns); + *ptr++ = opt; + put_int32(netns_len, ptr); + PLACE_FOR(netns_len, ptr); + memcpy(ptr, desc->netns, netns_len); + ptr += netns_len; + } else { + TRUNCATE_TO(0,ptr); + } + continue; +#endif + case INET_OPT_PRIORITY: #ifdef SO_PRIORITY type = SO_PRIORITY; @@ -7458,6 +7548,10 @@ static ErlDrvSSizeT inet_subscribe(inet_descriptor* desc, static void inet_stop(inet_descriptor* desc) { erl_inet_close(desc); +#ifdef HAVE_SETNS + if (desc->netns != NULL) + FREE(desc->netns); +#endif FREE(desc); } @@ -7537,6 +7631,10 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol) desc->is_ignored = 0; +#ifdef HAVE_SETNS + desc->netns = NULL; +#endif + return (ErlDrvData)desc; } diff --git a/erts/preloaded/ebin/prim_inet.beam b/erts/preloaded/ebin/prim_inet.beam index 8638ef677e..5b38871282 100644 Binary files a/erts/preloaded/ebin/prim_inet.beam and b/erts/preloaded/ebin/prim_inet.beam differ diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl index fb1269cf91..fa621681f3 100644 --- a/erts/preloaded/src/prim_inet.erl +++ b/erts/preloaded/src/prim_inet.erl @@ -25,7 +25,7 @@ %% Primitive inet_drv interface --export([open/3, fdopen/4, close/1]). +-export([open/3, open/4, fdopen/4, close/1]). -export([bind/3, listen/1, listen/2, peeloff/2]). -export([connect/3, connect/4, async_connect/4]). -export([accept/1, accept/2, async_accept/2]). @@ -64,22 +64,31 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% open(Protocol, Family, Type) -> - open(Protocol, Family, Type, ?INET_REQ_OPEN, []). + open(Protocol, Family, Type, [], ?INET_REQ_OPEN, []). + +open(Protocol, Family, Type, Opts) -> + open(Protocol, Family, Type, Opts, ?INET_REQ_OPEN, []). fdopen(Protocol, Family, Type, Fd) when is_integer(Fd) -> - open(Protocol, Family, Type, ?INET_REQ_FDOPEN, ?int32(Fd)). + open(Protocol, Family, Type, [], ?INET_REQ_FDOPEN, ?int32(Fd)). -open(Protocol, Family, Type, Req, Data) -> +open(Protocol, Family, Type, Opts, Req, Data) -> Drv = protocol2drv(Protocol), AF = enc_family(Family), T = enc_type(Type), try erlang:open_port({spawn_driver,Drv}, [binary]) of S -> - case ctl_cmd(S, Req, [AF,T,Data]) of - {ok,_} -> {ok,S}; - {error,_}=Error -> + case setopts(S, Opts) of + ok -> + case ctl_cmd(S, Req, [AF,T,Data]) of + {ok,_} -> {ok,S}; + {error,_}=E1 -> + close(S), + E1 + end; + {error,_}=E2 -> close(S), - Error + E2 end catch %% The only (?) way to get here is to try to open @@ -1108,6 +1117,7 @@ enc_opt(send_timeout_close) -> ?INET_LOPT_TCP_SEND_TIMEOUT_CLOSE; enc_opt(delay_send) -> ?INET_LOPT_TCP_DELAY_SEND; enc_opt(packet_size) -> ?INET_LOPT_PACKET_SIZE; enc_opt(read_packets) -> ?INET_LOPT_READ_PACKETS; +enc_opt(netns) -> ?INET_LOPT_NETNS; enc_opt(raw) -> ?INET_OPT_RAW; % Names of SCTP opts: enc_opt(sctp_rtoinfo) -> ?SCTP_OPT_RTOINFO; @@ -1164,6 +1174,7 @@ dec_opt(?INET_LOPT_TCP_SEND_TIMEOUT_CLOSE) -> send_timeout_close; dec_opt(?INET_LOPT_TCP_DELAY_SEND) -> delay_send; dec_opt(?INET_LOPT_PACKET_SIZE) -> packet_size; dec_opt(?INET_LOPT_READ_PACKETS) -> read_packets; +dec_opt(?INET_LOPT_NETNS) -> netns; dec_opt(?INET_OPT_RAW) -> raw; dec_opt(I) when is_integer(I) -> undefined. @@ -1261,6 +1272,7 @@ type_opt_1(send_timeout_close) -> bool; type_opt_1(delay_send) -> bool; type_opt_1(packet_size) -> uint; type_opt_1(read_packets) -> uint; +type_opt_1(netns) -> binary; %% %% SCTP options (to be set). If the type is a record type, the corresponding %% record signature is returned, otherwise, an "elementary" type tag @@ -1487,9 +1499,12 @@ type_value_2({bitenumlist,List,_}, EnumList) -> Ls when is_list(Ls) -> true; false -> false end; -type_value_2(binary,Bin) when is_binary(Bin) -> true; -type_value_2(binary_or_uint,Bin) when is_binary(Bin) -> true; -type_value_2(binary_or_uint,Int) when is_integer(Int), Int >= 0 -> true; +type_value_2(binary,Bin) + when is_binary(Bin), byte_size(Bin) < (1 bsl 32) -> true; +type_value_2(binary_or_uint,Bin) + when is_binary(Bin), byte_size(Bin) < (1 bsl 32) -> true; +type_value_2(binary_or_uint,Int) + when is_integer(Int), Int >= 0 -> true; %% Type-checking of SCTP options type_value_2(sctp_assoc_id, X) when X band 16#ffffffff =:= X -> true; diff --git a/lib/kernel/src/inet_int.hrl b/lib/kernel/src/inet_int.hrl index 67a99913a1..18a4a61b2f 100644 --- a/lib/kernel/src/inet_int.hrl +++ b/lib/kernel/src/inet_int.hrl @@ -143,6 +143,7 @@ -define(INET_LOPT_TCP_SEND_TIMEOUT_CLOSE, 35). -define(INET_LOPT_MSGQ_HIWTRMRK, 36). -define(INET_LOPT_MSGQ_LOWTRMRK, 37). +-define(INET_LOPT_NETNS, 38). % Specific SCTP options: separate range: -define(SCTP_OPT_RTOINFO, 100). -define(SCTP_OPT_ASSOCINFO, 101). -- cgit v1.2.3 From a9f92f3d024feffe23303af141dc4b13c7c17aa5 Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Tue, 16 Jul 2013 09:34:54 +0200 Subject: Implement netns option for TCP and UDP --- lib/kernel/src/inet.erl | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/lib/kernel/src/inet.erl b/lib/kernel/src/inet.erl index 3ea530a366..0ee3234b05 100644 --- a/lib/kernel/src/inet.erl +++ b/lib/kernel/src/inet.erl @@ -634,6 +634,13 @@ con_opt([Opt | Opts], R, As) -> {tcp_module,_} -> con_opt(Opts, R, As); inet -> con_opt(Opts, R, As); inet6 -> con_opt(Opts, R, As); + {netns,NS} -> + case prim_inet:is_sockopt_val(netns, NS) of + true -> + con_opt(Opts, R#connect_opts { fd = [Opt] }, As); + false -> + {error, badarg} + end; {Name,Val} when is_atom(Name) -> con_add(Name, Val, R, Opts, As); _ -> {error, badarg} end; @@ -692,6 +699,13 @@ list_opt([Opt | Opts], R, As) -> {tcp_module,_} -> list_opt(Opts, R, As); inet -> list_opt(Opts, R, As); inet6 -> list_opt(Opts, R, As); + {netns,NS} -> + case prim_inet:is_sockopt_val(netns, NS) of + true -> + list_opt(Opts, R#listen_opts { fd = [Opt] }, As); + false -> + {error, badarg} + end; {Name,Val} when is_atom(Name) -> list_add(Name, Val, R, Opts, As); _ -> {error, badarg} end; @@ -738,6 +752,13 @@ udp_opt([Opt | Opts], R, As) -> {udp_module,_} -> udp_opt(Opts, R, As); inet -> udp_opt(Opts, R, As); inet6 -> udp_opt(Opts, R, As); + {netns,NS} -> + case prim_inet:is_sockopt_val(netns, NS) of + true -> + list_opt(Opts, R#udp_opts { fd = [Opt] }, As); + false -> + {error, badarg} + end; {Name,Val} when is_atom(Name) -> udp_add(Name, Val, R, Opts, As); _ -> {error, badarg} end; @@ -1063,7 +1084,7 @@ gethostbyaddr_tm_native(Addr, Timer, Opts) -> Result -> Result end. --spec open(Fd :: integer(), +-spec open(Fd_or_OpenOpts :: integer() | list(), Addr :: ip_address(), Port :: port_number(), Opts :: [socket_setopt()], @@ -1073,8 +1094,14 @@ gethostbyaddr_tm_native(Addr, Timer, Opts) -> Module :: atom()) -> {'ok', socket()} | {'error', posix()}. -open(Fd, Addr, Port, Opts, Protocol, Family, Type, Module) when Fd < 0 -> - case prim_inet:open(Protocol, Family, Type) of +open(FdO, Addr, Port, Opts, Protocol, Family, Type, Module) + when is_integer(FdO), FdO < 0; + is_list(FdO) -> + OpenOpts = + if is_list(FdO) -> FdO; + true -> [] + end, + case prim_inet:open(Protocol, Family, Type, OpenOpts) of {ok,S} -> case prim_inet:setopts(S, Opts) of ok -> @@ -1097,7 +1124,8 @@ open(Fd, Addr, Port, Opts, Protocol, Family, Type, Module) when Fd < 0 -> Error -> Error end; -open(Fd, _Addr, _Port, Opts, Protocol, Family, Type, Module) -> +open(Fd, _Addr, _Port, Opts, Protocol, Family, Type, Module) + when is_integer(Fd) -> fdopen(Fd, Opts, Protocol, Family, Type, Module). bindx(S, [Addr], Port0) -> -- cgit v1.2.3 From 08ff3673e25fdd184ff92d45d4609cd423fd1e34 Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Tue, 16 Jul 2013 15:14:50 +0200 Subject: Implement netns for SCTP + bugfixes --- erts/emulator/drivers/common/inet_drv.c | 67 ++++++++++++++++++++++++++++++--- lib/kernel/src/inet.erl | 7 ++++ 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 9ef2a1cfc0..60db50e80a 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -1194,6 +1194,7 @@ static ErlDrvTermData am_dontroute; static ErlDrvTermData am_priority; static ErlDrvTermData am_tos; static ErlDrvTermData am_ipv6_v6only; +static ErlDrvTermData am_netns; #endif /* speical errors for bad ports and sequences */ @@ -3511,6 +3512,7 @@ static void inet_init_sctp(void) { INIT_ATOM(priority); INIT_ATOM(tos); INIT_ATOM(ipv6_v6only); + INIT_ATOM(netns); /* Option names */ INIT_ATOM(sctp_rtoinfo); @@ -3921,14 +3923,21 @@ static int erl_inet_close(inet_descriptor* desc) static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type, char** rbuf, ErlDrvSizeT rsize) { + int save_errno; #ifdef HAVE_SETNS - int current_ns, new_ns, save_errno; - current_ns = new_ns = save_errno = 0; + int current_ns, new_ns; + current_ns = new_ns = 0; #endif + save_errno = 0; + if (desc->state != INET_STATE_CLOSED) return ctl_xerror(EXBADSEQ, rbuf, rsize); + #ifdef HAVE_SETNS if (desc->netns != NULL) { + /* Temporarily change network namespace for this thread + * while creating the socket + */ current_ns = open("/proc/self/ns/net", O_RDONLY); if (current_ns == INVALID_SOCKET) return ctl_error(sock_errno(), rbuf, rsize); @@ -3954,11 +3963,18 @@ static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type, } #endif if ((desc->s = sock_open(domain, type, desc->sprotocol)) == INVALID_SOCKET) - return ctl_error(sock_errno(), rbuf, rsize); + save_errno = sock_errno(); #ifdef HAVE_SETNS if (desc->netns != NULL) { + /* Restore network namespace */ if (setns(current_ns, CLONE_NEWNET) != 0) { - save_errno = sock_errno(); + /* XXX Failed to restore network namespace. + * What to do? Tidy up and return an error... + * Note that the thread now might still be in the namespace. + * Can this even happen? Should the emulator be aborted? + */ + if (desc->s != INVALID_SOCKET) + save_errno = sock_errno(); while (close(desc->s) == INVALID_SOCKET && sock_errno() == EINTR); desc->s = INVALID_SOCKET; @@ -3972,8 +3988,16 @@ static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type, } } #endif - if ((desc->event = sock_create_event(desc)) == INVALID_EVENT) - return ctl_error(sock_errno(), rbuf, rsize); + if (desc->s == INVALID_SOCKET) + return ctl_error(save_errno, rbuf, rsize); + + if ((desc->event = sock_create_event(desc)) == INVALID_EVENT) { + save_errno = sock_errno(); + while (close(desc->s) == INVALID_SOCKET && + sock_errno() == EINTR); + desc->s = INVALID_SOCKET; + return ctl_error(save_errno, rbuf, rsize); + } SET_NONBLOCKING(desc->s); #ifdef __WIN32__ driver_select(desc->port, desc->event, ERL_DRV_READ, 1); @@ -5932,6 +5956,21 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) res = 0; continue; +#ifdef HAVE_SETNS + case INET_LOPT_NETNS: + { + size_t ns_len; + ns_len = get_int32(curr); curr += 4; + CHKLEN(curr, ns_len); + if (desc->netns != NULL) FREE(desc->netns); + desc->netns = ALLOC(ns_len + 1); + memcpy(desc->netns, curr, ns_len); + desc->netns[ns_len] = '\0'; + curr += ns_len; + } + continue; +#endif + /* SCTP options and applicable generic INET options: */ case SCTP_OPT_RTOINFO: @@ -6827,6 +6866,22 @@ static ErlDrvSSizeT sctp_fill_opts(inet_descriptor* desc, break; } +#ifdef HAVE_SETNS + case INET_LOPT_NETNS: + if (desc->netns != NULL) { + PLACE_FOR + (spec, i, + LOAD_ATOM_CNT + LOAD_BUF2BINARY_CNT + LOAD_TUPLE_CNT); + i = LOAD_ATOM (spec, i, am_netns); + i = LOAD_BUF2BINARY + (spec, i, desc->netns, strlen(desc->netns)); + i = LOAD_TUPLE (spec, i, 2); + break; + } + else + continue; /* Ignore */ +#endif + /* SCTP and generic INET options: */ case SCTP_OPT_RTOINFO: diff --git a/lib/kernel/src/inet.erl b/lib/kernel/src/inet.erl index 0ee3234b05..e118382bfe 100644 --- a/lib/kernel/src/inet.erl +++ b/lib/kernel/src/inet.erl @@ -828,6 +828,13 @@ sctp_opt([Opt|Opts], Mod, R, As) -> {sctp_module,_} -> sctp_opt (Opts, Mod, R, As); % Done with inet -> sctp_opt (Opts, Mod, R, As); % Done with inet6 -> sctp_opt (Opts, Mod, R, As); % Done with + {netns,NS} -> + case prim_inet:is_sockopt_val(netns, NS) of + true -> + sctp_opt(Opts, Mod, R#sctp_opts { fd = [Opt] }, As); + false -> + {error, badarg} + end; {Name,Val} -> sctp_opt (Opts, Mod, R, As, Name, Val); _ -> {error,badarg} end; -- cgit v1.2.3 From 4b4e9b8edb7c2340dbcdfd18e68981c028ede787 Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Wed, 17 Jul 2013 11:14:36 +0200 Subject: Make netns option value a string --- lib/kernel/src/inet.erl | 78 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 10 deletions(-) diff --git a/lib/kernel/src/inet.erl b/lib/kernel/src/inet.erl index e118382bfe..d2137065b6 100644 --- a/lib/kernel/src/inet.erl +++ b/lib/kernel/src/inet.erl @@ -200,7 +200,14 @@ send(Socket, Packet) -> Options :: [socket_setopt()]. setopts(Socket, Opts) -> - prim_inet:setopts(Socket, Opts). + SocketOpts = + [case Opt of + {netns,NS} -> + {netns,filename2binary(NS)}; + _ -> + Opt + end || Opt <- Opts], + prim_inet:setopts(Socket, SocketOpts). -spec getopts(Socket, Options) -> {'ok', OptionValues} | {'error', posix()} when @@ -209,7 +216,18 @@ setopts(Socket, Opts) -> OptionValues :: [socket_setopt()]. getopts(Socket, Opts) -> - prim_inet:getopts(Socket, Opts). + case prim_inet:getopts(Socket, Opts) of + {ok,OptionValues} -> + {ok, + [case OptionValue of + {netns,Bin} -> + {netns,binary2filename(Bin)}; + _ -> + OptionValue + end || OptionValue <- OptionValues]}; + Other -> + Other + end. -spec getifaddrs(Socket :: socket()) -> {'ok', [string()]} | {'error', posix()}. @@ -635,9 +653,10 @@ con_opt([Opt | Opts], R, As) -> inet -> con_opt(Opts, R, As); inet6 -> con_opt(Opts, R, As); {netns,NS} -> - case prim_inet:is_sockopt_val(netns, NS) of + BinNS = filename2binary(NS), + case prim_inet:is_sockopt_val(netns, BinNS) of true -> - con_opt(Opts, R#connect_opts { fd = [Opt] }, As); + con_opt(Opts, R#connect_opts { fd = [{netns,BinNS}] }, As); false -> {error, badarg} end; @@ -700,9 +719,10 @@ list_opt([Opt | Opts], R, As) -> inet -> list_opt(Opts, R, As); inet6 -> list_opt(Opts, R, As); {netns,NS} -> - case prim_inet:is_sockopt_val(netns, NS) of + BinNS = filename2binary(NS), + case prim_inet:is_sockopt_val(netns, BinNS) of true -> - list_opt(Opts, R#listen_opts { fd = [Opt] }, As); + list_opt(Opts, R#listen_opts { fd = [{netns,BinNS}] }, As); false -> {error, badarg} end; @@ -753,9 +773,10 @@ udp_opt([Opt | Opts], R, As) -> inet -> udp_opt(Opts, R, As); inet6 -> udp_opt(Opts, R, As); {netns,NS} -> - case prim_inet:is_sockopt_val(netns, NS) of + BinNS = filename2binary(NS), + case prim_inet:is_sockopt_val(netns, BinNS) of true -> - list_opt(Opts, R#udp_opts { fd = [Opt] }, As); + list_opt(Opts, R#udp_opts { fd = [{netns,BinNS}] }, As); false -> {error, badarg} end; @@ -829,9 +850,13 @@ sctp_opt([Opt|Opts], Mod, R, As) -> inet -> sctp_opt (Opts, Mod, R, As); % Done with inet6 -> sctp_opt (Opts, Mod, R, As); % Done with {netns,NS} -> - case prim_inet:is_sockopt_val(netns, NS) of + BinNS = filename2binary(NS), + case prim_inet:is_sockopt_val(netns, BinNS) of true -> - sctp_opt(Opts, Mod, R#sctp_opts { fd = [Opt] }, As); + sctp_opt( + Opts, Mod, + R#sctp_opts { fd = [{netns,BinNS}] }, + As); false -> {error, badarg} end; @@ -879,6 +904,39 @@ add_opt(Name, Val, Opts, As) -> end. +%% Passthrough all unknown - catch type errors later +filename2binary(List) when is_list(List) -> + OutEncoding = file:native_name_encoding(), + try unicode:characters_to_binary(List, unicode, OutEncoding) of + Bin when is_binary(Bin) -> + Bin; + _ -> + List + catch + error:badarg -> + List + end; +filename2binary(Bin) -> + Bin. + +binary2filename(Bin) -> + InEncoding = file:native_name_encoding(), + case unicode:characters_to_list(Bin, InEncoding) of + Filename when is_list(Filename) -> + Filename; + _ -> + %% For getopt/setopt of netns this should only happen if + %% a binary with wrong encoding was used when setting the + %% option, hence the user shall eat his/her own medicine. + %% + %% I.e passthrough here too for now. + %% Future usecases will most probably not want this, + %% rather Unicode error or warning + %% depending on emulator flag instead. + Bin + end. + + translate_ip(any, inet) -> {0,0,0,0}; translate_ip(loopback, inet) -> {127,0,0,1}; translate_ip(any, inet6) -> {0,0,0,0,0,0,0,0}; -- cgit v1.2.3 From 184e421b08feeae294cd4e629853a45ca640937f Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Wed, 17 Jul 2013 16:54:43 +0200 Subject: Rudimentary test --- lib/kernel/test/inet_SUITE.erl | 99 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 3 deletions(-) diff --git a/lib/kernel/test/inet_SUITE.erl b/lib/kernel/test/inet_SUITE.erl index 62ba95e1a3..427dbbc129 100644 --- a/lib/kernel/test/inet_SUITE.erl +++ b/lib/kernel/test/inet_SUITE.erl @@ -38,10 +38,10 @@ gethostnative_debug_level/0, gethostnative_debug_level/1, getif/1, getif_ifr_name_overflow/1,getservbyname_overflow/1, getifaddrs/1, - parse_strict_address/1]). + parse_strict_address/1, simple_netns/1]). -export([get_hosts/1, get_ipv6_hosts/1, parse_hosts/1, parse_address/1, - kill_gethost/0, parallell_gethost/0]). + kill_gethost/0, parallell_gethost/0, test_netns/0]). -export([init_per_testcase/2, end_per_testcase/2]). suite() -> [{ct_hooks,[ts_install_cth]}]. @@ -53,7 +53,7 @@ all() -> t_gethostnative, gethostnative_parallell, cname_loop, gethostnative_debug_level, gethostnative_soft_restart, getif, getif_ifr_name_overflow, getservbyname_overflow, - getifaddrs, parse_strict_address]. + getifaddrs, parse_strict_address, simple_netns]. groups() -> [{parse, [], [parse_hosts, parse_address]}]. @@ -1099,3 +1099,96 @@ toupper([C|Cs]) when is_integer(C) -> end; toupper([]) -> []. + + +simple_netns(Config) when is_list(Config) -> + {ok,U} = gen_udp:open(0), + case inet:setopts(U, [{netns,""}]) of + ok -> + jog_netns_opt(U), + ok = gen_udp:close(U), + %% + {ok,L} = gen_tcp:listen(0, []), + jog_netns_opt(L), + ok = gen_tcp:close(L), + %% + {ok,S} = gen_sctp:open(), + jog_netns_opt(S), + ok = gen_sctp:close(S); + {error,einval} -> + {skip,"setns() not supported"} + end. + +jog_netns_opt(S) -> + %% This is just jogging the option mechanics + ok = inet:setopts(S, [{netns,""}]), + {ok,[{netns,""}]} = inet:getopts(S, [netns]), + ok = inet:setopts(S, [{netns,"/proc/self/ns/net"}]), + {ok,[{netns,"/proc/self/ns/net"}]} = inet:getopts(S, [netns]), + ok. + + +%% Manual test to be run outside test_server in an emulator +%% started by root, in a machine with setns() support... +test_netns() -> + DefaultIF = v1, + DefaultIP = {192,168,1,17}, + Namespace = "test", + NamespaceIF = v2, + NamespaceIP = {192,168,1,18}, + %% + DefaultIPString = inet_parse:ntoa(DefaultIP), + NamespaceIPString = inet_parse:ntoa(NamespaceIP), + cmd("ip netns add ~s", + [Namespace]), + cmd("ip link add name ~w type veth peer name ~w netns ~s", + [DefaultIF,NamespaceIF,Namespace]), + cmd("ip netns exec ~s ip addr add ~s/30 dev ~w", + [Namespace,NamespaceIPString,NamespaceIF]), + cmd("ip netns exec ~s ip link set ~w up", + [Namespace,NamespaceIF]), + cmd("ip addr add ~s/30 dev ~w", + [DefaultIPString,DefaultIF]), + cmd("ip link set ~w up", + [DefaultIF]), + try test_netns( + {DefaultIF,DefaultIP}, + filename:join("/var/run/netns/", Namespace), + {NamespaceIF,NamespaceIP}) of + Result -> + io:put_chars(["#### Test done",io_lib:nl()]), + Result + after + cmd("ip link delete ~w type veth", + [DefaultIF]), + cmd("ip netns delete ~s", + [Namespace]) + end. + +test_netns({DefaultIF,DefaultIP}, Namespace, {NamespaceIF,NamespaceIP}) -> + {ok,ListenSocket} = gen_tcp:listen(0, [{active,false}]), + {ok,[{addr,DefaultIP}]} = inet:ifget(ListenSocket, DefaultIF, [addr]), + {ok,ListenPort} = inet:port(ListenSocket), + {ok,ConnectSocket} = + gen_tcp:connect( + DefaultIP, ListenPort, [{active,false},{netns,Namespace}], 3000), + {ok,[{addr,NamespaceIP}]} = inet:ifget(ConnectSocket, NamespaceIF, [addr]), + {ok,ConnectPort} = inet:port(ConnectSocket), + {ok,AcceptSocket} = gen_tcp:accept(ListenSocket, 0), + {ok,AcceptPort} = inet:port(AcceptSocket), + {ok,{NamespaceIP,ConnectPort}} = inet:peername(AcceptSocket), + {ok,{DefaultIP,AcceptPort}} = inet:peername(ConnectSocket), + ok = gen_tcp:send(ConnectSocket, "data"), + ok = gen_tcp:close(ConnectSocket), + {ok,"data"} = gen_tcp:recv(AcceptSocket, 4, 1000), + {error,closed} = gen_tcp:recv(AcceptSocket, 1, 1000), + ok = gen_tcp:close(AcceptSocket), + ok = gen_tcp:close(ListenSocket). + +cmd(Cmd, Args) -> + cmd(io_lib:format(Cmd, Args)). +%% +cmd(CmdString) -> + io:put_chars(["# ",CmdString,io_lib:nl()]), + io:put_chars([os:cmd(CmdString++" ; echo ' =>' $?")]), + ok. -- cgit v1.2.3 From af112cb10613d422080785621a274a18d96567c0 Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Fri, 19 Jul 2013 15:18:42 +0200 Subject: Document socket option 'netns' --- lib/kernel/doc/src/inet.xml | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/lib/kernel/doc/src/inet.xml b/lib/kernel/doc/src/inet.xml index 7cd98914d1..650ab41c3e 100644 --- a/lib/kernel/doc/src/inet.xml +++ b/lib/kernel/doc/src/inet.xml @@ -715,6 +715,59 @@ fe80::204:acff:fe17:bf38

Received Packet is delivered as defined by Mode.

+ {netns, Namespace :: file:filename_all()} + +

Set a network namespace for the socket. The Namespace + parameter is a filename defining the namespace for example + "/var/run/netns/example" typically created by the command + ip netns add example. This option must be used in a + function call that creates a socket i.e + + gen_tcp:connect/3,4, + + gen_tcp:listen/2, + + gen_udp:open/1,2 or + + gen_sctp:open/0-2. +

+

This option uses the Linux specific syscall + setns() such as in Linux kernel 3.0 or later + and therefore only exists when the runtime system + has been compiled for such an operating system. +

+

+ The virtual machine also needs elevated privileges either + running as superuser or (for Linux) having the capability + CAP_SYS_ADMIN according to the documentation for setns(2). + However, during testing also CAP_SYS_PTRACE + and CAP_DAC_READ_SEARCH has proven to be necessary. + Example: +setcap cap_sys_admin,cap_sys_ptrace,cap_dac_read_search+epi beam.smp + + Note also that the filesystem containing the virtual machine + executable (beam.smp in the example above) has to be local, + mounted without the nosetuid flag, + support extended attributes and that + the kernel has to support file capabilities. + All this runs out of the box on at least Ubuntu 12.04 LTS, + except that SCTP sockets appears to not support + network namespaces. +

+

The Namespace is a file name and is encoded + and decoded as discussed in + file + except that the emulator flag +fnu is ignored and + getopts/2 + for this option will return a binary for the filename + if the stored filename can not be decoded, + which should only happen if you set the option using a binary + that can not be decoded with the emulator's filename encoding: + + file:native_name_encoding/0. +

+
+ list

Received Packet is delivered as a list.

-- cgit v1.2.3