diff options
author | Raimo Niskanen <[email protected]> | 2018-07-06 12:55:10 +0200 |
---|---|---|
committer | Raimo Niskanen <[email protected]> | 2018-09-04 10:43:24 +0200 |
commit | 64853dc28ce838583e35d5fefb0604933b6e98f9 (patch) | |
tree | 94cf435f2c42f197b341dc0331df26189d7e29b5 /erts/preloaded/src/prim_inet.erl | |
parent | 487f0c12e8700d31161a3bbb9c36e360aff484ac (diff) | |
download | otp-64853dc28ce838583e35d5fefb0604933b6e98f9.tar.gz otp-64853dc28ce838583e35d5fefb0604933b6e98f9.tar.bz2 otp-64853dc28ce838583e35d5fefb0604933b6e98f9.zip |
Implement socket option recvtos and friends
Implement socket options recvtclass, recvtos, recvttl and pktoptions.
Document the implemented socket options, new types and message formats.
The options recvtclass, recvtos and recvttl are boolean options that
when activated (true) for a socket will cause ancillary data to be
received through recvmsg(). That is for packet oriented sockets
(UDP and SCTP).
The required options for this feature were recvtclass and recvtos,
and recvttl was only added to test that the ancillary data parsing
handled multiple data items in one message correctly.
These options does not work on Windows since ancillary data
is not handled by the Winsock2 API.
For stream sockets (TCP) there is no clear connection between
a received packet and what is returned when reading data from
the socket, so recvmsg() is not useful. It is possible to get
the same ancillary data through a getsockopt() call with
the IPv6 socket option IPV6_PKTOPTIONS, on Linux named
IPV6_2292PKTOPTIONS after the now obsoleted RFC where it originated.
(unfortunately RFC 3542 that obsoletes it explicitly undefines
this way to get packet ancillary data from a stream socket)
Linux also has got a way to get packet ancillary data for IPv4
TCP sockets through a getsockopt() call with IP_PKTOPTIONS,
which appears to be Linux specific.
This implementation uses a flag field in the inet_drv.c socket
internal data that records if any setsockopt() call with recvtclass,
recvtos or recvttl (IPV6_RECVTCLASS, IP_RECVTOS or IP_RECVTTL)
has been activated. If so recvmsg() is used instead of recvfrom().
Ancillary data is delivered to the application by a new return
tuple format from gen_udp:recv/2,3 containing a list of
ancillary data tuples [{tclass,TCLASS} | {tos,TOS} | {ttl,TTL}],
as returned by recvmsg(). For a socket in active mode a new
message format, containing the ancillary data list, delivers
the data in the same way.
For gen_sctp the ancillary data is delivered in the same way,
except that the gen_sctp return tuple format already contained
an ancillary data list so there are just more possible elements
when using these socket options. Note that the active mode
message format has got an extra tuple level for the ancillary
data compared to what is now implemented gen_udp.
The gen_sctp active mode format was considered to be the odd one
- now all tuples containing ancillary data are flat,
except for gen_sctp active mode.
Note that testing has not shown that Linux SCTP sockets deliver
any ancillary data for these socket options, so it is probably
not implemented yet. Remains to be seen what FreeBSD does...
For gen_tcp inet:getopts([pktoptions]) will deliver the latest
received ancillary data for any activated socket option recvtclass,
recvtos or recvttl, on platforms where IP_PKTOPTIONS is defined
for an IPv4 socket, or where IPV6_PKTOPTIONS or IPV6_2292PKTOPTIONS
is defined for an IPv6 socket. It will be delivered as a
list of ancillary data items in the same way as for gen_udp
(and gen_sctp).
On some platforms, e.g the BSD:s, when you activate IP_RECVTOS
you get ancillary data tagged IP_RECVTOS with the TOS value,
but on Linux you get ancillary data tagged IP_TOS with the
TOS value. Linux follows the style of RFC 2292, and the BSD:s
use an older notion. For RFC 2292 that defines the IP_PKTOPTIONS
socket option it is more logical to tag the items with the
tag that is the item's, than with the tag that defines that you
want the item. Therefore this implementation translates all
BSD style ancillary data tags to the corresponding Linux style
data tags, so the application will only see the tags 'tclass',
'tos' and 'ttl' on all platforms.
Diffstat (limited to 'erts/preloaded/src/prim_inet.erl')
-rw-r--r-- | erts/preloaded/src/prim_inet.erl | 85 |
1 files changed, 59 insertions, 26 deletions
diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl index 2a3605260d..8169943dde 100644 --- a/erts/preloaded/src/prim_inet.erl +++ b/erts/preloaded/src/prim_inet.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2000-2017. All Rights Reserved. +%% Copyright Ericsson AB 2000-2018. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ -export([open/3, open/4, fdopen/4, fdopen/5, close/1]). -export([bind/3, listen/1, listen/2, peeloff/2]). -export([connect/3, connect/4, async_connect/4]). --export([accept/1, accept/2, async_accept/2]). +-export([accept/1, accept/2, accept/3, async_accept/2]). -export([shutdown/2]). -export([send/2, send/3, sendto/4, sendmsg/3, sendfile/4]). -export([recv/2, recv/3, async_recv/3]). @@ -307,7 +307,7 @@ async_connect0(S, Addr, Time) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% -%% ACCEPT(insock() [,Timeout] ) -> {ok,insock()} | {error, Reason} +%% ACCEPT(insock() [,Timeout][,FamilyOpts] ) -> {ok,insock()} | {error, Reason} %% %% accept incoming connection on listen socket %% if timeout is given: @@ -315,6 +315,8 @@ async_connect0(S, Addr, Time) -> %% 0 -> immediate accept (poll) %% > 0 -> wait for timeout ms for accept if no accept then %% return {error, timeout} +%% FamilyOpts are address family specific options to copy from +%% listen socket to accepted socket %% %% ASYNC_ACCEPT(insock(), Timeout) %% @@ -325,17 +327,22 @@ async_connect0(S, Addr, Time) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% For TCP sockets only. %% -accept(L) -> accept0(L, -1). +accept(L) -> accept0(L, -1, []). -accept(L, infinity) -> accept0(L, -1); -accept(L, Time) -> accept0(L, Time). +accept(L, infinity) -> accept0(L, -1, []); +accept(L, FamilyOpts) when is_list(FamilyOpts) -> accept0(L, -1, FamilyOpts); +accept(L, Time) -> accept0(L, Time, []). -accept0(L, Time) when is_port(L), is_integer(Time) -> +accept(L, infinity, FamilyOpts) -> accept0(L, -1, FamilyOpts); +accept(L, Time, FamilyOpts) -> accept0(L, Time, FamilyOpts). + +accept0(L, Time, FamilyOpts) + when is_port(L), is_integer(Time), is_list(FamilyOpts) -> case async_accept(L, Time) of {ok, Ref} -> receive {inet_async, L, Ref, {ok,S}} -> - accept_opts(L, S); + accept_opts(L, S, FamilyOpts); {inet_async, L, Ref, Error} -> Error end; @@ -343,25 +350,22 @@ accept0(L, Time) when is_port(L), is_integer(Time) -> end. %% setup options from listen socket on the connected socket -accept_opts(L, S) -> - case getopts(L, [active, nodelay, keepalive, delay_send, priority, tos]) of +accept_opts(L, S, FamilyOpts) -> + case + getopts( + L, + [active, nodelay, keepalive, delay_send, priority] + ++ FamilyOpts) + of {ok, Opts} -> - case setopts(S, Opts) of - ok -> - case getopts(L, [tclass]) of - {ok, []} -> - {ok, S}; - {ok, TClassOpts} -> - case setopts(S, TClassOpts) of - ok -> - {ok, S}; - Error -> close(S), Error - end - end; - Error -> close(S), Error - end; - Error -> - close(S), Error + case setopts(S, Opts) of + ok -> + {ok, S}; + Error1 -> + close(S), Error1 + end; + Error2 -> + close(S), Error2 end. async_accept(L, Time) -> @@ -616,7 +620,16 @@ recvfrom0(S, Length, Time) Ref = ?u16(R1,R0), receive % Success, UDP: + {inet_async, S, Ref, {ok, {[F | AddrData], AncData}}} -> + %% With ancillary data + case get_addr(F, AddrData) of + {{Family, _} = Addr, Data} when is_atom(Family) -> + {ok, {Addr, 0, AncData, Data}}; + {{IP, Port}, Data} -> + {ok, {IP, Port, AncData, Data}} + end; {inet_async, S, Ref, {ok, [F | AddrData]}} -> + %% Without ancillary data case get_addr(F, AddrData) of {{Family, _} = Addr, Data} when is_atom(Family) -> {ok, {Addr, 0, Data}}; @@ -1256,6 +1269,11 @@ enc_opt(recbuf) -> ?INET_OPT_RCVBUF; enc_opt(priority) -> ?INET_OPT_PRIORITY; enc_opt(tos) -> ?INET_OPT_TOS; enc_opt(tclass) -> ?INET_OPT_TCLASS; +enc_opt(recvtos) -> ?INET_OPT_RECVTOS; +enc_opt(recvtclass) -> ?INET_OPT_RECVTCLASS; +enc_opt(pktoptions) -> ?INET_OPT_PKTOPTIONS; +enc_opt(ttl) -> ?INET_OPT_TTL; +enc_opt(recvttl) -> ?INET_OPT_RECVTTL; enc_opt(nodelay) -> ?TCP_OPT_NODELAY; enc_opt(multicast_if) -> ?UDP_OPT_MULTICAST_IF; enc_opt(multicast_ttl) -> ?UDP_OPT_MULTICAST_TTL; @@ -1318,6 +1336,11 @@ dec_opt(?INET_OPT_PRIORITY) -> priority; dec_opt(?INET_OPT_TOS) -> tos; dec_opt(?INET_OPT_TCLASS) -> tclass; dec_opt(?TCP_OPT_NODELAY) -> nodelay; +dec_opt(?INET_OPT_RECVTOS) -> recvtos; +dec_opt(?INET_OPT_RECVTCLASS) -> recvtclass; +dec_opt(?INET_OPT_PKTOPTIONS) -> pktoptions; +dec_opt(?INET_OPT_TTL) -> ttl; +dec_opt(?INET_OPT_RECVTTL) -> recvttl; dec_opt(?UDP_OPT_MULTICAST_IF) -> multicast_if; dec_opt(?UDP_OPT_MULTICAST_TTL) -> multicast_ttl; dec_opt(?UDP_OPT_MULTICAST_LOOP) -> multicast_loop; @@ -1393,6 +1416,11 @@ type_opt_1(recbuf) -> int; type_opt_1(priority) -> int; type_opt_1(tos) -> int; type_opt_1(tclass) -> int; +type_opt_1(recvtos) -> bool; +type_opt_1(recvtclass) -> bool; +type_opt_1(pktoptions) -> opts; +type_opt_1(ttl) -> int; +type_opt_1(recvttl) -> bool; type_opt_1(nodelay) -> bool; type_opt_1(ipv6_v6only) -> bool; %% multicast @@ -1899,6 +1927,11 @@ dec_value(binary,[L0,L1,L2,L3|List]) -> Len = ?i32(L0,L1,L2,L3), {X,T}=split(Len,List), {list_to_binary(X),T}; +dec_value(opts, [L0,L1,L2,L3|List]) -> + Len = ?u32(L0,L1,L2,L3), + {X,T} = split(Len, List), + Opts = dec_opt_val(X), + {Opts,T}; dec_value(Types, List) when is_tuple(Types) -> {L,T} = dec_value_tuple(Types, List, 1, []), {list_to_tuple(L),T}; |