diff options
author | John Högberg <[email protected]> | 2018-10-17 07:41:07 +0200 |
---|---|---|
committer | John Högberg <[email protected]> | 2018-10-17 07:48:46 +0200 |
commit | 2c9cfa8663df4b73afb6d9a6e5a40d6887cdc744 (patch) | |
tree | d741291d864d3f565a1cf0fd9996a3c7fa98615f | |
parent | cedccd3e8d42189b91d46c2637b9ce39675318f4 (diff) | |
parent | 917eeea53273f00489715a94a90cc0c2bb129b74 (diff) | |
download | otp-2c9cfa8663df4b73afb6d9a6e5a40d6887cdc744.tar.gz otp-2c9cfa8663df4b73afb6d9a6e5a40d6887cdc744.tar.bz2 otp-2c9cfa8663df4b73afb6d9a6e5a40d6887cdc744.zip |
Merge branch 'igor/tcp-nopush-ERL-698/OTP-15357' into maint
* igor/tcp-nopush-ERL-698/OTP-15357:
"cork" tcp socket around file:sendfile
Add nopush TCP socket option
-rw-r--r-- | erts/emulator/drivers/common/inet_drv.c | 30 | ||||
-rw-r--r-- | erts/preloaded/ebin/prim_inet.beam | bin | 80120 -> 80864 bytes | |||
-rw-r--r-- | erts/preloaded/src/prim_inet.erl | 27 | ||||
-rw-r--r-- | lib/kernel/doc/src/inet.xml | 12 | ||||
-rw-r--r-- | lib/kernel/src/inet_int.hrl | 1 | ||||
-rw-r--r-- | lib/kernel/test/inet_sockopt_SUITE.erl | 9 |
6 files changed, 76 insertions, 3 deletions
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index f663ab0b05..3195ca3874 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -812,6 +812,7 @@ static size_t my_strnlen(const char *s, size_t maxlen) #define INET_OPT_PKTOPTIONS 45 /* IP(V6)_PKTOPTIONS get ancillary data */ #define INET_OPT_TTL 46 /* IP_TTL */ #define INET_OPT_RECVTTL 47 /* IP_RECVTTL ancillary data */ +#define TCP_OPT_NOPUSH 48 /* super-Nagle, aka TCP_CORK */ /* SCTP options: a separate range, from 100: */ #define SCTP_OPT_RTOINFO 100 #define SCTP_OPT_ASSOCINFO 101 @@ -955,6 +956,12 @@ static size_t my_strnlen(const char *s, size_t maxlen) #endif +#if defined(TCP_CORK) +#define INET_TCP_NOPUSH TCP_CORK +#elif defined(TCP_NOPUSH) && !defined(__DARWIN__) +#define INET_TCP_NOPUSH TCP_NOPUSH +#endif + #define BIN_REALLOC_MARGIN(x) ((x)/4) /* 25% */ /* The general purpose sockaddr */ @@ -6598,6 +6605,19 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) (long)desc->port, desc->s, ival)); break; + case TCP_OPT_NOPUSH: +#if defined(INET_TCP_NOPUSH) + proto = IPPROTO_TCP; + type = INET_TCP_NOPUSH; + DEBUGF(("inet_set_opts(%ld): s=%d, t=%d TCP_NOPUSH=%d\r\n", + (long)desc->port, desc->s, type, ival)); + break; +#else + /* inet_fill_opts always returns a value for this option, + * so we need to ignore it if not implemented, just in case */ + continue; +#endif + #if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP) case UDP_OPT_MULTICAST_TTL: @@ -7759,6 +7779,16 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, proto = IPPROTO_TCP; type = TCP_NODELAY; break; + case TCP_OPT_NOPUSH: +#if defined(INET_TCP_NOPUSH) + proto = IPPROTO_TCP; + type = INET_TCP_NOPUSH; + break; +#else + *ptr++ = opt; + put_int32(0, ptr); + continue; +#endif #if defined(HAVE_MULTICAST_SUPPORT) && defined(IPPROTO_IP) case UDP_OPT_MULTICAST_TTL: diff --git a/erts/preloaded/ebin/prim_inet.beam b/erts/preloaded/ebin/prim_inet.beam Binary files differindex eebfe19a11..52bab031ff 100644 --- a/erts/preloaded/ebin/prim_inet.beam +++ b/erts/preloaded/ebin/prim_inet.beam diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl index 1da852deb2..963e8933bc 100644 --- a/erts/preloaded/src/prim_inet.erl +++ b/erts/preloaded/src/prim_inet.erl @@ -520,13 +520,35 @@ sendfile(S, FileHandle, Offset, Length) sendfile(S, FileHandle, Offset, Length) -> case erlang:port_info(S, connected) of {connected, Pid} when Pid =:= self() -> - sendfile_1(S, FileHandle, Offset, Length); + Uncork = sendfile_maybe_cork(S), + Result = sendfile_1(S, FileHandle, Offset, Length), + sendfile_maybe_uncork(S, Uncork), + Result; {connected, Pid} when Pid =/= self() -> {error, not_owner}; _Other -> {error, einval} end. +sendfile_maybe_cork(S) -> + case getprotocol(S) of + tcp -> + case getopts(S, [nopush]) of + {ok, [{nopush,false}]} -> + _ = setopts(S, [{nopush,true}]), + true; + _ -> + false + end; + _ -> false + end. + +sendfile_maybe_uncork(S, true) -> + _ = setopts(S, [{nopush,false}]), + ok; +sendfile_maybe_uncork(_, false) -> + ok. + sendfile_1(S, FileHandle, Offset, 0) -> sendfile_1(S, FileHandle, Offset, (1 bsl 63) - 1); sendfile_1(_S, _FileHandle, Offset, Length) when @@ -1318,6 +1340,7 @@ enc_opt(pktoptions) -> ?INET_OPT_PKTOPTIONS; enc_opt(ttl) -> ?INET_OPT_TTL; enc_opt(recvttl) -> ?INET_OPT_RECVTTL; enc_opt(nodelay) -> ?TCP_OPT_NODELAY; +enc_opt(nopush) -> ?TCP_OPT_NOPUSH; enc_opt(multicast_if) -> ?UDP_OPT_MULTICAST_IF; enc_opt(multicast_ttl) -> ?UDP_OPT_MULTICAST_TTL; enc_opt(multicast_loop) -> ?UDP_OPT_MULTICAST_LOOP; @@ -1379,6 +1402,7 @@ dec_opt(?INET_OPT_PRIORITY) -> priority; dec_opt(?INET_OPT_TOS) -> tos; dec_opt(?INET_OPT_TCLASS) -> tclass; dec_opt(?TCP_OPT_NODELAY) -> nodelay; +dec_opt(?TCP_OPT_NOPUSH) -> nopush; dec_opt(?INET_OPT_RECVTOS) -> recvtos; dec_opt(?INET_OPT_RECVTCLASS) -> recvtclass; dec_opt(?INET_OPT_PKTOPTIONS) -> pktoptions; @@ -1465,6 +1489,7 @@ type_opt_1(pktoptions) -> opts; type_opt_1(ttl) -> int; type_opt_1(recvttl) -> bool; type_opt_1(nodelay) -> bool; +type_opt_1(nopush) -> bool; type_opt_1(ipv6_v6only) -> bool; %% multicast type_opt_1(multicast_ttl) -> int; diff --git a/lib/kernel/doc/src/inet.xml b/lib/kernel/doc/src/inet.xml index 127c110df4..87b08e4e36 100644 --- a/lib/kernel/doc/src/inet.xml +++ b/lib/kernel/doc/src/inet.xml @@ -1147,6 +1147,18 @@ setcap cap_sys_admin,cap_sys_ptrace,cap_dac_read_search+epi beam.smp</code> is turned on for the socket, which means that also small amounts of data are sent immediately.</p> </item> + <tag><c>{nopush, Boolean}</c>(TCP/IP sockets)</tag> + <item> + <p>This translates to <c>TCP_NOPUSH</c> on BSD and + to <c>TCP_CORK</c> on Linux.</p> + <p>If <c>Boolean == true</c>, the corresponding option + is turned on for the socket, which means that small + amounts of data are accumulated until a full MSS-worth + of data is available or this option is turned off.</p> + <p>Note that while <c>TCP_NOPUSH</c> socket option is available on OSX, its semantics + is very different (e.g., unsetting it does not cause immediate send + of accumulated data). Hence, <c>nopush</c> option is intentionally ignored on OSX.</p> + </item> <tag><c>{packet, PacketType}</c>(TCP/IP sockets)</tag> <item> <p><marker id="packet"/>Defines the type of packets to use for a socket. diff --git a/lib/kernel/src/inet_int.hrl b/lib/kernel/src/inet_int.hrl index c8e09d18ad..f6525d7261 100644 --- a/lib/kernel/src/inet_int.hrl +++ b/lib/kernel/src/inet_int.hrl @@ -162,6 +162,7 @@ -define(INET_OPT_PKTOPTIONS, 45). -define(INET_OPT_TTL, 46). -define(INET_OPT_RECVTTL, 47). +-define(TCP_OPT_NOPUSH, 48). % Specific SCTP options: separate range: -define(SCTP_OPT_RTOINFO, 100). -define(SCTP_OPT_ASSOCINFO, 101). diff --git a/lib/kernel/test/inet_sockopt_SUITE.erl b/lib/kernel/test/inet_sockopt_SUITE.erl index ada9c2689c..27ff74e309 100644 --- a/lib/kernel/test/inet_sockopt_SUITE.erl +++ b/lib/kernel/test/inet_sockopt_SUITE.erl @@ -110,9 +110,14 @@ simple(Config) when is_list(Config) -> {S1,S2} = create_socketpair(Opt, Opt), {ok,Opt} = inet:getopts(S1,OptTags), {ok,Opt} = inet:getopts(S2,OptTags), - COpt = [{X,case X of nodelay -> false;_ -> Y end} || {X,Y} <- Opt], + NoPushOpt = case os:type() of + {unix, Osname} when Osname =:= linux; Osname =:= freebsd -> {nopush, true}; + {_,_} -> {nopush, false} + end, + COpt = [{X,case X of nodelay -> false;_ -> Y end} || {X,Y} <- [NoPushOpt|Opt]], + COptTags = [X || {X,_} <- COpt], inet:setopts(S1,COpt), - {ok,COpt} = inet:getopts(S1,OptTags), + {ok,COpt} = inet:getopts(S1,COptTags), {ok,Opt} = inet:getopts(S2,OptTags), gen_tcp:close(S1), gen_tcp:close(S2), |