diff options
author | Rory Byrne <[email protected]> | 2015-04-15 17:28:09 +0100 |
---|---|---|
committer | Rory Byrne <[email protected]> | 2015-05-12 19:55:35 +0100 |
commit | 25bd6312491fc9153a16f74b5d1d39609426ae60 (patch) | |
tree | 0cfd7f0227302868e2b77b04b803843b87f5eb9f | |
parent | ce96ab6d64768cd6536011ccdecc08191c238220 (diff) | |
download | otp-25bd6312491fc9153a16f74b5d1d39609426ae60.tar.gz otp-25bd6312491fc9153a16f74b5d1d39609426ae60.tar.bz2 otp-25bd6312491fc9153a16f74b5d1d39609426ae60.zip |
Fix gen_tcp:shutdown/2 by making it asynchronous
If the driver queue is empty, or the user is requesting a 'read'
shutdown, then the shutdown() syscall is performed synchronously, as
per the old version of shutdown/2.
However, if the user is requesting a 'write' or 'read_write' shutdown,
and there is data in the driver queue for the socket, then the
shutdown() syscall is delayed and handled asynchronously when the
driver queue is written out.
This version of shutdown solves a number of issues with the old
version. The two main solutions it offers are:
* It doesn't block when the TCP peer is idle or slow. This is the
expected behaviour when shutdown() is called: the caller needs
to be able to continue reading from the socket, not be prevented
from doing so.
* It doesn't truncate the output. The current version of
gen_tcp:shutdown/2 will truncate any outbound data in the driver
queue after about 10 seconds if the TCP peer is idle of slow. Worse
yet, it doesn't even inform anyone that the data has been
truncated: 'ok' is returned to the caller; and a FIN rather than
an RST is sent to the TCP peer.
For a detailed description of all the problems with the old version
of shutdown, please see the EEP Light that was written to justify
this patch.
-rw-r--r-- | erts/emulator/drivers/common/inet_drv.c | 58 | ||||
-rw-r--r-- | erts/preloaded/src/prim_inet.erl | 21 | ||||
-rw-r--r-- | lib/kernel/doc/src/gen_tcp.xml | 15 | ||||
-rw-r--r-- | lib/kernel/test/gen_tcp_api_SUITE.erl | 32 |
4 files changed, 99 insertions, 27 deletions
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 5196eb51c6..e001f31932 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -293,6 +293,10 @@ static BOOL (WINAPI *fpSetHandleInformation)(HANDLE,DWORD,DWORD); static unsigned long zero_value = 0; static unsigned long one_value = 1; +#define TCP_SHUT_WR SD_SEND +#define TCP_SHUT_RD SD_RECEIVE +#define TCP_SHUT_RDWR SD_BOTH + #elif defined (__OSE__) /* @@ -421,6 +425,10 @@ typedef unsigned long u_long; inet_driver_select((d), (flags), (onoff)); \ } while(0) +#define TCP_SHUT_WR SHUT_WR +#define TCP_SHUT_RD SHUT_RD +#define TCP_SHUT_RDWR SHUT_RDWR + #else /* !__OSE__ && !__WIN32__ */ #include <sys/time.h> @@ -691,6 +699,9 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) inet_driver_select((d)->port, (ErlDrvEvent)(long)(d)->event, (flags), (onoff)); \ } while(0) +#define TCP_SHUT_WR SHUT_WR +#define TCP_SHUT_RD SHUT_RD +#define TCP_SHUT_RDWR SHUT_RDWR #endif /* !__WIN32__ && !__OSE__ */ @@ -820,6 +831,10 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define TCP_ADDF_CLOSE_SENT 2 /* Close sent (active mode only) */ #define TCP_ADDF_DELAYED_CLOSE_RECV 4 /* If receive fails, report {error,closed} (passive mode) */ #define TCP_ADDF_DELAYED_CLOSE_SEND 8 /* If send fails, report {error,closed} (passive mode) */ +#define TCP_ADDF_PENDING_SHUT_WR 16 /* Call shutdown(sock, SHUT_WR) when queue empties */ +#define TCP_ADDF_PENDING_SHUT_RDWR 32 /* Call shutdown(sock, SHUT_RDWR) when queue empties */ +#define TCP_ADDF_PENDING_SHUTDOWN \ + (TCP_ADDF_PENDING_SHUT_WR | TCP_ADDF_PENDING_SHUT_RDWR) /* *_REQ_* replies */ #define INET_REP_ERROR 0 @@ -1407,6 +1422,8 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev); static int tcp_recv(tcp_descriptor* desc, int request_len); static int tcp_deliver(tcp_descriptor* desc, int len); +static int tcp_shutdown_error(tcp_descriptor* desc, int err); + static int tcp_inet_output(tcp_descriptor* desc, HANDLE event); static int tcp_inet_input(tcp_descriptor* desc, HANDLE event); @@ -9473,10 +9490,18 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, return ctl_error(EINVAL, rbuf, rsize); } how = buf[0]; - if (sock_shutdown(INETP(desc)->s, how) == 0) { + if (how != TCP_SHUT_RD && driver_sizeq(desc->inet.port) > 0) { + if (how == TCP_SHUT_WR) { + desc->tcp_add_flags |= TCP_ADDF_PENDING_SHUT_WR; + } else if (how == TCP_SHUT_RDWR) { + desc->tcp_add_flags |= TCP_ADDF_PENDING_SHUT_RDWR; + } return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); - } else { + } + if (IS_SOCKET_ERROR(sock_shutdown(INETP(desc)->s, how))) { return ctl_error(sock_errno(), rbuf, rsize); + } else { + return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); } } default: @@ -9613,6 +9638,8 @@ static void tcp_inet_commandv(ErlDrvData e, ErlIOVec* ev) else inet_reply_error(INETP(desc), ENOTCONN); } + else if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN) + tcp_shutdown_error(desc, EPIPE); else if (tcp_sendv(desc, ev) == 0) inet_reply_ok(INETP(desc)); DEBUGF(("tcp_inet_commandv(%ld) }\r\n", (long)desc->inet.port)); @@ -10506,7 +10533,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) return ret; } -static int tcp_send_error(tcp_descriptor* desc, int err) +static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) { /* * If the port is busy, we must do some clean-up before proceeding. @@ -10563,6 +10590,16 @@ static int tcp_send_error(tcp_descriptor* desc, int err) return -1; } +static int tcp_send_error(tcp_descriptor* desc, int err) +{ + return tcp_send_or_shutdown_error(desc, err); +} + +static int tcp_shutdown_error(tcp_descriptor* desc, int err) +{ + return tcp_send_or_shutdown_error(desc, err); +} + /* ** Send non-blocking vector data */ @@ -10763,6 +10800,19 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len) return 0; } +/* shutdown on the socket: +** Assume caller has confirmed TCP_ADDF_PENDING_SHUTDOWN is set. +*/ +static void tcp_shutdown_async(tcp_descriptor* desc) +{ + int how; + + how = (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUT_WR) ? + TCP_SHUT_WR : TCP_SHUT_RDWR; + if (IS_SOCKET_ERROR(sock_shutdown(INETP(desc)->s, how))) + tcp_shutdown_error(desc, sock_errno()); +} + #ifdef __OSE__ static void tcp_inet_drv_output_ose(ErlDrvData data, ErlDrvEvent event) @@ -10891,6 +10941,8 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) if ((iov = driver_peekq(ix, &vsize)) == NULL) { sock_select(INETP(desc), FD_WRITE, 0); send_empty_out_q_msgs(INETP(desc)); + if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN) + tcp_shutdown_async(desc); goto done; } vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize; diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl index 79ff013c77..622e1be869 100644 --- a/erts/preloaded/src/prim_inet.erl +++ b/erts/preloaded/src/prim_inet.erl @@ -127,37 +127,18 @@ drv2protocol(_) -> undefined. %% TODO: shutdown equivalent for SCTP %% shutdown(S, read) when is_port(S) -> - shutdown_2(S, 0); + shutdown_1(S, 0); shutdown(S, write) when is_port(S) -> shutdown_1(S, 1); shutdown(S, read_write) when is_port(S) -> shutdown_1(S, 2). shutdown_1(S, How) -> - case subscribe(S, [subs_empty_out_q]) of - {ok,[{subs_empty_out_q,N}]} when N > 0 -> - shutdown_pend_loop(S, N); %% wait for pending output to be sent - _Other -> ok - end, - shutdown_2(S, How). - -shutdown_2(S, How) -> case ctl_cmd(S, ?TCP_REQ_SHUTDOWN, [How]) of {ok, []} -> ok; {error,_}=Error -> Error end. -shutdown_pend_loop(S, N0) -> - receive - {empty_out_q,S} -> ok - after ?INET_CLOSE_TIMEOUT -> - case getstat(S, [send_pend]) of - {ok,[{send_pend,N0}]} -> ok; - {ok,[{send_pend,N}]} -> shutdown_pend_loop(S, N); - _ -> ok - end - end. - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% %% CLOSE(insock()) -> ok diff --git a/lib/kernel/doc/src/gen_tcp.xml b/lib/kernel/doc/src/gen_tcp.xml index 820ecd1e30..71ef5cd48f 100644 --- a/lib/kernel/doc/src/gen_tcp.xml +++ b/lib/kernel/doc/src/gen_tcp.xml @@ -347,11 +347,22 @@ do_recv(Sock, Bs) -> </func> <func> <name name="shutdown" arity="2"/> - <fsummary>Immediately close a socket</fsummary> + <fsummary>Asynchronously close a socket</fsummary> <desc> - <p>Immediately close a socket in one or two directions.</p> + <p>Close a socket in one or two directions.</p> <p><c><anno>How</anno> == write</c> means closing the socket for writing, reading from it is still possible.</p> + <p>If <c><anno>How</anno> == read</c>, or there is no outgoing + data buffered in the <c><anno>Socket</anno></c> port, + then the socket is shutdown immediately and any error encountered + is returned in <c><anno>Reason</anno></c>.</p> + <p>If there is data buffered in the socket port, then the attempt + to shutdown the socket is postponed until that data is written to the + kernel socket send buffer. Any errors encountered will result + in the socket being closed and <c>{error, closed}</c> being returned + on the next + <seealso marker="gen_tcp#recv/2">recv/2</seealso> or + <seealso marker="gen_tcp#send/2">send/2</seealso>.</p> <p>To be able to handle that the peer has done a shutdown on the write side, the <c>{exit_on_close, false}</c> option is useful.</p> diff --git a/lib/kernel/test/gen_tcp_api_SUITE.erl b/lib/kernel/test/gen_tcp_api_SUITE.erl index c27d265550..4a527e2f51 100644 --- a/lib/kernel/test/gen_tcp_api_SUITE.erl +++ b/lib/kernel/test/gen_tcp_api_SUITE.erl @@ -32,6 +32,7 @@ t_connect_bad/1, t_recv_timeout/1, t_recv_eof/1, t_shutdown_write/1, t_shutdown_both/1, t_shutdown_error/1, + t_shutdown_async/1, t_fdopen/1, t_fdconnect/1, t_implicit_inet6/1]). -export([getsockfd/0,closesockfd/1]). @@ -41,7 +42,7 @@ suite() -> [{ct_hooks,[ts_install_cth]}]. all() -> [{group, t_accept}, {group, t_connect}, {group, t_recv}, t_shutdown_write, t_shutdown_both, t_shutdown_error, - t_fdopen, t_fdconnect, t_implicit_inet6]. + t_shutdown_async, t_fdopen, t_fdconnect, t_implicit_inet6]. groups() -> [{t_accept, [], [t_accept_timeout]}, @@ -155,7 +156,34 @@ t_shutdown_error(Config) when is_list(Config) -> ?line ok = gen_tcp:close(L), ?line {error, closed} = gen_tcp:shutdown(L, read_write), ok. - + +t_shutdown_async(Config) when is_list(Config) -> + ?line {OS, _} = os:type(), + ?line {ok, L} = gen_tcp:listen(0, [{sndbuf, 4096}]), + ?line {ok, Port} = inet:port(L), + ?line {ok, Client} = gen_tcp:connect(localhost, Port, + [{recbuf, 4096}, + {active, false}]), + ?line {ok, S} = gen_tcp:accept(L), + ?line PayloadSize = 1024 * 1024, + ?line Payload = lists:duplicate(PayloadSize, $.), + ?line ok = gen_tcp:send(S, Payload), + ?line case erlang:port_info(S, queue_size) of + {queue_size, N} when N > 0 -> ok; + {queue_size, 0} when OS =:= win32 -> ok; + {queue_size, 0} = T -> ?t:fail({unexpected, T}) + end, + + ?line ok = gen_tcp:shutdown(S, write), + ?line {ok, Buf} = gen_tcp:recv(Client, PayloadSize), + ?line {error, closed} = gen_tcp:recv(Client, 0), + ?line case length(Buf) of + PayloadSize -> ok; + Sz -> ?t:fail({payload_size, + {expected, PayloadSize}, + {received, Sz}}) + end. + %%% gen_tcp:fdopen/2 |