aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRory Byrne <[email protected]>2015-04-15 17:28:09 +0100
committerRory Byrne <[email protected]>2015-05-12 19:55:35 +0100
commit25bd6312491fc9153a16f74b5d1d39609426ae60 (patch)
tree0cfd7f0227302868e2b77b04b803843b87f5eb9f
parentce96ab6d64768cd6536011ccdecc08191c238220 (diff)
downloadotp-25bd6312491fc9153a16f74b5d1d39609426ae60.tar.gz
otp-25bd6312491fc9153a16f74b5d1d39609426ae60.tar.bz2
otp-25bd6312491fc9153a16f74b5d1d39609426ae60.zip
Fix gen_tcp:shutdown/2 by making it asynchronous
If the driver queue is empty, or the user is requesting a 'read' shutdown, then the shutdown() syscall is performed synchronously, as per the old version of shutdown/2. However, if the user is requesting a 'write' or 'read_write' shutdown, and there is data in the driver queue for the socket, then the shutdown() syscall is delayed and handled asynchronously when the driver queue is written out. This version of shutdown solves a number of issues with the old version. The two main solutions it offers are: * It doesn't block when the TCP peer is idle or slow. This is the expected behaviour when shutdown() is called: the caller needs to be able to continue reading from the socket, not be prevented from doing so. * It doesn't truncate the output. The current version of gen_tcp:shutdown/2 will truncate any outbound data in the driver queue after about 10 seconds if the TCP peer is idle of slow. Worse yet, it doesn't even inform anyone that the data has been truncated: 'ok' is returned to the caller; and a FIN rather than an RST is sent to the TCP peer. For a detailed description of all the problems with the old version of shutdown, please see the EEP Light that was written to justify this patch.
-rw-r--r--erts/emulator/drivers/common/inet_drv.c58
-rw-r--r--erts/preloaded/src/prim_inet.erl21
-rw-r--r--lib/kernel/doc/src/gen_tcp.xml15
-rw-r--r--lib/kernel/test/gen_tcp_api_SUITE.erl32
4 files changed, 99 insertions, 27 deletions
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index 5196eb51c6..e001f31932 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -293,6 +293,10 @@ static BOOL (WINAPI *fpSetHandleInformation)(HANDLE,DWORD,DWORD);
static unsigned long zero_value = 0;
static unsigned long one_value = 1;
+#define TCP_SHUT_WR SD_SEND
+#define TCP_SHUT_RD SD_RECEIVE
+#define TCP_SHUT_RDWR SD_BOTH
+
#elif defined (__OSE__)
/*
@@ -421,6 +425,10 @@ typedef unsigned long u_long;
inet_driver_select((d), (flags), (onoff)); \
} while(0)
+#define TCP_SHUT_WR SHUT_WR
+#define TCP_SHUT_RD SHUT_RD
+#define TCP_SHUT_RDWR SHUT_RDWR
+
#else /* !__OSE__ && !__WIN32__ */
#include <sys/time.h>
@@ -691,6 +699,9 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n)
inet_driver_select((d)->port, (ErlDrvEvent)(long)(d)->event, (flags), (onoff)); \
} while(0)
+#define TCP_SHUT_WR SHUT_WR
+#define TCP_SHUT_RD SHUT_RD
+#define TCP_SHUT_RDWR SHUT_RDWR
#endif /* !__WIN32__ && !__OSE__ */
@@ -820,6 +831,10 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n)
#define TCP_ADDF_CLOSE_SENT 2 /* Close sent (active mode only) */
#define TCP_ADDF_DELAYED_CLOSE_RECV 4 /* If receive fails, report {error,closed} (passive mode) */
#define TCP_ADDF_DELAYED_CLOSE_SEND 8 /* If send fails, report {error,closed} (passive mode) */
+#define TCP_ADDF_PENDING_SHUT_WR 16 /* Call shutdown(sock, SHUT_WR) when queue empties */
+#define TCP_ADDF_PENDING_SHUT_RDWR 32 /* Call shutdown(sock, SHUT_RDWR) when queue empties */
+#define TCP_ADDF_PENDING_SHUTDOWN \
+ (TCP_ADDF_PENDING_SHUT_WR | TCP_ADDF_PENDING_SHUT_RDWR)
/* *_REQ_* replies */
#define INET_REP_ERROR 0
@@ -1407,6 +1422,8 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev);
static int tcp_recv(tcp_descriptor* desc, int request_len);
static int tcp_deliver(tcp_descriptor* desc, int len);
+static int tcp_shutdown_error(tcp_descriptor* desc, int err);
+
static int tcp_inet_output(tcp_descriptor* desc, HANDLE event);
static int tcp_inet_input(tcp_descriptor* desc, HANDLE event);
@@ -9473,10 +9490,18 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
return ctl_error(EINVAL, rbuf, rsize);
}
how = buf[0];
- if (sock_shutdown(INETP(desc)->s, how) == 0) {
+ if (how != TCP_SHUT_RD && driver_sizeq(desc->inet.port) > 0) {
+ if (how == TCP_SHUT_WR) {
+ desc->tcp_add_flags |= TCP_ADDF_PENDING_SHUT_WR;
+ } else if (how == TCP_SHUT_RDWR) {
+ desc->tcp_add_flags |= TCP_ADDF_PENDING_SHUT_RDWR;
+ }
return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
- } else {
+ }
+ if (IS_SOCKET_ERROR(sock_shutdown(INETP(desc)->s, how))) {
return ctl_error(sock_errno(), rbuf, rsize);
+ } else {
+ return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
}
}
default:
@@ -9613,6 +9638,8 @@ static void tcp_inet_commandv(ErlDrvData e, ErlIOVec* ev)
else
inet_reply_error(INETP(desc), ENOTCONN);
}
+ else if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN)
+ tcp_shutdown_error(desc, EPIPE);
else if (tcp_sendv(desc, ev) == 0)
inet_reply_ok(INETP(desc));
DEBUGF(("tcp_inet_commandv(%ld) }\r\n", (long)desc->inet.port));
@@ -10506,7 +10533,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event)
return ret;
}
-static int tcp_send_error(tcp_descriptor* desc, int err)
+static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
{
/*
* If the port is busy, we must do some clean-up before proceeding.
@@ -10563,6 +10590,16 @@ static int tcp_send_error(tcp_descriptor* desc, int err)
return -1;
}
+static int tcp_send_error(tcp_descriptor* desc, int err)
+{
+ return tcp_send_or_shutdown_error(desc, err);
+}
+
+static int tcp_shutdown_error(tcp_descriptor* desc, int err)
+{
+ return tcp_send_or_shutdown_error(desc, err);
+}
+
/*
** Send non-blocking vector data
*/
@@ -10763,6 +10800,19 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len)
return 0;
}
+/* shutdown on the socket:
+** Assume caller has confirmed TCP_ADDF_PENDING_SHUTDOWN is set.
+*/
+static void tcp_shutdown_async(tcp_descriptor* desc)
+{
+ int how;
+
+ how = (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUT_WR) ?
+ TCP_SHUT_WR : TCP_SHUT_RDWR;
+ if (IS_SOCKET_ERROR(sock_shutdown(INETP(desc)->s, how)))
+ tcp_shutdown_error(desc, sock_errno());
+}
+
#ifdef __OSE__
static void tcp_inet_drv_output_ose(ErlDrvData data, ErlDrvEvent event)
@@ -10891,6 +10941,8 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
if ((iov = driver_peekq(ix, &vsize)) == NULL) {
sock_select(INETP(desc), FD_WRITE, 0);
send_empty_out_q_msgs(INETP(desc));
+ if (desc->tcp_add_flags & TCP_ADDF_PENDING_SHUTDOWN)
+ tcp_shutdown_async(desc);
goto done;
}
vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize;
diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl
index 79ff013c77..622e1be869 100644
--- a/erts/preloaded/src/prim_inet.erl
+++ b/erts/preloaded/src/prim_inet.erl
@@ -127,37 +127,18 @@ drv2protocol(_) -> undefined.
%% TODO: shutdown equivalent for SCTP
%%
shutdown(S, read) when is_port(S) ->
- shutdown_2(S, 0);
+ shutdown_1(S, 0);
shutdown(S, write) when is_port(S) ->
shutdown_1(S, 1);
shutdown(S, read_write) when is_port(S) ->
shutdown_1(S, 2).
shutdown_1(S, How) ->
- case subscribe(S, [subs_empty_out_q]) of
- {ok,[{subs_empty_out_q,N}]} when N > 0 ->
- shutdown_pend_loop(S, N); %% wait for pending output to be sent
- _Other -> ok
- end,
- shutdown_2(S, How).
-
-shutdown_2(S, How) ->
case ctl_cmd(S, ?TCP_REQ_SHUTDOWN, [How]) of
{ok, []} -> ok;
{error,_}=Error -> Error
end.
-shutdown_pend_loop(S, N0) ->
- receive
- {empty_out_q,S} -> ok
- after ?INET_CLOSE_TIMEOUT ->
- case getstat(S, [send_pend]) of
- {ok,[{send_pend,N0}]} -> ok;
- {ok,[{send_pend,N}]} -> shutdown_pend_loop(S, N);
- _ -> ok
- end
- end.
-
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%
%% CLOSE(insock()) -> ok
diff --git a/lib/kernel/doc/src/gen_tcp.xml b/lib/kernel/doc/src/gen_tcp.xml
index 820ecd1e30..71ef5cd48f 100644
--- a/lib/kernel/doc/src/gen_tcp.xml
+++ b/lib/kernel/doc/src/gen_tcp.xml
@@ -347,11 +347,22 @@ do_recv(Sock, Bs) ->
</func>
<func>
<name name="shutdown" arity="2"/>
- <fsummary>Immediately close a socket</fsummary>
+ <fsummary>Asynchronously close a socket</fsummary>
<desc>
- <p>Immediately close a socket in one or two directions.</p>
+ <p>Close a socket in one or two directions.</p>
<p><c><anno>How</anno> == write</c> means closing the socket for writing,
reading from it is still possible.</p>
+ <p>If <c><anno>How</anno> == read</c>, or there is no outgoing
+ data buffered in the <c><anno>Socket</anno></c> port,
+ then the socket is shutdown immediately and any error encountered
+ is returned in <c><anno>Reason</anno></c>.</p>
+ <p>If there is data buffered in the socket port, then the attempt
+ to shutdown the socket is postponed until that data is written to the
+ kernel socket send buffer. Any errors encountered will result
+ in the socket being closed and <c>{error, closed}</c> being returned
+ on the next
+ <seealso marker="gen_tcp#recv/2">recv/2</seealso> or
+ <seealso marker="gen_tcp#send/2">send/2</seealso>.</p>
<p>To be able to handle that the peer has done a shutdown on
the write side, the <c>{exit_on_close, false}</c> option
is useful.</p>
diff --git a/lib/kernel/test/gen_tcp_api_SUITE.erl b/lib/kernel/test/gen_tcp_api_SUITE.erl
index c27d265550..4a527e2f51 100644
--- a/lib/kernel/test/gen_tcp_api_SUITE.erl
+++ b/lib/kernel/test/gen_tcp_api_SUITE.erl
@@ -32,6 +32,7 @@
t_connect_bad/1,
t_recv_timeout/1, t_recv_eof/1,
t_shutdown_write/1, t_shutdown_both/1, t_shutdown_error/1,
+ t_shutdown_async/1,
t_fdopen/1, t_fdconnect/1, t_implicit_inet6/1]).
-export([getsockfd/0,closesockfd/1]).
@@ -41,7 +42,7 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
all() ->
[{group, t_accept}, {group, t_connect}, {group, t_recv},
t_shutdown_write, t_shutdown_both, t_shutdown_error,
- t_fdopen, t_fdconnect, t_implicit_inet6].
+ t_shutdown_async, t_fdopen, t_fdconnect, t_implicit_inet6].
groups() ->
[{t_accept, [], [t_accept_timeout]},
@@ -155,7 +156,34 @@ t_shutdown_error(Config) when is_list(Config) ->
?line ok = gen_tcp:close(L),
?line {error, closed} = gen_tcp:shutdown(L, read_write),
ok.
-
+
+t_shutdown_async(Config) when is_list(Config) ->
+ ?line {OS, _} = os:type(),
+ ?line {ok, L} = gen_tcp:listen(0, [{sndbuf, 4096}]),
+ ?line {ok, Port} = inet:port(L),
+ ?line {ok, Client} = gen_tcp:connect(localhost, Port,
+ [{recbuf, 4096},
+ {active, false}]),
+ ?line {ok, S} = gen_tcp:accept(L),
+ ?line PayloadSize = 1024 * 1024,
+ ?line Payload = lists:duplicate(PayloadSize, $.),
+ ?line ok = gen_tcp:send(S, Payload),
+ ?line case erlang:port_info(S, queue_size) of
+ {queue_size, N} when N > 0 -> ok;
+ {queue_size, 0} when OS =:= win32 -> ok;
+ {queue_size, 0} = T -> ?t:fail({unexpected, T})
+ end,
+
+ ?line ok = gen_tcp:shutdown(S, write),
+ ?line {ok, Buf} = gen_tcp:recv(Client, PayloadSize),
+ ?line {error, closed} = gen_tcp:recv(Client, 0),
+ ?line case length(Buf) of
+ PayloadSize -> ok;
+ Sz -> ?t:fail({payload_size,
+ {expected, PayloadSize},
+ {received, Sz}})
+ end.
+
%%% gen_tcp:fdopen/2