From 4f63e427f1345b40b484ef16f6ff5e922bf14dac Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Mon, 20 Apr 2015 10:57:53 +0100 Subject: Add 'show_econnreset' TCP socket option An ECONNRESET is a socket error which tells us that a TCP peer has sent an RST. The RST indicates that they have aborted the connection and that the payload we have received should not be considered complete. Up until now, the implementation of TCP in inet_drv.c has hidden the receipt of the RST from the user, treating it as though it was just a FIN terminating the read side of the socket. There are many cases where user code needs to be able to distinguish between a socket that was closed normally and one that was aborted. Setting the option {show_econnreset, true} enables the user to receive ECONNRESET errors on both active and passive sockets. A connected socket returned from gen_tcp:accept/1 will inherit the show_econnreset setting of the listening socket. By default this option is set to {show_econnreset, false}. Note that this patch only enables the reporting of ECONNRESET when the socket is being read from. It does not report ECONNRESET (or EPIPE) when the user tries to write to a connection when an RST has already been received. Currently the TCP implementation in inet_drv.c hides all such send errors from the user in favour of returning {error, close}. A separate patch will be needed to enable the reporting of such errors. --- erts/emulator/drivers/common/inet_drv.c | 47 +++++++++++++- erts/preloaded/src/prim_inet.erl | 3 + lib/kernel/doc/src/inet.xml | 27 ++++++++ lib/kernel/src/gen_tcp.erl | 2 + lib/kernel/src/inet.erl | 7 ++- lib/kernel/src/inet_int.hrl | 1 + lib/kernel/test/gen_tcp_misc_SUITE.erl | 107 ++++++++++++++++++++++++++++++++ 7 files changed, 189 insertions(+), 5 deletions(-) diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index e001f31932..b5903f0c07 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -835,6 +835,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define TCP_ADDF_PENDING_SHUT_RDWR 32 /* Call shutdown(sock, SHUT_RDWR) when queue empties */ #define TCP_ADDF_PENDING_SHUTDOWN \ (TCP_ADDF_PENDING_SHUT_WR | TCP_ADDF_PENDING_SHUT_RDWR) +#define TCP_ADDF_SHOW_ECONNRESET 64 /* Tell user about incoming RST */ /* *_REQ_* replies */ #define INET_REP_ERROR 0 @@ -879,6 +880,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_LOPT_MSGQ_HIWTRMRK 36 /* set local msgq high watermark */ #define INET_LOPT_MSGQ_LOWTRMRK 37 /* set local msgq low watermark */ #define INET_LOPT_NETNS 38 /* Network namespace pathname */ +#define INET_LOPT_TCP_SHOW_ECONNRESET 39 /* tell user about incoming RST */ /* SCTP options: a separate range, from 100: */ #define SCTP_OPT_RTOINFO 100 #define SCTP_OPT_ASSOCINFO 101 @@ -6255,6 +6257,16 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) continue; #endif + case INET_LOPT_TCP_SHOW_ECONNRESET: + if (desc->sprotocol == IPPROTO_TCP) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + if (ival) + tdesc->tcp_add_flags |= TCP_ADDF_SHOW_ECONNRESET; + else + tdesc->tcp_add_flags &= ~TCP_ADDF_SHOW_ECONNRESET; + } + continue; + case INET_OPT_REUSEADDR: #ifdef __WIN32__ continue; /* Bjorn says */ @@ -7234,6 +7246,17 @@ static ErlDrvSSizeT inet_fill_opts(inet_descriptor* desc, continue; #endif + case INET_LOPT_TCP_SHOW_ECONNRESET: + if (desc->sprotocol == IPPROTO_TCP) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + *ptr++ = opt; + ival = !!(tdesc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET); + put_int32(ival, ptr); + } else { + TRUNCATE_TO(0,ptr); + } + continue; + case INET_OPT_PRIORITY: #ifdef SO_PRIORITY type = SO_PRIORITY; @@ -9077,6 +9100,11 @@ static tcp_descriptor* tcp_inet_copy(tcp_descriptor* desc,SOCKET s, copy_desc->low = desc->low; copy_desc->send_timeout = desc->send_timeout; copy_desc->send_timeout_close = desc->send_timeout_close; + + if (desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET) + copy_desc->tcp_add_flags |= TCP_ADDF_SHOW_ECONNRESET; + else + copy_desc->tcp_add_flags &= ~TCP_ADDF_SHOW_ECONNRESET; /* The new port will be linked and connected to the original caller */ port = driver_create_port(port, owner, "tcp_inet", (ErlDrvData) copy_desc); @@ -10014,7 +10042,10 @@ static int tcp_recv(tcp_descriptor* desc, int request_len) int err = sock_errno(); if (err == ECONNRESET) { DEBUGF((" => detected close (connreset)\r\n")); - return tcp_recv_closed(desc); + if (desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET) + return tcp_recv_error(desc, err); + else + return tcp_recv_closed(desc); } if (err == ERRNO_BLOCK) { DEBUGF((" => would block\r\n")); @@ -10226,7 +10257,19 @@ static void tcp_inet_event(ErlDrvData e, ErlDrvEvent event) if (netEv.lNetworkEvents & FD_CLOSE) { /* error in err = netEv.iErrorCode[FD_CLOSE_BIT] */ DEBUGF(("Detected close in %s, line %d\r\n", __FILE__, __LINE__)); - tcp_recv_closed(desc); + if (desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET) { + err = netEv.iErrorCode[FD_CLOSE_BIT]; + if (err == ECONNRESET) + tcp_recv_error(desc, err); + else if (err == ECONNABORTED && IS_CONNECTED(INETP(desc))) { + /* translate this error to ECONNRESET */ + tcp_recv_error(desc, ECONNRESET); + } + else + tcp_recv_closed(desc); + } + else + tcp_recv_closed(desc); } DEBUGF(("tcp_inet_event(%ld) }\r\n", (long)desc->inet.port)); return; diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl index 622e1be869..fb0f67aa8a 100644 --- a/erts/preloaded/src/prim_inet.erl +++ b/erts/preloaded/src/prim_inet.erl @@ -1140,6 +1140,7 @@ enc_opt(delay_send) -> ?INET_LOPT_TCP_DELAY_SEND; enc_opt(packet_size) -> ?INET_LOPT_PACKET_SIZE; enc_opt(read_packets) -> ?INET_LOPT_READ_PACKETS; enc_opt(netns) -> ?INET_LOPT_NETNS; +enc_opt(show_econnreset) -> ?INET_LOPT_TCP_SHOW_ECONNRESET; enc_opt(raw) -> ?INET_OPT_RAW; % Names of SCTP opts: enc_opt(sctp_rtoinfo) -> ?SCTP_OPT_RTOINFO; @@ -1197,6 +1198,7 @@ dec_opt(?INET_LOPT_TCP_DELAY_SEND) -> delay_send; dec_opt(?INET_LOPT_PACKET_SIZE) -> packet_size; dec_opt(?INET_LOPT_READ_PACKETS) -> read_packets; dec_opt(?INET_LOPT_NETNS) -> netns; +dec_opt(?INET_LOPT_TCP_SHOW_ECONNRESET) -> show_econnreset; dec_opt(?INET_OPT_RAW) -> raw; dec_opt(I) when is_integer(I) -> undefined. @@ -1296,6 +1298,7 @@ type_opt_1(delay_send) -> bool; type_opt_1(packet_size) -> uint; type_opt_1(read_packets) -> uint; type_opt_1(netns) -> binary; +type_opt_1(show_econnreset) -> bool; %% %% SCTP options (to be set). If the type is a record type, the corresponding %% record signature is returned, otherwise, an "elementary" type tag diff --git a/lib/kernel/doc/src/inet.xml b/lib/kernel/doc/src/inet.xml index 77a8caaaf6..a364f9e0c9 100644 --- a/lib/kernel/doc/src/inet.xml +++ b/lib/kernel/doc/src/inet.xml @@ -1037,6 +1037,33 @@ setcap cap_sys_admin,cap_sys_ptrace,cap_dac_read_search+epi beam.smp + {show_econnreset, Boolean}(TCP/IP sockets) + +

When this option is set to false, as it is by + default, an RST that is received from the TCP peer is treated + as a normal close (as though a FIN was sent). A caller + to gen_tcp:recv/2 + will get {error, closed}. In active + mode the controlling process will receive a + {tcp_close, Socket} message, indicating that the + peer has closed the connection.

+

Setting this option to true will allow you to + distinguish between a connection that was closed normally, + and one which was aborted (intentionally or unintentionally) + by the TCP peer. A call to + gen_tcp:recv/2 + will return {error, econnreset}. In + active mode, the controlling process will receive a + {tcp_error, Socket, econnreset} message + before the usual {tcp_closed, Socket}, as is + the case for any other socket error.

+

A connected socket returned from + gen_tcp:accept/1 + will inherit the show_econnreset setting from the + listening socket.

+ +
+ {sndbuf, Size}

The minimum size of the send buffer to use for the socket. diff --git a/lib/kernel/src/gen_tcp.erl b/lib/kernel/src/gen_tcp.erl index bc8ffbe5e3..86251fc8f1 100644 --- a/lib/kernel/src/gen_tcp.erl +++ b/lib/kernel/src/gen_tcp.erl @@ -58,6 +58,7 @@ {reuseaddr, boolean()} | {send_timeout, non_neg_integer() | infinity} | {send_timeout_close, boolean()} | + {show_econnreset, boolean()} | {sndbuf, non_neg_integer()} | {tos, non_neg_integer()} | {ipv6_v6only, boolean()}. @@ -89,6 +90,7 @@ reuseaddr | send_timeout | send_timeout_close | + show_econnreset | sndbuf | tos | ipv6_v6only. diff --git a/lib/kernel/src/inet.erl b/lib/kernel/src/inet.erl index d668738109..1ae90aaf0c 100644 --- a/lib/kernel/src/inet.erl +++ b/lib/kernel/src/inet.erl @@ -654,7 +654,7 @@ options() -> multicast_if, multicast_ttl, multicast_loop, exit_on_close, high_watermark, low_watermark, high_msgq_watermark, low_msgq_watermark, - send_timeout, send_timeout_close + send_timeout, send_timeout_close, show_econnreset ]. %% Return a list of statistics options @@ -672,7 +672,8 @@ connect_options() -> [tos, priority, reuseaddr, keepalive, linger, sndbuf, recbuf, nodelay, header, active, packet, packet_size, buffer, mode, deliver, exit_on_close, high_watermark, low_watermark, high_msgq_watermark, - low_msgq_watermark, send_timeout, send_timeout_close, delay_send, raw]. + low_msgq_watermark, send_timeout, send_timeout_close, delay_send, raw, + show_econnreset]. connect_options(Opts, Family) -> BaseOpts = @@ -740,7 +741,7 @@ listen_options() -> header, active, packet, buffer, mode, deliver, backlog, ipv6_v6only, exit_on_close, high_watermark, low_watermark, high_msgq_watermark, low_msgq_watermark, send_timeout, send_timeout_close, delay_send, - packet_size, raw]. + packet_size, raw, show_econnreset]. listen_options(Opts, Family) -> BaseOpts = diff --git a/lib/kernel/src/inet_int.hrl b/lib/kernel/src/inet_int.hrl index 889b596a22..7bd973af99 100644 --- a/lib/kernel/src/inet_int.hrl +++ b/lib/kernel/src/inet_int.hrl @@ -147,6 +147,7 @@ -define(INET_LOPT_MSGQ_HIWTRMRK, 36). -define(INET_LOPT_MSGQ_LOWTRMRK, 37). -define(INET_LOPT_NETNS, 38). +-define(INET_LOPT_TCP_SHOW_ECONNRESET, 39). % Specific SCTP options: separate range: -define(SCTP_OPT_RTOINFO, 100). -define(SCTP_OPT_ASSOCINFO, 101). diff --git a/lib/kernel/test/gen_tcp_misc_SUITE.erl b/lib/kernel/test/gen_tcp_misc_SUITE.erl index 4f0d7a7d50..f08f28b650 100644 --- a/lib/kernel/test/gen_tcp_misc_SUITE.erl +++ b/lib/kernel/test/gen_tcp_misc_SUITE.erl @@ -31,6 +31,8 @@ init_per_testcase/2, end_per_testcase/2, otp_3924/1, otp_3924_sender/4, closed_socket/1, shutdown_active/1, shutdown_passive/1, shutdown_pending/1, + show_econnreset_active/1, show_econnreset_active_once/1, + show_econnreset_passive/1, default_options/1, http_bad_packet/1, busy_send/1, busy_disconnect_passive/1, busy_disconnect_active/1, fill_sendq/1, partial_recv_and_close/1, @@ -92,6 +94,8 @@ all() -> iter_max_socks, passive_sockets, active_n, accept_closed_by_other_process, otp_3924, closed_socket, shutdown_active, shutdown_passive, shutdown_pending, + show_econnreset_active, show_econnreset_active_once, + show_econnreset_passive, default_options, http_bad_packet, busy_send, busy_disconnect_passive, busy_disconnect_active, fill_sendq, partial_recv_and_close, @@ -1075,6 +1079,109 @@ shutdown_pending(Config) when is_list(Config) -> gen_tcp:close(S) end. +%% +%% Test 'show_econnreset' option +%% + +show_econnreset_active(Config) when is_list(Config) -> + %% First confirm everything works with option turned off. + {ok, L} = gen_tcp:listen(0, []), + {ok, Port} = inet:port(L), + {ok, Client} = gen_tcp:connect(localhost, Port, [{active, false}]), + {ok, S} = gen_tcp:accept(L), + ok = gen_tcp:close(L), + ok = inet:setopts(Client, [{linger, {true, 0}}]), + ok = gen_tcp:close(Client), + receive + {tcp_closed, S} -> + ok; + Other -> + ?t:fail({unexpected1, Other}) + after 1000 -> + ?t:fail({timeout, {server, no_tcp_closed}}) + end, + + %% Now test with option switched on. + %% Note: We are also testing that the show_econnreset option is + %% inherited from the listening socket by the accepting socket. + {ok, L1} = gen_tcp:listen(0, [{show_econnreset, true}]), + {ok, Port1} = inet:port(L1), + {ok, Client1} = gen_tcp:connect(localhost, Port1, [{active, false}]), + {ok, S1} = gen_tcp:accept(L1), + ok = gen_tcp:close(L1), + ok = inet:setopts(Client1, [{linger, {true, 0}}]), + ok = gen_tcp:close(Client1), + receive + {tcp_error, S1, econnreset} -> + receive + {tcp_closed, S1} -> + ok; + Other1 -> + ?t:fail({unexpected2, Other1}) + after 1 -> + ?t:fail({timeout, {server, no_tcp_closed}}) + end; + Other2 -> + ?t:fail({unexpected3, Other2}) + after 1000 -> + ?t:fail({timeout, {server, no_tcp_error}}) + end. + +show_econnreset_active_once(Config) when is_list(Config) -> + %% Now test using {active, once} + {ok, L} = gen_tcp:listen(0, + [{active, false}, + {show_econnreset, true}]), + {ok, Port} = inet:port(L), + {ok, Client} = gen_tcp:connect(localhost, Port, [{active, false}]), + {ok, S} = gen_tcp:accept(L), + ok = gen_tcp:close(L), + ok = inet:setopts(Client, [{linger, {true, 0}}]), + ok = gen_tcp:close(Client), + ok = ?t:sleep(20), + ok = receive Msg -> {unexpected_msg, Msg} after 0 -> ok end, + ok = inet:setopts(S, [{active, once}]), + receive + {tcp_error, S, econnreset} -> + receive + {tcp_closed, S} -> + ok; + Other1 -> + ?t:fail({unexpected1, Other1}) + after 1 -> + ?t:fail({timeout, {server, no_tcp_closed}}) + end; + Other2 -> + ?t:fail({unexpected2, Other2}) + after 1000 -> + ?t:fail({timeout, {server, no_tcp_error}}) + end. + +show_econnreset_passive(Config) when is_list(Config) -> + %% First confirm everything works with option turned off. + {ok, L} = gen_tcp:listen(0, [{active, false}]), + {ok, Port} = inet:port(L), + {ok, Client} = gen_tcp:connect(localhost, Port, [{active, false}]), + {ok, S} = gen_tcp:accept(L), + ok = gen_tcp:close(L), + ok = inet:setopts(S, [{linger, {true, 0}}]), + ok = gen_tcp:close(S), + ok = ?t:sleep(1), + {error, closed} = gen_tcp:recv(Client, 0), + + %% Now test with option switched on. + {ok, L1} = gen_tcp:listen(0, [{active, false}]), + {ok, Port1} = inet:port(L1), + {ok, Client1} = gen_tcp:connect(localhost, Port1, + [{active, false}, + {show_econnreset, true}]), + {ok, S1} = gen_tcp:accept(L1), + ok = gen_tcp:close(L1), + ok = inet:setopts(S1, [{linger, {true, 0}}]), + ok = gen_tcp:close(S1), + ok = ?t:sleep(1), + {error, econnreset} = gen_tcp:recv(Client1, 0). + %% Thanks to Luke Gorrie. Tests for a very specific problem with %% corrupt data. The testcase will be killed by the timetrap timeout -- cgit v1.2.3 From 0098eed847516cc6760d961421c83527816e35ae Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Sat, 2 May 2015 20:34:24 +0100 Subject: Apply 'show_econnreset' socket option to send errors as well Up till now all send errors have been translated into a generic {error, closed}. This patch allows {error, econnreset} to be returned on send errors when it is detected that the TCP peer has sent an RST. --- erts/emulator/drivers/common/inet_drv.c | 84 +++++++++++++-- lib/kernel/doc/src/inet.xml | 5 +- lib/kernel/test/gen_tcp_misc_SUITE.erl | 179 +++++++++++++++++++++++++++++++- 3 files changed, 257 insertions(+), 11 deletions(-) diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index b5903f0c07..a8c58a8149 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -836,6 +836,8 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define TCP_ADDF_PENDING_SHUTDOWN \ (TCP_ADDF_PENDING_SHUT_WR | TCP_ADDF_PENDING_SHUT_RDWR) #define TCP_ADDF_SHOW_ECONNRESET 64 /* Tell user about incoming RST */ +#define TCP_ADDF_DELAYED_ECONNRESET 128 /* An ECONNRESET error occured on send or shutdown */ +#define TCP_ADDF_SHUTDOWN_WR_DONE 256 /* A shutdown(sock, SHUT_WR) or SHUT_RDWR was made */ /* *_REQ_* replies */ #define INET_REP_ERROR 0 @@ -6140,7 +6142,12 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) desc->active_count = 0; if ((desc->stype == SOCK_STREAM) && (desc->active != INET_PASSIVE) && (desc->state == INET_STATE_CLOSED)) { - tcp_closed_message((tcp_descriptor *) desc); + tcp_descriptor *tdesc = (tcp_descriptor *) desc; + if (tdesc->tcp_add_flags & TCP_ADDF_DELAYED_ECONNRESET) { + tdesc->tcp_add_flags &= ~TCP_ADDF_DELAYED_ECONNRESET; + tcp_error_message(tdesc, ECONNRESET); + } + tcp_closed_message(tdesc); if (desc->exitf) { driver_exit(desc->port, 0); return 0; /* Give up on this socket, descriptor lost */ @@ -9466,7 +9473,11 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, if (desc->tcp_add_flags & TCP_ADDF_DELAYED_CLOSE_RECV) { desc->tcp_add_flags &= ~(TCP_ADDF_DELAYED_CLOSE_RECV| TCP_ADDF_DELAYED_CLOSE_SEND); - return ctl_reply(INET_REP_ERROR, "closed", 6, rbuf, rsize); + if (desc->tcp_add_flags & TCP_ADDF_DELAYED_ECONNRESET) { + desc->tcp_add_flags &= ~TCP_ADDF_DELAYED_ECONNRESET; + return ctl_reply(INET_REP_ERROR, "econnreset", 10, rbuf, rsize); + } else + return ctl_reply(INET_REP_ERROR, "closed", 6, rbuf, rsize); } return ctl_error(ENOTCONN, rbuf, rsize); } @@ -9527,6 +9538,8 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); } if (IS_SOCKET_ERROR(sock_shutdown(INETP(desc)->s, how))) { + if (how != TCP_SHUT_RD) + desc->tcp_add_flags |= TCP_ADDF_SHUTDOWN_WR_DONE; return ctl_error(sock_errno(), rbuf, rsize); } else { return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); @@ -9661,7 +9674,13 @@ static void tcp_inet_commandv(ErlDrvData e, ErlIOVec* ev) if (!IS_CONNECTED(INETP(desc))) { if (desc->tcp_add_flags & TCP_ADDF_DELAYED_CLOSE_SEND) { desc->tcp_add_flags &= ~TCP_ADDF_DELAYED_CLOSE_SEND; - inet_reply_error_am(INETP(desc), am_closed); + if (desc->tcp_add_flags & TCP_ADDF_DELAYED_ECONNRESET) { + /* Don't clear flag. Leave it enabled for the next receive + * operation. + */ + inet_reply_error(INETP(desc), ECONNRESET); + } else + inet_reply_error_am(INETP(desc), am_closed); } else inet_reply_error(INETP(desc), ENOTCONN); @@ -10578,6 +10597,9 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) { + int show_econnreset = (err == ECONNRESET + && desc->tcp_add_flags & TCP_ADDF_SHOW_ECONNRESET); + /* * If the port is busy, we must do some clean-up before proceeding. */ @@ -10593,14 +10615,21 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) /* * We used to handle "expected errors" differently from unexpected ones. - * Now we handle all errors in the same way. We just have to distinguish - * between passive and active sockets. + * Now we handle all errors in the same way (unless the show_econnreset + * socket option is enabled). We just have to distinguish between passive + * and active sockets. */ DEBUGF(("driver_failure_eof(%ld) in %s, line %d\r\n", (long)desc->inet.port, __FILE__, __LINE__)); if (desc->inet.active) { - tcp_closed_message(desc); - inet_reply_error_am(INETP(desc), am_closed); + if (show_econnreset) { + tcp_error_message(desc, err); + tcp_closed_message(desc); + inet_reply_error(INETP(desc), err); + } else { + tcp_closed_message(desc); + inet_reply_error_am(INETP(desc), am_closed); + } if (desc->inet.exitf) driver_exit(desc->inet.port, 0); else @@ -10612,7 +10641,10 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) erl_inet_close(INETP(desc)); if (desc->inet.caller) { - inet_reply_error_am(INETP(desc), am_closed); + if (show_econnreset) + inet_reply_error(INETP(desc), err); + else + inet_reply_error_am(INETP(desc), am_closed); } else { /* No blocking send op to reply to right now. @@ -10629,12 +10661,46 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) * in the receive operation. */ desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_RECV; + + if (show_econnreset) { + /* Return {error, econnreset} instead of {error, closed} + * on send or receive operations. + */ + desc->tcp_add_flags |= TCP_ADDF_DELAYED_ECONNRESET; + } } return -1; } static int tcp_send_error(tcp_descriptor* desc, int err) { + /* EPIPE errors usually occur in one of three ways: + * 1. We write to a socket when we've already shutdown() the write side. On + * Windows the error returned for this is ESHUTDOWN rather than EPIPE. + * 2. The TCP peer sends us an RST through no fault of our own (perhaps + * by aborting the connection using SO_LINGER) and we then attempt + * to write to the socket. On Linux and Windows we would actually + * receive an ECONNRESET error for this, but on the BSDs, Darwin, + * Illumos and presumably Solaris, it's an EPIPE. + * 3. We cause the TCP peer to send us an RST by writing to a socket + * after we receive a FIN from them. Our first write will be + * successful, but if the they have closed the connection (rather + * than just shutting down the write side of it) this will cause their + * OS to send us an RST. Then, when we attempt to write to the socket + * a second time, we will get an EPIPE error. On Windows we get an + * ECONNABORTED. + * + * What we are going to do here is to treat all EPIPE messages that aren't + * of type 1 as ECONNRESET errors. This will allow users who have the + * show_econnreset socket option enabled to receive {error, econnreset} on + * both send and recv operations to indicate that an RST has been received. + */ +#ifdef __WIN_32__ + if (err == ECONNABORTED) + err = ECONNRESET; +#endif + if (err == EPIPE && !(desc->tcp_add_flags & TCP_ADDF_SHUTDOWN_WR_DONE)) + err = ECONNRESET; return tcp_send_or_shutdown_error(desc, err); } @@ -10854,6 +10920,8 @@ static void tcp_shutdown_async(tcp_descriptor* desc) TCP_SHUT_WR : TCP_SHUT_RDWR; if (IS_SOCKET_ERROR(sock_shutdown(INETP(desc)->s, how))) tcp_shutdown_error(desc, sock_errno()); + else + desc->tcp_add_flags |= TCP_ADDF_SHUTDOWN_WR_DONE; } #ifdef __OSE__ diff --git a/lib/kernel/doc/src/inet.xml b/lib/kernel/doc/src/inet.xml index a364f9e0c9..4dd9e0e221 100644 --- a/lib/kernel/doc/src/inet.xml +++ b/lib/kernel/doc/src/inet.xml @@ -1056,7 +1056,10 @@ setcap cap_sys_admin,cap_sys_ptrace,cap_dac_read_search+epi beam.smp active mode, the controlling process will receive a {tcp_error, Socket, econnreset} message before the usual {tcp_closed, Socket}, as is - the case for any other socket error.

+ the case for any other socket error. Calls to + gen_tcp:send/2 + will also return {error, econnreset} when it + is detected that a TCP peer has sent an RST.

A connected socket returned from gen_tcp:accept/1 will inherit the show_econnreset setting from the diff --git a/lib/kernel/test/gen_tcp_misc_SUITE.erl b/lib/kernel/test/gen_tcp_misc_SUITE.erl index f08f28b650..ddd9d356ee 100644 --- a/lib/kernel/test/gen_tcp_misc_SUITE.erl +++ b/lib/kernel/test/gen_tcp_misc_SUITE.erl @@ -32,7 +32,10 @@ otp_3924/1, otp_3924_sender/4, closed_socket/1, shutdown_active/1, shutdown_passive/1, shutdown_pending/1, show_econnreset_active/1, show_econnreset_active_once/1, - show_econnreset_passive/1, + show_econnreset_passive/1, econnreset_after_sync_send/1, + econnreset_after_async_send_active/1, + econnreset_after_async_send_active_once/1, + econnreset_after_async_send_passive/1, default_options/1, http_bad_packet/1, busy_send/1, busy_disconnect_passive/1, busy_disconnect_active/1, fill_sendq/1, partial_recv_and_close/1, @@ -95,7 +98,10 @@ all() -> accept_closed_by_other_process, otp_3924, closed_socket, shutdown_active, shutdown_passive, shutdown_pending, show_econnreset_active, show_econnreset_active_once, - show_econnreset_passive, + show_econnreset_passive, econnreset_after_sync_send, + econnreset_after_async_send_active, + econnreset_after_async_send_active_once, + econnreset_after_async_send_passive, default_options, http_bad_packet, busy_send, busy_disconnect_passive, busy_disconnect_active, fill_sendq, partial_recv_and_close, @@ -1182,6 +1188,175 @@ show_econnreset_passive(Config) when is_list(Config) -> ok = ?t:sleep(1), {error, econnreset} = gen_tcp:recv(Client1, 0). +econnreset_after_sync_send(Config) when is_list(Config) -> + %% First confirm everything works with option turned off. + {ok, L} = gen_tcp:listen(0, [{active, false}]), + {ok, Port} = inet:port(L), + {ok, Client} = gen_tcp:connect(localhost, Port, [{active, false}]), + {ok, S} = gen_tcp:accept(L), + ok = gen_tcp:close(L), + ok = inet:setopts(S, [{linger, {true, 0}}]), + ok = gen_tcp:close(S), + ok = ?t:sleep(20), + {error, closed} = gen_tcp:send(Client, "Whatever"), + + %% Now test with option switched on. + {ok, L1} = gen_tcp:listen(0, [{active, false}]), + {ok, Port1} = inet:port(L1), + {ok, Client1} = gen_tcp:connect(localhost, Port1, + [{active, false}, + {show_econnreset, true}]), + {ok, S1} = gen_tcp:accept(L1), + ok = gen_tcp:close(L1), + ok = inet:setopts(S1, [{linger, {true, 0}}]), + ok = gen_tcp:close(S1), + ok = ?t:sleep(20), + {error, econnreset} = gen_tcp:send(Client1, "Whatever"). + +econnreset_after_async_send_active(Config) when is_list(Config) -> + {OS, _} = os:type(), + Payload = lists:duplicate(1024 * 1024, $.), + + %% First confirm everything works with option turned off. + {ok, L} = gen_tcp:listen(0, [{active, false}, {recbuf, 4096}]), + {ok, Port} = inet:port(L), + {ok, Client} = gen_tcp:connect(localhost, Port, [{sndbuf, 4096}]), + {ok, S} = gen_tcp:accept(L), + ok = gen_tcp:close(L), + ok = gen_tcp:send(Client, Payload), + case erlang:port_info(Client, queue_size) of + {queue_size, N} when N > 0 -> ok; + {queue_size, 0} when OS =:= win32 -> ok; + {queue_size, 0} = T -> ?t:fail(T) + end, + ok = gen_tcp:send(S, "Whatever"), + ok = ?t:sleep(20), + ok = inet:setopts(S, [{linger, {true, 0}}]), + ok = gen_tcp:close(S), + ok = ?t:sleep(20), + receive + {tcp, Client, "Whatever"} -> + receive + {tcp_closed, Client} -> + ok; + Other1 -> + ?t:fail({unexpected1, Other1}) + end; + Other2 -> + ?t:fail({unexpected2, Other2}) + end, + + %% Now test with option switched on. + {ok, L1} = gen_tcp:listen(0, [{active, false}, {recbuf, 4096}]), + {ok, Port1} = inet:port(L1), + {ok, Client1} = gen_tcp:connect(localhost, Port1, + [{sndbuf, 4096}, + {show_econnreset, true}]), + {ok, S1} = gen_tcp:accept(L1), + ok = gen_tcp:close(L1), + ok = gen_tcp:send(Client1, Payload), + case erlang:port_info(Client1, queue_size) of + {queue_size, N1} when N1 > 0 -> ok; + {queue_size, 0} when OS =:= win32 -> ok; + {queue_size, 0} = T1 -> ?t:fail(T1) + end, + ok = gen_tcp:send(S1, "Whatever"), + ok = ?t:sleep(20), + ok = inet:setopts(S1, [{linger, {true, 0}}]), + ok = gen_tcp:close(S1), + ok = ?t:sleep(20), + receive + {tcp, Client1, "Whatever"} -> + receive + {tcp_error, Client1, econnreset} -> + receive + {tcp_closed, Client1} -> + ok; + Other3 -> + ?t:fail({unexpected3, Other3}) + end; + Other4 -> + ?t:fail({unexpected4, Other4}) + end; + Other5 -> + ?t:fail({unexpected5, Other5}) + end. + +econnreset_after_async_send_active_once(Config) when is_list(Config) -> + {OS, _} = os:type(), + {ok, L} = gen_tcp:listen(0, [{active, false}, {recbuf, 4096}]), + {ok, Port} = inet:port(L), + {ok, Client} = gen_tcp:connect(localhost, Port, + [{active, false}, + {sndbuf, 4096}, + {show_econnreset, true}]), + {ok,S} = gen_tcp:accept(L), + ok = gen_tcp:close(L), + Payload = lists:duplicate(1024 * 1024, $.), + ok = gen_tcp:send(Client, Payload), + case erlang:port_info(Client, queue_size) of + {queue_size, N} when N > 0 -> ok; + {queue_size, 0} when OS =:= win32 -> ok; + {queue_size, 0} = T -> ?t:fail(T) + end, + ok = gen_tcp:send(S, "Whatever"), + ok = ?t:sleep(20), + ok = inet:setopts(S, [{linger, {true, 0}}]), + ok = gen_tcp:close(S), + ok = ?t:sleep(20), + ok = receive Msg -> {unexpected_msg, Msg} after 0 -> ok end, + ok = inet:setopts(Client, [{active, once}]), + receive + {tcp_error, Client, econnreset} -> + receive + {tcp_closed, Client} -> + ok; + Other -> + ?t:fail({unexpected1, Other}) + end; + Other -> + ?t:fail({unexpected2, Other}) + end. + +econnreset_after_async_send_passive(Config) when is_list(Config) -> + {OS, _} = os:type(), + Payload = lists:duplicate(1024 * 1024, $.), + + %% First confirm everything works with option turned off. + {ok, L} = gen_tcp:listen(0, [{active, false}, {recbuf, 4096}]), + {ok, Port} = inet:port(L), + {ok, Client} = gen_tcp:connect(localhost, Port, + [{active, false}, + {sndbuf, 4096}]), + {ok, S} = gen_tcp:accept(L), + ok = gen_tcp:close(L), + ok = inet:setopts(S, [{linger, {true, 0}}]), + ok = gen_tcp:send(S, "Whatever"), + ok = gen_tcp:send(Client, Payload), + case erlang:port_info(Client, queue_size) of + {queue_size, N} when N > 0 -> ok; + {queue_size, 0} when OS =:= win32 -> ok; + {queue_size, 0} = T -> ?t:fail(T) + end, + ok = gen_tcp:close(S), + ok = ?t:sleep(20), + {error, closed} = gen_tcp:recv(Client, 0), + + %% Now test with option switched on. + {ok, L1} = gen_tcp:listen(0, [{active, false}, {recbuf, 4096}]), + {ok, Port1} = inet:port(L1), + {ok, Client1} = gen_tcp:connect(localhost, Port1, + [{active, false}, + {sndbuf, 4096}, + {show_econnreset, true}]), + {ok, S1} = gen_tcp:accept(L1), + ok = gen_tcp:close(L1), + ok = inet:setopts(S1, [{linger, {true, 0}}]), + ok = gen_tcp:send(S1, "Whatever"), + ok = gen_tcp:send(Client1, Payload), + ok = gen_tcp:close(S1), + ok = ?t:sleep(20), + {error, econnreset} = gen_tcp:recv(Client1, 0). %% Thanks to Luke Gorrie. Tests for a very specific problem with %% corrupt data. The testcase will be killed by the timetrap timeout -- cgit v1.2.3 From eed21b4fd99c8ab71dcefd3802104186af1cb6b0 Mon Sep 17 00:00:00 2001 From: Rory Byrne Date: Thu, 16 Apr 2015 18:01:23 +0100 Subject: Fix socket option {linger, {true, 0}} to abort TCP connections Up until now, if {linger, {true, 0}} is set on the socket and there is data in the port driver queue, the connection is not aborted until the port queue is empty and close() is called on the underlying file descriptor. This bug allows an idle TCP client to prevent a server from terminating the connection and freeing resources. This patch fixes the problem by discarding the port queue if the socket is closed when {linger, {true, 0}} is set. --- erts/emulator/drivers/common/inet_drv.c | 10 +++++++++ erts/preloaded/src/prim_inet.erl | 13 ++++++++---- lib/kernel/test/gen_tcp_misc_SUITE.erl | 37 +++++++++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index a8c58a8149..80cb705896 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -838,6 +838,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define TCP_ADDF_SHOW_ECONNRESET 64 /* Tell user about incoming RST */ #define TCP_ADDF_DELAYED_ECONNRESET 128 /* An ECONNRESET error occured on send or shutdown */ #define TCP_ADDF_SHUTDOWN_WR_DONE 256 /* A shutdown(sock, SHUT_WR) or SHUT_RDWR was made */ +#define TCP_ADDF_LINGER_ZERO 512 /* Discard driver queue on port close */ /* *_REQ_* replies */ #define INET_REP_ERROR 0 @@ -6318,6 +6319,13 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len) arg_sz = sizeof(li_val); DEBUGF(("inet_set_opts(%ld): s=%d, SO_LINGER=%d,%d", (long)desc->port, desc->s, li_val.l_onoff,li_val.l_linger)); + if (desc->sprotocol == IPPROTO_TCP) { + tcp_descriptor* tdesc = (tcp_descriptor*) desc; + if (li_val.l_onoff && li_val.l_linger == 0) + tdesc->tcp_add_flags |= TCP_ADDF_LINGER_ZERO; + else + tdesc->tcp_add_flags &= ~TCP_ADDF_LINGER_ZERO; + } break; case INET_OPT_PRIORITY: @@ -9699,6 +9707,8 @@ static void tcp_inet_flush(ErlDrvData e) /* Discard send queue to avoid hanging port (OTP-7615) */ tcp_clear_output(desc); } + if (desc->tcp_add_flags & TCP_ADDF_LINGER_ZERO) + tcp_clear_output(desc); } static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp) diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl index fb0f67aa8a..5e0b38aa68 100644 --- a/erts/preloaded/src/prim_inet.erl +++ b/erts/preloaded/src/prim_inet.erl @@ -146,11 +146,16 @@ shutdown_1(S, How) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% close(S) when is_port(S) -> - case subscribe(S, [subs_empty_out_q]) of - {ok, [{subs_empty_out_q,N}]} when N > 0 -> - close_pend_loop(S, N); %% wait for pending output to be sent + case getopt(S, linger) of + {ok,{true,0}} -> + close_port(S); _ -> - close_port(S) + case subscribe(S, [subs_empty_out_q]) of + {ok, [{subs_empty_out_q,N}]} when N > 0 -> + close_pend_loop(S, N); %% wait for pending output to be sent + _ -> + close_port(S) + end end. close_pend_loop(S, N) -> diff --git a/lib/kernel/test/gen_tcp_misc_SUITE.erl b/lib/kernel/test/gen_tcp_misc_SUITE.erl index ddd9d356ee..83bfd1f69d 100644 --- a/lib/kernel/test/gen_tcp_misc_SUITE.erl +++ b/lib/kernel/test/gen_tcp_misc_SUITE.erl @@ -35,7 +35,7 @@ show_econnreset_passive/1, econnreset_after_sync_send/1, econnreset_after_async_send_active/1, econnreset_after_async_send_active_once/1, - econnreset_after_async_send_passive/1, + econnreset_after_async_send_passive/1, linger_zero/1, default_options/1, http_bad_packet/1, busy_send/1, busy_disconnect_passive/1, busy_disconnect_active/1, fill_sendq/1, partial_recv_and_close/1, @@ -101,7 +101,7 @@ all() -> show_econnreset_passive, econnreset_after_sync_send, econnreset_after_async_send_active, econnreset_after_async_send_active_once, - econnreset_after_async_send_passive, + econnreset_after_async_send_passive, linger_zero, default_options, http_bad_packet, busy_send, busy_disconnect_passive, busy_disconnect_active, fill_sendq, partial_recv_and_close, @@ -1358,6 +1358,39 @@ econnreset_after_async_send_passive(Config) when is_list(Config) -> ok = ?t:sleep(20), {error, econnreset} = gen_tcp:recv(Client1, 0). +%% +%% Test {linger {true, 0}} aborts a connection +%% + +linger_zero(Config) when is_list(Config) -> + %% All the econnreset tests will prove that {linger, {true, 0}} aborts + %% a connection when the driver queue is empty. We will test here + %% that it also works when the driver queue is not empty. + {OS, _} = os:type(), + {ok, L} = gen_tcp:listen(0, [{active, false}, + {recbuf, 4096}, + {show_econnreset, true}]), + {ok, Port} = inet:port(L), + {ok, Client} = gen_tcp:connect(localhost, Port, + [{active, false}, + {sndbuf, 4096}]), + {ok, S} = gen_tcp:accept(L), + ok = gen_tcp:close(L), + PayloadSize = 1024 * 1024, + Payload = lists:duplicate(PayloadSize, $.), + ok = gen_tcp:send(Client, Payload), + case erlang:port_info(Client, queue_size) of + {queue_size, N} when N > 0 -> ok; + {queue_size, 0} when OS =:= win32 -> ok; + {queue_size, 0} = T -> ?t:fail(T) + end, + ok = inet:setopts(Client, [{linger, {true, 0}}]), + ok = gen_tcp:close(Client), + ok = ?t:sleep(1), + undefined = erlang:port_info(Client, connected), + {error, econnreset} = gen_tcp:recv(S, PayloadSize). + + %% Thanks to Luke Gorrie. Tests for a very specific problem with %% corrupt data. The testcase will be killed by the timetrap timeout %% if the bug is present. -- cgit v1.2.3 From 485187dfdb5ec628efed755c88331ca296bc33dc Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Mon, 15 Jun 2015 12:01:45 +0200 Subject: Update prim_inet.beam --- erts/preloaded/ebin/prim_inet.beam | Bin 73092 -> 72748 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/erts/preloaded/ebin/prim_inet.beam b/erts/preloaded/ebin/prim_inet.beam index 6729f06b79..5a188be3ba 100644 Binary files a/erts/preloaded/ebin/prim_inet.beam and b/erts/preloaded/ebin/prim_inet.beam differ -- cgit v1.2.3