diff options
-rw-r--r-- | erts/epmd/src/epmd_srv.c | 1 | ||||
-rw-r--r-- | erts/epmd/test/epmd_SUITE.erl | 58 |
2 files changed, 52 insertions, 7 deletions
diff --git a/erts/epmd/src/epmd_srv.c b/erts/epmd/src/epmd_srv.c index 5b58554590..7100855407 100644 --- a/erts/epmd/src/epmd_srv.c +++ b/erts/epmd/src/epmd_srv.c @@ -702,6 +702,7 @@ static void do_request(g, fd, s, buf, bsize) if (reply(g, fd, wbuf, 4) != 4) { + node_unreg(g, name); dbg_tty_printf(g,1,"** failed to send ALIVE2_RESP for \"%s\"", name); return; diff --git a/erts/epmd/test/epmd_SUITE.erl b/erts/epmd/test/epmd_SUITE.erl index e8bbfdbb18..4de65500e9 100644 --- a/erts/epmd/test/epmd_SUITE.erl +++ b/erts/epmd/test/epmd_SUITE.erl @@ -76,7 +76,9 @@ buffer_overrun_2/1, no_nonlocal_register/1, no_nonlocal_kill/1, - no_live_killing/1 + no_live_killing/1, + + socket_reset_before_alive2_reply_is_written/1 ]). @@ -123,7 +125,8 @@ all() -> returns_valid_populated_extra_with_nulls, names_stdout, {group, buffer_overrun}, no_nonlocal_register, - no_nonlocal_kill, no_live_killing]. + no_nonlocal_kill, no_live_killing, + socket_reset_before_alive2_reply_is_written]. groups() -> [{buffer_overrun, [], @@ -243,11 +246,7 @@ register_node(Name,Port) -> register_node_v2(Port,$M,0,5,5,Name,""). register_node_v2(Port, NodeType, Prot, HVsn, LVsn, Name, Extra) -> - Utf8Name = unicode:characters_to_binary(Name), - Req = [?EPMD_ALIVE2_REQ, put16(Port), NodeType, Prot, - put16(HVsn), put16(LVsn), - put16(size(Utf8Name)), binary_to_list(Utf8Name), - size16(Extra), Extra], + Req = alive2_req(Port, NodeType, Prot, HVsn, LVsn, Name, Extra), case send_req(Req) of {ok,Sock} -> case recv(Sock,4) of @@ -938,6 +937,42 @@ no_live_killing(Config) when is_list(Config) -> ?line close(Sock3), ok. +socket_reset_before_alive2_reply_is_written(doc) -> + ["Check for regression - don't make zombie from node which " + "sends TCP RST at wrong time"]; +socket_reset_before_alive2_reply_is_written(suite) -> + []; +socket_reset_before_alive2_reply_is_written(Config) when is_list(Config) -> + %% - delay_write for easier triggering of race condition + %% - relaxed_command_check for gracefull shutdown of epmd even if there + %% is stuck node. + ?line ok = epmdrun("-delay_write 1 -relaxed_command_check"), + + %% We can't use send_req/1 directly as we want to do inet:setopts/2 + %% on our socket. + ?line {ok, Sock} = connect(), + + %% Issuing close/1 on such socket will result in immediate RST packet. + ?line ok = inet:setopts(Sock, [{linger, {true, 0}}]), + + Req = alive2_req(4711, 77, 0, 5, 5, "test", []), + ?line ok = send(Sock, [size16(Req), Req]), + + timer:sleep(500), %% Wait for the first 1/2 of delay_write before closing + ?line ok = close(Sock), + + timer:sleep(500 + ?SHORT_PAUSE), %% Wait for the other 1/2 of delay_write + + %% Wait another delay_write interval, due to delay doubling in epmd. + %% Should be removed when this is issue is fixed there. + timer:sleep(1000), + + ?line {ok, SockForNames} = connect_active(), + + %% And there should be no stuck nodes + ?line {ok, []} = do_get_names(SockForNames), + ?line ok = close(SockForNames), + ok. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Terminate all tests with killing epmd. @@ -1200,3 +1235,12 @@ flat_count([_|T], N) -> flat_count(T, N); flat_count([], N) -> N. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +alive2_req(Port, NodeType, Prot, HVsn, LVsn, Name, Extra) -> + Utf8Name = unicode:characters_to_binary(Name), + [?EPMD_ALIVE2_REQ, put16(Port), NodeType, Prot, + put16(HVsn), put16(LVsn), + put16(size(Utf8Name)), binary_to_list(Utf8Name), + size16(Extra), Extra]. |