From 8dad1451dd6e41f30741ac7554da238aa63c163a Mon Sep 17 00:00:00 2001 From: Andrew Majorov Date: Thu, 1 Nov 2012 16:50:47 +0400 Subject: Make listener supervisor failures less painful Two general issues were addressed. The first one is the issue with statically defined pids passed into childspecs. This issue prevents regular supervisor' children restarts in the case of someone's failure. The second one is the not quite appropriate restart strategy. Changed to rest_for_one which in pair with previous fixes assures that live connections will not die in the case of partial failure. Among possible failures are listening socket shutdown or frequent accept errors. --- src/ranch_acceptors_sup.erl | 14 +++++++------- src/ranch_conns_sup.erl | 11 ++++++----- src/ranch_listener.erl | 1 + src/ranch_listener_sup.erl | 36 +++++++++++++++++++----------------- src/ranch_server.erl | 15 ++++++++++++++- 5 files changed, 47 insertions(+), 30 deletions(-) diff --git a/src/ranch_acceptors_sup.erl b/src/ranch_acceptors_sup.erl index 1d9503c..f1908a6 100644 --- a/src/ranch_acceptors_sup.erl +++ b/src/ranch_acceptors_sup.erl @@ -17,7 +17,7 @@ -behaviour(supervisor). %% API. --export([start_link/7]). +-export([start_link/5]). %% supervisor. -export([init/1]). @@ -25,16 +25,16 @@ %% API. -spec start_link(any(), non_neg_integer(), module(), any(), - module(), pid(), pid()) -> {ok, pid()}. -start_link(Ref, NbAcceptors, Transport, TransOpts, - Protocol, ListenerPid, ConnsPid) -> + module()) -> {ok, pid()}. +start_link(Ref, NbAcceptors, Transport, TransOpts, Protocol) -> supervisor:start_link(?MODULE, [Ref, NbAcceptors, Transport, TransOpts, - Protocol, ListenerPid, ConnsPid]). + Protocol]). %% supervisor. -init([Ref, NbAcceptors, Transport, TransOpts, - Protocol, ListenerPid, ConnsPid]) -> +init([Ref, NbAcceptors, Transport, TransOpts, Protocol]) -> + ListenerPid = ranch_server:lookup_listener(Ref), + ConnsPid = ranch_server:lookup_connections_sup(Ref), LSocket = case proplists:get_value(socket, TransOpts) of undefined -> {ok, Socket} = Transport:listen(TransOpts), diff --git a/src/ranch_conns_sup.erl b/src/ranch_conns_sup.erl index 99853c5..3cc09be 100644 --- a/src/ranch_conns_sup.erl +++ b/src/ranch_conns_sup.erl @@ -17,7 +17,7 @@ -behaviour(supervisor). %% API. --export([start_link/0]). +-export([start_link/1]). -export([start_protocol/5]). %% supervisor. @@ -25,9 +25,9 @@ %% API. --spec start_link() -> {ok, pid()}. -start_link() -> - supervisor:start_link(?MODULE, []). +-spec start_link(any()) -> {ok, pid()}. +start_link(Ref) -> + supervisor:start_link(?MODULE, Ref). -spec start_protocol(pid(), inet:socket(), module(), module(), any()) -> {ok, pid()}. @@ -36,6 +36,7 @@ start_protocol(ListenerPid, Socket, Transport, Protocol, Opts) -> %% supervisor. -init([]) -> +init(Ref) -> + ok = ranch_server:set_connections_sup(Ref, self()), {ok, {{simple_one_for_one, 0, 1}, [{?MODULE, {?MODULE, start_protocol, []}, temporary, brutal_kill, worker, [?MODULE]}]}}. diff --git a/src/ranch_listener.erl b/src/ranch_listener.erl index 83ee658..52e4c32 100644 --- a/src/ranch_listener.erl +++ b/src/ranch_listener.erl @@ -101,6 +101,7 @@ set_protocol_options(ServerPid, ProtoOpts) -> %% @private init([Ref, MaxConns, ProtoOpts]) -> + ok = ranch_server:insert_listener(Ref, self()), {ok, #state{ref=Ref, max_conns=MaxConns, proto_opts=ProtoOpts}}. %% @private diff --git a/src/ranch_listener_sup.erl b/src/ranch_listener_sup.erl index c8ba12d..0147cf2 100644 --- a/src/ranch_listener_sup.erl +++ b/src/ranch_listener_sup.erl @@ -28,23 +28,25 @@ -> {ok, pid()}. start_link(Ref, NbAcceptors, Transport, TransOpts, Protocol, ProtoOpts) -> MaxConns = proplists:get_value(max_connections, TransOpts, 1024), - {ok, SupPid} = supervisor:start_link(?MODULE, []), - {ok, ListenerPid} = supervisor:start_child(SupPid, - {ranch_listener, {ranch_listener, start_link, - [Ref, MaxConns, ProtoOpts]}, - permanent, 5000, worker, [ranch_listener]}), - ok = ranch_server:insert_listener(Ref, ListenerPid), - {ok, ConnsPid} = supervisor:start_child(SupPid, - {ranch_conns_sup, {ranch_conns_sup, start_link, []}, - permanent, 5000, supervisor, [ranch_conns_sup]}), - {ok, _PoolPid} = supervisor:start_child(SupPid, - {ranch_acceptors_sup, {ranch_acceptors_sup, start_link, [ - Ref, NbAcceptors, Transport, TransOpts, - Protocol, ListenerPid, ConnsPid - ]}, permanent, 5000, supervisor, [ranch_acceptors_sup]}), - {ok, SupPid}. + supervisor:start_link(?MODULE, { + Ref, NbAcceptors, MaxConns, Transport, TransOpts, Protocol, ProtoOpts + }). %% supervisor. -init([]) -> - {ok, {{one_for_all, 10, 10}, []}}. +init({Ref, NbAcceptors, MaxConns, Transport, TransOpts, Protocol, ProtoOpts}) -> + ChildSpecs = [ + %% listener + {ranch_listener, {ranch_listener, start_link, + [Ref, MaxConns, ProtoOpts]}, + permanent, 5000, worker, [ranch_listener]}, + %% conns_sup + {ranch_conns_sup, {ranch_conns_sup, start_link, [Ref]}, + permanent, infinity, supervisor, [ranch_conns_sup]}, + %% acceptors_sup + {ranch_acceptors_sup, {ranch_acceptors_sup, start_link, + [Ref, NbAcceptors, Transport, TransOpts, Protocol] + }, permanent, infinity, supervisor, [ranch_acceptors_sup]} + ], + {ok, {{rest_for_one, 10, 10}, ChildSpecs}}. + diff --git a/src/ranch_server.erl b/src/ranch_server.erl index b0ae612..078626f 100644 --- a/src/ranch_server.erl +++ b/src/ranch_server.erl @@ -20,6 +20,8 @@ -export([start_link/0]). -export([insert_listener/2]). -export([lookup_listener/1]). +-export([set_connections_sup/2]). +-export([lookup_connections_sup/1]). -export([add_acceptor/2]). -export([send_to_acceptors/2]). -export([add_connection/1]). @@ -52,7 +54,7 @@ start_link() -> %% @doc Insert a listener into the database. -spec insert_listener(any(), pid()) -> ok. insert_listener(Ref, Pid) -> - true = ets:insert_new(?TAB, {{listener, Ref}, Pid}), + true = ets:insert_new(?TAB, {{listener, Ref}, Pid, undefined}), gen_server:cast(?MODULE, {insert_listener, Ref, Pid}). %% @doc Lookup a listener in the database. @@ -60,6 +62,17 @@ insert_listener(Ref, Pid) -> lookup_listener(Ref) -> ets:lookup_element(?TAB, {listener, Ref}, 2). +%% @doc Set a connection supervisor associated with specific listener. +-spec set_connections_sup(any(), pid()) -> ok. +set_connections_sup(Ref, Pid) -> + true = ets:update_element(?TAB, {listener, Ref}, {3, Pid}), + ok. + +%% @doc Lookup a connection supervisor used by specific listener. +-spec lookup_connections_sup(any()) -> pid() | undefined. +lookup_connections_sup(Ref) -> + ets:lookup_element(?TAB, {listener, Ref}, 3). + %% @doc Add an acceptor for the given listener. -spec add_acceptor(any(), pid()) -> ok. add_acceptor(Ref, Pid) -> -- cgit v1.2.3 From 0dd0f5149daaea84ec858a40a5673c04202f64fd Mon Sep 17 00:00:00 2001 From: Andrew Majorov Date: Thu, 1 Nov 2012 19:49:24 +0400 Subject: Fix for cases when listener dies before acceptors --- src/ranch_server.erl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/ranch_server.erl b/src/ranch_server.erl index 078626f..a17e103 100644 --- a/src/ranch_server.erl +++ b/src/ranch_server.erl @@ -160,6 +160,11 @@ remove_process(Key = {listener, Ref}, MonitorRef, Pid, Monitors) -> true = ets:delete(?TAB, {connections, Pid}), lists:keydelete({MonitorRef, Pid}, 1, Monitors); remove_process(Key = {acceptors, _}, MonitorRef, Pid, Monitors) -> - Acceptors = ets:lookup_element(?TAB, Key, 2), - true = ets:insert(?TAB, {Key, lists:delete(Pid, Acceptors)}), + try + Acceptors = ets:lookup_element(?TAB, Key, 2), + true = ets:update_element(?TAB, Key, {2, lists:delete(Pid, Acceptors)}) + catch + error:_ -> + ok + end, lists:keydelete({MonitorRef, Pid}, 1, Monitors). -- cgit v1.2.3 From acaa33a8709bf199493861ae9bfb01f1abe16a38 Mon Sep 17 00:00:00 2001 From: Andrew Majorov Date: Thu, 1 Nov 2012 19:50:45 +0400 Subject: Test cases for the listener supervisor behavior --- test/acceptor_SUITE.erl | 94 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/test/acceptor_SUITE.erl b/test/acceptor_SUITE.erl index 59513ab..5193a16 100644 --- a/test/acceptor_SUITE.erl +++ b/test/acceptor_SUITE.erl @@ -41,10 +41,15 @@ -export([tcp_max_connections_and_beyond/1]). -export([tcp_upgrade/1]). +%% supervisor. +-export([supervisor_clean_restart/1]). +-export([supervisor_clean_child_restart/1]). +-export([supervisor_conns_alive/1]). + %% ct. all() -> - [{group, tcp}, {group, ssl}, {group, misc}]. + [{group, tcp}, {group, ssl}, {group, misc}, {group, supervisor}]. groups() -> [{tcp, [ @@ -61,6 +66,10 @@ groups() -> ssl_echo ]}, {misc, [ misc_bad_transport + ]}, {supervisor, [ + supervisor_clean_restart, + supervisor_clean_child_restart, + supervisor_conns_alive ]}]. init_per_suite(Config) -> @@ -261,6 +270,89 @@ tcp_upgrade(_) -> ok = connect_loop(Port, 1, 0), receive upgraded -> ok after 1000 -> error(timeout) end. +%% Supervisor tests + +supervisor_clean_restart(_) -> + %% There we verify that mature listener death will not let + %% whole supervisor down and also the supervisor itself will + %% restart everything properly. + Ref = supervisor_clean_restart, + NbAcc = 4, + {ok, Pid} = ranch:start_listener(Ref, + NbAcc, ranch_tcp, [{port, 0}], echo_protocol, []), + %% Trace supervisor spawns. + 1 = erlang:trace(Pid, true, [procs, set_on_spawn]), + ListenerPid0 = ranch_server:lookup_listener(Ref), + erlang:exit(ListenerPid0, kill), + receive after 1000 -> ok end, + %% Verify that supervisor is alive + true = is_process_alive(Pid), + %% ...but children are dead. + false = is_process_alive(ListenerPid0), + %% Receive traces from newly started children + ListenerPid = receive {trace, Pid, spawn, Pid1, _} -> Pid1 end, + _ConnSupPid = receive {trace, Pid, spawn, Pid2, _} -> Pid2 end, + AccSupPid = receive {trace, Pid, spawn, Pid3, _} -> Pid3 end, + %% ...and its acceptors. + [receive {trace, AccSupPid, spawn, _Pid, _} -> ok end || + _ <- lists:seq(1, NbAcc)], + %% No more traces then. + receive + {trace, EPid, spawn, _, _} when EPid == Pid; EPid == AccSupPid -> + error(invalid_restart) + after 1000 -> ok end, + %% Verify that new children registered themselves properly. + ListenerPid = ranch_server:lookup_listener(Ref), + ok. + +supervisor_clean_child_restart(_) -> + %% Then we verify that only parts of the supervision tree + %% restarted in the case of failure. + Ref = supervisor_clean_child_restart, + {ok, Pid} = ranch:start_listener(Ref, + 1, ranch_tcp, [{port, 0}], echo_protocol, []), + %% Trace supervisor spawns. + 1 = erlang:trace(Pid, true, [procs, set_on_spawn]), + ListenerPid0 = ranch_server:lookup_listener(Ref), + %% Manually shut the listening socket down. + Port = lists:last(erlang:ports()), + ok = gen_tcp:close(Port), + receive after 1000 -> ok end, + %% Verify that supervisor and its first two children are alive. + true = is_process_alive(Pid), + true = is_process_alive(ListenerPid0), + %% Check that acceptors_sup is restarted properly. + AccSupPid = receive {trace, Pid, spawn, Pid1, _} -> Pid1 end, + AccPid = receive {trace, AccSupPid, spawn, Pid2, _} -> Pid2 end, + receive {trace, AccPid, spawn, _, _} -> ok end, + %% No more traces then. + receive + {trace, _, spawn, _, _} -> error(invalid_restart) + after 1000 -> ok end, + %% Verify that children still registered right. + ListenerPid0 = ranch_server:lookup_listener(Ref), + ok. + +supervisor_conns_alive(_) -> + %% And finally we make sure that in the case of partial failure + %% live connections are not being killed. + Ref = supervisor_conns_alive, + {ok, _} = ranch:start_listener(Ref, + 1, ranch_tcp, [{port, 0}], remove_conn_and_wait_protocol, [{remove, false}]), + ok, + %% Get the listener socket + Port = lists:last(erlang:ports()), + TcpPort = ranch:get_port(Ref), + {ok, Socket} = gen_tcp:connect("localhost", TcpPort, + [binary, {active, true}, {packet, raw}]), + %% Shut the socket down + ok = gen_tcp:close(Port), + %% Assert that client is still viable. + receive {tcp_closed, _} -> error(closed) after 1500 -> ok end, + ok = gen_tcp:send(Socket, <<"poke">>), + receive {tcp_closed, _} -> ok end. + + %% Utility functions. connect_loop(_, 0, _) -> -- cgit v1.2.3 From 0b270b2e9cc05ad5fb5c42802c61ccf520860b7b Mon Sep 17 00:00:00 2001 From: Andrew Majorov Date: Fri, 21 Dec 2012 00:52:01 +0400 Subject: Ensure transport module is loaded before checking exports Tests were constantly failing without this patch. It seems ct starts erlang code server in interactive mode, so application module loading is defered. --- src/ranch.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ranch.erl b/src/ranch.erl index 0924ee7..59e1fa7 100644 --- a/src/ranch.erl +++ b/src/ranch.erl @@ -55,6 +55,7 @@ start_listener(Ref, NbAcceptors, Transport, TransOpts, Protocol, ProtoOpts) when is_integer(NbAcceptors) andalso is_atom(Transport) andalso is_atom(Protocol) -> + _ = code:ensure_loaded(Transport), case erlang:function_exported(Transport, name, 0) of false -> {error, badarg}; -- cgit v1.2.3 From 66618454e0925ebd46b6905a1957206b2f2663e9 Mon Sep 17 00:00:00 2001 From: Andrew Majorov Date: Fri, 21 Dec 2012 19:45:01 +0400 Subject: Assure we manually close right socket in testcases Concerning supervisor tests subtle issue. Before we just presumed that last port in the global list of ports is the listening socket. From now we trace the return value of the `ranch_tcp:listen` call. --- test/acceptor_SUITE.erl | 42 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/test/acceptor_SUITE.erl b/test/acceptor_SUITE.erl index 5193a16..582e564 100644 --- a/test/acceptor_SUITE.erl +++ b/test/acceptor_SUITE.erl @@ -303,20 +303,29 @@ supervisor_clean_restart(_) -> after 1000 -> ok end, %% Verify that new children registered themselves properly. ListenerPid = ranch_server:lookup_listener(Ref), - ok. + _ = erlang:trace(all, false, [all]), + ok = clean_traces(). supervisor_clean_child_restart(_) -> %% Then we verify that only parts of the supervision tree %% restarted in the case of failure. Ref = supervisor_clean_child_restart, + %% Trace socket allocations. + _ = erlang:trace(new, true, [call]), + 1 = erlang:trace_pattern({ranch_tcp, listen, 1}, [{'_', [], [{return_trace}]}], [global]), {ok, Pid} = ranch:start_listener(Ref, 1, ranch_tcp, [{port, 0}], echo_protocol, []), %% Trace supervisor spawns. 1 = erlang:trace(Pid, true, [procs, set_on_spawn]), ListenerPid0 = ranch_server:lookup_listener(Ref), %% Manually shut the listening socket down. - Port = lists:last(erlang:ports()), - ok = gen_tcp:close(Port), + LSocket = receive + {trace, _, return_from, {ranch_tcp, listen, 1}, {ok, Socket}} -> + Socket + after 0 -> + error(lsocket_unknown) + end, + ok = gen_tcp:close(LSocket), receive after 1000 -> ok end, %% Verify that supervisor and its first two children are alive. true = is_process_alive(Pid), @@ -331,27 +340,48 @@ supervisor_clean_child_restart(_) -> after 1000 -> ok end, %% Verify that children still registered right. ListenerPid0 = ranch_server:lookup_listener(Ref), + _ = erlang:trace_pattern({ranch_tcp, listen, 1}, false, []), + _ = erlang:trace(all, false, [all]), + ok = clean_traces(), ok. supervisor_conns_alive(_) -> %% And finally we make sure that in the case of partial failure %% live connections are not being killed. Ref = supervisor_conns_alive, + _ = erlang:trace(new, true, [call]), + 1 = erlang:trace_pattern({ranch_tcp, listen, 1}, [{'_', [], [{return_trace}]}], [global]), {ok, _} = ranch:start_listener(Ref, 1, ranch_tcp, [{port, 0}], remove_conn_and_wait_protocol, [{remove, false}]), ok, %% Get the listener socket - Port = lists:last(erlang:ports()), + LSocket = receive + {trace, _, return_from, {ranch_tcp, listen, 1}, {ok, S}} -> + S + after 0 -> + error(lsocket_unknown) + end, TcpPort = ranch:get_port(Ref), {ok, Socket} = gen_tcp:connect("localhost", TcpPort, [binary, {active, true}, {packet, raw}]), %% Shut the socket down - ok = gen_tcp:close(Port), + ok = gen_tcp:close(LSocket), %% Assert that client is still viable. receive {tcp_closed, _} -> error(closed) after 1500 -> ok end, ok = gen_tcp:send(Socket, <<"poke">>), - receive {tcp_closed, _} -> ok end. + receive {tcp_closed, _} -> ok end, + _ = erlang:trace(all, false, [all]), + ok = clean_traces(). +clean_traces() -> + receive + {trace, _, _, _} -> + clean_traces(); + {trace, _, _, _, _} -> + clean_traces() + after 0 -> + ok + end. %% Utility functions. -- cgit v1.2.3