diff options
author | Jay Nelson <[email protected]> | 2010-01-25 11:01:32 -0800 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2010-02-10 17:14:28 +0100 |
commit | 1686757b304b66050f54db8e0ce5ab758bde96af (patch) | |
tree | 4b23dd691ff970e43bc5bfb6dc87941facee621c /lib/stdlib/test/supervisor_SUITE.erl | |
parent | ada6afd00530d6569c41741cfd9d63311ff60f25 (diff) | |
download | otp-1686757b304b66050f54db8e0ce5ab758bde96af.tar.gz otp-1686757b304b66050f54db8e0ce5ab758bde96af.tar.bz2 otp-1686757b304b66050f54db8e0ce5ab758bde96af.zip |
Add count_children/1 to supervisor.erl to determine the number of
children being managed without the memory impact of which_children/1
The function which_children/1 returns a list of the child processes
currently being supervised, but it has the penalty of creating a new
list thereby consuming more memory. In low memory situations it is
often desirable to know which supervisor may have generated many
processes, but the act of discovering the culprit should not cause the
node to crash (or worse a different node if the kernel kills one
randomly). The new function count_children/1 can give an indication
of which supervisor is taxing resources the most without adding to the
burden. Rather than creating a new list, it walks the supervisor's
internal children structure using an accumulator function so that any
used memory can be incrementally collected yet the resulting count can
still be obtained.
The return result of count_children/1 is a property list of counts
containing:
- {specs, Total_Num_Child_Specs}
- {active, Num_Active_Child_Processes_Of_Supervisor_Or_Worker_Type}
- {supervisors, Num_Supervisor_Type_Children_Including_Dead_Processes}
- {workers, Num_Worker_Type_Children_Including_Dead_Processes}
This patch was made in response to mailing list discussions of the
problem diagnosing heavily taxed production systems. I cannot find
the original request, but http://www.erlang.org/cgi-bin/ezmlm-cgi/4/35060
is my original post of the patch.
Diffstat (limited to 'lib/stdlib/test/supervisor_SUITE.erl')
-rw-r--r-- | lib/stdlib/test/supervisor_SUITE.erl | 175 |
1 files changed, 163 insertions, 12 deletions
diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl index f25e877289..039ea298c4 100644 --- a/lib/stdlib/test/supervisor_SUITE.erl +++ b/lib/stdlib/test/supervisor_SUITE.erl @@ -50,7 +50,7 @@ simple_one_for_one_extra/1]). %% Misc tests --export([child_unlink/1, tree/1]). +-export([child_unlink/1, tree/1, count_children_memory/1]). %------------------------------------------------------------------------- @@ -60,7 +60,8 @@ all(suite) -> child_adm_simple, extra_return, child_specs, restart_one_for_one, restart_one_for_all, restart_simple_one_for_one, restart_rest_for_one, - normal_termination, abnormal_termination, child_unlink, tree]}. + normal_termination, abnormal_termination, child_unlink, tree, + count_children_memory]}. start(InitResult) -> @@ -72,6 +73,15 @@ init(fail) -> init(InitResult) -> InitResult. +%% Respect proplist return of supervisor:count_children +get_child_counts(Supervisor) -> + Counts = supervisor:count_children(Supervisor), + [proplists:get_value(specs, Counts), + proplists:get_value(active, Counts), + proplists:get_value(supervisors, Counts), + proplists:get_value(workers, Counts)]. + + %------------------------------------------------------------------------- % % Test cases starts here. @@ -140,6 +150,8 @@ sup_start_ignore_child(Config) when is_list(Config) -> ?line [{child2, CPid2, worker, []},{child1, undefined, worker, []}] = supervisor:which_children(sup_test), + ?line [2,1,0,2] = get_child_counts(sup_test), + ok. %------------------------------------------------------------------------- @@ -341,6 +353,8 @@ extra_return(Config) when is_list(Config) -> ?line {error, running} = supervisor:delete_child(sup_test, child1), ?line {error, running} = supervisor:restart_child(sup_test, child1), ?line [{child1, CPid, worker, []}] = supervisor:which_children(sup_test), + ?line [1,1,0,1] = get_child_counts(sup_test), + ?line ok = supervisor:terminate_child(sup_test, child1), receive {'EXIT', CPid, shutdown} -> ok; @@ -350,22 +364,31 @@ extra_return(Config) when is_list(Config) -> ?line test_server:fail(no_child_termination) end, ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test), + ?line [1,0,0,1] = get_child_counts(sup_test), + ?line {ok, CPid2,extra_return} = supervisor:restart_child(sup_test, child1), ?line [{child1, CPid2, worker, []}] = supervisor:which_children(sup_test), + ?line [1,1,0,1] = get_child_counts(sup_test), + ?line ok = supervisor:terminate_child(sup_test, child1), ?line ok = supervisor:terminate_child(sup_test, child1), ?line ok = supervisor:delete_child(sup_test, child1), ?line {error, not_found} = supervisor:restart_child(sup_test, child1), ?line [] = supervisor:which_children(sup_test), + ?line [0,0,0,0] = get_child_counts(sup_test), + ?line {ok, CPid3, extra_return} = supervisor:start_child(sup_test, Child), ?line [{child1, CPid3, worker, []}] = supervisor:which_children(sup_test), + ?line [1,1,0,1] = get_child_counts(sup_test), + ok. %------------------------------------------------------------------------- child_adm(doc)-> ["Test API functions start_child/2, terminate_child/2, delete_child/2 " - "restart_child/2, which_children/1. Only correct childspecs are used, " - "handling of incorrect childspecs is tested in child_specs/1"]; + "restart_child/2, which_children/1, count_children/1. Only correct " + "childspecs are used, handling of incorrect childspecs is tested in " + "child_specs/1"]; child_adm(suite) -> []; child_adm(Config) when is_list(Config) -> process_flag(trap_exit, true), @@ -373,6 +396,7 @@ child_adm(Config) when is_list(Config) -> worker, []}, ?line {ok, _Pid} = start({ok, {{one_for_one, 2, 3600}, [Child]}}), ?line [{child1, CPid, worker, []}] = supervisor:which_children(sup_test), + ?line [1,1,0,1] = get_child_counts(sup_test), link(CPid), %% Start of an already runnig process @@ -392,6 +416,7 @@ child_adm(Config) when is_list(Config) -> ?line test_server:fail(no_child_termination) end, ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test), + ?line [1,0,0,1] = get_child_counts(sup_test), %% Like deleting something that does not exist, it will succeed! ?line ok = supervisor:terminate_child(sup_test, child1), @@ -402,6 +427,7 @@ child_adm(Config) when is_list(Config) -> %% Restart ?line {ok, CPid2} = supervisor:restart_child(sup_test, child1), ?line [{child1, CPid2, worker, []}] = supervisor:which_children(sup_test), + ?line [1,1,0,1] = get_child_counts(sup_test), ?line {error, running} = supervisor:restart_child(sup_test, child1), ?line {error, not_found} = supervisor:restart_child(sup_test, child2), @@ -414,15 +440,19 @@ child_adm(Config) when is_list(Config) -> ?line ok = supervisor:delete_child(sup_test, child1), ?line {error, not_found} = supervisor:restart_child(sup_test, child1), ?line [] = supervisor:which_children(sup_test), + ?line [0,0,0,0] = get_child_counts(sup_test), %% Start ?line {'EXIT',{noproc,{gen_server,call, _}}} = (catch supervisor:start_child(foo, Child)), ?line {ok, CPid3} = supervisor:start_child(sup_test, Child), ?line [{child1, CPid3, worker, []}] = supervisor:which_children(sup_test), + ?line [1,1,0,1] = get_child_counts(sup_test), ?line {'EXIT',{noproc,{gen_server,call,[foo,which_children,infinity]}}} = (catch supervisor:which_children(foo)), + ?line {'EXIT',{noproc,{gen_server,call,[foo,count_children,infinity]}}} + = (catch supervisor:count_children(foo)), ok. %------------------------------------------------------------------------- child_adm_simple(doc) -> @@ -436,6 +466,7 @@ child_adm_simple(Config) when is_list(Config) -> ?line {ok, _Pid} = start({ok, {{simple_one_for_one, 2, 3600}, [Child]}}), %% In simple_one_for_one all children are added dynamically ?line [] = supervisor:which_children(sup_test), + ?line [1,0,0,0] = get_child_counts(sup_test), %% Start ?line {'EXIT',{noproc,{gen_server,call, _}}} = @@ -443,12 +474,14 @@ child_adm_simple(Config) when is_list(Config) -> ?line {ok, CPid1} = supervisor:start_child(sup_test, []), ?line [{undefined, CPid1, worker, []}] = supervisor:which_children(sup_test), + ?line [1,1,0,1] = get_child_counts(sup_test), ?line {ok, CPid2} = supervisor:start_child(sup_test, []), ?line Children = supervisor:which_children(sup_test), ?line 2 = length(Children), ?line true = lists:member({undefined, CPid2, worker, []}, Children), ?line true = lists:member({undefined, CPid1, worker, []}, Children), + ?line [1,2,0,2] = get_child_counts(sup_test), %% Termination ?line {error, simple_one_for_one} = @@ -541,8 +574,10 @@ permanent_normal(Config) when is_list(Config) -> ok; false -> ?line test_server:fail({permanent_child_not_restarted, Child1}) - end. + end, + ?line [1,1,0,1] = get_child_counts(sup_test), + ok. %------------------------------------------------------------------------- transient_normal(doc) -> ["A transient child should not be restarted if it exits with " @@ -558,8 +593,10 @@ transient_normal(Config) when is_list(Config) -> CPid1 ! stop, test_server:sleep(100), - ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test). + ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test), + ?line [1,0,0,1] = get_child_counts(sup_test), + ok. %------------------------------------------------------------------------- temporary_normal(doc) -> ["A temporary process should never be restarted"]; @@ -574,8 +611,10 @@ temporary_normal(Config) when is_list(Config) -> CPid1 ! stop, test_server:sleep(100), - ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test). + ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test), + ?line [1,0,0,1] = get_child_counts(sup_test), + ok. %------------------------------------------------------------------------- abnormal_termination(doc) -> ["Testes the supervisors behaviour if a child dies with reason abnormal"]; @@ -601,8 +640,10 @@ permanent_abnormal(Config) when is_list(Config) -> ok; false -> ?line test_server:fail({permanent_child_not_restarted, Child1}) - end. + end, + ?line [1,1,0,1] = get_child_counts(sup_test), + ok. %------------------------------------------------------------------------- transient_abnormal(doc) -> ["A transient child should be restarted if it exits with " @@ -624,9 +665,10 @@ transient_abnormal(Config) when is_list(Config) -> ok; false -> ?line test_server:fail({transient_child_not_restarted, Child1}) - end. - + end, + ?line [1,1,0,1] = get_child_counts(sup_test), + ok. %------------------------------------------------------------------------- temporary_abnormal(doc) -> ["A temporary process should never be restarted"]; @@ -641,8 +683,10 @@ temporary_abnormal(Config) when is_list(Config) -> CPid1 ! die, test_server:sleep(100), - ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test). + ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test), + ?line [1,0,0,1] = get_child_counts(sup_test), + ok. %------------------------------------------------------------------------- restart_one_for_one(doc) -> ["Test that the one_for_one strategy works."]; @@ -679,6 +723,7 @@ one_for_one(Config) when is_list(Config) -> end; true -> ?line test_server:fail({bad_child_list, Children}) end, + ?line [2,2,0,2] = get_child_counts(sup_test), %% Test restart frequency property CPid2 ! die, @@ -772,6 +817,8 @@ one_for_all(Config) when is_list(Config) -> true -> ?line test_server:fail(bad_child); false -> ok end, + ?line [2,2,0,2] = get_child_counts(sup_test), + %%% Test restart frequency property [{_, Pid3, _, _}|_] = supervisor:which_children(sup_test), Pid3 ! die, @@ -854,6 +901,8 @@ simple_one_for_one(Config) when is_list(Config) -> end; true -> ?line test_server:fail({bad_child_list, Children}) end, + ?line [1,2,0,2] = get_child_counts(sup_test), + %% Test restart frequency property CPid2 ! die, receive @@ -896,6 +945,8 @@ simple_one_for_one_extra(Config) when is_list(Config) -> end; true -> ?line test_server:fail({bad_child_list, Children}) end, + ?line [1,2,0,2] = get_child_counts(sup_test), + CPid2 ! die, receive {'EXIT', CPid2, _} -> ok @@ -962,6 +1013,8 @@ rest_for_one(Config) when is_list(Config) -> link(CPid2), ?line {ok, CPid3} = supervisor:start_child(sup_test, Child3), link(CPid3), + ?line [3,3,0,3] = get_child_counts(sup_test), + CPid2 ! die, receive {'EXIT', CPid2, died} -> ok; @@ -987,6 +1040,8 @@ rest_for_one(Config) when is_list(Config) -> if length(Children) == 3 -> ok; true -> ?line test_server:fail({bad_child_list, Children}) end, + ?line [3,3,0,3] = get_child_counts(sup_test), + %% Test that no old children is still alive SCh = lists:map(fun({_,P,_,_}) -> P end, Children), case lists:member(CPid1, SCh) of @@ -1114,15 +1169,19 @@ tree(Config) when is_list(Config) -> %% Child supervisors ?line {ok, Sup1} = supervisor:start_child(Pid, ChildSup1), ?line {ok, Sup2} = supervisor:start_child(Pid, ChildSup2), + ?line [2,2,2,0] = get_child_counts(Pid), %% Workers - ?line [{_, CPid2, _, _},{_, CPid1, _, _}] = supervisor:which_children(Sup1), + ?line [2,2,0,2] = get_child_counts(Sup1), + ?line [0,0,0,0] = get_child_counts(Sup2), %% Dynamic children ?line {ok, CPid3} = supervisor:start_child(Sup2, Child3), ?line {ok, CPid4} = supervisor:start_child(Sup2, Child4), + ?line [2,2,0,2] = get_child_counts(Sup1), + ?line [2,2,0,2] = get_child_counts(Sup2), link(Sup1), link(Sup2), @@ -1144,9 +1203,11 @@ tree(Config) when is_list(Config) -> ?line [{_, CPid2, _, _},{_, CPid1, _, _}] = supervisor:which_children(Sup1), + ?line [2,2,0,2] = get_child_counts(Sup1), ?line [{_, NewCPid4, _, _},{_, CPid3, _, _}] = supervisor:which_children(Sup2), + ?line [2,2,0,2] = get_child_counts(Sup2), link(NewCPid4), @@ -1195,9 +1256,99 @@ tree(Config) when is_list(Config) -> ?line [{supchild2, NewSup2, _, _},{supchild1, NewSup1, _, _}] = supervisor:which_children(Pid), + ?line [2,2,2,0] = get_child_counts(Pid), ?line [{child2, _, _, _},{child1, _, _, _}] = supervisor:which_children(NewSup1), + ?line [2,2,0,2] = get_child_counts(NewSup1), + ?line [] = supervisor:which_children(NewSup2), + ?line [0,0,0,0] = get_child_counts(NewSup2), ok. +%------------------------------------------------------------------------- +count_children_allocator_test(MemoryState) -> + Allocators = [temp_alloc, eheap_alloc, binary_alloc, ets_alloc, + driver_alloc, sl_alloc, ll_alloc, fix_alloc, std_alloc, + sys_alloc], + MemoryStateList = element(4, MemoryState), + AllocTypes = [lists:keyfind(Alloc, 1, MemoryStateList) + || Alloc <- Allocators], + AllocStates = [lists:keyfind(e, 1, AllocValue) + || {_Type, AllocValue} <- AllocTypes], + lists:all(fun(State) -> State == {e, true} end, AllocStates). + +count_children_memory(doc) -> + ["Test that which_children eats memory, but count_children does not."]; +count_children_memory(suite) -> + MemoryState = erlang:system_info(allocator), + case count_children_allocator_test(MemoryState) of + true -> []; + false -> + {skip, "+Meamin used during test; erlang:memory/1 not available"} + end; +count_children_memory(Config) when is_list(Config) -> + process_flag(trap_exit, true), + Child = {child, {supervisor_1, start_child, []}, temporary, 1000, + worker, []}, + ?line {ok, _Pid} = start({ok, {{simple_one_for_one, 2, 3600}, [Child]}}), + [supervisor:start_child(sup_test, []) || _Ignore <- lists:seq(1,1000)], + + garbage_collect(), + _Size1 = erlang:memory(processes_used), + Children = supervisor:which_children(sup_test), + _Size2 = erlang:memory(processes_used), + ChildCount = get_child_counts(sup_test), + Size3 = erlang:memory(processes_used), + + [supervisor:start_child(sup_test, []) || _Ignore2 <- lists:seq(1,1000)], + + garbage_collect(), + Children2 = supervisor:which_children(sup_test), + Size4 = erlang:memory(processes_used), + ChildCount2 = get_child_counts(sup_test), + Size5 = erlang:memory(processes_used), + + garbage_collect(), + Children3 = supervisor:which_children(sup_test), + Size6 = erlang:memory(processes_used), + ChildCount3 = get_child_counts(sup_test), + Size7 = erlang:memory(processes_used), + + ?line 1000 = length(Children), + ?line [1,1000,0,1000] = ChildCount, + ?line 2000 = length(Children2), + ?line [1,2000,0,2000] = ChildCount2, + ?line Children3 = Children2, + ?line ChildCount3 = ChildCount2, + + %% count_children consumes memory using an accumulator function, + %% but the space can be reclaimed incrementally, whereas + %% which_children generates a return list. + case (Size5 =< Size4) of + true -> ok; + false -> + ?line test_server:fail({count_children, used_more_memory}) + end, + case Size7 =< Size6 of + true -> ok; + false -> + ?line test_server:fail({count_children, used_more_memory}) + end, + + case Size4 > Size3 of + true -> ok; + false -> + ?line test_server:fail({which_children, used_no_memory}) + end, + case Size6 > Size5 of + true -> ok; + false -> + ?line test_server:fail({which_children, used_no_memory}) + end, + + [exit(Pid, kill) || {undefined, Pid, worker, _Modules} <- Children3], + test_server:sleep(100), + ?line [1,0,0,0] = get_child_counts(sup_test), + + ok. |