aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/test
diff options
context:
space:
mode:
authorJay Nelson <[email protected]>2010-01-25 11:01:32 -0800
committerBjörn Gustavsson <[email protected]>2010-02-10 17:14:28 +0100
commit1686757b304b66050f54db8e0ce5ab758bde96af (patch)
tree4b23dd691ff970e43bc5bfb6dc87941facee621c /lib/stdlib/test
parentada6afd00530d6569c41741cfd9d63311ff60f25 (diff)
downloadotp-1686757b304b66050f54db8e0ce5ab758bde96af.tar.gz
otp-1686757b304b66050f54db8e0ce5ab758bde96af.tar.bz2
otp-1686757b304b66050f54db8e0ce5ab758bde96af.zip
Add count_children/1 to supervisor.erl to determine the number of
children being managed without the memory impact of which_children/1 The function which_children/1 returns a list of the child processes currently being supervised, but it has the penalty of creating a new list thereby consuming more memory. In low memory situations it is often desirable to know which supervisor may have generated many processes, but the act of discovering the culprit should not cause the node to crash (or worse a different node if the kernel kills one randomly). The new function count_children/1 can give an indication of which supervisor is taxing resources the most without adding to the burden. Rather than creating a new list, it walks the supervisor's internal children structure using an accumulator function so that any used memory can be incrementally collected yet the resulting count can still be obtained. The return result of count_children/1 is a property list of counts containing: - {specs, Total_Num_Child_Specs} - {active, Num_Active_Child_Processes_Of_Supervisor_Or_Worker_Type} - {supervisors, Num_Supervisor_Type_Children_Including_Dead_Processes} - {workers, Num_Worker_Type_Children_Including_Dead_Processes} This patch was made in response to mailing list discussions of the problem diagnosing heavily taxed production systems. I cannot find the original request, but http://www.erlang.org/cgi-bin/ezmlm-cgi/4/35060 is my original post of the patch.
Diffstat (limited to 'lib/stdlib/test')
-rw-r--r--lib/stdlib/test/supervisor_SUITE.erl175
1 files changed, 163 insertions, 12 deletions
diff --git a/lib/stdlib/test/supervisor_SUITE.erl b/lib/stdlib/test/supervisor_SUITE.erl
index f25e877289..039ea298c4 100644
--- a/lib/stdlib/test/supervisor_SUITE.erl
+++ b/lib/stdlib/test/supervisor_SUITE.erl
@@ -50,7 +50,7 @@
simple_one_for_one_extra/1]).
%% Misc tests
--export([child_unlink/1, tree/1]).
+-export([child_unlink/1, tree/1, count_children_memory/1]).
%-------------------------------------------------------------------------
@@ -60,7 +60,8 @@ all(suite) ->
child_adm_simple, extra_return, child_specs,
restart_one_for_one, restart_one_for_all,
restart_simple_one_for_one, restart_rest_for_one,
- normal_termination, abnormal_termination, child_unlink, tree]}.
+ normal_termination, abnormal_termination, child_unlink, tree,
+ count_children_memory]}.
start(InitResult) ->
@@ -72,6 +73,15 @@ init(fail) ->
init(InitResult) ->
InitResult.
+%% Respect proplist return of supervisor:count_children
+get_child_counts(Supervisor) ->
+ Counts = supervisor:count_children(Supervisor),
+ [proplists:get_value(specs, Counts),
+ proplists:get_value(active, Counts),
+ proplists:get_value(supervisors, Counts),
+ proplists:get_value(workers, Counts)].
+
+
%-------------------------------------------------------------------------
%
% Test cases starts here.
@@ -140,6 +150,8 @@ sup_start_ignore_child(Config) when is_list(Config) ->
?line [{child2, CPid2, worker, []},{child1, undefined, worker, []}]
= supervisor:which_children(sup_test),
+ ?line [2,1,0,2] = get_child_counts(sup_test),
+
ok.
%-------------------------------------------------------------------------
@@ -341,6 +353,8 @@ extra_return(Config) when is_list(Config) ->
?line {error, running} = supervisor:delete_child(sup_test, child1),
?line {error, running} = supervisor:restart_child(sup_test, child1),
?line [{child1, CPid, worker, []}] = supervisor:which_children(sup_test),
+ ?line [1,1,0,1] = get_child_counts(sup_test),
+
?line ok = supervisor:terminate_child(sup_test, child1),
receive
{'EXIT', CPid, shutdown} -> ok;
@@ -350,22 +364,31 @@ extra_return(Config) when is_list(Config) ->
?line test_server:fail(no_child_termination)
end,
?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test),
+ ?line [1,0,0,1] = get_child_counts(sup_test),
+
?line {ok, CPid2,extra_return} =
supervisor:restart_child(sup_test, child1),
?line [{child1, CPid2, worker, []}] = supervisor:which_children(sup_test),
+ ?line [1,1,0,1] = get_child_counts(sup_test),
+
?line ok = supervisor:terminate_child(sup_test, child1),
?line ok = supervisor:terminate_child(sup_test, child1),
?line ok = supervisor:delete_child(sup_test, child1),
?line {error, not_found} = supervisor:restart_child(sup_test, child1),
?line [] = supervisor:which_children(sup_test),
+ ?line [0,0,0,0] = get_child_counts(sup_test),
+
?line {ok, CPid3, extra_return} = supervisor:start_child(sup_test, Child),
?line [{child1, CPid3, worker, []}] = supervisor:which_children(sup_test),
+ ?line [1,1,0,1] = get_child_counts(sup_test),
+
ok.
%-------------------------------------------------------------------------
child_adm(doc)->
["Test API functions start_child/2, terminate_child/2, delete_child/2 "
- "restart_child/2, which_children/1. Only correct childspecs are used, "
- "handling of incorrect childspecs is tested in child_specs/1"];
+ "restart_child/2, which_children/1, count_children/1. Only correct "
+ "childspecs are used, handling of incorrect childspecs is tested in "
+ "child_specs/1"];
child_adm(suite) -> [];
child_adm(Config) when is_list(Config) ->
process_flag(trap_exit, true),
@@ -373,6 +396,7 @@ child_adm(Config) when is_list(Config) ->
worker, []},
?line {ok, _Pid} = start({ok, {{one_for_one, 2, 3600}, [Child]}}),
?line [{child1, CPid, worker, []}] = supervisor:which_children(sup_test),
+ ?line [1,1,0,1] = get_child_counts(sup_test),
link(CPid),
%% Start of an already runnig process
@@ -392,6 +416,7 @@ child_adm(Config) when is_list(Config) ->
?line test_server:fail(no_child_termination)
end,
?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test),
+ ?line [1,0,0,1] = get_child_counts(sup_test),
%% Like deleting something that does not exist, it will succeed!
?line ok = supervisor:terminate_child(sup_test, child1),
@@ -402,6 +427,7 @@ child_adm(Config) when is_list(Config) ->
%% Restart
?line {ok, CPid2} = supervisor:restart_child(sup_test, child1),
?line [{child1, CPid2, worker, []}] = supervisor:which_children(sup_test),
+ ?line [1,1,0,1] = get_child_counts(sup_test),
?line {error, running} = supervisor:restart_child(sup_test, child1),
?line {error, not_found} = supervisor:restart_child(sup_test, child2),
@@ -414,15 +440,19 @@ child_adm(Config) when is_list(Config) ->
?line ok = supervisor:delete_child(sup_test, child1),
?line {error, not_found} = supervisor:restart_child(sup_test, child1),
?line [] = supervisor:which_children(sup_test),
+ ?line [0,0,0,0] = get_child_counts(sup_test),
%% Start
?line {'EXIT',{noproc,{gen_server,call, _}}} =
(catch supervisor:start_child(foo, Child)),
?line {ok, CPid3} = supervisor:start_child(sup_test, Child),
?line [{child1, CPid3, worker, []}] = supervisor:which_children(sup_test),
+ ?line [1,1,0,1] = get_child_counts(sup_test),
?line {'EXIT',{noproc,{gen_server,call,[foo,which_children,infinity]}}}
= (catch supervisor:which_children(foo)),
+ ?line {'EXIT',{noproc,{gen_server,call,[foo,count_children,infinity]}}}
+ = (catch supervisor:count_children(foo)),
ok.
%-------------------------------------------------------------------------
child_adm_simple(doc) ->
@@ -436,6 +466,7 @@ child_adm_simple(Config) when is_list(Config) ->
?line {ok, _Pid} = start({ok, {{simple_one_for_one, 2, 3600}, [Child]}}),
%% In simple_one_for_one all children are added dynamically
?line [] = supervisor:which_children(sup_test),
+ ?line [1,0,0,0] = get_child_counts(sup_test),
%% Start
?line {'EXIT',{noproc,{gen_server,call, _}}} =
@@ -443,12 +474,14 @@ child_adm_simple(Config) when is_list(Config) ->
?line {ok, CPid1} = supervisor:start_child(sup_test, []),
?line [{undefined, CPid1, worker, []}] =
supervisor:which_children(sup_test),
+ ?line [1,1,0,1] = get_child_counts(sup_test),
?line {ok, CPid2} = supervisor:start_child(sup_test, []),
?line Children = supervisor:which_children(sup_test),
?line 2 = length(Children),
?line true = lists:member({undefined, CPid2, worker, []}, Children),
?line true = lists:member({undefined, CPid1, worker, []}, Children),
+ ?line [1,2,0,2] = get_child_counts(sup_test),
%% Termination
?line {error, simple_one_for_one} =
@@ -541,8 +574,10 @@ permanent_normal(Config) when is_list(Config) ->
ok;
false ->
?line test_server:fail({permanent_child_not_restarted, Child1})
- end.
+ end,
+ ?line [1,1,0,1] = get_child_counts(sup_test),
+ ok.
%-------------------------------------------------------------------------
transient_normal(doc) ->
["A transient child should not be restarted if it exits with "
@@ -558,8 +593,10 @@ transient_normal(Config) when is_list(Config) ->
CPid1 ! stop,
test_server:sleep(100),
- ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test).
+ ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test),
+ ?line [1,0,0,1] = get_child_counts(sup_test),
+ ok.
%-------------------------------------------------------------------------
temporary_normal(doc) ->
["A temporary process should never be restarted"];
@@ -574,8 +611,10 @@ temporary_normal(Config) when is_list(Config) ->
CPid1 ! stop,
test_server:sleep(100),
- ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test).
+ ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test),
+ ?line [1,0,0,1] = get_child_counts(sup_test),
+ ok.
%-------------------------------------------------------------------------
abnormal_termination(doc) ->
["Testes the supervisors behaviour if a child dies with reason abnormal"];
@@ -601,8 +640,10 @@ permanent_abnormal(Config) when is_list(Config) ->
ok;
false ->
?line test_server:fail({permanent_child_not_restarted, Child1})
- end.
+ end,
+ ?line [1,1,0,1] = get_child_counts(sup_test),
+ ok.
%-------------------------------------------------------------------------
transient_abnormal(doc) ->
["A transient child should be restarted if it exits with "
@@ -624,9 +665,10 @@ transient_abnormal(Config) when is_list(Config) ->
ok;
false ->
?line test_server:fail({transient_child_not_restarted, Child1})
- end.
-
+ end,
+ ?line [1,1,0,1] = get_child_counts(sup_test),
+ ok.
%-------------------------------------------------------------------------
temporary_abnormal(doc) ->
["A temporary process should never be restarted"];
@@ -641,8 +683,10 @@ temporary_abnormal(Config) when is_list(Config) ->
CPid1 ! die,
test_server:sleep(100),
- ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test).
+ ?line [{child1,undefined,worker,[]}] = supervisor:which_children(sup_test),
+ ?line [1,0,0,1] = get_child_counts(sup_test),
+ ok.
%-------------------------------------------------------------------------
restart_one_for_one(doc) ->
["Test that the one_for_one strategy works."];
@@ -679,6 +723,7 @@ one_for_one(Config) when is_list(Config) ->
end;
true -> ?line test_server:fail({bad_child_list, Children})
end,
+ ?line [2,2,0,2] = get_child_counts(sup_test),
%% Test restart frequency property
CPid2 ! die,
@@ -772,6 +817,8 @@ one_for_all(Config) when is_list(Config) ->
true -> ?line test_server:fail(bad_child);
false -> ok
end,
+ ?line [2,2,0,2] = get_child_counts(sup_test),
+
%%% Test restart frequency property
[{_, Pid3, _, _}|_] = supervisor:which_children(sup_test),
Pid3 ! die,
@@ -854,6 +901,8 @@ simple_one_for_one(Config) when is_list(Config) ->
end;
true -> ?line test_server:fail({bad_child_list, Children})
end,
+ ?line [1,2,0,2] = get_child_counts(sup_test),
+
%% Test restart frequency property
CPid2 ! die,
receive
@@ -896,6 +945,8 @@ simple_one_for_one_extra(Config) when is_list(Config) ->
end;
true -> ?line test_server:fail({bad_child_list, Children})
end,
+ ?line [1,2,0,2] = get_child_counts(sup_test),
+
CPid2 ! die,
receive
{'EXIT', CPid2, _} -> ok
@@ -962,6 +1013,8 @@ rest_for_one(Config) when is_list(Config) ->
link(CPid2),
?line {ok, CPid3} = supervisor:start_child(sup_test, Child3),
link(CPid3),
+ ?line [3,3,0,3] = get_child_counts(sup_test),
+
CPid2 ! die,
receive
{'EXIT', CPid2, died} -> ok;
@@ -987,6 +1040,8 @@ rest_for_one(Config) when is_list(Config) ->
if length(Children) == 3 -> ok;
true -> ?line test_server:fail({bad_child_list, Children})
end,
+ ?line [3,3,0,3] = get_child_counts(sup_test),
+
%% Test that no old children is still alive
SCh = lists:map(fun({_,P,_,_}) -> P end, Children),
case lists:member(CPid1, SCh) of
@@ -1114,15 +1169,19 @@ tree(Config) when is_list(Config) ->
%% Child supervisors
?line {ok, Sup1} = supervisor:start_child(Pid, ChildSup1),
?line {ok, Sup2} = supervisor:start_child(Pid, ChildSup2),
+ ?line [2,2,2,0] = get_child_counts(Pid),
%% Workers
-
?line [{_, CPid2, _, _},{_, CPid1, _, _}] =
supervisor:which_children(Sup1),
+ ?line [2,2,0,2] = get_child_counts(Sup1),
+ ?line [0,0,0,0] = get_child_counts(Sup2),
%% Dynamic children
?line {ok, CPid3} = supervisor:start_child(Sup2, Child3),
?line {ok, CPid4} = supervisor:start_child(Sup2, Child4),
+ ?line [2,2,0,2] = get_child_counts(Sup1),
+ ?line [2,2,0,2] = get_child_counts(Sup2),
link(Sup1),
link(Sup2),
@@ -1144,9 +1203,11 @@ tree(Config) when is_list(Config) ->
?line [{_, CPid2, _, _},{_, CPid1, _, _}] =
supervisor:which_children(Sup1),
+ ?line [2,2,0,2] = get_child_counts(Sup1),
?line [{_, NewCPid4, _, _},{_, CPid3, _, _}] =
supervisor:which_children(Sup2),
+ ?line [2,2,0,2] = get_child_counts(Sup2),
link(NewCPid4),
@@ -1195,9 +1256,99 @@ tree(Config) when is_list(Config) ->
?line [{supchild2, NewSup2, _, _},{supchild1, NewSup1, _, _}] =
supervisor:which_children(Pid),
+ ?line [2,2,2,0] = get_child_counts(Pid),
?line [{child2, _, _, _},{child1, _, _, _}] =
supervisor:which_children(NewSup1),
+ ?line [2,2,0,2] = get_child_counts(NewSup1),
+
?line [] = supervisor:which_children(NewSup2),
+ ?line [0,0,0,0] = get_child_counts(NewSup2),
ok.
+%-------------------------------------------------------------------------
+count_children_allocator_test(MemoryState) ->
+ Allocators = [temp_alloc, eheap_alloc, binary_alloc, ets_alloc,
+ driver_alloc, sl_alloc, ll_alloc, fix_alloc, std_alloc,
+ sys_alloc],
+ MemoryStateList = element(4, MemoryState),
+ AllocTypes = [lists:keyfind(Alloc, 1, MemoryStateList)
+ || Alloc <- Allocators],
+ AllocStates = [lists:keyfind(e, 1, AllocValue)
+ || {_Type, AllocValue} <- AllocTypes],
+ lists:all(fun(State) -> State == {e, true} end, AllocStates).
+
+count_children_memory(doc) ->
+ ["Test that which_children eats memory, but count_children does not."];
+count_children_memory(suite) ->
+ MemoryState = erlang:system_info(allocator),
+ case count_children_allocator_test(MemoryState) of
+ true -> [];
+ false ->
+ {skip, "+Meamin used during test; erlang:memory/1 not available"}
+ end;
+count_children_memory(Config) when is_list(Config) ->
+ process_flag(trap_exit, true),
+ Child = {child, {supervisor_1, start_child, []}, temporary, 1000,
+ worker, []},
+ ?line {ok, _Pid} = start({ok, {{simple_one_for_one, 2, 3600}, [Child]}}),
+ [supervisor:start_child(sup_test, []) || _Ignore <- lists:seq(1,1000)],
+
+ garbage_collect(),
+ _Size1 = erlang:memory(processes_used),
+ Children = supervisor:which_children(sup_test),
+ _Size2 = erlang:memory(processes_used),
+ ChildCount = get_child_counts(sup_test),
+ Size3 = erlang:memory(processes_used),
+
+ [supervisor:start_child(sup_test, []) || _Ignore2 <- lists:seq(1,1000)],
+
+ garbage_collect(),
+ Children2 = supervisor:which_children(sup_test),
+ Size4 = erlang:memory(processes_used),
+ ChildCount2 = get_child_counts(sup_test),
+ Size5 = erlang:memory(processes_used),
+
+ garbage_collect(),
+ Children3 = supervisor:which_children(sup_test),
+ Size6 = erlang:memory(processes_used),
+ ChildCount3 = get_child_counts(sup_test),
+ Size7 = erlang:memory(processes_used),
+
+ ?line 1000 = length(Children),
+ ?line [1,1000,0,1000] = ChildCount,
+ ?line 2000 = length(Children2),
+ ?line [1,2000,0,2000] = ChildCount2,
+ ?line Children3 = Children2,
+ ?line ChildCount3 = ChildCount2,
+
+ %% count_children consumes memory using an accumulator function,
+ %% but the space can be reclaimed incrementally, whereas
+ %% which_children generates a return list.
+ case (Size5 =< Size4) of
+ true -> ok;
+ false ->
+ ?line test_server:fail({count_children, used_more_memory})
+ end,
+ case Size7 =< Size6 of
+ true -> ok;
+ false ->
+ ?line test_server:fail({count_children, used_more_memory})
+ end,
+
+ case Size4 > Size3 of
+ true -> ok;
+ false ->
+ ?line test_server:fail({which_children, used_no_memory})
+ end,
+ case Size6 > Size5 of
+ true -> ok;
+ false ->
+ ?line test_server:fail({which_children, used_no_memory})
+ end,
+
+ [exit(Pid, kill) || {undefined, Pid, worker, _Modules} <- Children3],
+ test_server:sleep(100),
+ ?line [1,0,0,0] = get_child_counts(sup_test),
+
+ ok.