aboutsummaryrefslogtreecommitdiffstats
path: root/lib/kernel/test/logger_stress_SUITE.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/kernel/test/logger_stress_SUITE.erl')
-rw-r--r--lib/kernel/test/logger_stress_SUITE.erl550
1 files changed, 550 insertions, 0 deletions
diff --git a/lib/kernel/test/logger_stress_SUITE.erl b/lib/kernel/test/logger_stress_SUITE.erl
new file mode 100644
index 0000000000..1a278fb1b2
--- /dev/null
+++ b/lib/kernel/test/logger_stress_SUITE.erl
@@ -0,0 +1,550 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+-module(logger_stress_SUITE).
+
+-compile(export_all).
+
+-include_lib("common_test/include/ct_event.hrl").
+-include_lib("kernel/include/logger.hrl").
+-include_lib("kernel/src/logger_h_common.hrl").
+
+-ifdef(SAVE_STATS).
+ -define(COLLECT_STATS(_All_,_Procs_),
+ ct:pal("~p",[stats(_All_,_Procs_)])).
+-else.
+ -define(COLLECT_STATS(_All_,_Procs__), ok).
+-endif.
+
+-define(TEST_DURATION,120). % seconds
+
+suite() ->
+ [{timetrap,{minutes,3}},
+ {ct_hooks,[logger_test_lib]}].
+
+init_per_suite(Config) ->
+ Config.
+
+end_per_suite(_Config) ->
+ ok.
+
+init_per_group(_Group, Config) ->
+ Config.
+
+end_per_group(_Group, _Config) ->
+ ok.
+
+init_per_testcase(_TestCase, Config) ->
+ Config.
+
+end_per_testcase(Case, Config) ->
+ try apply(?MODULE,Case,[cleanup,Config])
+ catch error:undef -> ok
+ end,
+ ok.
+
+groups() ->
+ [].
+
+all() ->
+ [allow_events,
+ reject_events,
+ std_handler,
+ disk_log_handler,
+ std_handler_time,
+ std_handler_time_big,
+ disk_log_handler_time,
+ disk_log_handler_time_big,
+ emulator_events,
+ remote_events,
+ remote_to_disk_log,
+ remote_emulator_events,
+ remote_emulator_to_disk_log].
+
+%%%-----------------------------------------------------------------
+%%% Test cases
+%%%-----------------------------------------------------------------
+%% Time from log macro call to handler callback
+allow_events(Config) ->
+ {ok,_,Node} =
+ logger_test_lib:setup(Config,
+ [{logger,
+ [{handler,default,?MODULE,#{}}]},
+ {logger_level,notice}]),
+ N = 100000,
+ {T,_} = timer:tc(fun() -> rpc:call(Node,?MODULE,nlogs,[N]) end),
+ IOPS = N * 1000/T, % log events allowed per millisecond
+ ct_event:notify(#event{name = benchmark_data,
+ data = [{value,IOPS}]}),
+ {comment,io_lib:format("~.2f accepted events pr millisecond",
+ [IOPS])}.
+
+%% Time from log macro call to reject (log level)
+reject_events(Config) ->
+ {ok,_,Node} =
+ logger_test_lib:setup(Config,
+ [{logger,
+ [{handler,default,?MODULE,#{}}]},
+ {logger_level,error}]),
+ N = 1000000,
+ {T,_} = timer:tc(fun() -> rpc:call(Node,?MODULE,nlogs,[N]) end),
+ IOPS = N * 1000/T, % log events rejected per millisecond
+ ct_event:notify(#event{name = benchmark_data,
+ data = [{value,IOPS}]}),
+ {comment,io_lib:format("~.2f rejected events pr millisecond",
+ [IOPS])}.
+
+%% Cascading failure that produce gen_server and proc_lib reports -
+%% how many of the produced log events are actually written to a log
+%% with logger_std_h file handler.
+std_handler(Config) ->
+ {ok,_,Node} =
+ logger_test_lib:setup(Config,
+ [{logger,
+ [{handler,default,logger_std_h,
+ #{config=>#{type=>{file,"default.log"}}}}]}]),
+
+ cascade({Node,{logger_backend,log_allowed,2},[]},
+ {Node,{logger_std_h,write,4},[{default,logger_std_h_default}]},
+ fun otp_cascading/0).
+std_handler(cleanup,_Config) ->
+ _ = file:delete("default.log"),
+ ok.
+
+%% Disable overload protection and just print a lot - measure time.
+%% The IOPS reported is the number of log events written per millisecond.
+std_handler_time(Config) ->
+ measure_handler_time(logger_std_h,#{type=>{file,"default.log"}},Config).
+std_handler_time(cleanup,_Config) ->
+ _ = file:delete("default.log"),
+ ok.
+
+std_handler_time_big(Config) ->
+ measure_handler_time_big(logger_std_h,#{type=>{file,"default.log"}},Config).
+std_handler_time_big(cleanup,_Config) ->
+ _ = file:delete("default.log"),
+ ok.
+
+%% Cascading failure that produce gen_server and proc_lib reports -
+%% how many of the produced log events are actually written to a log
+%% with logger_disk_log_h wrap file handler.
+disk_log_handler(Config) ->
+ {ok,_,Node} =
+ logger_test_lib:setup(Config,
+ [{logger,
+ [{handler,default,logger_disk_log_h,#{}}]}]),
+ cascade({Node,{logger_backend,log_allowed,2},[]},
+ {Node,{logger_disk_log_h,write,4},
+ [{default,logger_disk_log_h_default}]},
+ fun otp_cascading/0).
+disk_log_handler(cleanup,_Config) ->
+ Files = filelib:wildcard("default.log.*"),
+ [_ = file:delete(F) || F <- Files],
+ ok.
+
+%% Disable overload protection and just print a lot - measure time.
+%% The IOPS reported is the number of log events written per millisecond.
+disk_log_handler_time(Config) ->
+ measure_handler_time(logger_disk_log_h,#{type=>halt},Config).
+disk_log_handler_time(cleanup,_Config) ->
+ _ = file:delete("default"),
+ ok.
+
+disk_log_handler_time_big(Config) ->
+ measure_handler_time_big(logger_disk_log_h,#{type=>halt},Config).
+disk_log_handler_time_big(cleanup,_Config) ->
+ _ = file:delete("default"),
+ ok.
+
+%% Cascading failure that produce log events from the emulator - how
+%% many of the produced log events pass through the proxy.
+emulator_events(Config) ->
+ {ok,_,Node} =
+ logger_test_lib:setup(Config,
+ [{logger,
+ [{handler,default,?MODULE,#{}}]}]),
+ cascade({Node,{?MODULE,producer,0},[]},
+ {Node,{?MODULE,log,2},[{proxy,logger_proxy}]},
+ fun em_cascading/0).
+
+%% Cascading failure that produce gen_server and proc_lib reports on
+%% remote node - how many of the produced log events pass through the
+%% proxy.
+remote_events(Config) ->
+ {ok,_,Node1} =
+ logger_test_lib:setup([{postfix,1}|Config],
+ [{logger,
+ [{handler,default,?MODULE,#{}}]}]),
+ {ok,_,Node2} =
+ logger_test_lib:setup([{postfix,2}|Config],[]),
+ cascade({Node2,{logger_backend,log_allowed,2},[{remote_proxy,logger_proxy}]},
+ {Node1,{?MODULE,log,2},[{local_proxy,logger_proxy}]},
+ fun otp_cascading/0).
+
+%% Cascading failure that produce gen_server and proc_lib reports on
+%% remote node - how many of the produced log events are actually
+%% written to a log with logger_disk_log_h wrap file handler.
+remote_to_disk_log(Config) ->
+ {ok,_,Node1} =
+ logger_test_lib:setup([{postfix,1}|Config],
+ [{logger,
+ [{handler,default,logger_disk_log_h,#{}}]}]),
+ {ok,_,Node2} =
+ logger_test_lib:setup([{postfix,2}|Config],[]),
+ cascade({Node2,{logger_backend,log_allowed,2},[{remote_proxy,logger_proxy}]},
+ {Node1,{logger_disk_log_h,write,4},
+ [{local_proxy,logger_proxy},
+ {local_default,logger_disk_log_h_default}]},
+ fun otp_cascading/0).
+remote_to_disk_log(cleanup,_Config) ->
+ Files = filelib:wildcard("default.log.*"),
+ [_ = file:delete(F) || F <- Files],
+ ok.
+
+%% Cascading failure that produce log events from the emulator on
+%% remote node - how many of the produced log events pass through the
+%% proxy.
+remote_emulator_events(Config) ->
+ {ok,_,Node1} =
+ logger_test_lib:setup([{postfix,1}|Config],
+ [{logger,
+ [{handler,default,?MODULE,#{}}]}]),
+ {ok,_,Node2} =
+ logger_test_lib:setup([{postfix,2}|Config],[]),
+ cascade({Node2,{?MODULE,producer,0},[{remote_proxy,logger_proxy}]},
+ {Node1,{?MODULE,log,2},[{local_proxy,logger_proxy}]},
+ fun em_cascading/0).
+
+%% Cascading failure that produce log events from the emulator on
+%% remote node - how many of the produced log events are actually
+%% written to a log with logger_disk_log_h wrap file handler.
+remote_emulator_to_disk_log(Config) ->
+ {ok,_,Node1} =
+ logger_test_lib:setup([{postfix,1}|Config],
+ [{logger,
+ [{handler,default,logger_disk_log_h,#{}}]}]),
+ {ok,_,Node2} =
+ logger_test_lib:setup([{postfix,2}|Config],[]),
+ cascade({Node2,{?MODULE,producer,0},[{remote_proxy,logger_proxy}]},
+ {Node1,{logger_disk_log_h,write,4},
+ [{local_proxy,logger_proxy},
+ {local_default,logger_disk_log_h_default}]},
+ fun em_cascading/0).
+remote_emulator_to_disk_log(cleanup,_Config) ->
+ Files = filelib:wildcard("default.log.*"),
+ [_ = file:delete(F) || F <- Files],
+ ok.
+
+%%%-----------------------------------------------------------------
+%%% Internal functions
+measure_handler_time(Module,HCfg,Config) ->
+ measure_handler_time(Module,100000,small_fa(),millisecond,HCfg,#{},Config).
+
+measure_handler_time_big(Module,HCfg,Config) ->
+ FCfg = #{chars_limit=>4096, max_size=>1024},
+ measure_handler_time(Module,100,big_fa(),second,HCfg,FCfg,Config).
+
+measure_handler_time(Module,N,FA,Unit,HCfg,FCfg,Config) ->
+ {ok,_,Node} =
+ logger_test_lib:setup(
+ Config,
+ [{logger,
+ [{handler,default,Module,
+ #{formatter => {logger_formatter,
+ maps:merge(#{legacy_header=>false,
+ single_line=>true},FCfg)},
+ config=>maps:merge(#{sync_mode_qlen => N+1,
+ drop_mode_qlen => N+1,
+ flush_qlen => N+1,
+ burst_limit_enable => false,
+ filesync_repeat_interval => no_repeat},
+ HCfg)}}]}]),
+ %% HPid = rpc:call(Node,erlang,whereis,[?name_to_reg_name(Module,default)]),
+ %% {links,[_,FCPid]} = rpc:call(Node,erlang,process_info,[HPid,links]),
+ T0 = erlang:monotonic_time(millisecond),
+ ok = rpc:call(Node,?MODULE,nlogs_wait,[N,FA,Module]),
+ %% ok = rpc:call(Node,fprof,apply,[fun ?MODULE:nlogs_wait/2,[N div 10,FA,Module],[{procs,[HPid,FCPid]}]]),
+ T1 = erlang:monotonic_time(millisecond),
+ T = T1-T0,
+ M = case Unit of
+ millisecond -> 1;
+ second -> 1000
+ end,
+ IOPS = M*N/T,
+ ct:pal("N: ~p~nT: ~p~nIOPS: ~.2f events pr ~w",[N,T,IOPS,Unit]),
+ %% Stats = rpc:call(Node,logger_olp,info,[?name_to_reg_name(Module,default)]),
+ %% ct:pal("Stats: ~p",[Stats]),
+ ct_event:notify(#event{name = benchmark_data,
+ data = [{value,IOPS}]}),
+ {comment,io_lib:format("~.2f events written pr ~w",[IOPS,Unit])}.
+
+nlogs_wait(N,{F,A},Module) ->
+ group_leader(whereis(user),self()),
+ [?LOG_NOTICE(F,A) || _ <- lists:seq(1,N)],
+ wait(Module).
+
+wait(Module) ->
+ case Module:filesync(default) of
+ {error,handler_busy} ->
+ wait(Module);
+ ok ->
+ ok
+ end.
+
+small_fa() ->
+ Str = "\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "[\\]^_`abcdefghijklmnopqr",
+ {"~ts",[Str]}.
+
+big_fa() ->
+ {"~p",[lists:duplicate(1048576,"a")]}.
+
+nlogs(N) ->
+ group_leader(whereis(user),self()),
+ Str = "\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "[\\]^_`abcdefghijklmnopqr",
+ [?LOG_NOTICE(Str) || _ <- lists:seq(1,N)],
+ ok.
+
+%% cascade(ProducerInfo,ConsumerInfo,TestFun)
+cascade({PNode,PMFA,_PStatProcs},{CNode,CMFA,_CStatProcs},TestFun) ->
+ Tab = ets:new(counter,[set,public]),
+ ets:insert(Tab,{producer,0}),
+ ets:insert(Tab,{consumer,0}),
+ dbg:tracer(process,{fun tracer/2,{Tab,PNode,CNode}}),
+ dbg:n(PNode),
+ dbg:n(CNode),
+ dbg:cn(node()),
+ dbg:p(all,[call,arity]),
+ dbg:tpl(PMFA,[]),
+ dbg:tpl(CMFA,[]),
+
+ Pid = rpc:call(CNode,?MODULE,wrap_test,[PNode,TestFun]),
+ MRef = erlang:monitor(process,Pid),
+ TO = ?TEST_DURATION*1000,
+ receive {'DOWN',MRef,_,_,Reason} ->
+ ct:fail({remote_pid_down,Reason})
+ after TO ->
+ All = ets:lookup_element(Tab,producer,2),
+ Written = ets:lookup_element(Tab,consumer,2),
+ dbg:stop_clear(),
+ ?COLLECT_STATS(All,
+ [{PNode,P,Id} || {Id,P} <- _PStatProcs] ++
+ [{CNode,P,Id} || {Id,P} <- _CStatProcs]),
+ Ratio = Written/All * 100,
+ ct_event:notify(#event{name = benchmark_data,
+ data = [{value,Ratio}]}),
+ {comment,io_lib:format("~p % (~p written, ~p produced)",
+ [round(Ratio),Written,All])}
+ end.
+
+wrap_test(Fun) ->
+ wrap_test(node(),Fun).
+wrap_test(Node,Fun) ->
+ reset(),
+ group_leader(whereis(user),self()),
+ rpc:call(Node,?MODULE,do_fun,[Fun]).
+
+do_fun(Fun) ->
+ reset(),
+ Fun().
+
+reset() ->
+ reset([logger_std_h_default, logger_disk_log_h_default, logger_proxy]).
+reset([P|Ps]) ->
+ is_pid(whereis(P)) andalso logger_olp:reset(P),
+ reset(Ps);
+reset([]) ->
+ ok.
+
+
+tracer({trace,_,call,{?MODULE,producer,_}},{Tab,_PNode,_CNode}=S) ->
+ ets:update_counter(Tab,producer,1),
+ S;
+tracer({trace,Pid,call,{logger_backend,log_allowed,_}},{Tab,PNode,_CNode}=S) when node(Pid)=:=PNode ->
+ ets:update_counter(Tab,producer,1),
+ S;
+tracer({trace,_,call,{?MODULE,log,_}},{Tab,_PNode,_CNode}=S) ->
+ ets:update_counter(Tab,consumer,1),
+ S;
+tracer({trace,_,call,{_,write,_}},{Tab,_PNode,_CNode}=S) ->
+ ets:update_counter(Tab,consumer,1),
+ S;
+tracer(_,S) ->
+ S.
+
+
+%%%-----------------------------------------------------------------
+%%% Collect statistics
+-define(STAT_KEYS,
+ [burst_drops,
+ calls,
+ casts,
+ drops,
+ flushed,
+ flushes,
+ freq,
+ last_qlen,
+ max_qlen,
+ time,
+ writes]).
+-define(EVENT_KEYS,
+ [calls,casts,flushed]).
+
+stats(All,Procs) ->
+ NI = [{Id,rpc:call(N,logger_olp,info,[P])} || {N,P,Id}<-Procs],
+ [{all,All}|[stats(Id,I,All) || {Id,I} <- NI]].
+
+stats(Id,Info,All) ->
+ S = maps:with(?STAT_KEYS,Info),
+ AllOnProc = lists:sum(maps:values(maps:with(?EVENT_KEYS,S))),
+ if All>0 ->
+ Writes = maps:get(writes,S),
+ {_,ActiveTime} = maps:get(time,S),
+ Rate = round(100*Writes/All),
+ RateOnProc =
+ if AllOnProc>0 ->
+ round(100*Writes/AllOnProc);
+ true ->
+ 0
+ end,
+ AvFreq =
+ if ActiveTime>0 ->
+ round(Writes/ActiveTime);
+ true ->
+ 0
+ end,
+ {Id,
+ {stats,S},
+ {rate,Rate},
+ {rate_on_proc,RateOnProc},
+ {av_freq,AvFreq}};
+ true ->
+ {Id,none}
+ end.
+
+%%%-----------------------------------------------------------------
+%%% Spawn a lot of processes that crash repeatedly, causing a lot of
+%%% error reports from the emulator.
+em_cascading() ->
+ spawn(fun() -> super() end).
+
+super() ->
+ process_flag(trap_exit,true),
+ spawn_link(fun server/0),
+ [spawn_link(fun client/0) || _<-lists:seq(1,10000)],
+ super_loop().
+
+super_loop() ->
+ receive
+ {'EXIT',_,server} ->
+ spawn_link(fun server/0),
+ super_loop();
+ {'EXIT',_,_} ->
+ _L = lists:sum(lists:seq(1,10000)),
+ spawn_link(fun client/0),
+ super_loop()
+ end.
+
+client() ->
+ receive
+ after 1 ->
+ case whereis(server) of
+ Pid when is_pid(Pid) ->
+ ok;
+ undefined ->
+ producer(),
+ erlang:error(some_exception)
+ end
+ end,
+ client().
+
+server() ->
+ register(server,self()),
+ receive
+ after 3000 ->
+ exit(server)
+ end.
+
+
+%%%-----------------------------------------------------------------
+%%% Create a supervisor tree with processes that crash repeatedly,
+%%% causing a lot of supervisor reports and crashreports
+otp_cascading() ->
+ {ok,Pid} = supervisor:start_link({local,otp_super}, ?MODULE, [otp_super]),
+ unlink(Pid),
+ Pid.
+
+otp_server_sup() ->
+ supervisor:start_link({local,otp_server_sup},?MODULE,[otp_server_sup]).
+
+otp_client_sup(N) ->
+ supervisor:start_link({local,otp_client_sup},?MODULE,[otp_client_sup,N]).
+
+otp_server() ->
+ gen_server:start_link({local,otp_server},?MODULE,[otp_server],[]).
+
+otp_client() ->
+ gen_server:start_link(?MODULE,[otp_client],[]).
+
+init([otp_super]) ->
+ {ok, {{one_for_one, 200, 10},
+ [{client_sup,
+ {?MODULE, otp_client_sup, [10000]},
+ permanent, 1000, supervisor, [?MODULE]},
+ {server_sup,
+ {?MODULE, otp_server_sup, []},
+ permanent, 1000, supervisor, [?MODULE]}
+ ]}};
+init([otp_server_sup]) ->
+ {ok, {{one_for_one, 2, 10},
+ [{server,
+ {?MODULE, otp_server, []},
+ permanent, 1000, worker, [?MODULE]}
+ ]}};
+init([otp_client_sup,N]) ->
+ spawn(fun() ->
+ [supervisor:start_child(otp_client_sup,[])
+ || _ <- lists:seq(1,N)]
+ end),
+ {ok, {{simple_one_for_one, N*10, 1},
+ [{client,
+ {?MODULE, otp_client, []},
+ permanent, 1000, worker, [?MODULE]}
+ ]}};
+init([otp_server]) ->
+ {ok, server, 10000};
+init([otp_client]) ->
+ {ok, client,1}.
+
+handle_info(timeout, client) ->
+ true = is_pid(whereis(otp_server)),
+ {noreply,client,1};
+handle_info(timeout, server) ->
+ exit(self(), some_error).
+
+%%%-----------------------------------------------------------------
+%%% Logger callbacks
+log(_LogEvent,_Config) ->
+ ok.
+
+%%%-----------------------------------------------------------------
+%%% Function to trace on for counting produced emulator messages
+producer() ->
+ ok.