%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2018. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
-module(logger_stress_SUITE).
-compile(export_all).
-include_lib("common_test/include/ct_event.hrl").
-include_lib("kernel/include/logger.hrl").
-include_lib("kernel/src/logger_h_common.hrl").
-ifdef(SAVE_STATS).
-define(COLLECT_STATS(_All_,_Procs_),
ct:pal("~p",[stats(_All_,_Procs_)])).
-else.
-define(COLLECT_STATS(_All_,_Procs__), ok).
-endif.
-define(TEST_DURATION,120). % seconds
suite() ->
[{timetrap,{minutes,3}},
{ct_hooks,[logger_test_lib]}].
init_per_suite(Config) ->
Config.
end_per_suite(_Config) ->
ok.
init_per_group(_Group, Config) ->
Config.
end_per_group(_Group, _Config) ->
ok.
init_per_testcase(_TestCase, Config) ->
Config.
end_per_testcase(Case, Config) ->
try apply(?MODULE,Case,[cleanup,Config])
catch error:undef -> ok
end,
ok.
groups() ->
[].
all() ->
[allow_events,
reject_events,
std_handler,
disk_log_handler,
std_handler_time,
std_handler_time_big,
disk_log_handler_time,
disk_log_handler_time_big,
emulator_events,
remote_events,
remote_to_disk_log,
remote_emulator_events,
remote_emulator_to_disk_log].
%%%-----------------------------------------------------------------
%%% Test cases
%%%-----------------------------------------------------------------
%% Time from log macro call to handler callback
allow_events(Config) ->
{ok,_,Node} =
logger_test_lib:setup(Config,
[{logger,
[{handler,default,?MODULE,#{}}]},
{logger_level,notice}]),
N = 100000,
{T,_} = timer:tc(fun() -> rpc:call(Node,?MODULE,nlogs,[N]) end),
IOPS = N * 1000/T, % log events allowed per millisecond
ct_event:notify(#event{name = benchmark_data,
data = [{value,IOPS}]}),
{comment,io_lib:format("~.2f accepted events pr millisecond",
[IOPS])}.
%% Time from log macro call to reject (log level)
reject_events(Config) ->
{ok,_,Node} =
logger_test_lib:setup(Config,
[{logger,
[{handler,default,?MODULE,#{}}]},
{logger_level,error}]),
N = 1000000,
{T,_} = timer:tc(fun() -> rpc:call(Node,?MODULE,nlogs,[N]) end),
IOPS = N * 1000/T, % log events rejected per millisecond
ct_event:notify(#event{name = benchmark_data,
data = [{value,IOPS}]}),
{comment,io_lib:format("~.2f rejected events pr millisecond",
[IOPS])}.
%% Cascading failure that produce gen_server and proc_lib reports -
%% how many of the produced log events are actually written to a log
%% with logger_std_h file handler.
std_handler(Config) ->
{ok,_,Node} =
logger_test_lib:setup(Config,
[{logger,
[{handler,default,logger_std_h,
#{config=>#{type=>{file,"default.log"}}}}]}]),
cascade({Node,{logger_backend,log_allowed,2},[]},
{Node,{logger_std_h,write,4},[{default,logger_std_h_default}]},
fun otp_cascading/0).
std_handler(cleanup,_Config) ->
_ = file:delete("default.log"),
ok.
%% Disable overload protection and just print a lot - measure time.
%% The IOPS reported is the number of log events written per millisecond.
std_handler_time(Config) ->
measure_handler_time(logger_std_h,#{type=>{file,"default.log"}},Config).
std_handler_time(cleanup,_Config) ->
_ = file:delete("default.log"),
ok.
std_handler_time_big(Config) ->
measure_handler_time_big(logger_std_h,#{type=>{file,"default.log"}},Config).
std_handler_time_big(cleanup,_Config) ->
_ = file:delete("default.log"),
ok.
%% Cascading failure that produce gen_server and proc_lib reports -
%% how many of the produced log events are actually written to a log
%% with logger_disk_log_h wrap file handler.
disk_log_handler(Config) ->
{ok,_,Node} =
logger_test_lib:setup(Config,
[{logger,
[{handler,default,logger_disk_log_h,#{}}]}]),
cascade({Node,{logger_backend,log_allowed,2},[]},
{Node,{logger_disk_log_h,write,4},
[{default,logger_disk_log_h_default}]},
fun otp_cascading/0).
disk_log_handler(cleanup,_Config) ->
Files = filelib:wildcard("default.log.*"),
[_ = file:delete(F) || F <- Files],
ok.
%% Disable overload protection and just print a lot - measure time.
%% The IOPS reported is the number of log events written per millisecond.
disk_log_handler_time(Config) ->
measure_handler_time(logger_disk_log_h,#{type=>halt},Config).
disk_log_handler_time(cleanup,_Config) ->
_ = file:delete("default"),
ok.
disk_log_handler_time_big(Config) ->
measure_handler_time_big(logger_disk_log_h,#{type=>halt},Config).
disk_log_handler_time_big(cleanup,_Config) ->
_ = file:delete("default"),
ok.
%% Cascading failure that produce log events from the emulator - how
%% many of the produced log events pass through the proxy.
emulator_events(Config) ->
{ok,_,Node} =
logger_test_lib:setup(Config,
[{logger,
[{handler,default,?MODULE,#{}}]}]),
cascade({Node,{?MODULE,producer,0},[]},
{Node,{?MODULE,log,2},[{proxy,logger_proxy}]},
fun em_cascading/0).
%% Cascading failure that produce gen_server and proc_lib reports on
%% remote node - how many of the produced log events pass through the
%% proxy.
remote_events(Config) ->
{ok,_,Node1} =
logger_test_lib:setup([{postfix,1}|Config],
[{logger,
[{handler,default,?MODULE,#{}}]}]),
{ok,_,Node2} =
logger_test_lib:setup([{postfix,2}|Config],[]),
cascade({Node2,{logger_backend,log_allowed,2},[{remote_proxy,logger_proxy}]},
{Node1,{?MODULE,log,2},[{local_proxy,logger_proxy}]},
fun otp_cascading/0).
%% Cascading failure that produce gen_server and proc_lib reports on
%% remote node - how many of the produced log events are actually
%% written to a log with logger_disk_log_h wrap file handler.
remote_to_disk_log(Config) ->
{ok,_,Node1} =
logger_test_lib:setup([{postfix,1}|Config],
[{logger,
[{handler,default,logger_disk_log_h,#{}}]}]),
{ok,_,Node2} =
logger_test_lib:setup([{postfix,2}|Config],[]),
cascade({Node2,{logger_backend,log_allowed,2},[{remote_proxy,logger_proxy}]},
{Node1,{logger_disk_log_h,write,4},
[{local_proxy,logger_proxy},
{local_default,logger_disk_log_h_default}]},
fun otp_cascading/0).
remote_to_disk_log(cleanup,_Config) ->
Files = filelib:wildcard("default.log.*"),
[_ = file:delete(F) || F <- Files],
ok.
%% Cascading failure that produce log events from the emulator on
%% remote node - how many of the produced log events pass through the
%% proxy.
remote_emulator_events(Config) ->
{ok,_,Node1} =
logger_test_lib:setup([{postfix,1}|Config],
[{logger,
[{handler,default,?MODULE,#{}}]}]),
{ok,_,Node2} =
logger_test_lib:setup([{postfix,2}|Config],[]),
cascade({Node2,{?MODULE,producer,0},[{remote_proxy,logger_proxy}]},
{Node1,{?MODULE,log,2},[{local_proxy,logger_proxy}]},
fun em_cascading/0).
%% Cascading failure that produce log events from the emulator on
%% remote node - how many of the produced log events are actually
%% written to a log with logger_disk_log_h wrap file handler.
remote_emulator_to_disk_log(Config) ->
{ok,_,Node1} =
logger_test_lib:setup([{postfix,1}|Config],
[{logger,
[{handler,default,logger_disk_log_h,#{}}]}]),
{ok,_,Node2} =
logger_test_lib:setup([{postfix,2}|Config],[]),
cascade({Node2,{?MODULE,producer,0},[{remote_proxy,logger_proxy}]},
{Node1,{logger_disk_log_h,write,4},
[{local_proxy,logger_proxy},
{local_default,logger_disk_log_h_default}]},
fun em_cascading/0).
remote_emulator_to_disk_log(cleanup,_Config) ->
Files = filelib:wildcard("default.log.*"),
[_ = file:delete(F) || F <- Files],
ok.
%%%-----------------------------------------------------------------
%%% Internal functions
measure_handler_time(Module,HCfg,Config) ->
measure_handler_time(Module,100000,small_fa(),millisecond,HCfg,#{},Config).
measure_handler_time_big(Module,HCfg,Config) ->
FCfg = #{chars_limit=>4096, max_size=>1024},
measure_handler_time(Module,100,big_fa(),second,HCfg,FCfg,Config).
measure_handler_time(Module,N,FA,Unit,HCfg,FCfg,Config) ->
{ok,_,Node} =
logger_test_lib:setup(
Config,
[{logger,
[{handler,default,Module,
#{formatter => {logger_formatter,
maps:merge(#{legacy_header=>false,
single_line=>true},FCfg)},
config=>maps:merge(#{sync_mode_qlen => N+1,
drop_mode_qlen => N+1,
flush_qlen => N+1,
burst_limit_enable => false,
filesync_repeat_interval => no_repeat},
HCfg)}}]}]),
%% HPid = rpc:call(Node,erlang,whereis,[?name_to_reg_name(Module,default)]),
%% {links,[_,FCPid]} = rpc:call(Node,erlang,process_info,[HPid,links]),
T0 = erlang:monotonic_time(millisecond),
ok = rpc:call(Node,?MODULE,nlogs_wait,[N,FA,Module]),
%% ok = rpc:call(Node,fprof,apply,[fun ?MODULE:nlogs_wait/2,[N div 10,FA,Module],[{procs,[HPid,FCPid]}]]),
T1 = erlang:monotonic_time(millisecond),
T = T1-T0,
M = case Unit of
millisecond -> 1;
second -> 1000
end,
IOPS = M*N/T,
ct:pal("N: ~p~nT: ~p~nIOPS: ~.2f events pr ~w",[N,T,IOPS,Unit]),
%% Stats = rpc:call(Node,logger_olp,info,[?name_to_reg_name(Module,default)]),
%% ct:pal("Stats: ~p",[Stats]),
ct_event:notify(#event{name = benchmark_data,
data = [{value,IOPS}]}),
{comment,io_lib:format("~.2f events written pr ~w",[IOPS,Unit])}.
nlogs_wait(N,{F,A},Module) ->
group_leader(whereis(user),self()),
[?LOG_NOTICE(F,A) || _ <- lists:seq(1,N)],
wait(Module).
wait(Module) ->
case Module:filesync(default) of
{error,handler_busy} ->
wait(Module);
ok ->
ok
end.
small_fa() ->
Str = "\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"[\\]^_`abcdefghijklmnopqr",
{"~ts",[Str]}.
big_fa() ->
{"~p",[lists:duplicate(1048576,"a")]}.
nlogs(N) ->
group_leader(whereis(user),self()),
Str = "\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"[\\]^_`abcdefghijklmnopqr",
[?LOG_NOTICE(Str) || _ <- lists:seq(1,N)],
ok.
%% cascade(ProducerInfo,ConsumerInfo,TestFun)
cascade({PNode,PMFA,_PStatProcs},{CNode,CMFA,_CStatProcs},TestFun) ->
Tab = ets:new(counter,[set,public]),
ets:insert(Tab,{producer,0}),
ets:insert(Tab,{consumer,0}),
dbg:tracer(process,{fun tracer/2,{Tab,PNode,CNode}}),
dbg:n(PNode),
dbg:n(CNode),
dbg:cn(node()),
dbg:p(all,[call,arity]),
dbg:tpl(PMFA,[]),
dbg:tpl(CMFA,[]),
Pid = rpc:call(CNode,?MODULE,wrap_test,[PNode,TestFun]),
MRef = erlang:monitor(process,Pid),
TO = ?TEST_DURATION*1000,
receive {'DOWN',MRef,_,_,Reason} ->
ct:fail({remote_pid_down,Reason})
after TO ->
All = ets:lookup_element(Tab,producer,2),
Written = ets:lookup_element(Tab,consumer,2),
dbg:stop_clear(),
?COLLECT_STATS(All,
[{PNode,P,Id} || {Id,P} <- _PStatProcs] ++
[{CNode,P,Id} || {Id,P} <- _CStatProcs]),
Ratio = Written/All * 100,
ct_event:notify(#event{name = benchmark_data,
data = [{value,Ratio}]}),
{comment,io_lib:format("~p % (~p written, ~p produced)",
[round(Ratio),Written,All])}
end.
wrap_test(Fun) ->
wrap_test(node(),Fun).
wrap_test(Node,Fun) ->
reset(),
group_leader(whereis(user),self()),
rpc:call(Node,?MODULE,do_fun,[Fun]).
do_fun(Fun) ->
reset(),
Fun().
reset() ->
reset([logger_std_h_default, logger_disk_log_h_default, logger_proxy]).
reset([P|Ps]) ->
is_pid(whereis(P)) andalso logger_olp:reset(P),
reset(Ps);
reset([]) ->
ok.
tracer({trace,_,call,{?MODULE,producer,_}},{Tab,_PNode,_CNode}=S) ->
ets:update_counter(Tab,producer,1),
S;
tracer({trace,Pid,call,{logger_backend,log_allowed,_}},{Tab,PNode,_CNode}=S) when node(Pid)=:=PNode ->
ets:update_counter(Tab,producer,1),
S;
tracer({trace,_,call,{?MODULE,log,_}},{Tab,_PNode,_CNode}=S) ->
ets:update_counter(Tab,consumer,1),
S;
tracer({trace,_,call,{_,write,_}},{Tab,_PNode,_CNode}=S) ->
ets:update_counter(Tab,consumer,1),
S;
tracer(_,S) ->
S.
%%%-----------------------------------------------------------------
%%% Collect statistics
-define(STAT_KEYS,
[burst_drops,
calls,
casts,
drops,
flushed,
flushes,
freq,
last_qlen,
max_qlen,
time,
writes]).
-define(EVENT_KEYS,
[calls,casts,flushed]).
stats(All,Procs) ->
NI = [{Id,rpc:call(N,logger_olp,info,[P])} || {N,P,Id}<-Procs],
[{all,All}|[stats(Id,I,All) || {Id,I} <- NI]].
stats(Id,Info,All) ->
S = maps:with(?STAT_KEYS,Info),
AllOnProc = lists:sum(maps:values(maps:with(?EVENT_KEYS,S))),
if All>0 ->
Writes = maps:get(writes,S),
{_,ActiveTime} = maps:get(time,S),
Rate = round(100*Writes/All),
RateOnProc =
if AllOnProc>0 ->
round(100*Writes/AllOnProc);
true ->
0
end,
AvFreq =
if ActiveTime>0 ->
round(Writes/ActiveTime);
true ->
0
end,
{Id,
{stats,S},
{rate,Rate},
{rate_on_proc,RateOnProc},
{av_freq,AvFreq}};
true ->
{Id,none}
end.
%%%-----------------------------------------------------------------
%%% Spawn a lot of processes that crash repeatedly, causing a lot of
%%% error reports from the emulator.
em_cascading() ->
spawn(fun() -> super() end).
super() ->
process_flag(trap_exit,true),
spawn_link(fun server/0),
[spawn_link(fun client/0) || _<-lists:seq(1,10000)],
super_loop().
super_loop() ->
receive
{'EXIT',_,server} ->
spawn_link(fun server/0),
super_loop();
{'EXIT',_,_} ->
_L = lists:sum(lists:seq(1,10000)),
spawn_link(fun client/0),
super_loop()
end.
client() ->
receive
after 1 ->
case whereis(server) of
Pid when is_pid(Pid) ->
ok;
undefined ->
producer(),
erlang:error(some_exception)
end
end,
client().
server() ->
register(server,self()),
receive
after 3000 ->
exit(server)
end.
%%%-----------------------------------------------------------------
%%% Create a supervisor tree with processes that crash repeatedly,
%%% causing a lot of supervisor reports and crashreports
otp_cascading() ->
{ok,Pid} = supervisor:start_link({local,otp_super}, ?MODULE, [otp_super]),
unlink(Pid),
Pid.
otp_server_sup() ->
supervisor:start_link({local,otp_server_sup},?MODULE,[otp_server_sup]).
otp_client_sup(N) ->
supervisor:start_link({local,otp_client_sup},?MODULE,[otp_client_sup,N]).
otp_server() ->
gen_server:start_link({local,otp_server},?MODULE,[otp_server],[]).
otp_client() ->
gen_server:start_link(?MODULE,[otp_client],[]).
init([otp_super]) ->
{ok, {{one_for_one, 200, 10},
[{client_sup,
{?MODULE, otp_client_sup, [10000]},
permanent, 1000, supervisor, [?MODULE]},
{server_sup,
{?MODULE, otp_server_sup, []},
permanent, 1000, supervisor, [?MODULE]}
]}};
init([otp_server_sup]) ->
{ok, {{one_for_one, 2, 10},
[{server,
{?MODULE, otp_server, []},
permanent, 1000, worker, [?MODULE]}
]}};
init([otp_client_sup,N]) ->
spawn(fun() ->
[supervisor:start_child(otp_client_sup,[])
|| _ <- lists:seq(1,N)]
end),
{ok, {{simple_one_for_one, N*10, 1},
[{client,
{?MODULE, otp_client, []},
permanent, 1000, worker, [?MODULE]}
]}};
init([otp_server]) ->
{ok, server, 10000};
init([otp_client]) ->
{ok, client,1}.
handle_info(timeout, client) ->
true = is_pid(whereis(otp_server)),
{noreply,client,1};
handle_info(timeout, server) ->
exit(self(), some_error).
%%%-----------------------------------------------------------------
%%% Logger callbacks
log(_LogEvent,_Config) ->
ok.
%%%-----------------------------------------------------------------
%%% Function to trace on for counting produced emulator messages
producer() ->
ok.