diff options
-rw-r--r-- | lib/kernel/src/logger_olp.erl | 4 | ||||
-rw-r--r-- | lib/kernel/test/Makefile | 1 | ||||
-rw-r--r-- | lib/kernel/test/logger_stress_SUITE.erl | 456 | ||||
-rw-r--r-- | lib/kernel/test/logger_test_lib.erl | 10 |
4 files changed, 467 insertions, 4 deletions
diff --git a/lib/kernel/src/logger_olp.erl b/lib/kernel/src/logger_olp.erl index 7c6a1a8547..013cc6ce37 100644 --- a/lib/kernel/src/logger_olp.erl +++ b/lib/kernel/src/logger_olp.erl @@ -358,7 +358,7 @@ do_load(Msg, CallOrCast, State) -> %% this function is called by do_load/3 after an overload check %% has been performed, where QLen > FlushQLen -flush(T1, State=#{id := _Name, mode := Mode, last_load_ts := T0, mode_ref := ModeRef}) -> +flush(T1, State=#{id := _Name, mode := Mode, last_load_ts := _T0, mode_ref := ModeRef}) -> %% flush load messages in the mailbox (a limited number in order %% to not cause long delays) NewFlushed = flush_load(?FLUSH_MAX_N), @@ -375,7 +375,7 @@ flush(T1, State=#{id := _Name, mode := Mode, last_load_ts := T0, mode_ref := Mod %% Add 1 for the current log event ?observe(_Name,{flushed,NewFlushed+1}), - State2 = ?update_max_time(?diff_time(T1,T0),State1), + State2 = ?update_max_time(?diff_time(T1,_T0),State1), State3 = ?update_max_qlen(QLen1,State2), State4 = maybe_notify_mode_change(async,QLen1,State3), {dropped,?update_other(flushed,FLUSHED,NewFlushed, diff --git a/lib/kernel/test/Makefile b/lib/kernel/test/Makefile index 61b0705041..e510cf6531 100644 --- a/lib/kernel/test/Makefile +++ b/lib/kernel/test/Makefile @@ -79,6 +79,7 @@ MODULES= \ logger_proxy_SUITE \ logger_simple_h_SUITE \ logger_std_h_SUITE \ + logger_stress_SUITE \ logger_test_lib \ os_SUITE \ pg2_SUITE \ diff --git a/lib/kernel/test/logger_stress_SUITE.erl b/lib/kernel/test/logger_stress_SUITE.erl new file mode 100644 index 0000000000..5ec724c524 --- /dev/null +++ b/lib/kernel/test/logger_stress_SUITE.erl @@ -0,0 +1,456 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(logger_stress_SUITE). + +-compile(export_all). + +-include_lib("common_test/include/ct_event.hrl"). +-include_lib("kernel/include/logger.hrl"). +-include_lib("kernel/src/logger_h_common.hrl"). + +-ifdef(SAVE_STATS). + -define(COLLECT_STATS(_All_,_Procs_), + ct:pal("~p",[stats(_All_,_Procs_)])). +-else. + -define(COLLECT_STATS(_All_,_Procs__), ok). +-endif. + +-define(TEST_DURATION,120). % seconds + +suite() -> + [{timetrap,{minutes,3}}, + {ct_hooks,[logger_test_lib]}]. + +init_per_suite(Config) -> + Config. + +end_per_suite(_Config) -> + ok. + +init_per_group(_Group, Config) -> + Config. + +end_per_group(_Group, _Config) -> + ok. + +init_per_testcase(_TestCase, Config) -> + Config. + +end_per_testcase(Case, Config) -> + try apply(?MODULE,Case,[cleanup,Config]) + catch error:undef -> ok + end, + ok. + +groups() -> + []. + +all() -> + [allow_events, + reject_events, + std_handler, + disk_log_handler, + emulator_events, + remote_events, + remote_to_disk_log, + remote_emulator_events, + remote_emulator_to_disk_log]. + +%%%----------------------------------------------------------------- +%%% Test cases +%%%----------------------------------------------------------------- +%% Time from log macro call to handler callback +allow_events(Config) -> + {ok,_,Node} = + logger_test_lib:setup(Config, + [{logger, + [{handler,default,?MODULE,#{}}]}, + {logger_level,notice}]), + N = 100000, + {T,_} = timer:tc(fun() -> rpc:call(Node,?MODULE,nlogs,[N]) end), + IOPS = N * 1000/T, % log events allowed per millisecond + ct_event:notify(#event{name = benchmark_data, + data = [{value,IOPS}]}), + {comment,io_lib:format("~.2f accepted events pr millisecond", + [IOPS])}. + +%% Time from log macro call to reject (log level) +reject_events(Config) -> + {ok,_,Node} = + logger_test_lib:setup(Config, + [{logger, + [{handler,default,?MODULE,#{}}]}, + {logger_level,error}]), + N = 1000000, + {T,_} = timer:tc(fun() -> rpc:call(Node,?MODULE,nlogs,[N]) end), + IOPS = N * 1000/T, % log events rejeted per second + ct_event:notify(#event{name = benchmark_data, + data = [{value,IOPS}]}), + {comment,io_lib:format("~.2f rejected events pr millisecond", + [IOPS])}. + +%% Cascading failure that produce gen_server and proc_lib reports - +%% how many of the produced log events are actually written to a log +%% with logger_std_h file handler. +std_handler(Config) -> + {ok,_,Node} = + logger_test_lib:setup(Config, + [{logger, + [{handler,default,logger_std_h, + #{config=>#{type=>{file,"default.log"}}}}]}]), + + cascade({Node,{logger_backend,log_allowed,2},[]}, + {Node,{logger_std_h,write,4},[{default,logger_std_h_default}]}, + fun otp_cascading/0). +std_handler(cleanup,_Config) -> + _ = file:delete("default.log"), + ok. + +%% Cascading failure that produce gen_server and proc_lib reports - +%% how many of the produced log events are actually written to a log +%% with logger_disk_log_h wrap file handler. +disk_log_handler(Config) -> + {ok,_,Node} = + logger_test_lib:setup(Config, + [{logger, + [{handler,default,logger_disk_log_h,#{}}]}]), + cascade({Node,{logger_backend,log_allowed,2},[]}, + {Node,{logger_disk_log_h,write,4}, + [{default,logger_disk_log_h_default}]}, + fun otp_cascading/0). +disk_log_handler(cleanup,_Config) -> + Files = filelib:wildcard("default.log.*"), + [_ = file:delete(F) || F <- Files], + ok. + +%% Cascading failure that produce log events from the emulator - how +%% many of the produced log events pass through the proxy. +emulator_events(Config) -> + {ok,_,Node} = + logger_test_lib:setup(Config, + [{logger, + [{handler,default,?MODULE,#{}}]}]), + cascade({Node,{?MODULE,producer,0},[]}, + {Node,{?MODULE,log,2},[{proxy,logger_proxy}]}, + fun em_cascading/0). + +%% Cascading failure that produce gen_server and proc_lib reports on +%% remote node - how many of the produced log events pass through the +%% proxy. +remote_events(Config) -> + {ok,_,Node1} = + logger_test_lib:setup([{postfix,1}|Config], + [{logger, + [{handler,default,?MODULE,#{}}]}]), + {ok,_,Node2} = + logger_test_lib:setup([{postfix,2}|Config],[]), + cascade({Node2,{logger_backend,log_allowed,2},[{remote_proxy,logger_proxy}]}, + {Node1,{?MODULE,log,2},[{local_proxy,logger_proxy}]}, + fun otp_cascading/0). + +%% Cascading failure that produce gen_server and proc_lib reports on +%% remote node - how many of the produced log events are actually +%% written to a log with logger_disk_log_h wrap file handler. +remote_to_disk_log(Config) -> + {ok,_,Node1} = + logger_test_lib:setup([{postfix,1}|Config], + [{logger, + [{handler,default,logger_disk_log_h,#{}}]}]), + {ok,_,Node2} = + logger_test_lib:setup([{postfix,2}|Config],[]), + cascade({Node2,{logger_backend,log_allowed,2},[{remote_proxy,logger_proxy}]}, + {Node1,{logger_disk_log_h,write,4}, + [{local_proxy,logger_proxy}, + {local_default,logger_disk_log_h_default}]}, + fun otp_cascading/0). +remote_to_disk_log(cleanup,_Config) -> + Files = filelib:wildcard("default.log.*"), + [_ = file:delete(F) || F <- Files], + ok. + +%% Cascading failure that produce log events from the emulator on +%% remote node - how many of the produced log events pass through the +%% proxy. +remote_emulator_events(Config) -> + {ok,_,Node1} = + logger_test_lib:setup([{postfix,1}|Config], + [{logger, + [{handler,default,?MODULE,#{}}]}]), + {ok,_,Node2} = + logger_test_lib:setup([{postfix,2}|Config],[]), + cascade({Node2,{?MODULE,producer,0},[{remote_proxy,logger_proxy}]}, + {Node1,{?MODULE,log,2},[{local_proxy,logger_proxy}]}, + fun em_cascading/0). + +%% Cascading failure that produce log events from the emulator on +%% remote node - how many of the produced log events are actually +%% written to a log with logger_disk_log_h wrap file handler. +remote_emulator_to_disk_log(Config) -> + {ok,_,Node1} = + logger_test_lib:setup([{postfix,1}|Config], + [{logger, + [{handler,default,logger_disk_log_h,#{}}]}]), + {ok,_,Node2} = + logger_test_lib:setup([{postfix,2}|Config],[]), + cascade({Node2,{?MODULE,producer,0},[{remote_proxy,logger_proxy}]}, + {Node1,{logger_disk_log_h,write,4}, + [{local_proxy,logger_proxy}, + {local_default,logger_disk_log_h_default}]}, + fun em_cascading/0). +remote_emulator_to_disk_log(cleanup,_Config) -> + Files = filelib:wildcard("default.log.*"), + [_ = file:delete(F) || F <- Files], + ok. + +%%%----------------------------------------------------------------- +%%% Internal functions +nlogs(N) -> + group_leader(whereis(user),self()), + Str = "\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "[\\]^_`abcdefghijklmnopqr", + [?LOG_NOTICE(Str) || _ <- lists:seq(1,N)], + ok. + +%% cascade(ProducerInfo,ConsumerInfo,TestFun) +cascade({PNode,PMFA,PStatProcs},{CNode,CMFA,CStatProcs},TestFun) -> + Tab = ets:new(counter,[set,public]), + ets:insert(Tab,{producer,0}), + ets:insert(Tab,{consumer,0}), + dbg:tracer(process,{fun tracer/2,{Tab,PNode,CNode}}), + dbg:n(PNode), + dbg:n(CNode), + dbg:cn(node()), + dbg:p(all,[call,arity]), + dbg:tpl(PMFA,[]), + dbg:tpl(CMFA,[]), + + Pid = rpc:call(CNode,?MODULE,wrap_test,[PNode,TestFun]), + MRef = erlang:monitor(process,Pid), + TO = ?TEST_DURATION*1000, + receive {'DOWN',MRef,_,_,Reason} -> + ct:fail({remote_pid_down,Reason}) + after TO -> + All = ets:lookup_element(Tab,producer,2), + Written = ets:lookup_element(Tab,consumer,2), + dbg:stop_clear(), + ?COLLECT_STATS(All, + [{PNode,P,Id} || {Id,P} <- PStatProcs] ++ + [{CNode,P,Id} || {Id,P} <- CStatProcs]), + Ratio = Written/All, + ct_event:notify(#event{name = benchmark_data, + data = [{value,Ratio}]}), + {comment,io_lib:format("~.2f (~p written, ~p produced)", + [Ratio,Written,All])} + end. + +wrap_test(Fun) -> + wrap_test(node(),Fun). +wrap_test(Node,Fun) -> + reset(), + group_leader(whereis(user),self()), + rpc:call(Node,?MODULE,do_fun,[Fun]). + +do_fun(Fun) -> + reset(), + Fun(). + +reset() -> + reset([logger_std_h_default, logger_disk_log_h_default, logger_proxy]). +reset([P|Ps]) -> + is_pid(whereis(P)) andalso logger_olp:reset(P), + reset(Ps); +reset([]) -> + ok. + + +tracer({trace,_,call,{?MODULE,producer,_}},{Tab,_PNode,_CNode}=S) -> + ets:update_counter(Tab,producer,1), + S; +tracer({trace,Pid,call,{logger_backend,log_allowed,_}},{Tab,PNode,_CNode}=S) when node(Pid)=:=PNode -> + ets:update_counter(Tab,producer,1), + S; +tracer({trace,_,call,{?MODULE,log,_}},{Tab,_PNode,_CNode}=S) -> + ets:update_counter(Tab,consumer,1), + S; +tracer({trace,_,call,{_,write,_}},{Tab,_PNode,_CNode}=S) -> + ets:update_counter(Tab,consumer,1), + S; +tracer(_,S) -> + S. + + +%%%----------------------------------------------------------------- +%%% Collect statistics +-define(STAT_KEYS, + [burst_drops, + calls, + casts, + drops, + flushed, + flushes, + freq, + last_qlen, + max_qlen, + time, + writes]). +-define(EVENT_KEYS, + [calls,casts,flushed]). + +stats(All,Procs) -> + NI = [{Id,rpc:call(N,logger_olp,info,[P])} || {N,P,Id}<-Procs], + [{all,All}|[stats(Id,I,All) || {Id,I} <- NI]]. + +stats(Id,Info,All) -> + S = maps:with(?STAT_KEYS,Info), + AllOnProc = lists:sum(maps:values(maps:with(?EVENT_KEYS,S))), + if All>0 -> + Writes = maps:get(writes,S), + {_,ActiveTime} = maps:get(time,S), + Rate = round(100*Writes/All), + RateOnProc = + if AllOnProc>0 -> + round(100*Writes/AllOnProc); + true -> + 0 + end, + AvFreq = + if ActiveTime>0 -> + round(Writes/ActiveTime); + true -> + 0 + end, + {Id, + {stats,S}, + {rate,Rate}, + {rate_on_proc,RateOnProc}, + {av_freq,AvFreq}}; + true -> + {Id,none} + end. + +%%%----------------------------------------------------------------- +%%% Spawn a lot of processes that crash repeatedly, causing a lot of +%%% error reports from the emulator. +em_cascading() -> + spawn(fun() -> super() end). + +super() -> + process_flag(trap_exit,true), + spawn_link(fun server/0), + [spawn_link(fun client/0) || _<-lists:seq(1,10000)], + super_loop(). + +super_loop() -> + receive + {'EXIT',_,server} -> + spawn_link(fun server/0), + super_loop(); + {'EXIT',_,_} -> + _L = lists:sum(lists:seq(1,10000)), + spawn_link(fun client/0), + super_loop() + end. + +client() -> + receive + after 1 -> + case whereis(server) of + Pid when is_pid(Pid) -> + ok; + undefined -> + producer(), + erlang:error(some_exception) + end + end, + client(). + +server() -> + register(server,self()), + receive + after 3000 -> + exit(server) + end. + + +%%%----------------------------------------------------------------- +%%% Create a supervisor tree with processes that crash repeatedly, +%%% causing a lot of supervisor reports and crashreports +otp_cascading() -> + {ok,Pid} = supervisor:start_link({local,otp_super}, ?MODULE, [otp_super]), + unlink(Pid), + Pid. + +otp_server_sup() -> + supervisor:start_link({local,otp_server_sup},?MODULE,[otp_server_sup]). + +otp_client_sup(N) -> + supervisor:start_link({local,otp_client_sup},?MODULE,[otp_client_sup,N]). + +otp_server() -> + gen_server:start_link({local,otp_server},?MODULE,[otp_server],[]). + +otp_client() -> + gen_server:start_link(?MODULE,[otp_client],[]). + +init([otp_super]) -> + {ok, {{one_for_one, 200, 10}, + [{client_sup, + {?MODULE, otp_client_sup, [10000]}, + permanent, 1000, supervisor, [?MODULE]}, + {server_sup, + {?MODULE, otp_server_sup, []}, + permanent, 1000, supervisor, [?MODULE]} + ]}}; +init([otp_server_sup]) -> + {ok, {{one_for_one, 2, 10}, + [{server, + {?MODULE, otp_server, []}, + permanent, 1000, worker, [?MODULE]} + ]}}; +init([otp_client_sup,N]) -> + spawn(fun() -> + [supervisor:start_child(otp_client_sup,[]) + || _ <- lists:seq(1,N)] + end), + {ok, {{simple_one_for_one, N*10, 1}, + [{client, + {?MODULE, otp_client, []}, + permanent, 1000, worker, [?MODULE]} + ]}}; +init([otp_server]) -> + {ok, server, 10000}; +init([otp_client]) -> + {ok, client,1}. + +handle_info(timeout, client) -> + true = is_pid(whereis(otp_server)), + {noreply,client,1}; +handle_info(timeout, server) -> + exit(self(), some_error). + +%%%----------------------------------------------------------------- +%%% Logger callbacks +log(_LogEvent,_Config) -> + ok. + +%%%----------------------------------------------------------------- +%%% Function to trace on for counting produced emulator messages +producer() -> + ok. diff --git a/lib/kernel/test/logger_test_lib.erl b/lib/kernel/test/logger_test_lib.erl index 81eb9ce5eb..be4bc427fb 100644 --- a/lib/kernel/test/logger_test_lib.erl +++ b/lib/kernel/test/logger_test_lib.erl @@ -28,11 +28,17 @@ post_end_per_testcase/5, post_end_per_suite/3]). setup(Config,Vars) -> + Postfix = case proplists:get_value(postfix, Config) of + undefined -> ""; + P -> ["_",P] + end, FuncStr = lists:concat([proplists:get_value(suite, Config), "_", - proplists:get_value(tc, Config)]), + proplists:get_value(tc, Config)| + Postfix]), ConfigFileName = filename:join(proplists:get_value(priv_dir, Config), FuncStr), file:write_file(ConfigFileName ++ ".config", io_lib:format("[{kernel, ~p}].",[Vars])), - case test_server:start_node(proplists:get_value(tc, Config), slave, + Sname = lists:concat([proplists:get_value(tc,Config)|Postfix]), + case test_server:start_node(Sname, slave, [{args, ["-pa ",filename:dirname(code:which(?MODULE)), " -boot start_sasl -kernel start_timer true " "-config ",ConfigFileName]}]) of |