From 681bc6ad427fce5f087ccc48fb0172b46c02b52a Mon Sep 17 00:00:00 2001 From: Peter Andersson Date: Fri, 4 May 2018 15:00:19 +0200 Subject: Various logger handler improvements and updated test cases --- lib/kernel/test/logger_disk_log_h_SUITE.erl | 193 ++++++++++++++++++---------- 1 file changed, 128 insertions(+), 65 deletions(-) (limited to 'lib/kernel/test/logger_disk_log_h_SUITE.erl') diff --git a/lib/kernel/test/logger_disk_log_h_SUITE.erl b/lib/kernel/test/logger_disk_log_h_SUITE.erl index 6a4ec72d5e..bb88c53f26 100644 --- a/lib/kernel/test/logger_disk_log_h_SUITE.erl +++ b/lib/kernel/test/logger_disk_log_h_SUITE.erl @@ -66,19 +66,7 @@ end_per_testcase(Case, Config) -> ok. groups() -> - [ - {retry_op_switch_to_sync, - [{repeat_until_all_ok,10}], - [op_switch_to_sync]}, - - {retry_op_switch_to_drop, - [{repeat_until_all_ok,10}], - [op_switch_to_drop]}, - - {retry_op_switch_to_flush, - [{repeat_until_all_ok,10}], - [op_switch_to_flush]} - ]. + []. all() -> [start_stop_handler, @@ -99,9 +87,9 @@ all() -> disk_log_events, write_failure, sync_failure, - {group,retry_op_switch_to_sync}, - {group,retry_op_switch_to_drop}, - {group,retry_op_switch_to_flush}, + op_switch_to_sync, + op_switch_to_drop, + op_switch_to_flush, limit_burst_disabled, limit_burst_enabled_one, limit_burst_enabled_period, @@ -880,57 +868,107 @@ op_switch_to_sync(Config) -> enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), send_burst({n,NumOfReqs}, seq, {chars,79}, info), - NumOfReqs = count_lines(Log), - ok = file:delete(Log). + Lines = count_lines(Log), + ok = file:delete(Log), + NumOfReqs = Lines, + ok. op_switch_to_sync(cleanup, _Config) -> ok = stop_handler(?MODULE). +op_switch_to_drop() -> + [{timetrap,{seconds,180}}]. op_switch_to_drop(Config) -> - {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - NumOfReqs = 300, - Procs = 2, - NewHConfig = - HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 1, - drop_new_reqs_qlen => 2, - flush_reqs_qlen => Procs*NumOfReqs+1, - enable_burst_limit => false}}, - ok = logger:set_handler_config(?MODULE, NewHConfig), - send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), - Logged = count_lines(Log), - ct:pal("Number of messages dropped = ~w (~w)", - [Procs*NumOfReqs-Logged,Procs*NumOfReqs]), - true = (Logged < (Procs*NumOfReqs)), - true = (Logged > 0), - ok = file:delete(Log). + Test = + fun() -> + {Log,HConfig,DLHConfig} = + start_handler(?MODULE, ?FUNCTION_NAME, Config), + NumOfReqs = 300, + Procs = 2, + Bursts = 10, + NewHConfig = + HConfig#{logger_disk_log_h => + DLHConfig#{toggle_sync_qlen => 1, + drop_new_reqs_qlen => 2, + flush_reqs_qlen => Procs*NumOfReqs*Bursts, + enable_burst_limit => false}}, + ok = logger:set_handler_config(?MODULE, NewHConfig), + %% It sometimes happens that the handler either gets + %% the requests in a slow enough pace so that dropping + %% never occurs. Therefore, lets generate a number of + %% bursts to increase the chance of message buildup. + [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) || + _ <- lists:seq(1, Bursts)], + Logged = count_lines(Log), + ok= stop_handler(?MODULE), + _ = file:delete(Log), + ct:pal("Number of messages dropped = ~w (~w)", + [Procs*NumOfReqs*Bursts-Logged,Procs*NumOfReqs*Bursts]), + true = (Logged < (Procs*NumOfReqs*Bursts)), + true = (Logged > 0), + ok + end, + %% As it's tricky to get the timing right in only one go, we perform the + %% test repeatedly, hoping that will generate a successful result. + case repeat_until_ok(Test, 10) of + {ok,{Failures,_Result}} -> + ct:log("Failed ~w times before success!", [Failures]); + {fails,Reason} -> + ct:fail(Reason) + end. op_switch_to_drop(cleanup, _Config) -> - ok = stop_handler(?MODULE). + _ = stop_handler(?MODULE). op_switch_to_flush() -> - [{timetrap,{seconds,60}}]. + [{timetrap,{seconds,180}}]. op_switch_to_flush(Config) -> - {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), + Test = + fun() -> + {Log,HConfig,DLHConfig} = + start_handler(?MODULE, ?FUNCTION_NAME, Config), + + %% NOTE: it's important that both async and sync + %% requests have been queued when the flush happens + %% (verify with coverage of flush_log_requests/2) - %% it's important that both async and sync requests have been queued - %% when the flush happens (verify with coverage of flush_log_requests/2) - - NewHConfig = - HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2, - %% disable drop mode - drop_new_reqs_qlen => 500, - flush_reqs_qlen => 500, - enable_burst_limit => false}}, - ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 1000, - Procs = 200, - send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), - Logged = count_lines(Log), - ct:pal("Number of messages flushed/dropped = ~w (~w)", - [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]), - true = (Logged < (NumOfReqs*Procs)), - true = (Logged > 0), - ok = file:delete(Log). + NewHConfig = + HConfig#{logger_disk_log_h => + DLHConfig#{toggle_sync_qlen => 2, + %% disable drop mode + drop_new_reqs_qlen => 300, + flush_reqs_qlen => 300, + enable_burst_limit => false}}, + ok = logger:set_handler_config(?MODULE, NewHConfig), + NumOfReqs = 1500, + Procs = 10, + Bursts = 10, + %% It sometimes happens that the handler either gets + %% the requests in a slow enough pace so that flushing + %% never occurs, or it gets all messages at once, + %% causing all messages to get flushed (no dropping of + %% sync messages gets tested). Therefore, lets + %% generate a number of bursts to increase the chance + %% of message buildup in some random fashion. + [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) || + _ <- lists:seq(1,Bursts)], + Logged = count_lines(Log), + ok= stop_handler(?MODULE), + _ = file:delete(Log), + ct:pal("Number of messages flushed/dropped = ~w (~w)", + [NumOfReqs*Procs*Bursts-Logged,NumOfReqs*Procs*Bursts]), + true = (Logged < (NumOfReqs*Procs*Bursts)), + true = (Logged > 0), + ok + end, + %% As it's tricky to get the timing right in only one go, we perform the + %% test repeatedly, hoping that will generate a successful result. + case repeat_until_ok(Test, 10) of + {ok,{Failures,_Result}} -> + ct:log("Failed ~w times before success!", [Failures]); + {fails,Reason} -> + ct:fail(Reason) + end. op_switch_to_flush(cleanup, _Config) -> - ok = stop_handler(?MODULE). + _ = stop_handler(?MODULE). limit_burst_disabled(Config) -> @@ -1014,7 +1052,7 @@ qlen_kill_new(Config) -> {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), Pid0 = whereis(?MODULE), {_,Mem0} = process_info(Pid0, memory), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig = HConfig#{logger_disk_log_h => DLHConfig#{enable_kill_overloaded=>true, @@ -1035,7 +1073,7 @@ qlen_kill_new(Config) -> killed -> ct:pal("Slow shutdown, handler process was killed!", []) end, - timer:sleep(RestartAfter + 1000), + timer:sleep(RestartAfter + 2000), true = is_pid(whereis(?MODULE)), ok after @@ -1051,7 +1089,7 @@ mem_kill_new(Config) -> {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), Pid0 = whereis(?MODULE), {_,Mem0} = process_info(Pid0, memory), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig = HConfig#{logger_disk_log_h => DLHConfig#{enable_kill_overloaded=>true, @@ -1072,7 +1110,7 @@ mem_kill_new(Config) -> killed -> ct:pal("Slow shutdown, handler process was killed!", []) end, - timer:sleep(RestartAfter * 2), + timer:sleep(RestartAfter + 2000), true = is_pid(whereis(?MODULE)), ok after @@ -1105,7 +1143,7 @@ restart_after(Config) -> end, {Log,_,_} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig2 = HConfig#{logger_disk_log_h=>DLHConfig#{enable_kill_overloaded=>true, handler_overloaded_qlen=>10, @@ -1117,7 +1155,7 @@ restart_after(Config) -> send_burst({n,100}, {spawn,2,0}, {chars,79}, info), receive {'DOWN', MRef2, _, _, _Info2} -> - timer:sleep(RestartAfter + 1000), + timer:sleep(RestartAfter + 2000), Pid1 = whereis(?MODULE), true = is_pid(Pid1), false = (Pid1 == Pid0), @@ -1388,6 +1426,29 @@ count_lines1(File) -> file:close(Dev), Lines. +repeat_until_ok(Fun, N) -> + repeat_until_ok(Fun, 0, N, undefined). + +repeat_until_ok(_Fun, Stop, Stop, Reason) -> + {fails,Reason}; + +repeat_until_ok(Fun, C, Stop, FirstReason) -> + if C > 0 -> timer:sleep(5000); + true -> ok + end, + try Fun() of + Result -> + {ok,{C,Result}} + catch + _:Reason:Stack -> + ct:pal("Test fails: ~p (~p)~n", [Reason,hd(Stack)]), + if FirstReason == undefined -> + repeat_until_ok(Fun, C+1, Stop, {Reason,Stack}); + true -> + repeat_until_ok(Fun, C+1, Stop, FirstReason) + end + end. + start_tracer(Trace,Expected) -> Pid = self(), dbg:tracer(process,{fun tracer/2,{Pid,Expected}}), @@ -1410,9 +1471,11 @@ tpl([{M,F,A}|Trace]) -> tpl([]) -> ok. -tracer({trace,_,call,{?MODULE,format,[#{msg:={string,Msg}}|_]}}, {Pid,[{formatter,Msg}|Expected]}) -> +tracer({trace,_,call,{?MODULE,format,[#{msg:={string,Msg}}|_]}}, + {Pid,[{formatter,Msg}|Expected]}) -> maybe_tracer_done(Pid,Expected,{formatter,Msg}); -tracer({trace,_,call,{logger_disk_log_h,handle_cast,[{Op,_}|_]}}, {Pid,[{Mod,Func,Op}|Expected]}) -> +tracer({trace,_,call,{logger_disk_log_h,handle_cast,[Op|_]}}, + {Pid,[{Mod,Func,Op}|Expected]}) -> maybe_tracer_done(Pid,Expected,{Mod,Func,Op}); tracer({trace,_,call,{Mod,Func,_}}, {Pid,[{Mod,Func}|Expected]}) -> maybe_tracer_done(Pid,Expected,{Mod,Func}); -- cgit v1.2.3