diff options
Diffstat (limited to 'lib/kernel')
-rw-r--r-- | lib/kernel/doc/src/logger_chapter.xml | 4 | ||||
-rw-r--r-- | lib/kernel/src/logger_disk_log_h.erl | 64 | ||||
-rw-r--r-- | lib/kernel/src/logger_std_h.erl | 22 | ||||
-rw-r--r-- | lib/kernel/test/logger_disk_log_h_SUITE.erl | 193 | ||||
-rw-r--r-- | lib/kernel/test/logger_std_h_SUITE.erl | 233 |
5 files changed, 311 insertions, 205 deletions
diff --git a/lib/kernel/doc/src/logger_chapter.xml b/lib/kernel/doc/src/logger_chapter.xml index 5fb81034dc..0374a0c93a 100644 --- a/lib/kernel/doc/src/logger_chapter.xml +++ b/lib/kernel/doc/src/logger_chapter.xml @@ -697,8 +697,8 @@ my_report_cb(R) -> </item> </taglist> - <p>For the overload protection algorithm to work properly, it is a - requirement that:</p> + <p>For the overload protection algorithm to work properly, it is + required that:</p> <p><c>toggle_sync_qlen =< drop_new_reqs_qlen =< flush_reqs_qlen</c></p> diff --git a/lib/kernel/src/logger_disk_log_h.erl b/lib/kernel/src/logger_disk_log_h.erl index 3b71f936d8..eaa5cd6f99 100644 --- a/lib/kernel/src/logger_disk_log_h.erl +++ b/lib/kernel/src/logger_disk_log_h.erl @@ -278,10 +278,11 @@ init([Name, Config = #{disk_log_opts := LogOpts}, last_log_ts => T0, burst_win_ts => T0, burst_msg_count => 0, + last_op => sync, prev_log_result => ok, prev_sync_result => ok, prev_disk_log_info => undefined}), - gen_server:cast(self(), {repeated_disk_log_sync,T0}), + gen_server:cast(self(), repeated_disk_log_sync), enter_loop(Config, State1); Error -> logger_h_common:error_notify({open_disk_log,Name,Error}), @@ -316,8 +317,7 @@ handle_call(disk_log_sync, _From, State = #{id := Name}) -> {reply, Result, State1}; handle_call({change_config,_OldConfig,NewConfig}, _From, - State = #{filesync_repeat_interval := FSyncInt0, - last_log_ts := LastLogTS}) -> + State = #{filesync_repeat_interval := FSyncInt0}) -> HConfig = maps:get(?MODULE, NewConfig, #{}), State1 = #{toggle_sync_qlen := TSQL, drop_new_reqs_qlen := DNRQL, @@ -338,9 +338,8 @@ handle_call({change_config,_OldConfig,NewConfig}, _From, _ = logger_h_common:cancel_timer(maps:get(rep_sync_tref, State, undefined)), - _ = gen_server:cast(self(), {repeated_disk_log_sync, - LastLogTS}) - end, + _ = gen_server:cast(self(), repeated_disk_log_sync) + end, {reply, ok, State1}; false -> {reply, {error,{invalid_levels,{TSQL,DNRQL,FRQL}}}, State} @@ -370,24 +369,23 @@ handle_cast({log, Bin}, State) -> %% clause gets called repeatedly by the handler. In order to %% guarantee that a filesync *always* happens after the last log %% request, the repeat operation must be active! -handle_cast({repeated_disk_log_sync,LastLogTS0}, +handle_cast(repeated_disk_log_sync, State = #{id := Name, filesync_repeat_interval := FSyncInt, - last_log_ts := LastLogTS1}) -> + last_op := LastOp}) -> State1 = if is_integer(FSyncInt) -> %% only do filesync if something has been %% written since last time we checked - NewState = if LastLogTS1 == LastLogTS0 -> + NewState = if LastOp == sync -> State; true -> disk_log_sync(Name, State) end, {ok,TRef} = timer:apply_after(FSyncInt, gen_server,cast, - [self(), - {repeated_disk_log_sync,LastLogTS1}]), - NewState#{rep_sync_tref => TRef}; + [self(),repeated_disk_log_sync]), + NewState#{rep_sync_tref => TRef, last_op => sync}; true -> State end, @@ -649,10 +647,9 @@ close_disk_log(Name, _) -> ok. disk_log_write(Name, Bin, State) -> - Result = case ?disk_log_blog(Name, Bin) of ok -> - ok; + State#{prev_log_result => ok, last_op => write}; LogError -> _ = case maps:get(prev_log_result, State) of LogError -> @@ -664,29 +661,26 @@ disk_log_write(Name, Bin, State) -> LogOpts, LogError}) end, - LogError - end, - State#{prev_log_result => Result}. + State#{prev_log_result => LogError} + end. disk_log_sync(Name, State) -> - Result = - case ?disk_log_sync(Name) of - ok -> - ok; - SyncError -> - _ = case maps:get(prev_sync_result, State) of - SyncError -> - %% don't report same error twice - ok; - _ -> - LogOpts = maps:get(log_opts, State), - logger_h_common:error_notify({Name,sync, - LogOpts, - SyncError}) - end, - SyncError - end, - State#{prev_sync_result => Result}. + case ?disk_log_sync(Name) of + ok -> + State#{prev_sync_result => ok, last_op => sync}; + SyncError -> + _ = case maps:get(prev_sync_result, State) of + SyncError -> + %% don't report same error twice + ok; + _ -> + LogOpts = maps:get(log_opts, State), + logger_h_common:error_notify({Name,sync, + LogOpts, + SyncError}) + end, + State#{prev_sync_result => SyncError} + end. error_notify_new(Info,Info, _Term) -> ok; diff --git a/lib/kernel/src/logger_std_h.erl b/lib/kernel/src/logger_std_h.erl index cbc9db372c..813fbad0ed 100644 --- a/lib/kernel/src/logger_std_h.erl +++ b/lib/kernel/src/logger_std_h.erl @@ -257,10 +257,11 @@ init([Name, Config, file_ctrl_sync => FileCtrlSyncInt, last_qlen => 0, last_log_ts => T0, + last_op => sync, burst_win_ts => T0, burst_msg_count => 0}), proc_lib:init_ack({ok,self()}), - gen_server:cast(self(), {repeated_filesync,T0}), + gen_server:cast(self(), repeated_filesync), enter_loop(Config, State1); Error -> logger_h_common:error_notify({init_handler,Name,Error}), @@ -310,12 +311,11 @@ handle_call(filesync, _From, State = #{type := Type, if is_atom(Type) -> {reply, ok, State}; true -> - {reply, file_ctrl_filesync_sync(FileCtrlPid), State} + {reply, file_ctrl_filesync_sync(FileCtrlPid), State#{last_op=>sync}} end; handle_call({change_config,_OldConfig,NewConfig}, _From, - State = #{filesync_repeat_interval := FSyncInt0, - last_log_ts := LastLogTS}) -> + State = #{filesync_repeat_interval := FSyncInt0}) -> HConfig = maps:get(?MODULE, NewConfig, #{}), State1 = maps:merge(State, HConfig), case logger_h_common:overload_levels_ok(State1) of @@ -334,8 +334,7 @@ handle_call({change_config,_OldConfig,NewConfig}, _From, _ = logger_h_common:cancel_timer(maps:get(rep_sync_tref, State, undefined)), - gen_server:cast(self(), {repeated_filesync, - LastLogTS}) + gen_server:cast(self(), repeated_filesync) end, {reply, ok, State1}; false -> @@ -365,24 +364,24 @@ handle_cast({log, Bin}, State) -> %% clause gets called repeatedly by the handler. In order to %% guarantee that a filesync *always* happens after the last log %% request, the repeat operation must be active! -handle_cast({repeated_filesync,LastLogTS0}, +handle_cast(repeated_filesync, State = #{type := Type, file_ctrl_pid := FileCtrlPid, filesync_repeat_interval := FSyncInt, - last_log_ts := LastLogTS1}) -> + last_op := LastOp}) -> State1 = if not is_atom(Type), is_integer(FSyncInt) -> %% only do filesync if something has been %% written since last time we checked - if LastLogTS1 == LastLogTS0 -> + if LastOp == sync -> ok; true -> file_ctrl_filesync_async(FileCtrlPid) end, {ok,TRef} = timer:apply_after(FSyncInt, gen_server,cast, - [self(),{repeated_filesync,LastLogTS1}]), - State#{rep_sync_tref => TRef}; + [self(),repeated_filesync]), + State#{rep_sync_tref => TRef, last_op => sync}; true -> State end, @@ -600,6 +599,7 @@ write(Name, Mode, T1, Bin, _CallOrCast, State1#{mode => Mode1, last_qlen := LastQLen1, last_log_ts => T1, + last_op => write, burst_win_ts => BurstWinT, burst_msg_count => BurstMsgCount1, file_ctrl_sync => diff --git a/lib/kernel/test/logger_disk_log_h_SUITE.erl b/lib/kernel/test/logger_disk_log_h_SUITE.erl index 6a4ec72d5e..bb88c53f26 100644 --- a/lib/kernel/test/logger_disk_log_h_SUITE.erl +++ b/lib/kernel/test/logger_disk_log_h_SUITE.erl @@ -66,19 +66,7 @@ end_per_testcase(Case, Config) -> ok. groups() -> - [ - {retry_op_switch_to_sync, - [{repeat_until_all_ok,10}], - [op_switch_to_sync]}, - - {retry_op_switch_to_drop, - [{repeat_until_all_ok,10}], - [op_switch_to_drop]}, - - {retry_op_switch_to_flush, - [{repeat_until_all_ok,10}], - [op_switch_to_flush]} - ]. + []. all() -> [start_stop_handler, @@ -99,9 +87,9 @@ all() -> disk_log_events, write_failure, sync_failure, - {group,retry_op_switch_to_sync}, - {group,retry_op_switch_to_drop}, - {group,retry_op_switch_to_flush}, + op_switch_to_sync, + op_switch_to_drop, + op_switch_to_flush, limit_burst_disabled, limit_burst_enabled_one, limit_burst_enabled_period, @@ -880,57 +868,107 @@ op_switch_to_sync(Config) -> enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), send_burst({n,NumOfReqs}, seq, {chars,79}, info), - NumOfReqs = count_lines(Log), - ok = file:delete(Log). + Lines = count_lines(Log), + ok = file:delete(Log), + NumOfReqs = Lines, + ok. op_switch_to_sync(cleanup, _Config) -> ok = stop_handler(?MODULE). +op_switch_to_drop() -> + [{timetrap,{seconds,180}}]. op_switch_to_drop(Config) -> - {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - NumOfReqs = 300, - Procs = 2, - NewHConfig = - HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 1, - drop_new_reqs_qlen => 2, - flush_reqs_qlen => Procs*NumOfReqs+1, - enable_burst_limit => false}}, - ok = logger:set_handler_config(?MODULE, NewHConfig), - send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), - Logged = count_lines(Log), - ct:pal("Number of messages dropped = ~w (~w)", - [Procs*NumOfReqs-Logged,Procs*NumOfReqs]), - true = (Logged < (Procs*NumOfReqs)), - true = (Logged > 0), - ok = file:delete(Log). + Test = + fun() -> + {Log,HConfig,DLHConfig} = + start_handler(?MODULE, ?FUNCTION_NAME, Config), + NumOfReqs = 300, + Procs = 2, + Bursts = 10, + NewHConfig = + HConfig#{logger_disk_log_h => + DLHConfig#{toggle_sync_qlen => 1, + drop_new_reqs_qlen => 2, + flush_reqs_qlen => Procs*NumOfReqs*Bursts, + enable_burst_limit => false}}, + ok = logger:set_handler_config(?MODULE, NewHConfig), + %% It sometimes happens that the handler either gets + %% the requests in a slow enough pace so that dropping + %% never occurs. Therefore, lets generate a number of + %% bursts to increase the chance of message buildup. + [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) || + _ <- lists:seq(1, Bursts)], + Logged = count_lines(Log), + ok= stop_handler(?MODULE), + _ = file:delete(Log), + ct:pal("Number of messages dropped = ~w (~w)", + [Procs*NumOfReqs*Bursts-Logged,Procs*NumOfReqs*Bursts]), + true = (Logged < (Procs*NumOfReqs*Bursts)), + true = (Logged > 0), + ok + end, + %% As it's tricky to get the timing right in only one go, we perform the + %% test repeatedly, hoping that will generate a successful result. + case repeat_until_ok(Test, 10) of + {ok,{Failures,_Result}} -> + ct:log("Failed ~w times before success!", [Failures]); + {fails,Reason} -> + ct:fail(Reason) + end. op_switch_to_drop(cleanup, _Config) -> - ok = stop_handler(?MODULE). + _ = stop_handler(?MODULE). op_switch_to_flush() -> - [{timetrap,{seconds,60}}]. + [{timetrap,{seconds,180}}]. op_switch_to_flush(Config) -> - {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), + Test = + fun() -> + {Log,HConfig,DLHConfig} = + start_handler(?MODULE, ?FUNCTION_NAME, Config), + + %% NOTE: it's important that both async and sync + %% requests have been queued when the flush happens + %% (verify with coverage of flush_log_requests/2) - %% it's important that both async and sync requests have been queued - %% when the flush happens (verify with coverage of flush_log_requests/2) - - NewHConfig = - HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2, - %% disable drop mode - drop_new_reqs_qlen => 500, - flush_reqs_qlen => 500, - enable_burst_limit => false}}, - ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 1000, - Procs = 200, - send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), - Logged = count_lines(Log), - ct:pal("Number of messages flushed/dropped = ~w (~w)", - [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]), - true = (Logged < (NumOfReqs*Procs)), - true = (Logged > 0), - ok = file:delete(Log). + NewHConfig = + HConfig#{logger_disk_log_h => + DLHConfig#{toggle_sync_qlen => 2, + %% disable drop mode + drop_new_reqs_qlen => 300, + flush_reqs_qlen => 300, + enable_burst_limit => false}}, + ok = logger:set_handler_config(?MODULE, NewHConfig), + NumOfReqs = 1500, + Procs = 10, + Bursts = 10, + %% It sometimes happens that the handler either gets + %% the requests in a slow enough pace so that flushing + %% never occurs, or it gets all messages at once, + %% causing all messages to get flushed (no dropping of + %% sync messages gets tested). Therefore, lets + %% generate a number of bursts to increase the chance + %% of message buildup in some random fashion. + [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) || + _ <- lists:seq(1,Bursts)], + Logged = count_lines(Log), + ok= stop_handler(?MODULE), + _ = file:delete(Log), + ct:pal("Number of messages flushed/dropped = ~w (~w)", + [NumOfReqs*Procs*Bursts-Logged,NumOfReqs*Procs*Bursts]), + true = (Logged < (NumOfReqs*Procs*Bursts)), + true = (Logged > 0), + ok + end, + %% As it's tricky to get the timing right in only one go, we perform the + %% test repeatedly, hoping that will generate a successful result. + case repeat_until_ok(Test, 10) of + {ok,{Failures,_Result}} -> + ct:log("Failed ~w times before success!", [Failures]); + {fails,Reason} -> + ct:fail(Reason) + end. op_switch_to_flush(cleanup, _Config) -> - ok = stop_handler(?MODULE). + _ = stop_handler(?MODULE). limit_burst_disabled(Config) -> @@ -1014,7 +1052,7 @@ qlen_kill_new(Config) -> {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), Pid0 = whereis(?MODULE), {_,Mem0} = process_info(Pid0, memory), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig = HConfig#{logger_disk_log_h => DLHConfig#{enable_kill_overloaded=>true, @@ -1035,7 +1073,7 @@ qlen_kill_new(Config) -> killed -> ct:pal("Slow shutdown, handler process was killed!", []) end, - timer:sleep(RestartAfter + 1000), + timer:sleep(RestartAfter + 2000), true = is_pid(whereis(?MODULE)), ok after @@ -1051,7 +1089,7 @@ mem_kill_new(Config) -> {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), Pid0 = whereis(?MODULE), {_,Mem0} = process_info(Pid0, memory), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig = HConfig#{logger_disk_log_h => DLHConfig#{enable_kill_overloaded=>true, @@ -1072,7 +1110,7 @@ mem_kill_new(Config) -> killed -> ct:pal("Slow shutdown, handler process was killed!", []) end, - timer:sleep(RestartAfter * 2), + timer:sleep(RestartAfter + 2000), true = is_pid(whereis(?MODULE)), ok after @@ -1105,7 +1143,7 @@ restart_after(Config) -> end, {Log,_,_} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig2 = HConfig#{logger_disk_log_h=>DLHConfig#{enable_kill_overloaded=>true, handler_overloaded_qlen=>10, @@ -1117,7 +1155,7 @@ restart_after(Config) -> send_burst({n,100}, {spawn,2,0}, {chars,79}, info), receive {'DOWN', MRef2, _, _, _Info2} -> - timer:sleep(RestartAfter + 1000), + timer:sleep(RestartAfter + 2000), Pid1 = whereis(?MODULE), true = is_pid(Pid1), false = (Pid1 == Pid0), @@ -1388,6 +1426,29 @@ count_lines1(File) -> file:close(Dev), Lines. +repeat_until_ok(Fun, N) -> + repeat_until_ok(Fun, 0, N, undefined). + +repeat_until_ok(_Fun, Stop, Stop, Reason) -> + {fails,Reason}; + +repeat_until_ok(Fun, C, Stop, FirstReason) -> + if C > 0 -> timer:sleep(5000); + true -> ok + end, + try Fun() of + Result -> + {ok,{C,Result}} + catch + _:Reason:Stack -> + ct:pal("Test fails: ~p (~p)~n", [Reason,hd(Stack)]), + if FirstReason == undefined -> + repeat_until_ok(Fun, C+1, Stop, {Reason,Stack}); + true -> + repeat_until_ok(Fun, C+1, Stop, FirstReason) + end + end. + start_tracer(Trace,Expected) -> Pid = self(), dbg:tracer(process,{fun tracer/2,{Pid,Expected}}), @@ -1410,9 +1471,11 @@ tpl([{M,F,A}|Trace]) -> tpl([]) -> ok. -tracer({trace,_,call,{?MODULE,format,[#{msg:={string,Msg}}|_]}}, {Pid,[{formatter,Msg}|Expected]}) -> +tracer({trace,_,call,{?MODULE,format,[#{msg:={string,Msg}}|_]}}, + {Pid,[{formatter,Msg}|Expected]}) -> maybe_tracer_done(Pid,Expected,{formatter,Msg}); -tracer({trace,_,call,{logger_disk_log_h,handle_cast,[{Op,_}|_]}}, {Pid,[{Mod,Func,Op}|Expected]}) -> +tracer({trace,_,call,{logger_disk_log_h,handle_cast,[Op|_]}}, + {Pid,[{Mod,Func,Op}|Expected]}) -> maybe_tracer_done(Pid,Expected,{Mod,Func,Op}); tracer({trace,_,call,{Mod,Func,_}}, {Pid,[{Mod,Func}|Expected]}) -> maybe_tracer_done(Pid,Expected,{Mod,Func}); diff --git a/lib/kernel/test/logger_std_h_SUITE.erl b/lib/kernel/test/logger_std_h_SUITE.erl index ed5504431e..3ebbbe74ef 100644 --- a/lib/kernel/test/logger_std_h_SUITE.erl +++ b/lib/kernel/test/logger_std_h_SUITE.erl @@ -89,19 +89,7 @@ end_per_testcase(Case, Config) -> ok. groups() -> - [ - {retry_op_switch_to_sync_file, - [{repeat_until_all_ok,10}], - [op_switch_to_sync_file]}, - - {retry_op_switch_to_drop_file, - [{repeat_until_all_ok,10}], - [op_switch_to_drop_file]}, - - {retry_op_switch_to_flush_file, - [{repeat_until_all_ok,10}], - [op_switch_to_flush_file]} - ]. + []. all() -> [add_remove_instance_tty, @@ -122,11 +110,11 @@ all() -> filesync, write_failure, sync_failure, - {group,retry_op_switch_to_sync_file}, + op_switch_to_sync_file, op_switch_to_sync_tty, - {group,retry_op_switch_to_drop_file}, + op_switch_to_drop_file, op_switch_to_drop_tty, - {group,retry_op_switch_to_flush_file}, + op_switch_to_flush_file, op_switch_to_flush_tty, limit_burst_disabled, limit_burst_enabled_one, @@ -588,7 +576,7 @@ filesync(Config) -> #{filesync_repeat_interval => no_repeat}), dbg:stop_clear(), - Received2 = lists:map(fun({trace,_M,handle_cast,[{Op,_},_]}) -> {trace,Op}; + Received2 = lists:map(fun({trace,_M,handle_cast,[Op,_]}) -> {trace,Op}; (Other) -> Other end, test_server:messages_get()), ct:pal("Trace #2 =~n~p", [Received2]), @@ -726,7 +714,8 @@ op_switch_to_sync_file(Config) -> ok = logger:set_handler_config(?MODULE, NewHConfig), %% TRecvPid = start_op_trace(), send_burst({n,NumOfReqs}, seq, {chars,79}, info), - NumOfReqs = count_lines(Log), + Lines = count_lines(Log), + ok = file:delete(Log), %% true = analyse_trace(TRecvPid, %% fun(Events) -> find_mode(async,Events) end), %% true = analyse_trace(TRecvPid, @@ -737,8 +726,8 @@ op_switch_to_sync_file(Config) -> %% fun(Events) -> find_mode(drop,Events) end), %% false = analyse_trace(TRecvPid, %% fun(Events) -> find_mode(flush,Events) end), - ok = file:delete(Log), %% stop_op_trace(TRecvPid), + NumOfReqs = Lines, ok. op_switch_to_sync_file(cleanup, _Config) -> ok = stop_handler(?MODULE). @@ -757,38 +746,49 @@ op_switch_to_sync_tty(Config) -> op_switch_to_sync_tty(cleanup, _Config) -> ok = stop_handler(?MODULE). +op_switch_to_drop_file() -> + [{timetrap,{seconds,180}}]. op_switch_to_drop_file(Config) -> - {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - NumOfReqs = 300, - Procs = 2, - NewHConfig = - HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 1, + Test = + fun() -> + {Log,HConfig,StdHConfig} = + start_handler(?MODULE, ?FUNCTION_NAME, Config), + NumOfReqs = 300, + Procs = 2, + Bursts = 10, + NewHConfig = + HConfig#{logger_std_h => + StdHConfig#{toggle_sync_qlen => 1, drop_new_reqs_qlen => 2, - flush_reqs_qlen => Procs*NumOfReqs+1, + flush_reqs_qlen => + Procs*NumOfReqs*Bursts, enable_burst_limit => false}}, - ok = logger:set_handler_config(?MODULE, NewHConfig), - %% TRecvPid = start_op_trace(), - send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), - Logged = count_lines(Log), - ct:pal("Number of messages dropped = ~w (~w)", - [Procs*NumOfReqs-Logged,Procs*NumOfReqs]), - true = (Logged < (Procs*NumOfReqs)), - true = (Logged > 0), - %% true = analyse_trace(TRecvPid, - %% fun(Events) -> find_mode(async,Events) end), - %% true = analyse_trace(TRecvPid, - %% fun(Events) -> find_mode(drop,Events) end), - %% false = analyse_trace(TRecvPid, - %% fun(Events) -> find_mode(flush,Events) end), - %% true = analyse_trace(TRecvPid, - %% fun(Events) -> find_switch(async,drop,Events) - %% orelse find_switch(sync,drop,Events) - %% end), - ok = file:delete(Log), - %% stop_op_trace(TRecvPid), - ok. + ok = logger:set_handler_config(?MODULE, NewHConfig), + %% It sometimes happens that the handler gets the + %% requests in a slow enough pace so that dropping + %% never occurs. Therefore, lets generate a number of + %% bursts to increase the chance of message buildup. + [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) || + _ <- lists:seq(1, Bursts)], + Logged = count_lines(Log), + ok = stop_handler(?MODULE), + _ = file:delete(Log), + ct:pal("Number of messages dropped = ~w (~w)", + [Procs*NumOfReqs*Bursts-Logged,Procs*NumOfReqs*Bursts]), + true = (Logged < (Procs*NumOfReqs*Bursts)), + true = (Logged > 0), + ok + end, + %% As it's tricky to get the timing right in only one go, we perform the + %% test repeatedly, hoping that will generate a successful result. + case repeat_until_ok(Test, 10) of + {ok,{Failures,_Result}} -> + ct:log("Failed ~w times before success!", [Failures]); + {fails,Reason} -> + ct:fail(Reason) + end. op_switch_to_drop_file(cleanup, _Config) -> - ok = stop_handler(?MODULE). + _ = stop_handler(?MODULE). op_switch_to_drop_tty(Config) -> {HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config), @@ -797,7 +797,8 @@ op_switch_to_drop_tty(Config) -> NewHConfig = HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 1, drop_new_reqs_qlen => 2, - flush_reqs_qlen => Procs*NumOfReqs+1, + flush_reqs_qlen => + Procs*NumOfReqs+1, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), @@ -806,32 +807,56 @@ op_switch_to_drop_tty(cleanup, _Config) -> ok = stop_handler(?MODULE). op_switch_to_flush_file() -> - [{timetrap,{seconds,60}}]. + [{timetrap,{seconds,180}}]. op_switch_to_flush_file(Config) -> - {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - - %% it's important that both async and sync requests have been queued - %% when the flush happens (verify with coverage of flush_log_requests/2) - - NewHConfig = - HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2, + Test = + fun() -> + {Log,HConfig,StdHConfig} = + start_handler(?MODULE, ?FUNCTION_NAME, Config), + + %% NOTE: it's important that both async and sync + %% requests have been queued when the flush happens + %% (verify with coverage of flush_log_requests/2) + + NewHConfig = + HConfig#{logger_std_h => + StdHConfig#{toggle_sync_qlen => 2, %% disable drop mode - drop_new_reqs_qlen => 500, - flush_reqs_qlen => 500, + drop_new_reqs_qlen => 300, + flush_reqs_qlen => 300, enable_burst_limit => false}}, - ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 1000, - Procs = 200, - send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), - Logged = count_lines(Log), - ct:pal("Number of messages flushed/dropped = ~w (~w)", - [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]), - true = (Logged < (NumOfReqs*Procs)), - true = (Logged > 0), - ok = file:delete(Log), - ok. + ok = logger:set_handler_config(?MODULE, NewHConfig), + NumOfReqs = 1500, + Procs = 10, + Bursts = 10, + %% It sometimes happens that the handler either gets + %% the requests in a slow enough pace so that flushing + %% never occurs, or it gets all messages at once, + %% causing all messages to get flushed (no dropping of + %% sync messages gets tested). Therefore, lets + %% generate a number of bursts to increase the chance + %% of message buildup in some random fashion. + [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) || + _ <- lists:seq(1,Bursts)], + Logged = count_lines(Log), + ok = stop_handler(?MODULE), + _ = file:delete(Log), + ct:pal("Number of messages flushed/dropped = ~w (~w)", + [NumOfReqs*Procs*Bursts-Logged,NumOfReqs*Procs*Bursts]), + true = (Logged < (NumOfReqs*Procs*Bursts)), + true = (Logged > 0), + ok + end, + %% As it's tricky to get the timing right in only one go, we perform the + %% test repeatedly, hoping that will generate a successful result. + case repeat_until_ok(Test, 10) of + {ok,{Failures,_Result}} -> + ct:log("Failed ~w times before success!", [Failures]); + {fails,Reason} -> + ct:fail(Reason) + end. op_switch_to_flush_file(cleanup, _Config) -> - ok = stop_handler(?MODULE). + _ = stop_handler(?MODULE). op_switch_to_flush_tty(Config) -> {HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config), @@ -931,10 +956,10 @@ kill_disabled(cleanup, _Config) -> ok = stop_handler(?MODULE). qlen_kill_new(Config) -> - {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), + {_Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), Pid0 = whereis(?MODULE), {_,Mem0} = process_info(Pid0, memory), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig = HConfig#{logger_std_h=>StdHConfig#{enable_kill_overloaded=>true, handler_overloaded_qlen=>10, @@ -954,7 +979,7 @@ qlen_kill_new(Config) -> killed -> ct:pal("Slow shutdown, handler process was killed!", []) end, - timer:sleep(RestartAfter + 1000), + timer:sleep(RestartAfter + 2000), true = is_pid(whereis(?MODULE)), ok after @@ -968,7 +993,7 @@ qlen_kill_new(cleanup, _Config) -> %% choke the standard handler on remote node to verify the termination %% works as expected -qlen_kill_std(Config) -> +qlen_kill_std(_Config) -> %%! HERE %% Dir = ?config(priv_dir, Config), %% File = lists:concat([?MODULE,"_",?FUNCTION_NAME,".log"]), @@ -982,10 +1007,10 @@ qlen_kill_std(Config) -> {skip,"Not done yet"}. mem_kill_new(Config) -> - {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), + {_Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), Pid0 = whereis(?MODULE), {_,Mem0} = process_info(Pid0, memory), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig = HConfig#{logger_std_h=>StdHConfig#{enable_kill_overloaded=>true, handler_overloaded_qlen=>50000, @@ -1005,7 +1030,7 @@ mem_kill_new(Config) -> killed -> ct:pal("Slow shutdown, handler process was killed!", []) end, - timer:sleep(RestartAfter * 2), + timer:sleep(RestartAfter + 2000), true = is_pid(whereis(?MODULE)), ok after @@ -1019,7 +1044,7 @@ mem_kill_new(cleanup, _Config) -> %% choke the standard handler on remote node to verify the termination %% works as expected -mem_kill_std(Config) -> +mem_kill_std(_Config) -> {skip,"Not done yet"}. restart_after(Config) -> @@ -1043,7 +1068,7 @@ restart_after(Config) -> end, {Log,_,_} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig2 = HConfig#{logger_std_h=>StdHConfig#{enable_kill_overloaded=>true, handler_overloaded_qlen=>10, @@ -1055,7 +1080,7 @@ restart_after(Config) -> send_burst({n,100}, {spawn,2,0}, {chars,79}, info), receive {'DOWN', MRef2, _, _, _Info2} -> - timer:sleep(RestartAfter + 1000), + timer:sleep(RestartAfter + 2000), Pid1 = whereis(?MODULE), true = is_pid(Pid1), false = (Pid1 == Pid0), @@ -1101,7 +1126,7 @@ handler_requests_under_load(Config) -> NoOfReqs = lists:foldl(fun({_,Res}, N) -> N + length(Res) end, 0, ReqResult), ct:pal("~w requests made. Errors: ~n~p", [NoOfReqs,Errors]), ok = file:delete(Log). -handler_requests_under_load(cleanup, Config) -> +handler_requests_under_load(cleanup, _Config) -> ok = stop_handler(?MODULE). send_requests(HName, TO, Reqs = [{Req,Res}|Rs]) -> @@ -1326,6 +1351,30 @@ try_match_file(_,Pattern,_,Incorrect) -> [Pattern,Incorrect]), erlang:error({error,not_matching_pattern,Pattern,Incorrect}). +repeat_until_ok(Fun, N) -> + repeat_until_ok(Fun, 0, N, undefined). + +repeat_until_ok(_Fun, Stop, Stop, Reason) -> + {fails,Reason}; + +repeat_until_ok(Fun, C, Stop, FirstReason) -> + if C > 0 -> timer:sleep(5000); + true -> ok + end, + try Fun() of + Result -> + {ok,{C,Result}} + catch + _:Reason:Stack -> + ct:pal("Test fails: ~p (~p)~n", [Reason,hd(Stack)]), + if FirstReason == undefined -> + repeat_until_ok(Fun, C+1, Stop, {Reason,Stack}); + true -> + repeat_until_ok(Fun, C+1, Stop, FirstReason) + end + end. + + %%%----------------------------------------------------------------- %%% start_op_trace() -> @@ -1366,17 +1415,17 @@ find_mode(flush, Events) -> find_mode(Mode, Events) -> lists:keymember([{mode,Mode}], 3, Events). -find_switch(From, To, Events) -> - try lists:foldl(fun({trace_return,check_load,{To,_,_,_}}, - {trace_call,check_load,[#{mode := From}]}) -> - throw(match); - (Event, _) -> - Event - end, undefined, Events) of - _ -> false - catch - throw:match -> true - end. +%% find_switch(_From, To, Events) -> +%% try lists:foldl(fun({trace_return,check_load,{To,_,_,_}}, +%% {trace_call,check_load,[#{mode := From}]}) -> +%% throw(match); +%% (Event, _) -> +%% Event +%% end, undefined, Events) of +%% _ -> false +%% catch +%% throw:match -> true +%% end. analyse_trace(TRecvPid, TestFun) -> TRecvPid ! {test,self(),TestFun}, |