From 5a93c690267d0a3ca12304c34b0e4538549fabb1 Mon Sep 17 00:00:00 2001 From: Peter Andersson Date: Thu, 3 May 2018 17:23:16 +0200 Subject: Make it possible to disable sync and drop mode Also add tests and update failing test cases --- lib/kernel/doc/src/logger_chapter.xml | 12 +++- lib/kernel/src/logger_h_common.erl | 16 +++-- lib/kernel/src/logger_h_common.hrl | 2 +- lib/kernel/test/logger_disk_log_h_SUITE.erl | 65 +++++++++++++------- lib/kernel/test/logger_std_h_SUITE.erl | 94 +++++++++++++++++------------ 5 files changed, 123 insertions(+), 66 deletions(-) (limited to 'lib/kernel') diff --git a/lib/kernel/doc/src/logger_chapter.xml b/lib/kernel/doc/src/logger_chapter.xml index 0bc3b37476..5fb81034dc 100644 --- a/lib/kernel/doc/src/logger_chapter.xml +++ b/lib/kernel/doc/src/logger_chapter.xml @@ -700,7 +700,17 @@ my_report_cb(R) ->

For the overload protection algorithm to work properly, it is a requirement that:

-

toggle_sync_qlen < drop_new_reqs_qlen < flush_reqs_qlen

+

toggle_sync_qlen =< drop_new_reqs_qlen =< flush_reqs_qlen

+ +

and that:

+ +

drop_new_reqs_qlen > 1

+ +

If toggle_sync_qlen is set to 0, the handler will handle all + requests synchronously. Setting the value of toggle_sync_qlen to the same + as drop_new_reqs_qlen, disables the synchronous mode. Likewise, setting + the value of drop_new_reqs_qlen to the same as flush_reqs_qlen, + disables the drop mode.

During high load scenarios, the length of the handler message queue rarely grows in a linear and predictable way. Instead, whenever the diff --git a/lib/kernel/src/logger_h_common.erl b/lib/kernel/src/logger_h_common.erl index 7caad366ae..901c4c0dad 100644 --- a/lib/kernel/src/logger_h_common.erl +++ b/lib/kernel/src/logger_h_common.erl @@ -135,7 +135,8 @@ call_cast_or_drop(Name, Bin) -> _:{timeout,_} -> ?observe(Name,{dropped,1}) end; - drop -> ?observe(Name,{dropped,1}) + drop -> + ?observe(Name,{dropped,1}) catch %% if the ETS table doesn't exist (maybe because of a %% handler restart), we can only drop the request @@ -152,12 +153,15 @@ check_load(State = #{id:=Name, mode := Mode, flush_reqs_qlen := FlushQLen}) -> {_,Mem} = process_info(self(), memory), ?observe(Name,{max_mem,Mem}), - %% make sure the handler process doesn't get scheduled - %% out between the message_queue_len check below and the - %% action that follows (flush or write). {_,QLen} = process_info(self(), message_queue_len), ?observe(Name,{max_qlen,QLen}), - + %% When the handler process gets scheduled in, it's impossible + %% to predict the QLen. We could jump "up" arbitrarily from say + %% async to sync, async to drop, sync to flush, etc. However, when + %% the handler process manages the log requests (without flushing), + %% one after the other, we will move "down" from drop to sync and + %% from sync to async. This way we don't risk getting stuck in + %% drop or sync mode with an empty mailbox. {Mode1,_NewDrops,_NewFlushes} = if QLen >= FlushQLen -> @@ -292,7 +296,7 @@ overload_levels_ok(HandlerConfig) -> TSQL = maps:get(toggle_sync_qlen, HandlerConfig, ?TOGGLE_SYNC_QLEN), DNRQL = maps:get(drop_new_reqs_qlen, HandlerConfig, ?DROP_NEW_REQS_QLEN), FRQL = maps:get(flush_reqs_qlen, HandlerConfig, ?FLUSH_REQS_QLEN), - (TSQL < DNRQL) andalso (DNRQL < FRQL). + (DNRQL > 1) andalso (TSQL =< DNRQL) andalso (DNRQL =< FRQL). error_notify(Term) -> ?internal_log(error, Term). diff --git a/lib/kernel/src/logger_h_common.hrl b/lib/kernel/src/logger_h_common.hrl index 89378dbb10..ed365ce6eb 100644 --- a/lib/kernel/src/logger_h_common.hrl +++ b/lib/kernel/src/logger_h_common.hrl @@ -124,7 +124,7 @@ %%% slow down execution and therefore should not be include in code %%% to be officially released. -%% -define(TEST_HOOKS, true). +-define(TEST_HOOKS, true). -ifdef(TEST_HOOKS). -define(TEST_HOOKS_TAB, logger_h_test_hooks). diff --git a/lib/kernel/test/logger_disk_log_h_SUITE.erl b/lib/kernel/test/logger_disk_log_h_SUITE.erl index c7c6137380..6a4ec72d5e 100644 --- a/lib/kernel/test/logger_disk_log_h_SUITE.erl +++ b/lib/kernel/test/logger_disk_log_h_SUITE.erl @@ -66,7 +66,19 @@ end_per_testcase(Case, Config) -> ok. groups() -> - []. + [ + {retry_op_switch_to_sync, + [{repeat_until_all_ok,10}], + [op_switch_to_sync]}, + + {retry_op_switch_to_drop, + [{repeat_until_all_ok,10}], + [op_switch_to_drop]}, + + {retry_op_switch_to_flush, + [{repeat_until_all_ok,10}], + [op_switch_to_flush]} + ]. all() -> [start_stop_handler, @@ -87,9 +99,9 @@ all() -> disk_log_events, write_failure, sync_failure, - op_switch_to_sync, - op_switch_to_drop, - op_switch_to_flush, + {group,retry_op_switch_to_sync}, + {group,retry_op_switch_to_drop}, + {group,retry_op_switch_to_flush}, limit_burst_disabled, limit_burst_enabled_one, limit_burst_enabled_period, @@ -369,10 +381,18 @@ config_fail(_Config) -> #{logger_disk_log_h => #{bad => bad}, filter_default=>log, formatter=>{?MODULE,self()}}), - {error,{handler_not_added,{invalid_levels,{42,42,_}}}} = + + {error,{handler_not_added,{invalid_levels,{_,1,_}}}} = logger:add_handler(?MODULE,logger_disk_log_h, - #{logger_disk_log_h => #{toggle_sync_qlen=>42, + #{logger_disk_log_h => #{drop_new_reqs_qlen=>1}}), + {error,{handler_not_added,{invalid_levels,{43,42,_}}}} = + logger:add_handler(?MODULE,logger_disk_log_h, + #{logger_disk_log_h => #{toggle_sync_qlen=>43, drop_new_reqs_qlen=>42}}), + {error,{handler_not_added,{invalid_levels,{_,43,42}}}} = + logger:add_handler(?MODULE,logger_disk_log_h, + #{logger_disk_log_h => #{drop_new_reqs_qlen=>43, + flush_reqs_qlen=>42}}), ok = logger:add_handler(?MODULE,logger_disk_log_h, #{filter_default=>log, @@ -852,13 +872,13 @@ internal_log(Type, Term) -> op_switch_to_sync(Config) -> {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), + NumOfReqs = 500, NewHConfig = - HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 3, - drop_new_reqs_qlen => 501, - flush_reqs_qlen => 2000, + HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2, + drop_new_reqs_qlen => NumOfReqs+1, + flush_reqs_qlen => 2*NumOfReqs, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 500, send_burst({n,NumOfReqs}, seq, {chars,79}, info), NumOfReqs = count_lines(Log), ok = file:delete(Log). @@ -867,19 +887,20 @@ op_switch_to_sync(cleanup, _Config) -> op_switch_to_drop(Config) -> {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - + NumOfReqs = 300, + Procs = 2, NewHConfig = - HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2, - drop_new_reqs_qlen => 3, - flush_reqs_qlen => 600, + HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 1, + drop_new_reqs_qlen => 2, + flush_reqs_qlen => Procs*NumOfReqs+1, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 500, - send_burst({n,NumOfReqs}, seq, {chars,79}, info), + send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), Logged = count_lines(Log), ct:pal("Number of messages dropped = ~w (~w)", - [NumOfReqs-Logged,NumOfReqs]), - true = (Logged < NumOfReqs), + [Procs*NumOfReqs-Logged,Procs*NumOfReqs]), + true = (Logged < (Procs*NumOfReqs)), + true = (Logged > 0), ok = file:delete(Log). op_switch_to_drop(cleanup, _Config) -> ok = stop_handler(?MODULE). @@ -894,17 +915,19 @@ op_switch_to_flush(Config) -> NewHConfig = HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2, - drop_new_reqs_qlen => 99, - flush_reqs_qlen => 100, + %% disable drop mode + drop_new_reqs_qlen => 500, + flush_reqs_qlen => 500, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), NumOfReqs = 1000, - Procs = 500, + Procs = 200, send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), Logged = count_lines(Log), ct:pal("Number of messages flushed/dropped = ~w (~w)", [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]), true = (Logged < (NumOfReqs*Procs)), + true = (Logged > 0), ok = file:delete(Log). op_switch_to_flush(cleanup, _Config) -> ok = stop_handler(?MODULE). diff --git a/lib/kernel/test/logger_std_h_SUITE.erl b/lib/kernel/test/logger_std_h_SUITE.erl index e940e0a026..ed5504431e 100644 --- a/lib/kernel/test/logger_std_h_SUITE.erl +++ b/lib/kernel/test/logger_std_h_SUITE.erl @@ -89,7 +89,19 @@ end_per_testcase(Case, Config) -> ok. groups() -> - []. + [ + {retry_op_switch_to_sync_file, + [{repeat_until_all_ok,10}], + [op_switch_to_sync_file]}, + + {retry_op_switch_to_drop_file, + [{repeat_until_all_ok,10}], + [op_switch_to_drop_file]}, + + {retry_op_switch_to_flush_file, + [{repeat_until_all_ok,10}], + [op_switch_to_flush_file]} + ]. all() -> [add_remove_instance_tty, @@ -110,11 +122,11 @@ all() -> filesync, write_failure, sync_failure, - op_switch_to_sync_file, + {group,retry_op_switch_to_sync_file}, op_switch_to_sync_tty, - op_switch_to_drop_file, + {group,retry_op_switch_to_drop_file}, op_switch_to_drop_tty, - op_switch_to_flush_file, + {group,retry_op_switch_to_flush_file}, op_switch_to_flush_tty, limit_burst_disabled, limit_burst_enabled_one, @@ -289,10 +301,17 @@ config_fail(_Config) -> #{logger_std_h => #{restart_type => bad}, filter_default=>log, formatter=>{?MODULE,self()}}), - {error,{handler_not_added,{invalid_levels,{42,42,_}}}} = + {error,{handler_not_added,{invalid_levels,{_,1,_}}}} = + logger:add_handler(?MODULE,logger_std_h, + #{logger_std_h => #{drop_new_reqs_qlen=>1}}), + {error,{handler_not_added,{invalid_levels,{43,42,_}}}} = logger:add_handler(?MODULE,logger_std_h, - #{logger_std_h => #{toggle_sync_qlen=>42, + #{logger_std_h => #{toggle_sync_qlen=>43, drop_new_reqs_qlen=>42}}), + {error,{handler_not_added,{invalid_levels,{_,43,42}}}} = + logger:add_handler(?MODULE,logger_std_h, + #{logger_std_h => #{drop_new_reqs_qlen=>43, + flush_reqs_qlen=>42}}), ok = logger:add_handler(?MODULE,logger_std_h, #{filter_default=>log, @@ -698,14 +717,14 @@ internal_log(Type, Term) -> op_switch_to_sync_file(Config) -> {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), + NumOfReqs = 500, NewHConfig = - HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 3, - drop_new_reqs_qlen => 501, - flush_reqs_qlen => 2000, + HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2, + drop_new_reqs_qlen => NumOfReqs+1, + flush_reqs_qlen => 2*NumOfReqs, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), %% TRecvPid = start_op_trace(), - NumOfReqs = 500, send_burst({n,NumOfReqs}, seq, {chars,79}, info), NumOfReqs = count_lines(Log), %% true = analyse_trace(TRecvPid, @@ -726,13 +745,13 @@ op_switch_to_sync_file(cleanup, _Config) -> op_switch_to_sync_tty(Config) -> {HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config), + NumOfReqs = 500, NewHConfig = HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 3, - drop_new_reqs_qlen => 501, - flush_reqs_qlen => 2000, + drop_new_reqs_qlen => NumOfReqs+1, + flush_reqs_qlen => 2*NumOfReqs, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 500, send_burst({n,NumOfReqs}, seq, {chars,79}, info), ok. op_switch_to_sync_tty(cleanup, _Config) -> @@ -740,20 +759,21 @@ op_switch_to_sync_tty(cleanup, _Config) -> op_switch_to_drop_file(Config) -> {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - + NumOfReqs = 300, + Procs = 2, NewHConfig = - HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2, - drop_new_reqs_qlen => 3, - flush_reqs_qlen => 600, + HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 1, + drop_new_reqs_qlen => 2, + flush_reqs_qlen => Procs*NumOfReqs+1, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), %% TRecvPid = start_op_trace(), - NumOfReqs = 500, - send_burst({n,NumOfReqs}, seq, {chars,79}, info), + send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), Logged = count_lines(Log), ct:pal("Number of messages dropped = ~w (~w)", - [NumOfReqs-Logged,NumOfReqs]), - true = (Logged < NumOfReqs), + [Procs*NumOfReqs-Logged,Procs*NumOfReqs]), + true = (Logged < (Procs*NumOfReqs)), + true = (Logged > 0), %% true = analyse_trace(TRecvPid, %% fun(Events) -> find_mode(async,Events) end), %% true = analyse_trace(TRecvPid, @@ -772,14 +792,15 @@ op_switch_to_drop_file(cleanup, _Config) -> op_switch_to_drop_tty(Config) -> {HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config), + NumOfReqs = 300, + Procs = 2, NewHConfig = - HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2, - drop_new_reqs_qlen => 3, - flush_reqs_qlen => 600, + HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 1, + drop_new_reqs_qlen => 2, + flush_reqs_qlen => Procs*NumOfReqs+1, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 500, - send_burst({n,NumOfReqs}, seq, {chars,79}, info), + send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), ok. op_switch_to_drop_tty(cleanup, _Config) -> ok = stop_handler(?MODULE). @@ -794,21 +815,19 @@ op_switch_to_flush_file(Config) -> NewHConfig = HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2, - drop_new_reqs_qlen => 99, - flush_reqs_qlen => 100, + %% disable drop mode + drop_new_reqs_qlen => 500, + flush_reqs_qlen => 500, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 10000, - Procs = 100, + NumOfReqs = 1000, + Procs = 200, send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), Logged = count_lines(Log), ct:pal("Number of messages flushed/dropped = ~w (~w)", [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]), true = (Logged < (NumOfReqs*Procs)), - - %%! --- Thu Apr 12 13:46:00 2018 --- peppe was here! - %%! TODO: Verify that handler has switched to flush mode - + true = (Logged > 0), ok = file:delete(Log), ok. op_switch_to_flush_file(cleanup, _Config) -> @@ -822,12 +841,13 @@ op_switch_to_flush_tty(Config) -> NewHConfig = HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2, - drop_new_reqs_qlen => 99, + %% disable drop mode + drop_new_reqs_qlen => 100, flush_reqs_qlen => 100, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 10000, - Procs = 10, + NumOfReqs = 1000, + Procs = 100, send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), ok. op_switch_to_flush_tty(cleanup, _Config) -> -- cgit v1.2.3 From 681bc6ad427fce5f087ccc48fb0172b46c02b52a Mon Sep 17 00:00:00 2001 From: Peter Andersson Date: Fri, 4 May 2018 15:00:19 +0200 Subject: Various logger handler improvements and updated test cases --- lib/kernel/doc/src/logger_chapter.xml | 4 +- lib/kernel/src/logger_disk_log_h.erl | 64 ++++---- lib/kernel/src/logger_std_h.erl | 22 +-- lib/kernel/test/logger_disk_log_h_SUITE.erl | 193 +++++++++++++++-------- lib/kernel/test/logger_std_h_SUITE.erl | 233 +++++++++++++++++----------- 5 files changed, 311 insertions(+), 205 deletions(-) (limited to 'lib/kernel') diff --git a/lib/kernel/doc/src/logger_chapter.xml b/lib/kernel/doc/src/logger_chapter.xml index 5fb81034dc..0374a0c93a 100644 --- a/lib/kernel/doc/src/logger_chapter.xml +++ b/lib/kernel/doc/src/logger_chapter.xml @@ -697,8 +697,8 @@ my_report_cb(R) -> -

For the overload protection algorithm to work properly, it is a - requirement that:

+

For the overload protection algorithm to work properly, it is + required that:

toggle_sync_qlen =< drop_new_reqs_qlen =< flush_reqs_qlen

diff --git a/lib/kernel/src/logger_disk_log_h.erl b/lib/kernel/src/logger_disk_log_h.erl index 3b71f936d8..eaa5cd6f99 100644 --- a/lib/kernel/src/logger_disk_log_h.erl +++ b/lib/kernel/src/logger_disk_log_h.erl @@ -278,10 +278,11 @@ init([Name, Config = #{disk_log_opts := LogOpts}, last_log_ts => T0, burst_win_ts => T0, burst_msg_count => 0, + last_op => sync, prev_log_result => ok, prev_sync_result => ok, prev_disk_log_info => undefined}), - gen_server:cast(self(), {repeated_disk_log_sync,T0}), + gen_server:cast(self(), repeated_disk_log_sync), enter_loop(Config, State1); Error -> logger_h_common:error_notify({open_disk_log,Name,Error}), @@ -316,8 +317,7 @@ handle_call(disk_log_sync, _From, State = #{id := Name}) -> {reply, Result, State1}; handle_call({change_config,_OldConfig,NewConfig}, _From, - State = #{filesync_repeat_interval := FSyncInt0, - last_log_ts := LastLogTS}) -> + State = #{filesync_repeat_interval := FSyncInt0}) -> HConfig = maps:get(?MODULE, NewConfig, #{}), State1 = #{toggle_sync_qlen := TSQL, drop_new_reqs_qlen := DNRQL, @@ -338,9 +338,8 @@ handle_call({change_config,_OldConfig,NewConfig}, _From, _ = logger_h_common:cancel_timer(maps:get(rep_sync_tref, State, undefined)), - _ = gen_server:cast(self(), {repeated_disk_log_sync, - LastLogTS}) - end, + _ = gen_server:cast(self(), repeated_disk_log_sync) + end, {reply, ok, State1}; false -> {reply, {error,{invalid_levels,{TSQL,DNRQL,FRQL}}}, State} @@ -370,24 +369,23 @@ handle_cast({log, Bin}, State) -> %% clause gets called repeatedly by the handler. In order to %% guarantee that a filesync *always* happens after the last log %% request, the repeat operation must be active! -handle_cast({repeated_disk_log_sync,LastLogTS0}, +handle_cast(repeated_disk_log_sync, State = #{id := Name, filesync_repeat_interval := FSyncInt, - last_log_ts := LastLogTS1}) -> + last_op := LastOp}) -> State1 = if is_integer(FSyncInt) -> %% only do filesync if something has been %% written since last time we checked - NewState = if LastLogTS1 == LastLogTS0 -> + NewState = if LastOp == sync -> State; true -> disk_log_sync(Name, State) end, {ok,TRef} = timer:apply_after(FSyncInt, gen_server,cast, - [self(), - {repeated_disk_log_sync,LastLogTS1}]), - NewState#{rep_sync_tref => TRef}; + [self(),repeated_disk_log_sync]), + NewState#{rep_sync_tref => TRef, last_op => sync}; true -> State end, @@ -649,10 +647,9 @@ close_disk_log(Name, _) -> ok. disk_log_write(Name, Bin, State) -> - Result = case ?disk_log_blog(Name, Bin) of ok -> - ok; + State#{prev_log_result => ok, last_op => write}; LogError -> _ = case maps:get(prev_log_result, State) of LogError -> @@ -664,29 +661,26 @@ disk_log_write(Name, Bin, State) -> LogOpts, LogError}) end, - LogError - end, - State#{prev_log_result => Result}. + State#{prev_log_result => LogError} + end. disk_log_sync(Name, State) -> - Result = - case ?disk_log_sync(Name) of - ok -> - ok; - SyncError -> - _ = case maps:get(prev_sync_result, State) of - SyncError -> - %% don't report same error twice - ok; - _ -> - LogOpts = maps:get(log_opts, State), - logger_h_common:error_notify({Name,sync, - LogOpts, - SyncError}) - end, - SyncError - end, - State#{prev_sync_result => Result}. + case ?disk_log_sync(Name) of + ok -> + State#{prev_sync_result => ok, last_op => sync}; + SyncError -> + _ = case maps:get(prev_sync_result, State) of + SyncError -> + %% don't report same error twice + ok; + _ -> + LogOpts = maps:get(log_opts, State), + logger_h_common:error_notify({Name,sync, + LogOpts, + SyncError}) + end, + State#{prev_sync_result => SyncError} + end. error_notify_new(Info,Info, _Term) -> ok; diff --git a/lib/kernel/src/logger_std_h.erl b/lib/kernel/src/logger_std_h.erl index cbc9db372c..813fbad0ed 100644 --- a/lib/kernel/src/logger_std_h.erl +++ b/lib/kernel/src/logger_std_h.erl @@ -257,10 +257,11 @@ init([Name, Config, file_ctrl_sync => FileCtrlSyncInt, last_qlen => 0, last_log_ts => T0, + last_op => sync, burst_win_ts => T0, burst_msg_count => 0}), proc_lib:init_ack({ok,self()}), - gen_server:cast(self(), {repeated_filesync,T0}), + gen_server:cast(self(), repeated_filesync), enter_loop(Config, State1); Error -> logger_h_common:error_notify({init_handler,Name,Error}), @@ -310,12 +311,11 @@ handle_call(filesync, _From, State = #{type := Type, if is_atom(Type) -> {reply, ok, State}; true -> - {reply, file_ctrl_filesync_sync(FileCtrlPid), State} + {reply, file_ctrl_filesync_sync(FileCtrlPid), State#{last_op=>sync}} end; handle_call({change_config,_OldConfig,NewConfig}, _From, - State = #{filesync_repeat_interval := FSyncInt0, - last_log_ts := LastLogTS}) -> + State = #{filesync_repeat_interval := FSyncInt0}) -> HConfig = maps:get(?MODULE, NewConfig, #{}), State1 = maps:merge(State, HConfig), case logger_h_common:overload_levels_ok(State1) of @@ -334,8 +334,7 @@ handle_call({change_config,_OldConfig,NewConfig}, _From, _ = logger_h_common:cancel_timer(maps:get(rep_sync_tref, State, undefined)), - gen_server:cast(self(), {repeated_filesync, - LastLogTS}) + gen_server:cast(self(), repeated_filesync) end, {reply, ok, State1}; false -> @@ -365,24 +364,24 @@ handle_cast({log, Bin}, State) -> %% clause gets called repeatedly by the handler. In order to %% guarantee that a filesync *always* happens after the last log %% request, the repeat operation must be active! -handle_cast({repeated_filesync,LastLogTS0}, +handle_cast(repeated_filesync, State = #{type := Type, file_ctrl_pid := FileCtrlPid, filesync_repeat_interval := FSyncInt, - last_log_ts := LastLogTS1}) -> + last_op := LastOp}) -> State1 = if not is_atom(Type), is_integer(FSyncInt) -> %% only do filesync if something has been %% written since last time we checked - if LastLogTS1 == LastLogTS0 -> + if LastOp == sync -> ok; true -> file_ctrl_filesync_async(FileCtrlPid) end, {ok,TRef} = timer:apply_after(FSyncInt, gen_server,cast, - [self(),{repeated_filesync,LastLogTS1}]), - State#{rep_sync_tref => TRef}; + [self(),repeated_filesync]), + State#{rep_sync_tref => TRef, last_op => sync}; true -> State end, @@ -600,6 +599,7 @@ write(Name, Mode, T1, Bin, _CallOrCast, State1#{mode => Mode1, last_qlen := LastQLen1, last_log_ts => T1, + last_op => write, burst_win_ts => BurstWinT, burst_msg_count => BurstMsgCount1, file_ctrl_sync => diff --git a/lib/kernel/test/logger_disk_log_h_SUITE.erl b/lib/kernel/test/logger_disk_log_h_SUITE.erl index 6a4ec72d5e..bb88c53f26 100644 --- a/lib/kernel/test/logger_disk_log_h_SUITE.erl +++ b/lib/kernel/test/logger_disk_log_h_SUITE.erl @@ -66,19 +66,7 @@ end_per_testcase(Case, Config) -> ok. groups() -> - [ - {retry_op_switch_to_sync, - [{repeat_until_all_ok,10}], - [op_switch_to_sync]}, - - {retry_op_switch_to_drop, - [{repeat_until_all_ok,10}], - [op_switch_to_drop]}, - - {retry_op_switch_to_flush, - [{repeat_until_all_ok,10}], - [op_switch_to_flush]} - ]. + []. all() -> [start_stop_handler, @@ -99,9 +87,9 @@ all() -> disk_log_events, write_failure, sync_failure, - {group,retry_op_switch_to_sync}, - {group,retry_op_switch_to_drop}, - {group,retry_op_switch_to_flush}, + op_switch_to_sync, + op_switch_to_drop, + op_switch_to_flush, limit_burst_disabled, limit_burst_enabled_one, limit_burst_enabled_period, @@ -880,57 +868,107 @@ op_switch_to_sync(Config) -> enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), send_burst({n,NumOfReqs}, seq, {chars,79}, info), - NumOfReqs = count_lines(Log), - ok = file:delete(Log). + Lines = count_lines(Log), + ok = file:delete(Log), + NumOfReqs = Lines, + ok. op_switch_to_sync(cleanup, _Config) -> ok = stop_handler(?MODULE). +op_switch_to_drop() -> + [{timetrap,{seconds,180}}]. op_switch_to_drop(Config) -> - {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - NumOfReqs = 300, - Procs = 2, - NewHConfig = - HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 1, - drop_new_reqs_qlen => 2, - flush_reqs_qlen => Procs*NumOfReqs+1, - enable_burst_limit => false}}, - ok = logger:set_handler_config(?MODULE, NewHConfig), - send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), - Logged = count_lines(Log), - ct:pal("Number of messages dropped = ~w (~w)", - [Procs*NumOfReqs-Logged,Procs*NumOfReqs]), - true = (Logged < (Procs*NumOfReqs)), - true = (Logged > 0), - ok = file:delete(Log). + Test = + fun() -> + {Log,HConfig,DLHConfig} = + start_handler(?MODULE, ?FUNCTION_NAME, Config), + NumOfReqs = 300, + Procs = 2, + Bursts = 10, + NewHConfig = + HConfig#{logger_disk_log_h => + DLHConfig#{toggle_sync_qlen => 1, + drop_new_reqs_qlen => 2, + flush_reqs_qlen => Procs*NumOfReqs*Bursts, + enable_burst_limit => false}}, + ok = logger:set_handler_config(?MODULE, NewHConfig), + %% It sometimes happens that the handler either gets + %% the requests in a slow enough pace so that dropping + %% never occurs. Therefore, lets generate a number of + %% bursts to increase the chance of message buildup. + [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) || + _ <- lists:seq(1, Bursts)], + Logged = count_lines(Log), + ok= stop_handler(?MODULE), + _ = file:delete(Log), + ct:pal("Number of messages dropped = ~w (~w)", + [Procs*NumOfReqs*Bursts-Logged,Procs*NumOfReqs*Bursts]), + true = (Logged < (Procs*NumOfReqs*Bursts)), + true = (Logged > 0), + ok + end, + %% As it's tricky to get the timing right in only one go, we perform the + %% test repeatedly, hoping that will generate a successful result. + case repeat_until_ok(Test, 10) of + {ok,{Failures,_Result}} -> + ct:log("Failed ~w times before success!", [Failures]); + {fails,Reason} -> + ct:fail(Reason) + end. op_switch_to_drop(cleanup, _Config) -> - ok = stop_handler(?MODULE). + _ = stop_handler(?MODULE). op_switch_to_flush() -> - [{timetrap,{seconds,60}}]. + [{timetrap,{seconds,180}}]. op_switch_to_flush(Config) -> - {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), + Test = + fun() -> + {Log,HConfig,DLHConfig} = + start_handler(?MODULE, ?FUNCTION_NAME, Config), + + %% NOTE: it's important that both async and sync + %% requests have been queued when the flush happens + %% (verify with coverage of flush_log_requests/2) - %% it's important that both async and sync requests have been queued - %% when the flush happens (verify with coverage of flush_log_requests/2) - - NewHConfig = - HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2, - %% disable drop mode - drop_new_reqs_qlen => 500, - flush_reqs_qlen => 500, - enable_burst_limit => false}}, - ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 1000, - Procs = 200, - send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), - Logged = count_lines(Log), - ct:pal("Number of messages flushed/dropped = ~w (~w)", - [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]), - true = (Logged < (NumOfReqs*Procs)), - true = (Logged > 0), - ok = file:delete(Log). + NewHConfig = + HConfig#{logger_disk_log_h => + DLHConfig#{toggle_sync_qlen => 2, + %% disable drop mode + drop_new_reqs_qlen => 300, + flush_reqs_qlen => 300, + enable_burst_limit => false}}, + ok = logger:set_handler_config(?MODULE, NewHConfig), + NumOfReqs = 1500, + Procs = 10, + Bursts = 10, + %% It sometimes happens that the handler either gets + %% the requests in a slow enough pace so that flushing + %% never occurs, or it gets all messages at once, + %% causing all messages to get flushed (no dropping of + %% sync messages gets tested). Therefore, lets + %% generate a number of bursts to increase the chance + %% of message buildup in some random fashion. + [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) || + _ <- lists:seq(1,Bursts)], + Logged = count_lines(Log), + ok= stop_handler(?MODULE), + _ = file:delete(Log), + ct:pal("Number of messages flushed/dropped = ~w (~w)", + [NumOfReqs*Procs*Bursts-Logged,NumOfReqs*Procs*Bursts]), + true = (Logged < (NumOfReqs*Procs*Bursts)), + true = (Logged > 0), + ok + end, + %% As it's tricky to get the timing right in only one go, we perform the + %% test repeatedly, hoping that will generate a successful result. + case repeat_until_ok(Test, 10) of + {ok,{Failures,_Result}} -> + ct:log("Failed ~w times before success!", [Failures]); + {fails,Reason} -> + ct:fail(Reason) + end. op_switch_to_flush(cleanup, _Config) -> - ok = stop_handler(?MODULE). + _ = stop_handler(?MODULE). limit_burst_disabled(Config) -> @@ -1014,7 +1052,7 @@ qlen_kill_new(Config) -> {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), Pid0 = whereis(?MODULE), {_,Mem0} = process_info(Pid0, memory), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig = HConfig#{logger_disk_log_h => DLHConfig#{enable_kill_overloaded=>true, @@ -1035,7 +1073,7 @@ qlen_kill_new(Config) -> killed -> ct:pal("Slow shutdown, handler process was killed!", []) end, - timer:sleep(RestartAfter + 1000), + timer:sleep(RestartAfter + 2000), true = is_pid(whereis(?MODULE)), ok after @@ -1051,7 +1089,7 @@ mem_kill_new(Config) -> {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), Pid0 = whereis(?MODULE), {_,Mem0} = process_info(Pid0, memory), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig = HConfig#{logger_disk_log_h => DLHConfig#{enable_kill_overloaded=>true, @@ -1072,7 +1110,7 @@ mem_kill_new(Config) -> killed -> ct:pal("Slow shutdown, handler process was killed!", []) end, - timer:sleep(RestartAfter * 2), + timer:sleep(RestartAfter + 2000), true = is_pid(whereis(?MODULE)), ok after @@ -1105,7 +1143,7 @@ restart_after(Config) -> end, {Log,_,_} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig2 = HConfig#{logger_disk_log_h=>DLHConfig#{enable_kill_overloaded=>true, handler_overloaded_qlen=>10, @@ -1117,7 +1155,7 @@ restart_after(Config) -> send_burst({n,100}, {spawn,2,0}, {chars,79}, info), receive {'DOWN', MRef2, _, _, _Info2} -> - timer:sleep(RestartAfter + 1000), + timer:sleep(RestartAfter + 2000), Pid1 = whereis(?MODULE), true = is_pid(Pid1), false = (Pid1 == Pid0), @@ -1388,6 +1426,29 @@ count_lines1(File) -> file:close(Dev), Lines. +repeat_until_ok(Fun, N) -> + repeat_until_ok(Fun, 0, N, undefined). + +repeat_until_ok(_Fun, Stop, Stop, Reason) -> + {fails,Reason}; + +repeat_until_ok(Fun, C, Stop, FirstReason) -> + if C > 0 -> timer:sleep(5000); + true -> ok + end, + try Fun() of + Result -> + {ok,{C,Result}} + catch + _:Reason:Stack -> + ct:pal("Test fails: ~p (~p)~n", [Reason,hd(Stack)]), + if FirstReason == undefined -> + repeat_until_ok(Fun, C+1, Stop, {Reason,Stack}); + true -> + repeat_until_ok(Fun, C+1, Stop, FirstReason) + end + end. + start_tracer(Trace,Expected) -> Pid = self(), dbg:tracer(process,{fun tracer/2,{Pid,Expected}}), @@ -1410,9 +1471,11 @@ tpl([{M,F,A}|Trace]) -> tpl([]) -> ok. -tracer({trace,_,call,{?MODULE,format,[#{msg:={string,Msg}}|_]}}, {Pid,[{formatter,Msg}|Expected]}) -> +tracer({trace,_,call,{?MODULE,format,[#{msg:={string,Msg}}|_]}}, + {Pid,[{formatter,Msg}|Expected]}) -> maybe_tracer_done(Pid,Expected,{formatter,Msg}); -tracer({trace,_,call,{logger_disk_log_h,handle_cast,[{Op,_}|_]}}, {Pid,[{Mod,Func,Op}|Expected]}) -> +tracer({trace,_,call,{logger_disk_log_h,handle_cast,[Op|_]}}, + {Pid,[{Mod,Func,Op}|Expected]}) -> maybe_tracer_done(Pid,Expected,{Mod,Func,Op}); tracer({trace,_,call,{Mod,Func,_}}, {Pid,[{Mod,Func}|Expected]}) -> maybe_tracer_done(Pid,Expected,{Mod,Func}); diff --git a/lib/kernel/test/logger_std_h_SUITE.erl b/lib/kernel/test/logger_std_h_SUITE.erl index ed5504431e..3ebbbe74ef 100644 --- a/lib/kernel/test/logger_std_h_SUITE.erl +++ b/lib/kernel/test/logger_std_h_SUITE.erl @@ -89,19 +89,7 @@ end_per_testcase(Case, Config) -> ok. groups() -> - [ - {retry_op_switch_to_sync_file, - [{repeat_until_all_ok,10}], - [op_switch_to_sync_file]}, - - {retry_op_switch_to_drop_file, - [{repeat_until_all_ok,10}], - [op_switch_to_drop_file]}, - - {retry_op_switch_to_flush_file, - [{repeat_until_all_ok,10}], - [op_switch_to_flush_file]} - ]. + []. all() -> [add_remove_instance_tty, @@ -122,11 +110,11 @@ all() -> filesync, write_failure, sync_failure, - {group,retry_op_switch_to_sync_file}, + op_switch_to_sync_file, op_switch_to_sync_tty, - {group,retry_op_switch_to_drop_file}, + op_switch_to_drop_file, op_switch_to_drop_tty, - {group,retry_op_switch_to_flush_file}, + op_switch_to_flush_file, op_switch_to_flush_tty, limit_burst_disabled, limit_burst_enabled_one, @@ -588,7 +576,7 @@ filesync(Config) -> #{filesync_repeat_interval => no_repeat}), dbg:stop_clear(), - Received2 = lists:map(fun({trace,_M,handle_cast,[{Op,_},_]}) -> {trace,Op}; + Received2 = lists:map(fun({trace,_M,handle_cast,[Op,_]}) -> {trace,Op}; (Other) -> Other end, test_server:messages_get()), ct:pal("Trace #2 =~n~p", [Received2]), @@ -726,7 +714,8 @@ op_switch_to_sync_file(Config) -> ok = logger:set_handler_config(?MODULE, NewHConfig), %% TRecvPid = start_op_trace(), send_burst({n,NumOfReqs}, seq, {chars,79}, info), - NumOfReqs = count_lines(Log), + Lines = count_lines(Log), + ok = file:delete(Log), %% true = analyse_trace(TRecvPid, %% fun(Events) -> find_mode(async,Events) end), %% true = analyse_trace(TRecvPid, @@ -737,8 +726,8 @@ op_switch_to_sync_file(Config) -> %% fun(Events) -> find_mode(drop,Events) end), %% false = analyse_trace(TRecvPid, %% fun(Events) -> find_mode(flush,Events) end), - ok = file:delete(Log), %% stop_op_trace(TRecvPid), + NumOfReqs = Lines, ok. op_switch_to_sync_file(cleanup, _Config) -> ok = stop_handler(?MODULE). @@ -757,38 +746,49 @@ op_switch_to_sync_tty(Config) -> op_switch_to_sync_tty(cleanup, _Config) -> ok = stop_handler(?MODULE). +op_switch_to_drop_file() -> + [{timetrap,{seconds,180}}]. op_switch_to_drop_file(Config) -> - {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - NumOfReqs = 300, - Procs = 2, - NewHConfig = - HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 1, + Test = + fun() -> + {Log,HConfig,StdHConfig} = + start_handler(?MODULE, ?FUNCTION_NAME, Config), + NumOfReqs = 300, + Procs = 2, + Bursts = 10, + NewHConfig = + HConfig#{logger_std_h => + StdHConfig#{toggle_sync_qlen => 1, drop_new_reqs_qlen => 2, - flush_reqs_qlen => Procs*NumOfReqs+1, + flush_reqs_qlen => + Procs*NumOfReqs*Bursts, enable_burst_limit => false}}, - ok = logger:set_handler_config(?MODULE, NewHConfig), - %% TRecvPid = start_op_trace(), - send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), - Logged = count_lines(Log), - ct:pal("Number of messages dropped = ~w (~w)", - [Procs*NumOfReqs-Logged,Procs*NumOfReqs]), - true = (Logged < (Procs*NumOfReqs)), - true = (Logged > 0), - %% true = analyse_trace(TRecvPid, - %% fun(Events) -> find_mode(async,Events) end), - %% true = analyse_trace(TRecvPid, - %% fun(Events) -> find_mode(drop,Events) end), - %% false = analyse_trace(TRecvPid, - %% fun(Events) -> find_mode(flush,Events) end), - %% true = analyse_trace(TRecvPid, - %% fun(Events) -> find_switch(async,drop,Events) - %% orelse find_switch(sync,drop,Events) - %% end), - ok = file:delete(Log), - %% stop_op_trace(TRecvPid), - ok. + ok = logger:set_handler_config(?MODULE, NewHConfig), + %% It sometimes happens that the handler gets the + %% requests in a slow enough pace so that dropping + %% never occurs. Therefore, lets generate a number of + %% bursts to increase the chance of message buildup. + [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) || + _ <- lists:seq(1, Bursts)], + Logged = count_lines(Log), + ok = stop_handler(?MODULE), + _ = file:delete(Log), + ct:pal("Number of messages dropped = ~w (~w)", + [Procs*NumOfReqs*Bursts-Logged,Procs*NumOfReqs*Bursts]), + true = (Logged < (Procs*NumOfReqs*Bursts)), + true = (Logged > 0), + ok + end, + %% As it's tricky to get the timing right in only one go, we perform the + %% test repeatedly, hoping that will generate a successful result. + case repeat_until_ok(Test, 10) of + {ok,{Failures,_Result}} -> + ct:log("Failed ~w times before success!", [Failures]); + {fails,Reason} -> + ct:fail(Reason) + end. op_switch_to_drop_file(cleanup, _Config) -> - ok = stop_handler(?MODULE). + _ = stop_handler(?MODULE). op_switch_to_drop_tty(Config) -> {HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config), @@ -797,7 +797,8 @@ op_switch_to_drop_tty(Config) -> NewHConfig = HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 1, drop_new_reqs_qlen => 2, - flush_reqs_qlen => Procs*NumOfReqs+1, + flush_reqs_qlen => + Procs*NumOfReqs+1, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), @@ -806,32 +807,56 @@ op_switch_to_drop_tty(cleanup, _Config) -> ok = stop_handler(?MODULE). op_switch_to_flush_file() -> - [{timetrap,{seconds,60}}]. + [{timetrap,{seconds,180}}]. op_switch_to_flush_file(Config) -> - {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - - %% it's important that both async and sync requests have been queued - %% when the flush happens (verify with coverage of flush_log_requests/2) - - NewHConfig = - HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2, + Test = + fun() -> + {Log,HConfig,StdHConfig} = + start_handler(?MODULE, ?FUNCTION_NAME, Config), + + %% NOTE: it's important that both async and sync + %% requests have been queued when the flush happens + %% (verify with coverage of flush_log_requests/2) + + NewHConfig = + HConfig#{logger_std_h => + StdHConfig#{toggle_sync_qlen => 2, %% disable drop mode - drop_new_reqs_qlen => 500, - flush_reqs_qlen => 500, + drop_new_reqs_qlen => 300, + flush_reqs_qlen => 300, enable_burst_limit => false}}, - ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 1000, - Procs = 200, - send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), - Logged = count_lines(Log), - ct:pal("Number of messages flushed/dropped = ~w (~w)", - [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]), - true = (Logged < (NumOfReqs*Procs)), - true = (Logged > 0), - ok = file:delete(Log), - ok. + ok = logger:set_handler_config(?MODULE, NewHConfig), + NumOfReqs = 1500, + Procs = 10, + Bursts = 10, + %% It sometimes happens that the handler either gets + %% the requests in a slow enough pace so that flushing + %% never occurs, or it gets all messages at once, + %% causing all messages to get flushed (no dropping of + %% sync messages gets tested). Therefore, lets + %% generate a number of bursts to increase the chance + %% of message buildup in some random fashion. + [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) || + _ <- lists:seq(1,Bursts)], + Logged = count_lines(Log), + ok = stop_handler(?MODULE), + _ = file:delete(Log), + ct:pal("Number of messages flushed/dropped = ~w (~w)", + [NumOfReqs*Procs*Bursts-Logged,NumOfReqs*Procs*Bursts]), + true = (Logged < (NumOfReqs*Procs*Bursts)), + true = (Logged > 0), + ok + end, + %% As it's tricky to get the timing right in only one go, we perform the + %% test repeatedly, hoping that will generate a successful result. + case repeat_until_ok(Test, 10) of + {ok,{Failures,_Result}} -> + ct:log("Failed ~w times before success!", [Failures]); + {fails,Reason} -> + ct:fail(Reason) + end. op_switch_to_flush_file(cleanup, _Config) -> - ok = stop_handler(?MODULE). + _ = stop_handler(?MODULE). op_switch_to_flush_tty(Config) -> {HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config), @@ -931,10 +956,10 @@ kill_disabled(cleanup, _Config) -> ok = stop_handler(?MODULE). qlen_kill_new(Config) -> - {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), + {_Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), Pid0 = whereis(?MODULE), {_,Mem0} = process_info(Pid0, memory), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig = HConfig#{logger_std_h=>StdHConfig#{enable_kill_overloaded=>true, handler_overloaded_qlen=>10, @@ -954,7 +979,7 @@ qlen_kill_new(Config) -> killed -> ct:pal("Slow shutdown, handler process was killed!", []) end, - timer:sleep(RestartAfter + 1000), + timer:sleep(RestartAfter + 2000), true = is_pid(whereis(?MODULE)), ok after @@ -968,7 +993,7 @@ qlen_kill_new(cleanup, _Config) -> %% choke the standard handler on remote node to verify the termination %% works as expected -qlen_kill_std(Config) -> +qlen_kill_std(_Config) -> %%! HERE %% Dir = ?config(priv_dir, Config), %% File = lists:concat([?MODULE,"_",?FUNCTION_NAME,".log"]), @@ -982,10 +1007,10 @@ qlen_kill_std(Config) -> {skip,"Not done yet"}. mem_kill_new(Config) -> - {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), + {_Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), Pid0 = whereis(?MODULE), {_,Mem0} = process_info(Pid0, memory), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig = HConfig#{logger_std_h=>StdHConfig#{enable_kill_overloaded=>true, handler_overloaded_qlen=>50000, @@ -1005,7 +1030,7 @@ mem_kill_new(Config) -> killed -> ct:pal("Slow shutdown, handler process was killed!", []) end, - timer:sleep(RestartAfter * 2), + timer:sleep(RestartAfter + 2000), true = is_pid(whereis(?MODULE)), ok after @@ -1019,7 +1044,7 @@ mem_kill_new(cleanup, _Config) -> %% choke the standard handler on remote node to verify the termination %% works as expected -mem_kill_std(Config) -> +mem_kill_std(_Config) -> {skip,"Not done yet"}. restart_after(Config) -> @@ -1043,7 +1068,7 @@ restart_after(Config) -> end, {Log,_,_} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - RestartAfter = 2000, + RestartAfter = ?HANDLER_RESTART_AFTER, NewHConfig2 = HConfig#{logger_std_h=>StdHConfig#{enable_kill_overloaded=>true, handler_overloaded_qlen=>10, @@ -1055,7 +1080,7 @@ restart_after(Config) -> send_burst({n,100}, {spawn,2,0}, {chars,79}, info), receive {'DOWN', MRef2, _, _, _Info2} -> - timer:sleep(RestartAfter + 1000), + timer:sleep(RestartAfter + 2000), Pid1 = whereis(?MODULE), true = is_pid(Pid1), false = (Pid1 == Pid0), @@ -1101,7 +1126,7 @@ handler_requests_under_load(Config) -> NoOfReqs = lists:foldl(fun({_,Res}, N) -> N + length(Res) end, 0, ReqResult), ct:pal("~w requests made. Errors: ~n~p", [NoOfReqs,Errors]), ok = file:delete(Log). -handler_requests_under_load(cleanup, Config) -> +handler_requests_under_load(cleanup, _Config) -> ok = stop_handler(?MODULE). send_requests(HName, TO, Reqs = [{Req,Res}|Rs]) -> @@ -1326,6 +1351,30 @@ try_match_file(_,Pattern,_,Incorrect) -> [Pattern,Incorrect]), erlang:error({error,not_matching_pattern,Pattern,Incorrect}). +repeat_until_ok(Fun, N) -> + repeat_until_ok(Fun, 0, N, undefined). + +repeat_until_ok(_Fun, Stop, Stop, Reason) -> + {fails,Reason}; + +repeat_until_ok(Fun, C, Stop, FirstReason) -> + if C > 0 -> timer:sleep(5000); + true -> ok + end, + try Fun() of + Result -> + {ok,{C,Result}} + catch + _:Reason:Stack -> + ct:pal("Test fails: ~p (~p)~n", [Reason,hd(Stack)]), + if FirstReason == undefined -> + repeat_until_ok(Fun, C+1, Stop, {Reason,Stack}); + true -> + repeat_until_ok(Fun, C+1, Stop, FirstReason) + end + end. + + %%%----------------------------------------------------------------- %%% start_op_trace() -> @@ -1366,17 +1415,17 @@ find_mode(flush, Events) -> find_mode(Mode, Events) -> lists:keymember([{mode,Mode}], 3, Events). -find_switch(From, To, Events) -> - try lists:foldl(fun({trace_return,check_load,{To,_,_,_}}, - {trace_call,check_load,[#{mode := From}]}) -> - throw(match); - (Event, _) -> - Event - end, undefined, Events) of - _ -> false - catch - throw:match -> true - end. +%% find_switch(_From, To, Events) -> +%% try lists:foldl(fun({trace_return,check_load,{To,_,_,_}}, +%% {trace_call,check_load,[#{mode := From}]}) -> +%% throw(match); +%% (Event, _) -> +%% Event +%% end, undefined, Events) of +%% _ -> false +%% catch +%% throw:match -> true +%% end. analyse_trace(TRecvPid, TestFun) -> TRecvPid ! {test,self(),TestFun}, -- cgit v1.2.3