aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/kernel/doc/src/logger_chapter.xml16
-rw-r--r--lib/kernel/src/logger_disk_log_h.erl64
-rw-r--r--lib/kernel/src/logger_h_common.erl16
-rw-r--r--lib/kernel/src/logger_h_common.hrl2
-rw-r--r--lib/kernel/src/logger_std_h.erl22
-rw-r--r--lib/kernel/test/logger_disk_log_h_SUITE.erl185
-rw-r--r--lib/kernel/test/logger_std_h_SUITE.erl262
7 files changed, 365 insertions, 202 deletions
diff --git a/lib/kernel/doc/src/logger_chapter.xml b/lib/kernel/doc/src/logger_chapter.xml
index 519df2ba48..3150c5adb4 100644
--- a/lib/kernel/doc/src/logger_chapter.xml
+++ b/lib/kernel/doc/src/logger_chapter.xml
@@ -661,10 +661,20 @@ do_log(Fd,Log,#{formatter:={FModule,FConfig}}) ->
</item>
</taglist>
- <p>For the overload protection algorithm to work properly, it is a
- requirement that:</p>
+ <p>For the overload protection algorithm to work properly, it is
+ required that:</p>
- <p><c>toggle_sync_qlen &lt; drop_new_reqs_qlen &lt; flush_reqs_qlen</c></p>
+ <p><c>toggle_sync_qlen =&lt; drop_new_reqs_qlen =&lt; flush_reqs_qlen</c></p>
+
+ <p>and that:</p>
+
+ <p><c>drop_new_reqs_qlen &gt; 1</c></p>
+
+ <p>If <c>toggle_sync_qlen</c> is set to <c>0</c>, the handler will handle all
+ requests synchronously. Setting the value of <c>toggle_sync_qlen</c> to the same
+ as <c>drop_new_reqs_qlen</c>, disables the synchronous mode. Likewise, setting
+ the value of <c>drop_new_reqs_qlen</c> to the same as <c>flush_reqs_qlen</c>,
+ disables the drop mode.</p>
<p>During high load scenarios, the length of the handler message queue
rarely grows in a linear and predictable way. Instead, whenever the
diff --git a/lib/kernel/src/logger_disk_log_h.erl b/lib/kernel/src/logger_disk_log_h.erl
index 0150fa781a..57c54ce27e 100644
--- a/lib/kernel/src/logger_disk_log_h.erl
+++ b/lib/kernel/src/logger_disk_log_h.erl
@@ -278,10 +278,11 @@ init([Name, Config = #{disk_log_opts := LogOpts},
last_log_ts => T0,
burst_win_ts => T0,
burst_msg_count => 0,
+ last_op => sync,
prev_log_result => ok,
prev_sync_result => ok,
prev_disk_log_info => undefined}),
- gen_server:cast(self(), {repeated_disk_log_sync,T0}),
+ gen_server:cast(self(), repeated_disk_log_sync),
enter_loop(Config, State1);
Error ->
logger_h_common:error_notify({open_disk_log,Name,Error}),
@@ -316,8 +317,7 @@ handle_call(disk_log_sync, _From, State = #{id := Name}) ->
{reply, Result, State1};
handle_call({change_config,_OldConfig,NewConfig}, _From,
- State = #{filesync_repeat_interval := FSyncInt0,
- last_log_ts := LastLogTS}) ->
+ State = #{filesync_repeat_interval := FSyncInt0}) ->
HConfig = maps:get(?MODULE, NewConfig, #{}),
State1 = #{toggle_sync_qlen := TSQL,
drop_new_reqs_qlen := DNRQL,
@@ -338,9 +338,8 @@ handle_call({change_config,_OldConfig,NewConfig}, _From,
_ = logger_h_common:cancel_timer(maps:get(rep_sync_tref,
State,
undefined)),
- _ = gen_server:cast(self(), {repeated_disk_log_sync,
- LastLogTS})
- end,
+ _ = gen_server:cast(self(), repeated_disk_log_sync)
+ end,
{reply, ok, State1};
false ->
{reply, {error,{invalid_levels,{TSQL,DNRQL,FRQL}}}, State}
@@ -370,24 +369,23 @@ handle_cast({log, Bin}, State) ->
%% clause gets called repeatedly by the handler. In order to
%% guarantee that a filesync *always* happens after the last log
%% request, the repeat operation must be active!
-handle_cast({repeated_disk_log_sync,LastLogTS0},
+handle_cast(repeated_disk_log_sync,
State = #{id := Name,
filesync_repeat_interval := FSyncInt,
- last_log_ts := LastLogTS1}) ->
+ last_op := LastOp}) ->
State1 =
if is_integer(FSyncInt) ->
%% only do filesync if something has been
%% written since last time we checked
- NewState = if LastLogTS1 == LastLogTS0 ->
+ NewState = if LastOp == sync ->
State;
true ->
disk_log_sync(Name, State)
end,
{ok,TRef} =
timer:apply_after(FSyncInt, gen_server,cast,
- [self(),
- {repeated_disk_log_sync,LastLogTS1}]),
- NewState#{rep_sync_tref => TRef};
+ [self(),repeated_disk_log_sync]),
+ NewState#{rep_sync_tref => TRef, last_op => sync};
true ->
State
end,
@@ -649,10 +647,9 @@ close_disk_log(Name, _) ->
ok.
disk_log_write(Name, Bin, State) ->
- Result =
case ?disk_log_blog(Name, Bin) of
ok ->
- ok;
+ State#{prev_log_result => ok, last_op => write};
LogError ->
_ = case maps:get(prev_log_result, State) of
LogError ->
@@ -664,29 +661,26 @@ disk_log_write(Name, Bin, State) ->
LogOpts,
LogError})
end,
- LogError
- end,
- State#{prev_log_result => Result}.
+ State#{prev_log_result => LogError}
+ end.
disk_log_sync(Name, State) ->
- Result =
- case ?disk_log_sync(Name) of
- ok ->
- ok;
- SyncError ->
- _ = case maps:get(prev_sync_result, State) of
- SyncError ->
- %% don't report same error twice
- ok;
- _ ->
- LogOpts = maps:get(log_opts, State),
- logger_h_common:error_notify({Name,sync,
- LogOpts,
- SyncError})
- end,
- SyncError
- end,
- State#{prev_sync_result => Result}.
+ case ?disk_log_sync(Name) of
+ ok ->
+ State#{prev_sync_result => ok, last_op => sync};
+ SyncError ->
+ _ = case maps:get(prev_sync_result, State) of
+ SyncError ->
+ %% don't report same error twice
+ ok;
+ _ ->
+ LogOpts = maps:get(log_opts, State),
+ logger_h_common:error_notify({Name,sync,
+ LogOpts,
+ SyncError})
+ end,
+ State#{prev_sync_result => SyncError}
+ end.
error_notify_new(Info,Info, _Term) ->
ok;
diff --git a/lib/kernel/src/logger_h_common.erl b/lib/kernel/src/logger_h_common.erl
index 7caad366ae..901c4c0dad 100644
--- a/lib/kernel/src/logger_h_common.erl
+++ b/lib/kernel/src/logger_h_common.erl
@@ -135,7 +135,8 @@ call_cast_or_drop(Name, Bin) ->
_:{timeout,_} ->
?observe(Name,{dropped,1})
end;
- drop -> ?observe(Name,{dropped,1})
+ drop ->
+ ?observe(Name,{dropped,1})
catch
%% if the ETS table doesn't exist (maybe because of a
%% handler restart), we can only drop the request
@@ -152,12 +153,15 @@ check_load(State = #{id:=Name, mode := Mode,
flush_reqs_qlen := FlushQLen}) ->
{_,Mem} = process_info(self(), memory),
?observe(Name,{max_mem,Mem}),
- %% make sure the handler process doesn't get scheduled
- %% out between the message_queue_len check below and the
- %% action that follows (flush or write).
{_,QLen} = process_info(self(), message_queue_len),
?observe(Name,{max_qlen,QLen}),
-
+ %% When the handler process gets scheduled in, it's impossible
+ %% to predict the QLen. We could jump "up" arbitrarily from say
+ %% async to sync, async to drop, sync to flush, etc. However, when
+ %% the handler process manages the log requests (without flushing),
+ %% one after the other, we will move "down" from drop to sync and
+ %% from sync to async. This way we don't risk getting stuck in
+ %% drop or sync mode with an empty mailbox.
{Mode1,_NewDrops,_NewFlushes} =
if
QLen >= FlushQLen ->
@@ -292,7 +296,7 @@ overload_levels_ok(HandlerConfig) ->
TSQL = maps:get(toggle_sync_qlen, HandlerConfig, ?TOGGLE_SYNC_QLEN),
DNRQL = maps:get(drop_new_reqs_qlen, HandlerConfig, ?DROP_NEW_REQS_QLEN),
FRQL = maps:get(flush_reqs_qlen, HandlerConfig, ?FLUSH_REQS_QLEN),
- (TSQL < DNRQL) andalso (DNRQL < FRQL).
+ (DNRQL > 1) andalso (TSQL =< DNRQL) andalso (DNRQL =< FRQL).
error_notify(Term) ->
?internal_log(error, Term).
diff --git a/lib/kernel/src/logger_h_common.hrl b/lib/kernel/src/logger_h_common.hrl
index 89378dbb10..ed365ce6eb 100644
--- a/lib/kernel/src/logger_h_common.hrl
+++ b/lib/kernel/src/logger_h_common.hrl
@@ -124,7 +124,7 @@
%%% slow down execution and therefore should not be include in code
%%% to be officially released.
-%% -define(TEST_HOOKS, true).
+-define(TEST_HOOKS, true).
-ifdef(TEST_HOOKS).
-define(TEST_HOOKS_TAB, logger_h_test_hooks).
diff --git a/lib/kernel/src/logger_std_h.erl b/lib/kernel/src/logger_std_h.erl
index 31edcfea8b..e5e0febc88 100644
--- a/lib/kernel/src/logger_std_h.erl
+++ b/lib/kernel/src/logger_std_h.erl
@@ -257,10 +257,11 @@ init([Name, Config,
file_ctrl_sync => FileCtrlSyncInt,
last_qlen => 0,
last_log_ts => T0,
+ last_op => sync,
burst_win_ts => T0,
burst_msg_count => 0}),
proc_lib:init_ack({ok,self()}),
- gen_server:cast(self(), {repeated_filesync,T0}),
+ gen_server:cast(self(), repeated_filesync),
enter_loop(Config, State1);
Error ->
logger_h_common:error_notify({init_handler,Name,Error}),
@@ -310,12 +311,11 @@ handle_call(filesync, _From, State = #{type := Type,
if is_atom(Type) ->
{reply, ok, State};
true ->
- {reply, file_ctrl_filesync_sync(FileCtrlPid), State}
+ {reply, file_ctrl_filesync_sync(FileCtrlPid), State#{last_op=>sync}}
end;
handle_call({change_config,_OldConfig,NewConfig}, _From,
- State = #{filesync_repeat_interval := FSyncInt0,
- last_log_ts := LastLogTS}) ->
+ State = #{filesync_repeat_interval := FSyncInt0}) ->
HConfig = maps:get(?MODULE, NewConfig, #{}),
State1 = maps:merge(State, HConfig),
case logger_h_common:overload_levels_ok(State1) of
@@ -334,8 +334,7 @@ handle_call({change_config,_OldConfig,NewConfig}, _From,
_ = logger_h_common:cancel_timer(maps:get(rep_sync_tref,
State,
undefined)),
- gen_server:cast(self(), {repeated_filesync,
- LastLogTS})
+ gen_server:cast(self(), repeated_filesync)
end,
{reply, ok, State1};
false ->
@@ -365,24 +364,24 @@ handle_cast({log, Bin}, State) ->
%% clause gets called repeatedly by the handler. In order to
%% guarantee that a filesync *always* happens after the last log
%% request, the repeat operation must be active!
-handle_cast({repeated_filesync,LastLogTS0},
+handle_cast(repeated_filesync,
State = #{type := Type,
file_ctrl_pid := FileCtrlPid,
filesync_repeat_interval := FSyncInt,
- last_log_ts := LastLogTS1}) ->
+ last_op := LastOp}) ->
State1 =
if not is_atom(Type), is_integer(FSyncInt) ->
%% only do filesync if something has been
%% written since last time we checked
- if LastLogTS1 == LastLogTS0 ->
+ if LastOp == sync ->
ok;
true ->
file_ctrl_filesync_async(FileCtrlPid)
end,
{ok,TRef} =
timer:apply_after(FSyncInt, gen_server,cast,
- [self(),{repeated_filesync,LastLogTS1}]),
- State#{rep_sync_tref => TRef};
+ [self(),repeated_filesync]),
+ State#{rep_sync_tref => TRef, last_op => sync};
true ->
State
end,
@@ -600,6 +599,7 @@ write(Name, Mode, T1, Bin, _CallOrCast,
State1#{mode => Mode1,
last_qlen := LastQLen1,
last_log_ts => T1,
+ last_op => write,
burst_win_ts => BurstWinT,
burst_msg_count => BurstMsgCount1,
file_ctrl_sync =>
diff --git a/lib/kernel/test/logger_disk_log_h_SUITE.erl b/lib/kernel/test/logger_disk_log_h_SUITE.erl
index 63e5b56021..7c33c9130c 100644
--- a/lib/kernel/test/logger_disk_log_h_SUITE.erl
+++ b/lib/kernel/test/logger_disk_log_h_SUITE.erl
@@ -369,10 +369,18 @@ config_fail(_Config) ->
#{logger_disk_log_h => #{bad => bad},
filter_default=>log,
formatter=>{?MODULE,self()}}),
- {error,{handler_not_added,{invalid_levels,{42,42,_}}}} =
+
+ {error,{handler_not_added,{invalid_levels,{_,1,_}}}} =
+ logger:add_handler(?MODULE,logger_disk_log_h,
+ #{logger_disk_log_h => #{drop_new_reqs_qlen=>1}}),
+ {error,{handler_not_added,{invalid_levels,{43,42,_}}}} =
logger:add_handler(?MODULE,logger_disk_log_h,
- #{logger_disk_log_h => #{toggle_sync_qlen=>42,
+ #{logger_disk_log_h => #{toggle_sync_qlen=>43,
drop_new_reqs_qlen=>42}}),
+ {error,{handler_not_added,{invalid_levels,{_,43,42}}}} =
+ logger:add_handler(?MODULE,logger_disk_log_h,
+ #{logger_disk_log_h => #{drop_new_reqs_qlen=>43,
+ flush_reqs_qlen=>42}}),
ok = logger:add_handler(?MODULE,logger_disk_log_h,
#{filter_default=>log,
@@ -848,62 +856,115 @@ internal_log(Type, Term) ->
op_switch_to_sync(Config) ->
{Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
+ NumOfReqs = 500,
NewHConfig =
- HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 3,
- drop_new_reqs_qlen => 501,
- flush_reqs_qlen => 2000,
+ HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2,
+ drop_new_reqs_qlen => NumOfReqs+1,
+ flush_reqs_qlen => 2*NumOfReqs,
enable_burst_limit => false}},
ok = logger:set_handler_config(?MODULE, NewHConfig),
- NumOfReqs = 500,
send_burst({n,NumOfReqs}, seq, {chars,79}, info),
- NumOfReqs = count_lines(Log),
- ok = file:delete(Log).
+ Lines = count_lines(Log),
+ ok = file:delete(Log),
+ NumOfReqs = Lines,
+ ok.
op_switch_to_sync(cleanup, _Config) ->
ok = stop_handler(?MODULE).
+op_switch_to_drop() ->
+ [{timetrap,{seconds,180}}].
op_switch_to_drop(Config) ->
- {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
-
- NewHConfig =
- HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2,
- drop_new_reqs_qlen => 3,
- flush_reqs_qlen => 600,
- enable_burst_limit => false}},
- ok = logger:set_handler_config(?MODULE, NewHConfig),
- NumOfReqs = 500,
- send_burst({n,NumOfReqs}, seq, {chars,79}, info),
- Logged = count_lines(Log),
- ct:pal("Number of messages dropped = ~w (~w)",
- [NumOfReqs-Logged,NumOfReqs]),
- true = (Logged < NumOfReqs),
- ok = file:delete(Log).
+ Test =
+ fun() ->
+ {Log,HConfig,DLHConfig} =
+ start_handler(?MODULE, ?FUNCTION_NAME, Config),
+ NumOfReqs = 300,
+ Procs = 2,
+ Bursts = 10,
+ NewHConfig =
+ HConfig#{logger_disk_log_h =>
+ DLHConfig#{toggle_sync_qlen => 1,
+ drop_new_reqs_qlen => 2,
+ flush_reqs_qlen => Procs*NumOfReqs*Bursts,
+ enable_burst_limit => false}},
+ ok = logger:set_handler_config(?MODULE, NewHConfig),
+ %% It sometimes happens that the handler either gets
+ %% the requests in a slow enough pace so that dropping
+ %% never occurs. Therefore, lets generate a number of
+ %% bursts to increase the chance of message buildup.
+ [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) ||
+ _ <- lists:seq(1, Bursts)],
+ Logged = count_lines(Log),
+ ok= stop_handler(?MODULE),
+ _ = file:delete(Log),
+ ct:pal("Number of messages dropped = ~w (~w)",
+ [Procs*NumOfReqs*Bursts-Logged,Procs*NumOfReqs*Bursts]),
+ true = (Logged < (Procs*NumOfReqs*Bursts)),
+ true = (Logged > 0),
+ ok
+ end,
+ %% As it's tricky to get the timing right in only one go, we perform the
+ %% test repeatedly, hoping that will generate a successful result.
+ case repeat_until_ok(Test, 10) of
+ {ok,{Failures,_Result}} ->
+ ct:log("Failed ~w times before success!", [Failures]);
+ {fails,Reason} ->
+ ct:fail(Reason)
+ end.
op_switch_to_drop(cleanup, _Config) ->
- ok = stop_handler(?MODULE).
+ _ = stop_handler(?MODULE).
op_switch_to_flush() ->
[{timetrap,{minutes,3}}].
op_switch_to_flush(Config) ->
- {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
-
- %% it's important that both async and sync requests have been queued
- %% when the flush happens (verify with coverage of flush_log_requests/2)
+ Test =
+ fun() ->
+ {Log,HConfig,DLHConfig} =
+ start_handler(?MODULE, ?FUNCTION_NAME, Config),
+
+ %% NOTE: it's important that both async and sync
+ %% requests have been queued when the flush happens
+ %% (verify with coverage of flush_log_requests/2)
- NewHConfig =
- HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2,
- drop_new_reqs_qlen => 99,
- flush_reqs_qlen => 100,
- enable_burst_limit => false}},
- ok = logger:set_handler_config(?MODULE, NewHConfig),
- NumOfReqs = 1000,
- Procs = 500,
- send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info),
- Logged = count_lines(Log),
- ct:pal("Number of messages flushed/dropped = ~w (~w)",
- [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]),
- true = (Logged < (NumOfReqs*Procs)),
- ok = file:delete(Log).
+ NewHConfig =
+ HConfig#{logger_disk_log_h =>
+ DLHConfig#{toggle_sync_qlen => 2,
+ %% disable drop mode
+ drop_new_reqs_qlen => 300,
+ flush_reqs_qlen => 300,
+ enable_burst_limit => false}},
+ ok = logger:set_handler_config(?MODULE, NewHConfig),
+ NumOfReqs = 1500,
+ Procs = 10,
+ Bursts = 10,
+ %% It sometimes happens that the handler either gets
+ %% the requests in a slow enough pace so that flushing
+ %% never occurs, or it gets all messages at once,
+ %% causing all messages to get flushed (no dropping of
+ %% sync messages gets tested). Therefore, lets
+ %% generate a number of bursts to increase the chance
+ %% of message buildup in some random fashion.
+ [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) ||
+ _ <- lists:seq(1,Bursts)],
+ Logged = count_lines(Log),
+ ok= stop_handler(?MODULE),
+ _ = file:delete(Log),
+ ct:pal("Number of messages flushed/dropped = ~w (~w)",
+ [NumOfReqs*Procs*Bursts-Logged,NumOfReqs*Procs*Bursts]),
+ true = (Logged < (NumOfReqs*Procs*Bursts)),
+ true = (Logged > 0),
+ ok
+ end,
+ %% As it's tricky to get the timing right in only one go, we perform the
+ %% test repeatedly, hoping that will generate a successful result.
+ case repeat_until_ok(Test, 10) of
+ {ok,{Failures,_Result}} ->
+ ct:log("Failed ~w times before success!", [Failures]);
+ {fails,Reason} ->
+ ct:fail(Reason)
+ end.
op_switch_to_flush(cleanup, _Config) ->
- ok = stop_handler(?MODULE).
+ _ = stop_handler(?MODULE).
limit_burst_disabled(Config) ->
@@ -987,7 +1048,7 @@ qlen_kill_new(Config) ->
{_Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
Pid0 = whereis(?MODULE),
{_,Mem0} = process_info(Pid0, memory),
- RestartAfter = 2000,
+ RestartAfter = ?HANDLER_RESTART_AFTER,
NewHConfig =
HConfig#{logger_disk_log_h =>
DLHConfig#{enable_kill_overloaded=>true,
@@ -1008,7 +1069,7 @@ qlen_kill_new(Config) ->
killed ->
ct:pal("Slow shutdown, handler process was killed!", [])
end,
- timer:sleep(RestartAfter + 1000),
+ timer:sleep(RestartAfter + 2000),
true = is_pid(whereis(?MODULE)),
ok
after
@@ -1024,7 +1085,7 @@ mem_kill_new(Config) ->
{_Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
Pid0 = whereis(?MODULE),
{_,Mem0} = process_info(Pid0, memory),
- RestartAfter = 2000,
+ RestartAfter = ?HANDLER_RESTART_AFTER,
NewHConfig =
HConfig#{logger_disk_log_h =>
DLHConfig#{enable_kill_overloaded=>true,
@@ -1045,7 +1106,7 @@ mem_kill_new(Config) ->
killed ->
ct:pal("Slow shutdown, handler process was killed!", [])
end,
- timer:sleep(RestartAfter * 2),
+ timer:sleep(RestartAfter + 2000),
true = is_pid(whereis(?MODULE)),
ok
after
@@ -1078,7 +1139,7 @@ restart_after(Config) ->
end,
{Log,_,_} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
- RestartAfter = 2000,
+ RestartAfter = ?HANDLER_RESTART_AFTER,
NewHConfig2 =
HConfig#{logger_disk_log_h=>DLHConfig#{enable_kill_overloaded=>true,
handler_overloaded_qlen=>10,
@@ -1090,7 +1151,7 @@ restart_after(Config) ->
send_burst({n,100}, {spawn,2,0}, {chars,79}, info),
receive
{'DOWN', MRef2, _, _, _Info2} ->
- timer:sleep(RestartAfter + 1000),
+ timer:sleep(RestartAfter + 2000),
Pid1 = whereis(?MODULE),
true = is_pid(Pid1),
false = (Pid1 == Pid0),
@@ -1361,6 +1422,29 @@ count_lines1(File) ->
file:close(Dev),
Lines.
+repeat_until_ok(Fun, N) ->
+ repeat_until_ok(Fun, 0, N, undefined).
+
+repeat_until_ok(_Fun, Stop, Stop, Reason) ->
+ {fails,Reason};
+
+repeat_until_ok(Fun, C, Stop, FirstReason) ->
+ if C > 0 -> timer:sleep(5000);
+ true -> ok
+ end,
+ try Fun() of
+ Result ->
+ {ok,{C,Result}}
+ catch
+ _:Reason:Stack ->
+ ct:pal("Test fails: ~p (~p)~n", [Reason,hd(Stack)]),
+ if FirstReason == undefined ->
+ repeat_until_ok(Fun, C+1, Stop, {Reason,Stack});
+ true ->
+ repeat_until_ok(Fun, C+1, Stop, FirstReason)
+ end
+ end.
+
start_tracer(Trace,Expected) ->
Pid = self(),
dbg:tracer(process,{fun tracer/2,{Pid,Expected}}),
@@ -1382,7 +1466,8 @@ tpl([{M,F,A}|Trace]) ->
tpl([]) ->
ok.
-tracer({trace,_,call,{logger_disk_log_h,handle_cast,[{Op,_}|_]}}, {Pid,[{Mod,Func,Op}|Expected]}) ->
+tracer({trace,_,call,{logger_disk_log_h,handle_cast,[Op|_]}},
+ {Pid,[{Mod,Func,Op}|Expected]}) ->
maybe_tracer_done(Pid,Expected,{Mod,Func,Op});
tracer({trace,_,call,{Mod=disk_log,Func=blog,[_,Data]}}, {Pid,[{Mod,Func,Data}|Expected]}) ->
maybe_tracer_done(Pid,Expected,{Mod,Func,Data});
diff --git a/lib/kernel/test/logger_std_h_SUITE.erl b/lib/kernel/test/logger_std_h_SUITE.erl
index 7c8d63cbbd..34c3167960 100644
--- a/lib/kernel/test/logger_std_h_SUITE.erl
+++ b/lib/kernel/test/logger_std_h_SUITE.erl
@@ -289,10 +289,17 @@ config_fail(_Config) ->
#{logger_std_h => #{restart_type => bad},
filter_default=>log,
formatter=>{?MODULE,self()}}),
- {error,{handler_not_added,{invalid_levels,{42,42,_}}}} =
+ {error,{handler_not_added,{invalid_levels,{_,1,_}}}} =
logger:add_handler(?MODULE,logger_std_h,
- #{logger_std_h => #{toggle_sync_qlen=>42,
+ #{logger_std_h => #{drop_new_reqs_qlen=>1}}),
+ {error,{handler_not_added,{invalid_levels,{43,42,_}}}} =
+ logger:add_handler(?MODULE,logger_std_h,
+ #{logger_std_h => #{toggle_sync_qlen=>43,
drop_new_reqs_qlen=>42}}),
+ {error,{handler_not_added,{invalid_levels,{_,43,42}}}} =
+ logger:add_handler(?MODULE,logger_std_h,
+ #{logger_std_h => #{drop_new_reqs_qlen=>43,
+ flush_reqs_qlen=>42}}),
ok = logger:add_handler(?MODULE,logger_std_h,
#{filter_default=>log,
@@ -691,16 +698,17 @@ internal_log(Type, Term) ->
op_switch_to_sync_file(Config) ->
{Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
+ NumOfReqs = 500,
NewHConfig =
- HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 3,
- drop_new_reqs_qlen => 501,
- flush_reqs_qlen => 2000,
+ HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2,
+ drop_new_reqs_qlen => NumOfReqs+1,
+ flush_reqs_qlen => 2*NumOfReqs,
enable_burst_limit => false}},
ok = logger:set_handler_config(?MODULE, NewHConfig),
%% TRecvPid = start_op_trace(),
- NumOfReqs = 500,
send_burst({n,NumOfReqs}, seq, {chars,79}, info),
- NumOfReqs = count_lines(Log),
+ Lines = count_lines(Log),
+ ok = file:delete(Log),
%% true = analyse_trace(TRecvPid,
%% fun(Events) -> find_mode(async,Events) end),
%% true = analyse_trace(TRecvPid,
@@ -711,68 +719,82 @@ op_switch_to_sync_file(Config) ->
%% fun(Events) -> find_mode(drop,Events) end),
%% false = analyse_trace(TRecvPid,
%% fun(Events) -> find_mode(flush,Events) end),
- ok = file:delete(Log),
%% stop_op_trace(TRecvPid),
+ NumOfReqs = Lines,
ok.
op_switch_to_sync_file(cleanup, _Config) ->
ok = stop_handler(?MODULE).
op_switch_to_sync_tty(Config) ->
{HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config),
+ NumOfReqs = 500,
NewHConfig =
HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 3,
- drop_new_reqs_qlen => 501,
- flush_reqs_qlen => 2000,
+ drop_new_reqs_qlen => NumOfReqs+1,
+ flush_reqs_qlen => 2*NumOfReqs,
enable_burst_limit => false}},
ok = logger:set_handler_config(?MODULE, NewHConfig),
- NumOfReqs = 500,
send_burst({n,NumOfReqs}, seq, {chars,79}, info),
ok.
op_switch_to_sync_tty(cleanup, _Config) ->
ok = stop_handler(?MODULE).
+op_switch_to_drop_file() ->
+ [{timetrap,{seconds,180}}].
op_switch_to_drop_file(Config) ->
- {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
-
- NewHConfig =
- HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2,
- drop_new_reqs_qlen => 3,
- flush_reqs_qlen => 600,
+ Test =
+ fun() ->
+ {Log,HConfig,StdHConfig} =
+ start_handler(?MODULE, ?FUNCTION_NAME, Config),
+ NumOfReqs = 300,
+ Procs = 2,
+ Bursts = 10,
+ NewHConfig =
+ HConfig#{logger_std_h =>
+ StdHConfig#{toggle_sync_qlen => 1,
+ drop_new_reqs_qlen => 2,
+ flush_reqs_qlen =>
+ Procs*NumOfReqs*Bursts,
enable_burst_limit => false}},
- ok = logger:set_handler_config(?MODULE, NewHConfig),
- %% TRecvPid = start_op_trace(),
- NumOfReqs = 500,
- send_burst({n,NumOfReqs}, seq, {chars,79}, info),
- Logged = count_lines(Log),
- ct:pal("Number of messages dropped = ~w (~w)",
- [NumOfReqs-Logged,NumOfReqs]),
- true = (Logged < NumOfReqs),
- %% true = analyse_trace(TRecvPid,
- %% fun(Events) -> find_mode(async,Events) end),
- %% true = analyse_trace(TRecvPid,
- %% fun(Events) -> find_mode(drop,Events) end),
- %% false = analyse_trace(TRecvPid,
- %% fun(Events) -> find_mode(flush,Events) end),
- %% true = analyse_trace(TRecvPid,
- %% fun(Events) -> find_switch(async,drop,Events)
- %% orelse find_switch(sync,drop,Events)
- %% end),
- ok = file:delete(Log),
- %% stop_op_trace(TRecvPid),
- ok.
+ ok = logger:set_handler_config(?MODULE, NewHConfig),
+ %% It sometimes happens that the handler gets the
+ %% requests in a slow enough pace so that dropping
+ %% never occurs. Therefore, lets generate a number of
+ %% bursts to increase the chance of message buildup.
+ [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) ||
+ _ <- lists:seq(1, Bursts)],
+ Logged = count_lines(Log),
+ ok = stop_handler(?MODULE),
+ _ = file:delete(Log),
+ ct:pal("Number of messages dropped = ~w (~w)",
+ [Procs*NumOfReqs*Bursts-Logged,Procs*NumOfReqs*Bursts]),
+ true = (Logged < (Procs*NumOfReqs*Bursts)),
+ true = (Logged > 0),
+ ok
+ end,
+ %% As it's tricky to get the timing right in only one go, we perform the
+ %% test repeatedly, hoping that will generate a successful result.
+ case repeat_until_ok(Test, 10) of
+ {ok,{Failures,_Result}} ->
+ ct:log("Failed ~w times before success!", [Failures]);
+ {fails,Reason} ->
+ ct:fail(Reason)
+ end.
op_switch_to_drop_file(cleanup, _Config) ->
- ok = stop_handler(?MODULE).
+ _ = stop_handler(?MODULE).
op_switch_to_drop_tty(Config) ->
{HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config),
+ NumOfReqs = 300,
+ Procs = 2,
NewHConfig =
- HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2,
- drop_new_reqs_qlen => 3,
- flush_reqs_qlen => 600,
+ HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 1,
+ drop_new_reqs_qlen => 2,
+ flush_reqs_qlen =>
+ Procs*NumOfReqs+1,
enable_burst_limit => false}},
ok = logger:set_handler_config(?MODULE, NewHConfig),
- NumOfReqs = 500,
- send_burst({n,NumOfReqs}, seq, {chars,79}, info),
+ send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info),
ok.
op_switch_to_drop_tty(cleanup, _Config) ->
ok = stop_handler(?MODULE).
@@ -780,32 +802,54 @@ op_switch_to_drop_tty(cleanup, _Config) ->
op_switch_to_flush_file() ->
[{timetrap,{minutes,3}}].
op_switch_to_flush_file(Config) ->
- {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
-
- %% it's important that both async and sync requests have been queued
- %% when the flush happens (verify with coverage of flush_log_requests/2)
-
- NewHConfig =
- HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2,
- drop_new_reqs_qlen => 99,
- flush_reqs_qlen => 100,
+ Test =
+ fun() ->
+ {Log,HConfig,StdHConfig} =
+ start_handler(?MODULE, ?FUNCTION_NAME, Config),
+
+ %% NOTE: it's important that both async and sync
+ %% requests have been queued when the flush happens
+ %% (verify with coverage of flush_log_requests/2)
+
+ NewHConfig =
+ HConfig#{logger_std_h =>
+ StdHConfig#{toggle_sync_qlen => 2,
+ %% disable drop mode
+ drop_new_reqs_qlen => 300,
+ flush_reqs_qlen => 300,
enable_burst_limit => false}},
- ok = logger:set_handler_config(?MODULE, NewHConfig),
- NumOfReqs = 10000,
- Procs = 100,
- send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info),
- Logged = count_lines(Log),
- ct:pal("Number of messages flushed/dropped = ~w (~w)",
- [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]),
- true = (Logged < (NumOfReqs*Procs)),
-
- %%! --- Thu Apr 12 13:46:00 2018 --- peppe was here!
- %%! TODO: Verify that handler has switched to flush mode
-
- ok = file:delete(Log),
- ok.
+ ok = logger:set_handler_config(?MODULE, NewHConfig),
+ NumOfReqs = 1500,
+ Procs = 10,
+ Bursts = 10,
+ %% It sometimes happens that the handler either gets
+ %% the requests in a slow enough pace so that flushing
+ %% never occurs, or it gets all messages at once,
+ %% causing all messages to get flushed (no dropping of
+ %% sync messages gets tested). Therefore, lets
+ %% generate a number of bursts to increase the chance
+ %% of message buildup in some random fashion.
+ [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) ||
+ _ <- lists:seq(1,Bursts)],
+ Logged = count_lines(Log),
+ ok = stop_handler(?MODULE),
+ _ = file:delete(Log),
+ ct:pal("Number of messages flushed/dropped = ~w (~w)",
+ [NumOfReqs*Procs*Bursts-Logged,NumOfReqs*Procs*Bursts]),
+ true = (Logged < (NumOfReqs*Procs*Bursts)),
+ true = (Logged > 0),
+ ok
+ end,
+ %% As it's tricky to get the timing right in only one go, we perform the
+ %% test repeatedly, hoping that will generate a successful result.
+ case repeat_until_ok(Test, 10) of
+ {ok,{Failures,_Result}} ->
+ ct:log("Failed ~w times before success!", [Failures]);
+ {fails,Reason} ->
+ ct:fail(Reason)
+ end.
op_switch_to_flush_file(cleanup, _Config) ->
- ok = stop_handler(?MODULE).
+ _ = stop_handler(?MODULE).
op_switch_to_flush_tty(Config) ->
{HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config),
@@ -815,12 +859,13 @@ op_switch_to_flush_tty(Config) ->
NewHConfig =
HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2,
- drop_new_reqs_qlen => 99,
+ %% disable drop mode
+ drop_new_reqs_qlen => 100,
flush_reqs_qlen => 100,
enable_burst_limit => false}},
ok = logger:set_handler_config(?MODULE, NewHConfig),
- NumOfReqs = 10000,
- Procs = 10,
+ NumOfReqs = 1000,
+ Procs = 100,
send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info),
ok.
op_switch_to_flush_tty(cleanup, _Config) ->
@@ -904,10 +949,10 @@ kill_disabled(cleanup, _Config) ->
ok = stop_handler(?MODULE).
qlen_kill_new(Config) ->
- {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
+ {_Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
Pid0 = whereis(?MODULE),
{_,Mem0} = process_info(Pid0, memory),
- RestartAfter = 2000,
+ RestartAfter = ?HANDLER_RESTART_AFTER,
NewHConfig =
HConfig#{logger_std_h=>StdHConfig#{enable_kill_overloaded=>true,
handler_overloaded_qlen=>10,
@@ -927,7 +972,7 @@ qlen_kill_new(Config) ->
killed ->
ct:pal("Slow shutdown, handler process was killed!", [])
end,
- timer:sleep(RestartAfter + 1000),
+ timer:sleep(RestartAfter + 2000),
true = is_pid(whereis(?MODULE)),
ok
after
@@ -941,7 +986,7 @@ qlen_kill_new(cleanup, _Config) ->
%% choke the standard handler on remote node to verify the termination
%% works as expected
-qlen_kill_std(Config) ->
+qlen_kill_std(_Config) ->
%%! HERE
%% Dir = ?config(priv_dir, Config),
%% File = lists:concat([?MODULE,"_",?FUNCTION_NAME,".log"]),
@@ -955,10 +1000,10 @@ qlen_kill_std(Config) ->
{skip,"Not done yet"}.
mem_kill_new(Config) ->
- {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
+ {_Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
Pid0 = whereis(?MODULE),
{_,Mem0} = process_info(Pid0, memory),
- RestartAfter = 2000,
+ RestartAfter = ?HANDLER_RESTART_AFTER,
NewHConfig =
HConfig#{logger_std_h=>StdHConfig#{enable_kill_overloaded=>true,
handler_overloaded_qlen=>50000,
@@ -978,7 +1023,7 @@ mem_kill_new(Config) ->
killed ->
ct:pal("Slow shutdown, handler process was killed!", [])
end,
- timer:sleep(RestartAfter * 2),
+ timer:sleep(RestartAfter + 2000),
true = is_pid(whereis(?MODULE)),
ok
after
@@ -992,7 +1037,7 @@ mem_kill_new(cleanup, _Config) ->
%% choke the standard handler on remote node to verify the termination
%% works as expected
-mem_kill_std(Config) ->
+mem_kill_std(_Config) ->
{skip,"Not done yet"}.
restart_after(Config) ->
@@ -1016,7 +1061,7 @@ restart_after(Config) ->
end,
{Log,_,_} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
- RestartAfter = 2000,
+ RestartAfter = ?HANDLER_RESTART_AFTER,
NewHConfig2 =
HConfig#{logger_std_h=>StdHConfig#{enable_kill_overloaded=>true,
handler_overloaded_qlen=>10,
@@ -1028,7 +1073,7 @@ restart_after(Config) ->
send_burst({n,100}, {spawn,2,0}, {chars,79}, info),
receive
{'DOWN', MRef2, _, _, _Info2} ->
- timer:sleep(RestartAfter + 1000),
+ timer:sleep(RestartAfter + 2000),
Pid1 = whereis(?MODULE),
true = is_pid(Pid1),
false = (Pid1 == Pid0),
@@ -1074,7 +1119,7 @@ handler_requests_under_load(Config) ->
NoOfReqs = lists:foldl(fun({_,Res}, N) -> N + length(Res) end, 0, ReqResult),
ct:pal("~w requests made. Errors: ~n~p", [NoOfReqs,Errors]),
ok = file:delete(Log).
-handler_requests_under_load(cleanup, Config) ->
+handler_requests_under_load(cleanup, _Config) ->
ok = stop_handler(?MODULE).
send_requests(HName, TO, Reqs = [{Req,Res}|Rs]) ->
@@ -1126,8 +1171,9 @@ start_handler(Name, FuncName, Config) ->
{Log,HConfig,StdHConfig}.
stop_handler(Name) ->
- ok = logger:remove_handler(Name),
- ct:pal("Handler ~p stopped!", [Name]).
+ R = logger:remove_handler(Name),
+ ct:pal("Handler ~p stopped! Result: ~p", [Name,R]),
+ R.
count_lines(File) ->
wait_until_written(File, -1),
@@ -1299,6 +1345,30 @@ try_match_file(_,Pattern,_,Incorrect) ->
[Pattern,Incorrect]),
erlang:error({error,not_matching_pattern,Pattern,Incorrect}).
+repeat_until_ok(Fun, N) ->
+ repeat_until_ok(Fun, 0, N, undefined).
+
+repeat_until_ok(_Fun, Stop, Stop, Reason) ->
+ {fails,Reason};
+
+repeat_until_ok(Fun, C, Stop, FirstReason) ->
+ if C > 0 -> timer:sleep(5000);
+ true -> ok
+ end,
+ try Fun() of
+ Result ->
+ {ok,{C,Result}}
+ catch
+ _:Reason:Stack ->
+ ct:pal("Test fails: ~p (~p)~n", [Reason,hd(Stack)]),
+ if FirstReason == undefined ->
+ repeat_until_ok(Fun, C+1, Stop, {Reason,Stack});
+ true ->
+ repeat_until_ok(Fun, C+1, Stop, FirstReason)
+ end
+ end.
+
+
%%%-----------------------------------------------------------------
%%%
start_op_trace() ->
@@ -1339,17 +1409,17 @@ find_mode(flush, Events) ->
find_mode(Mode, Events) ->
lists:keymember([{mode,Mode}], 3, Events).
-find_switch(From, To, Events) ->
- try lists:foldl(fun({trace_return,check_load,{To,_,_,_}},
- {trace_call,check_load,[#{mode := From}]}) ->
- throw(match);
- (Event, _) ->
- Event
- end, undefined, Events) of
- _ -> false
- catch
- throw:match -> true
- end.
+%% find_switch(_From, To, Events) ->
+%% try lists:foldl(fun({trace_return,check_load,{To,_,_,_}},
+%% {trace_call,check_load,[#{mode := From}]}) ->
+%% throw(match);
+%% (Event, _) ->
+%% Event
+%% end, undefined, Events) of
+%% _ -> false
+%% catch
+%% throw:match -> true
+%% end.
analyse_trace(TRecvPid, TestFun) ->
TRecvPid ! {test,self(),TestFun},
@@ -1411,7 +1481,7 @@ tpl([{M,F,A}|Trace]) ->
tpl([]) ->
ok.
-tracer({trace,_,call,{logger_std_h,handle_cast,[{Op,_}|_]}},
+tracer({trace,_,call,{logger_std_h,handle_cast,[Op|_]}},
{Pid,[{Mod,Func,Op}|Expected]}) ->
maybe_tracer_done(Pid,Expected,{Mod,Func,Op});
tracer({trace,_,call,{Mod=logger_std_h,Func=write_to_dev,[_,Data,_,_,_]}},