aboutsummaryrefslogtreecommitdiffstats
path: root/lib/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'lib/kernel')
-rw-r--r--lib/kernel/doc/src/logger_chapter.xml4
-rw-r--r--lib/kernel/src/logger_disk_log_h.erl64
-rw-r--r--lib/kernel/src/logger_std_h.erl22
-rw-r--r--lib/kernel/test/logger_disk_log_h_SUITE.erl193
-rw-r--r--lib/kernel/test/logger_std_h_SUITE.erl233
5 files changed, 311 insertions, 205 deletions
diff --git a/lib/kernel/doc/src/logger_chapter.xml b/lib/kernel/doc/src/logger_chapter.xml
index 5fb81034dc..0374a0c93a 100644
--- a/lib/kernel/doc/src/logger_chapter.xml
+++ b/lib/kernel/doc/src/logger_chapter.xml
@@ -697,8 +697,8 @@ my_report_cb(R) ->
</item>
</taglist>
- <p>For the overload protection algorithm to work properly, it is a
- requirement that:</p>
+ <p>For the overload protection algorithm to work properly, it is
+ required that:</p>
<p><c>toggle_sync_qlen =&lt; drop_new_reqs_qlen =&lt; flush_reqs_qlen</c></p>
diff --git a/lib/kernel/src/logger_disk_log_h.erl b/lib/kernel/src/logger_disk_log_h.erl
index 3b71f936d8..eaa5cd6f99 100644
--- a/lib/kernel/src/logger_disk_log_h.erl
+++ b/lib/kernel/src/logger_disk_log_h.erl
@@ -278,10 +278,11 @@ init([Name, Config = #{disk_log_opts := LogOpts},
last_log_ts => T0,
burst_win_ts => T0,
burst_msg_count => 0,
+ last_op => sync,
prev_log_result => ok,
prev_sync_result => ok,
prev_disk_log_info => undefined}),
- gen_server:cast(self(), {repeated_disk_log_sync,T0}),
+ gen_server:cast(self(), repeated_disk_log_sync),
enter_loop(Config, State1);
Error ->
logger_h_common:error_notify({open_disk_log,Name,Error}),
@@ -316,8 +317,7 @@ handle_call(disk_log_sync, _From, State = #{id := Name}) ->
{reply, Result, State1};
handle_call({change_config,_OldConfig,NewConfig}, _From,
- State = #{filesync_repeat_interval := FSyncInt0,
- last_log_ts := LastLogTS}) ->
+ State = #{filesync_repeat_interval := FSyncInt0}) ->
HConfig = maps:get(?MODULE, NewConfig, #{}),
State1 = #{toggle_sync_qlen := TSQL,
drop_new_reqs_qlen := DNRQL,
@@ -338,9 +338,8 @@ handle_call({change_config,_OldConfig,NewConfig}, _From,
_ = logger_h_common:cancel_timer(maps:get(rep_sync_tref,
State,
undefined)),
- _ = gen_server:cast(self(), {repeated_disk_log_sync,
- LastLogTS})
- end,
+ _ = gen_server:cast(self(), repeated_disk_log_sync)
+ end,
{reply, ok, State1};
false ->
{reply, {error,{invalid_levels,{TSQL,DNRQL,FRQL}}}, State}
@@ -370,24 +369,23 @@ handle_cast({log, Bin}, State) ->
%% clause gets called repeatedly by the handler. In order to
%% guarantee that a filesync *always* happens after the last log
%% request, the repeat operation must be active!
-handle_cast({repeated_disk_log_sync,LastLogTS0},
+handle_cast(repeated_disk_log_sync,
State = #{id := Name,
filesync_repeat_interval := FSyncInt,
- last_log_ts := LastLogTS1}) ->
+ last_op := LastOp}) ->
State1 =
if is_integer(FSyncInt) ->
%% only do filesync if something has been
%% written since last time we checked
- NewState = if LastLogTS1 == LastLogTS0 ->
+ NewState = if LastOp == sync ->
State;
true ->
disk_log_sync(Name, State)
end,
{ok,TRef} =
timer:apply_after(FSyncInt, gen_server,cast,
- [self(),
- {repeated_disk_log_sync,LastLogTS1}]),
- NewState#{rep_sync_tref => TRef};
+ [self(),repeated_disk_log_sync]),
+ NewState#{rep_sync_tref => TRef, last_op => sync};
true ->
State
end,
@@ -649,10 +647,9 @@ close_disk_log(Name, _) ->
ok.
disk_log_write(Name, Bin, State) ->
- Result =
case ?disk_log_blog(Name, Bin) of
ok ->
- ok;
+ State#{prev_log_result => ok, last_op => write};
LogError ->
_ = case maps:get(prev_log_result, State) of
LogError ->
@@ -664,29 +661,26 @@ disk_log_write(Name, Bin, State) ->
LogOpts,
LogError})
end,
- LogError
- end,
- State#{prev_log_result => Result}.
+ State#{prev_log_result => LogError}
+ end.
disk_log_sync(Name, State) ->
- Result =
- case ?disk_log_sync(Name) of
- ok ->
- ok;
- SyncError ->
- _ = case maps:get(prev_sync_result, State) of
- SyncError ->
- %% don't report same error twice
- ok;
- _ ->
- LogOpts = maps:get(log_opts, State),
- logger_h_common:error_notify({Name,sync,
- LogOpts,
- SyncError})
- end,
- SyncError
- end,
- State#{prev_sync_result => Result}.
+ case ?disk_log_sync(Name) of
+ ok ->
+ State#{prev_sync_result => ok, last_op => sync};
+ SyncError ->
+ _ = case maps:get(prev_sync_result, State) of
+ SyncError ->
+ %% don't report same error twice
+ ok;
+ _ ->
+ LogOpts = maps:get(log_opts, State),
+ logger_h_common:error_notify({Name,sync,
+ LogOpts,
+ SyncError})
+ end,
+ State#{prev_sync_result => SyncError}
+ end.
error_notify_new(Info,Info, _Term) ->
ok;
diff --git a/lib/kernel/src/logger_std_h.erl b/lib/kernel/src/logger_std_h.erl
index cbc9db372c..813fbad0ed 100644
--- a/lib/kernel/src/logger_std_h.erl
+++ b/lib/kernel/src/logger_std_h.erl
@@ -257,10 +257,11 @@ init([Name, Config,
file_ctrl_sync => FileCtrlSyncInt,
last_qlen => 0,
last_log_ts => T0,
+ last_op => sync,
burst_win_ts => T0,
burst_msg_count => 0}),
proc_lib:init_ack({ok,self()}),
- gen_server:cast(self(), {repeated_filesync,T0}),
+ gen_server:cast(self(), repeated_filesync),
enter_loop(Config, State1);
Error ->
logger_h_common:error_notify({init_handler,Name,Error}),
@@ -310,12 +311,11 @@ handle_call(filesync, _From, State = #{type := Type,
if is_atom(Type) ->
{reply, ok, State};
true ->
- {reply, file_ctrl_filesync_sync(FileCtrlPid), State}
+ {reply, file_ctrl_filesync_sync(FileCtrlPid), State#{last_op=>sync}}
end;
handle_call({change_config,_OldConfig,NewConfig}, _From,
- State = #{filesync_repeat_interval := FSyncInt0,
- last_log_ts := LastLogTS}) ->
+ State = #{filesync_repeat_interval := FSyncInt0}) ->
HConfig = maps:get(?MODULE, NewConfig, #{}),
State1 = maps:merge(State, HConfig),
case logger_h_common:overload_levels_ok(State1) of
@@ -334,8 +334,7 @@ handle_call({change_config,_OldConfig,NewConfig}, _From,
_ = logger_h_common:cancel_timer(maps:get(rep_sync_tref,
State,
undefined)),
- gen_server:cast(self(), {repeated_filesync,
- LastLogTS})
+ gen_server:cast(self(), repeated_filesync)
end,
{reply, ok, State1};
false ->
@@ -365,24 +364,24 @@ handle_cast({log, Bin}, State) ->
%% clause gets called repeatedly by the handler. In order to
%% guarantee that a filesync *always* happens after the last log
%% request, the repeat operation must be active!
-handle_cast({repeated_filesync,LastLogTS0},
+handle_cast(repeated_filesync,
State = #{type := Type,
file_ctrl_pid := FileCtrlPid,
filesync_repeat_interval := FSyncInt,
- last_log_ts := LastLogTS1}) ->
+ last_op := LastOp}) ->
State1 =
if not is_atom(Type), is_integer(FSyncInt) ->
%% only do filesync if something has been
%% written since last time we checked
- if LastLogTS1 == LastLogTS0 ->
+ if LastOp == sync ->
ok;
true ->
file_ctrl_filesync_async(FileCtrlPid)
end,
{ok,TRef} =
timer:apply_after(FSyncInt, gen_server,cast,
- [self(),{repeated_filesync,LastLogTS1}]),
- State#{rep_sync_tref => TRef};
+ [self(),repeated_filesync]),
+ State#{rep_sync_tref => TRef, last_op => sync};
true ->
State
end,
@@ -600,6 +599,7 @@ write(Name, Mode, T1, Bin, _CallOrCast,
State1#{mode => Mode1,
last_qlen := LastQLen1,
last_log_ts => T1,
+ last_op => write,
burst_win_ts => BurstWinT,
burst_msg_count => BurstMsgCount1,
file_ctrl_sync =>
diff --git a/lib/kernel/test/logger_disk_log_h_SUITE.erl b/lib/kernel/test/logger_disk_log_h_SUITE.erl
index 6a4ec72d5e..bb88c53f26 100644
--- a/lib/kernel/test/logger_disk_log_h_SUITE.erl
+++ b/lib/kernel/test/logger_disk_log_h_SUITE.erl
@@ -66,19 +66,7 @@ end_per_testcase(Case, Config) ->
ok.
groups() ->
- [
- {retry_op_switch_to_sync,
- [{repeat_until_all_ok,10}],
- [op_switch_to_sync]},
-
- {retry_op_switch_to_drop,
- [{repeat_until_all_ok,10}],
- [op_switch_to_drop]},
-
- {retry_op_switch_to_flush,
- [{repeat_until_all_ok,10}],
- [op_switch_to_flush]}
- ].
+ [].
all() ->
[start_stop_handler,
@@ -99,9 +87,9 @@ all() ->
disk_log_events,
write_failure,
sync_failure,
- {group,retry_op_switch_to_sync},
- {group,retry_op_switch_to_drop},
- {group,retry_op_switch_to_flush},
+ op_switch_to_sync,
+ op_switch_to_drop,
+ op_switch_to_flush,
limit_burst_disabled,
limit_burst_enabled_one,
limit_burst_enabled_period,
@@ -880,57 +868,107 @@ op_switch_to_sync(Config) ->
enable_burst_limit => false}},
ok = logger:set_handler_config(?MODULE, NewHConfig),
send_burst({n,NumOfReqs}, seq, {chars,79}, info),
- NumOfReqs = count_lines(Log),
- ok = file:delete(Log).
+ Lines = count_lines(Log),
+ ok = file:delete(Log),
+ NumOfReqs = Lines,
+ ok.
op_switch_to_sync(cleanup, _Config) ->
ok = stop_handler(?MODULE).
+op_switch_to_drop() ->
+ [{timetrap,{seconds,180}}].
op_switch_to_drop(Config) ->
- {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
- NumOfReqs = 300,
- Procs = 2,
- NewHConfig =
- HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 1,
- drop_new_reqs_qlen => 2,
- flush_reqs_qlen => Procs*NumOfReqs+1,
- enable_burst_limit => false}},
- ok = logger:set_handler_config(?MODULE, NewHConfig),
- send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info),
- Logged = count_lines(Log),
- ct:pal("Number of messages dropped = ~w (~w)",
- [Procs*NumOfReqs-Logged,Procs*NumOfReqs]),
- true = (Logged < (Procs*NumOfReqs)),
- true = (Logged > 0),
- ok = file:delete(Log).
+ Test =
+ fun() ->
+ {Log,HConfig,DLHConfig} =
+ start_handler(?MODULE, ?FUNCTION_NAME, Config),
+ NumOfReqs = 300,
+ Procs = 2,
+ Bursts = 10,
+ NewHConfig =
+ HConfig#{logger_disk_log_h =>
+ DLHConfig#{toggle_sync_qlen => 1,
+ drop_new_reqs_qlen => 2,
+ flush_reqs_qlen => Procs*NumOfReqs*Bursts,
+ enable_burst_limit => false}},
+ ok = logger:set_handler_config(?MODULE, NewHConfig),
+ %% It sometimes happens that the handler either gets
+ %% the requests in a slow enough pace so that dropping
+ %% never occurs. Therefore, lets generate a number of
+ %% bursts to increase the chance of message buildup.
+ [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) ||
+ _ <- lists:seq(1, Bursts)],
+ Logged = count_lines(Log),
+ ok= stop_handler(?MODULE),
+ _ = file:delete(Log),
+ ct:pal("Number of messages dropped = ~w (~w)",
+ [Procs*NumOfReqs*Bursts-Logged,Procs*NumOfReqs*Bursts]),
+ true = (Logged < (Procs*NumOfReqs*Bursts)),
+ true = (Logged > 0),
+ ok
+ end,
+ %% As it's tricky to get the timing right in only one go, we perform the
+ %% test repeatedly, hoping that will generate a successful result.
+ case repeat_until_ok(Test, 10) of
+ {ok,{Failures,_Result}} ->
+ ct:log("Failed ~w times before success!", [Failures]);
+ {fails,Reason} ->
+ ct:fail(Reason)
+ end.
op_switch_to_drop(cleanup, _Config) ->
- ok = stop_handler(?MODULE).
+ _ = stop_handler(?MODULE).
op_switch_to_flush() ->
- [{timetrap,{seconds,60}}].
+ [{timetrap,{seconds,180}}].
op_switch_to_flush(Config) ->
- {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
+ Test =
+ fun() ->
+ {Log,HConfig,DLHConfig} =
+ start_handler(?MODULE, ?FUNCTION_NAME, Config),
+
+ %% NOTE: it's important that both async and sync
+ %% requests have been queued when the flush happens
+ %% (verify with coverage of flush_log_requests/2)
- %% it's important that both async and sync requests have been queued
- %% when the flush happens (verify with coverage of flush_log_requests/2)
-
- NewHConfig =
- HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2,
- %% disable drop mode
- drop_new_reqs_qlen => 500,
- flush_reqs_qlen => 500,
- enable_burst_limit => false}},
- ok = logger:set_handler_config(?MODULE, NewHConfig),
- NumOfReqs = 1000,
- Procs = 200,
- send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info),
- Logged = count_lines(Log),
- ct:pal("Number of messages flushed/dropped = ~w (~w)",
- [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]),
- true = (Logged < (NumOfReqs*Procs)),
- true = (Logged > 0),
- ok = file:delete(Log).
+ NewHConfig =
+ HConfig#{logger_disk_log_h =>
+ DLHConfig#{toggle_sync_qlen => 2,
+ %% disable drop mode
+ drop_new_reqs_qlen => 300,
+ flush_reqs_qlen => 300,
+ enable_burst_limit => false}},
+ ok = logger:set_handler_config(?MODULE, NewHConfig),
+ NumOfReqs = 1500,
+ Procs = 10,
+ Bursts = 10,
+ %% It sometimes happens that the handler either gets
+ %% the requests in a slow enough pace so that flushing
+ %% never occurs, or it gets all messages at once,
+ %% causing all messages to get flushed (no dropping of
+ %% sync messages gets tested). Therefore, lets
+ %% generate a number of bursts to increase the chance
+ %% of message buildup in some random fashion.
+ [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) ||
+ _ <- lists:seq(1,Bursts)],
+ Logged = count_lines(Log),
+ ok= stop_handler(?MODULE),
+ _ = file:delete(Log),
+ ct:pal("Number of messages flushed/dropped = ~w (~w)",
+ [NumOfReqs*Procs*Bursts-Logged,NumOfReqs*Procs*Bursts]),
+ true = (Logged < (NumOfReqs*Procs*Bursts)),
+ true = (Logged > 0),
+ ok
+ end,
+ %% As it's tricky to get the timing right in only one go, we perform the
+ %% test repeatedly, hoping that will generate a successful result.
+ case repeat_until_ok(Test, 10) of
+ {ok,{Failures,_Result}} ->
+ ct:log("Failed ~w times before success!", [Failures]);
+ {fails,Reason} ->
+ ct:fail(Reason)
+ end.
op_switch_to_flush(cleanup, _Config) ->
- ok = stop_handler(?MODULE).
+ _ = stop_handler(?MODULE).
limit_burst_disabled(Config) ->
@@ -1014,7 +1052,7 @@ qlen_kill_new(Config) ->
{Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
Pid0 = whereis(?MODULE),
{_,Mem0} = process_info(Pid0, memory),
- RestartAfter = 2000,
+ RestartAfter = ?HANDLER_RESTART_AFTER,
NewHConfig =
HConfig#{logger_disk_log_h =>
DLHConfig#{enable_kill_overloaded=>true,
@@ -1035,7 +1073,7 @@ qlen_kill_new(Config) ->
killed ->
ct:pal("Slow shutdown, handler process was killed!", [])
end,
- timer:sleep(RestartAfter + 1000),
+ timer:sleep(RestartAfter + 2000),
true = is_pid(whereis(?MODULE)),
ok
after
@@ -1051,7 +1089,7 @@ mem_kill_new(Config) ->
{Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
Pid0 = whereis(?MODULE),
{_,Mem0} = process_info(Pid0, memory),
- RestartAfter = 2000,
+ RestartAfter = ?HANDLER_RESTART_AFTER,
NewHConfig =
HConfig#{logger_disk_log_h =>
DLHConfig#{enable_kill_overloaded=>true,
@@ -1072,7 +1110,7 @@ mem_kill_new(Config) ->
killed ->
ct:pal("Slow shutdown, handler process was killed!", [])
end,
- timer:sleep(RestartAfter * 2),
+ timer:sleep(RestartAfter + 2000),
true = is_pid(whereis(?MODULE)),
ok
after
@@ -1105,7 +1143,7 @@ restart_after(Config) ->
end,
{Log,_,_} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
- RestartAfter = 2000,
+ RestartAfter = ?HANDLER_RESTART_AFTER,
NewHConfig2 =
HConfig#{logger_disk_log_h=>DLHConfig#{enable_kill_overloaded=>true,
handler_overloaded_qlen=>10,
@@ -1117,7 +1155,7 @@ restart_after(Config) ->
send_burst({n,100}, {spawn,2,0}, {chars,79}, info),
receive
{'DOWN', MRef2, _, _, _Info2} ->
- timer:sleep(RestartAfter + 1000),
+ timer:sleep(RestartAfter + 2000),
Pid1 = whereis(?MODULE),
true = is_pid(Pid1),
false = (Pid1 == Pid0),
@@ -1388,6 +1426,29 @@ count_lines1(File) ->
file:close(Dev),
Lines.
+repeat_until_ok(Fun, N) ->
+ repeat_until_ok(Fun, 0, N, undefined).
+
+repeat_until_ok(_Fun, Stop, Stop, Reason) ->
+ {fails,Reason};
+
+repeat_until_ok(Fun, C, Stop, FirstReason) ->
+ if C > 0 -> timer:sleep(5000);
+ true -> ok
+ end,
+ try Fun() of
+ Result ->
+ {ok,{C,Result}}
+ catch
+ _:Reason:Stack ->
+ ct:pal("Test fails: ~p (~p)~n", [Reason,hd(Stack)]),
+ if FirstReason == undefined ->
+ repeat_until_ok(Fun, C+1, Stop, {Reason,Stack});
+ true ->
+ repeat_until_ok(Fun, C+1, Stop, FirstReason)
+ end
+ end.
+
start_tracer(Trace,Expected) ->
Pid = self(),
dbg:tracer(process,{fun tracer/2,{Pid,Expected}}),
@@ -1410,9 +1471,11 @@ tpl([{M,F,A}|Trace]) ->
tpl([]) ->
ok.
-tracer({trace,_,call,{?MODULE,format,[#{msg:={string,Msg}}|_]}}, {Pid,[{formatter,Msg}|Expected]}) ->
+tracer({trace,_,call,{?MODULE,format,[#{msg:={string,Msg}}|_]}},
+ {Pid,[{formatter,Msg}|Expected]}) ->
maybe_tracer_done(Pid,Expected,{formatter,Msg});
-tracer({trace,_,call,{logger_disk_log_h,handle_cast,[{Op,_}|_]}}, {Pid,[{Mod,Func,Op}|Expected]}) ->
+tracer({trace,_,call,{logger_disk_log_h,handle_cast,[Op|_]}},
+ {Pid,[{Mod,Func,Op}|Expected]}) ->
maybe_tracer_done(Pid,Expected,{Mod,Func,Op});
tracer({trace,_,call,{Mod,Func,_}}, {Pid,[{Mod,Func}|Expected]}) ->
maybe_tracer_done(Pid,Expected,{Mod,Func});
diff --git a/lib/kernel/test/logger_std_h_SUITE.erl b/lib/kernel/test/logger_std_h_SUITE.erl
index ed5504431e..3ebbbe74ef 100644
--- a/lib/kernel/test/logger_std_h_SUITE.erl
+++ b/lib/kernel/test/logger_std_h_SUITE.erl
@@ -89,19 +89,7 @@ end_per_testcase(Case, Config) ->
ok.
groups() ->
- [
- {retry_op_switch_to_sync_file,
- [{repeat_until_all_ok,10}],
- [op_switch_to_sync_file]},
-
- {retry_op_switch_to_drop_file,
- [{repeat_until_all_ok,10}],
- [op_switch_to_drop_file]},
-
- {retry_op_switch_to_flush_file,
- [{repeat_until_all_ok,10}],
- [op_switch_to_flush_file]}
- ].
+ [].
all() ->
[add_remove_instance_tty,
@@ -122,11 +110,11 @@ all() ->
filesync,
write_failure,
sync_failure,
- {group,retry_op_switch_to_sync_file},
+ op_switch_to_sync_file,
op_switch_to_sync_tty,
- {group,retry_op_switch_to_drop_file},
+ op_switch_to_drop_file,
op_switch_to_drop_tty,
- {group,retry_op_switch_to_flush_file},
+ op_switch_to_flush_file,
op_switch_to_flush_tty,
limit_burst_disabled,
limit_burst_enabled_one,
@@ -588,7 +576,7 @@ filesync(Config) ->
#{filesync_repeat_interval => no_repeat}),
dbg:stop_clear(),
- Received2 = lists:map(fun({trace,_M,handle_cast,[{Op,_},_]}) -> {trace,Op};
+ Received2 = lists:map(fun({trace,_M,handle_cast,[Op,_]}) -> {trace,Op};
(Other) -> Other
end, test_server:messages_get()),
ct:pal("Trace #2 =~n~p", [Received2]),
@@ -726,7 +714,8 @@ op_switch_to_sync_file(Config) ->
ok = logger:set_handler_config(?MODULE, NewHConfig),
%% TRecvPid = start_op_trace(),
send_burst({n,NumOfReqs}, seq, {chars,79}, info),
- NumOfReqs = count_lines(Log),
+ Lines = count_lines(Log),
+ ok = file:delete(Log),
%% true = analyse_trace(TRecvPid,
%% fun(Events) -> find_mode(async,Events) end),
%% true = analyse_trace(TRecvPid,
@@ -737,8 +726,8 @@ op_switch_to_sync_file(Config) ->
%% fun(Events) -> find_mode(drop,Events) end),
%% false = analyse_trace(TRecvPid,
%% fun(Events) -> find_mode(flush,Events) end),
- ok = file:delete(Log),
%% stop_op_trace(TRecvPid),
+ NumOfReqs = Lines,
ok.
op_switch_to_sync_file(cleanup, _Config) ->
ok = stop_handler(?MODULE).
@@ -757,38 +746,49 @@ op_switch_to_sync_tty(Config) ->
op_switch_to_sync_tty(cleanup, _Config) ->
ok = stop_handler(?MODULE).
+op_switch_to_drop_file() ->
+ [{timetrap,{seconds,180}}].
op_switch_to_drop_file(Config) ->
- {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
- NumOfReqs = 300,
- Procs = 2,
- NewHConfig =
- HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 1,
+ Test =
+ fun() ->
+ {Log,HConfig,StdHConfig} =
+ start_handler(?MODULE, ?FUNCTION_NAME, Config),
+ NumOfReqs = 300,
+ Procs = 2,
+ Bursts = 10,
+ NewHConfig =
+ HConfig#{logger_std_h =>
+ StdHConfig#{toggle_sync_qlen => 1,
drop_new_reqs_qlen => 2,
- flush_reqs_qlen => Procs*NumOfReqs+1,
+ flush_reqs_qlen =>
+ Procs*NumOfReqs*Bursts,
enable_burst_limit => false}},
- ok = logger:set_handler_config(?MODULE, NewHConfig),
- %% TRecvPid = start_op_trace(),
- send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info),
- Logged = count_lines(Log),
- ct:pal("Number of messages dropped = ~w (~w)",
- [Procs*NumOfReqs-Logged,Procs*NumOfReqs]),
- true = (Logged < (Procs*NumOfReqs)),
- true = (Logged > 0),
- %% true = analyse_trace(TRecvPid,
- %% fun(Events) -> find_mode(async,Events) end),
- %% true = analyse_trace(TRecvPid,
- %% fun(Events) -> find_mode(drop,Events) end),
- %% false = analyse_trace(TRecvPid,
- %% fun(Events) -> find_mode(flush,Events) end),
- %% true = analyse_trace(TRecvPid,
- %% fun(Events) -> find_switch(async,drop,Events)
- %% orelse find_switch(sync,drop,Events)
- %% end),
- ok = file:delete(Log),
- %% stop_op_trace(TRecvPid),
- ok.
+ ok = logger:set_handler_config(?MODULE, NewHConfig),
+ %% It sometimes happens that the handler gets the
+ %% requests in a slow enough pace so that dropping
+ %% never occurs. Therefore, lets generate a number of
+ %% bursts to increase the chance of message buildup.
+ [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) ||
+ _ <- lists:seq(1, Bursts)],
+ Logged = count_lines(Log),
+ ok = stop_handler(?MODULE),
+ _ = file:delete(Log),
+ ct:pal("Number of messages dropped = ~w (~w)",
+ [Procs*NumOfReqs*Bursts-Logged,Procs*NumOfReqs*Bursts]),
+ true = (Logged < (Procs*NumOfReqs*Bursts)),
+ true = (Logged > 0),
+ ok
+ end,
+ %% As it's tricky to get the timing right in only one go, we perform the
+ %% test repeatedly, hoping that will generate a successful result.
+ case repeat_until_ok(Test, 10) of
+ {ok,{Failures,_Result}} ->
+ ct:log("Failed ~w times before success!", [Failures]);
+ {fails,Reason} ->
+ ct:fail(Reason)
+ end.
op_switch_to_drop_file(cleanup, _Config) ->
- ok = stop_handler(?MODULE).
+ _ = stop_handler(?MODULE).
op_switch_to_drop_tty(Config) ->
{HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config),
@@ -797,7 +797,8 @@ op_switch_to_drop_tty(Config) ->
NewHConfig =
HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 1,
drop_new_reqs_qlen => 2,
- flush_reqs_qlen => Procs*NumOfReqs+1,
+ flush_reqs_qlen =>
+ Procs*NumOfReqs+1,
enable_burst_limit => false}},
ok = logger:set_handler_config(?MODULE, NewHConfig),
send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info),
@@ -806,32 +807,56 @@ op_switch_to_drop_tty(cleanup, _Config) ->
ok = stop_handler(?MODULE).
op_switch_to_flush_file() ->
- [{timetrap,{seconds,60}}].
+ [{timetrap,{seconds,180}}].
op_switch_to_flush_file(Config) ->
- {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
-
- %% it's important that both async and sync requests have been queued
- %% when the flush happens (verify with coverage of flush_log_requests/2)
-
- NewHConfig =
- HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2,
+ Test =
+ fun() ->
+ {Log,HConfig,StdHConfig} =
+ start_handler(?MODULE, ?FUNCTION_NAME, Config),
+
+ %% NOTE: it's important that both async and sync
+ %% requests have been queued when the flush happens
+ %% (verify with coverage of flush_log_requests/2)
+
+ NewHConfig =
+ HConfig#{logger_std_h =>
+ StdHConfig#{toggle_sync_qlen => 2,
%% disable drop mode
- drop_new_reqs_qlen => 500,
- flush_reqs_qlen => 500,
+ drop_new_reqs_qlen => 300,
+ flush_reqs_qlen => 300,
enable_burst_limit => false}},
- ok = logger:set_handler_config(?MODULE, NewHConfig),
- NumOfReqs = 1000,
- Procs = 200,
- send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info),
- Logged = count_lines(Log),
- ct:pal("Number of messages flushed/dropped = ~w (~w)",
- [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]),
- true = (Logged < (NumOfReqs*Procs)),
- true = (Logged > 0),
- ok = file:delete(Log),
- ok.
+ ok = logger:set_handler_config(?MODULE, NewHConfig),
+ NumOfReqs = 1500,
+ Procs = 10,
+ Bursts = 10,
+ %% It sometimes happens that the handler either gets
+ %% the requests in a slow enough pace so that flushing
+ %% never occurs, or it gets all messages at once,
+ %% causing all messages to get flushed (no dropping of
+ %% sync messages gets tested). Therefore, lets
+ %% generate a number of bursts to increase the chance
+ %% of message buildup in some random fashion.
+ [send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info) ||
+ _ <- lists:seq(1,Bursts)],
+ Logged = count_lines(Log),
+ ok = stop_handler(?MODULE),
+ _ = file:delete(Log),
+ ct:pal("Number of messages flushed/dropped = ~w (~w)",
+ [NumOfReqs*Procs*Bursts-Logged,NumOfReqs*Procs*Bursts]),
+ true = (Logged < (NumOfReqs*Procs*Bursts)),
+ true = (Logged > 0),
+ ok
+ end,
+ %% As it's tricky to get the timing right in only one go, we perform the
+ %% test repeatedly, hoping that will generate a successful result.
+ case repeat_until_ok(Test, 10) of
+ {ok,{Failures,_Result}} ->
+ ct:log("Failed ~w times before success!", [Failures]);
+ {fails,Reason} ->
+ ct:fail(Reason)
+ end.
op_switch_to_flush_file(cleanup, _Config) ->
- ok = stop_handler(?MODULE).
+ _ = stop_handler(?MODULE).
op_switch_to_flush_tty(Config) ->
{HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config),
@@ -931,10 +956,10 @@ kill_disabled(cleanup, _Config) ->
ok = stop_handler(?MODULE).
qlen_kill_new(Config) ->
- {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
+ {_Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
Pid0 = whereis(?MODULE),
{_,Mem0} = process_info(Pid0, memory),
- RestartAfter = 2000,
+ RestartAfter = ?HANDLER_RESTART_AFTER,
NewHConfig =
HConfig#{logger_std_h=>StdHConfig#{enable_kill_overloaded=>true,
handler_overloaded_qlen=>10,
@@ -954,7 +979,7 @@ qlen_kill_new(Config) ->
killed ->
ct:pal("Slow shutdown, handler process was killed!", [])
end,
- timer:sleep(RestartAfter + 1000),
+ timer:sleep(RestartAfter + 2000),
true = is_pid(whereis(?MODULE)),
ok
after
@@ -968,7 +993,7 @@ qlen_kill_new(cleanup, _Config) ->
%% choke the standard handler on remote node to verify the termination
%% works as expected
-qlen_kill_std(Config) ->
+qlen_kill_std(_Config) ->
%%! HERE
%% Dir = ?config(priv_dir, Config),
%% File = lists:concat([?MODULE,"_",?FUNCTION_NAME,".log"]),
@@ -982,10 +1007,10 @@ qlen_kill_std(Config) ->
{skip,"Not done yet"}.
mem_kill_new(Config) ->
- {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
+ {_Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
Pid0 = whereis(?MODULE),
{_,Mem0} = process_info(Pid0, memory),
- RestartAfter = 2000,
+ RestartAfter = ?HANDLER_RESTART_AFTER,
NewHConfig =
HConfig#{logger_std_h=>StdHConfig#{enable_kill_overloaded=>true,
handler_overloaded_qlen=>50000,
@@ -1005,7 +1030,7 @@ mem_kill_new(Config) ->
killed ->
ct:pal("Slow shutdown, handler process was killed!", [])
end,
- timer:sleep(RestartAfter * 2),
+ timer:sleep(RestartAfter + 2000),
true = is_pid(whereis(?MODULE)),
ok
after
@@ -1019,7 +1044,7 @@ mem_kill_new(cleanup, _Config) ->
%% choke the standard handler on remote node to verify the termination
%% works as expected
-mem_kill_std(Config) ->
+mem_kill_std(_Config) ->
{skip,"Not done yet"}.
restart_after(Config) ->
@@ -1043,7 +1068,7 @@ restart_after(Config) ->
end,
{Log,_,_} = start_handler(?MODULE, ?FUNCTION_NAME, Config),
- RestartAfter = 2000,
+ RestartAfter = ?HANDLER_RESTART_AFTER,
NewHConfig2 =
HConfig#{logger_std_h=>StdHConfig#{enable_kill_overloaded=>true,
handler_overloaded_qlen=>10,
@@ -1055,7 +1080,7 @@ restart_after(Config) ->
send_burst({n,100}, {spawn,2,0}, {chars,79}, info),
receive
{'DOWN', MRef2, _, _, _Info2} ->
- timer:sleep(RestartAfter + 1000),
+ timer:sleep(RestartAfter + 2000),
Pid1 = whereis(?MODULE),
true = is_pid(Pid1),
false = (Pid1 == Pid0),
@@ -1101,7 +1126,7 @@ handler_requests_under_load(Config) ->
NoOfReqs = lists:foldl(fun({_,Res}, N) -> N + length(Res) end, 0, ReqResult),
ct:pal("~w requests made. Errors: ~n~p", [NoOfReqs,Errors]),
ok = file:delete(Log).
-handler_requests_under_load(cleanup, Config) ->
+handler_requests_under_load(cleanup, _Config) ->
ok = stop_handler(?MODULE).
send_requests(HName, TO, Reqs = [{Req,Res}|Rs]) ->
@@ -1326,6 +1351,30 @@ try_match_file(_,Pattern,_,Incorrect) ->
[Pattern,Incorrect]),
erlang:error({error,not_matching_pattern,Pattern,Incorrect}).
+repeat_until_ok(Fun, N) ->
+ repeat_until_ok(Fun, 0, N, undefined).
+
+repeat_until_ok(_Fun, Stop, Stop, Reason) ->
+ {fails,Reason};
+
+repeat_until_ok(Fun, C, Stop, FirstReason) ->
+ if C > 0 -> timer:sleep(5000);
+ true -> ok
+ end,
+ try Fun() of
+ Result ->
+ {ok,{C,Result}}
+ catch
+ _:Reason:Stack ->
+ ct:pal("Test fails: ~p (~p)~n", [Reason,hd(Stack)]),
+ if FirstReason == undefined ->
+ repeat_until_ok(Fun, C+1, Stop, {Reason,Stack});
+ true ->
+ repeat_until_ok(Fun, C+1, Stop, FirstReason)
+ end
+ end.
+
+
%%%-----------------------------------------------------------------
%%%
start_op_trace() ->
@@ -1366,17 +1415,17 @@ find_mode(flush, Events) ->
find_mode(Mode, Events) ->
lists:keymember([{mode,Mode}], 3, Events).
-find_switch(From, To, Events) ->
- try lists:foldl(fun({trace_return,check_load,{To,_,_,_}},
- {trace_call,check_load,[#{mode := From}]}) ->
- throw(match);
- (Event, _) ->
- Event
- end, undefined, Events) of
- _ -> false
- catch
- throw:match -> true
- end.
+%% find_switch(_From, To, Events) ->
+%% try lists:foldl(fun({trace_return,check_load,{To,_,_,_}},
+%% {trace_call,check_load,[#{mode := From}]}) ->
+%% throw(match);
+%% (Event, _) ->
+%% Event
+%% end, undefined, Events) of
+%% _ -> false
+%% catch
+%% throw:match -> true
+%% end.
analyse_trace(TRecvPid, TestFun) ->
TRecvPid ! {test,self(),TestFun},