From 5a93c690267d0a3ca12304c34b0e4538549fabb1 Mon Sep 17 00:00:00 2001 From: Peter Andersson Date: Thu, 3 May 2018 17:23:16 +0200 Subject: Make it possible to disable sync and drop mode Also add tests and update failing test cases --- lib/kernel/doc/src/logger_chapter.xml | 12 +++- lib/kernel/src/logger_h_common.erl | 16 +++-- lib/kernel/src/logger_h_common.hrl | 2 +- lib/kernel/test/logger_disk_log_h_SUITE.erl | 65 +++++++++++++------- lib/kernel/test/logger_std_h_SUITE.erl | 94 +++++++++++++++++------------ 5 files changed, 123 insertions(+), 66 deletions(-) (limited to 'lib') diff --git a/lib/kernel/doc/src/logger_chapter.xml b/lib/kernel/doc/src/logger_chapter.xml index 0bc3b37476..5fb81034dc 100644 --- a/lib/kernel/doc/src/logger_chapter.xml +++ b/lib/kernel/doc/src/logger_chapter.xml @@ -700,7 +700,17 @@ my_report_cb(R) ->

For the overload protection algorithm to work properly, it is a requirement that:

-

toggle_sync_qlen < drop_new_reqs_qlen < flush_reqs_qlen

+

toggle_sync_qlen =< drop_new_reqs_qlen =< flush_reqs_qlen

+ +

and that:

+ +

drop_new_reqs_qlen > 1

+ +

If toggle_sync_qlen is set to 0, the handler will handle all + requests synchronously. Setting the value of toggle_sync_qlen to the same + as drop_new_reqs_qlen, disables the synchronous mode. Likewise, setting + the value of drop_new_reqs_qlen to the same as flush_reqs_qlen, + disables the drop mode.

During high load scenarios, the length of the handler message queue rarely grows in a linear and predictable way. Instead, whenever the diff --git a/lib/kernel/src/logger_h_common.erl b/lib/kernel/src/logger_h_common.erl index 7caad366ae..901c4c0dad 100644 --- a/lib/kernel/src/logger_h_common.erl +++ b/lib/kernel/src/logger_h_common.erl @@ -135,7 +135,8 @@ call_cast_or_drop(Name, Bin) -> _:{timeout,_} -> ?observe(Name,{dropped,1}) end; - drop -> ?observe(Name,{dropped,1}) + drop -> + ?observe(Name,{dropped,1}) catch %% if the ETS table doesn't exist (maybe because of a %% handler restart), we can only drop the request @@ -152,12 +153,15 @@ check_load(State = #{id:=Name, mode := Mode, flush_reqs_qlen := FlushQLen}) -> {_,Mem} = process_info(self(), memory), ?observe(Name,{max_mem,Mem}), - %% make sure the handler process doesn't get scheduled - %% out between the message_queue_len check below and the - %% action that follows (flush or write). {_,QLen} = process_info(self(), message_queue_len), ?observe(Name,{max_qlen,QLen}), - + %% When the handler process gets scheduled in, it's impossible + %% to predict the QLen. We could jump "up" arbitrarily from say + %% async to sync, async to drop, sync to flush, etc. However, when + %% the handler process manages the log requests (without flushing), + %% one after the other, we will move "down" from drop to sync and + %% from sync to async. This way we don't risk getting stuck in + %% drop or sync mode with an empty mailbox. {Mode1,_NewDrops,_NewFlushes} = if QLen >= FlushQLen -> @@ -292,7 +296,7 @@ overload_levels_ok(HandlerConfig) -> TSQL = maps:get(toggle_sync_qlen, HandlerConfig, ?TOGGLE_SYNC_QLEN), DNRQL = maps:get(drop_new_reqs_qlen, HandlerConfig, ?DROP_NEW_REQS_QLEN), FRQL = maps:get(flush_reqs_qlen, HandlerConfig, ?FLUSH_REQS_QLEN), - (TSQL < DNRQL) andalso (DNRQL < FRQL). + (DNRQL > 1) andalso (TSQL =< DNRQL) andalso (DNRQL =< FRQL). error_notify(Term) -> ?internal_log(error, Term). diff --git a/lib/kernel/src/logger_h_common.hrl b/lib/kernel/src/logger_h_common.hrl index 89378dbb10..ed365ce6eb 100644 --- a/lib/kernel/src/logger_h_common.hrl +++ b/lib/kernel/src/logger_h_common.hrl @@ -124,7 +124,7 @@ %%% slow down execution and therefore should not be include in code %%% to be officially released. -%% -define(TEST_HOOKS, true). +-define(TEST_HOOKS, true). -ifdef(TEST_HOOKS). -define(TEST_HOOKS_TAB, logger_h_test_hooks). diff --git a/lib/kernel/test/logger_disk_log_h_SUITE.erl b/lib/kernel/test/logger_disk_log_h_SUITE.erl index c7c6137380..6a4ec72d5e 100644 --- a/lib/kernel/test/logger_disk_log_h_SUITE.erl +++ b/lib/kernel/test/logger_disk_log_h_SUITE.erl @@ -66,7 +66,19 @@ end_per_testcase(Case, Config) -> ok. groups() -> - []. + [ + {retry_op_switch_to_sync, + [{repeat_until_all_ok,10}], + [op_switch_to_sync]}, + + {retry_op_switch_to_drop, + [{repeat_until_all_ok,10}], + [op_switch_to_drop]}, + + {retry_op_switch_to_flush, + [{repeat_until_all_ok,10}], + [op_switch_to_flush]} + ]. all() -> [start_stop_handler, @@ -87,9 +99,9 @@ all() -> disk_log_events, write_failure, sync_failure, - op_switch_to_sync, - op_switch_to_drop, - op_switch_to_flush, + {group,retry_op_switch_to_sync}, + {group,retry_op_switch_to_drop}, + {group,retry_op_switch_to_flush}, limit_burst_disabled, limit_burst_enabled_one, limit_burst_enabled_period, @@ -369,10 +381,18 @@ config_fail(_Config) -> #{logger_disk_log_h => #{bad => bad}, filter_default=>log, formatter=>{?MODULE,self()}}), - {error,{handler_not_added,{invalid_levels,{42,42,_}}}} = + + {error,{handler_not_added,{invalid_levels,{_,1,_}}}} = logger:add_handler(?MODULE,logger_disk_log_h, - #{logger_disk_log_h => #{toggle_sync_qlen=>42, + #{logger_disk_log_h => #{drop_new_reqs_qlen=>1}}), + {error,{handler_not_added,{invalid_levels,{43,42,_}}}} = + logger:add_handler(?MODULE,logger_disk_log_h, + #{logger_disk_log_h => #{toggle_sync_qlen=>43, drop_new_reqs_qlen=>42}}), + {error,{handler_not_added,{invalid_levels,{_,43,42}}}} = + logger:add_handler(?MODULE,logger_disk_log_h, + #{logger_disk_log_h => #{drop_new_reqs_qlen=>43, + flush_reqs_qlen=>42}}), ok = logger:add_handler(?MODULE,logger_disk_log_h, #{filter_default=>log, @@ -852,13 +872,13 @@ internal_log(Type, Term) -> op_switch_to_sync(Config) -> {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), + NumOfReqs = 500, NewHConfig = - HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 3, - drop_new_reqs_qlen => 501, - flush_reqs_qlen => 2000, + HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2, + drop_new_reqs_qlen => NumOfReqs+1, + flush_reqs_qlen => 2*NumOfReqs, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 500, send_burst({n,NumOfReqs}, seq, {chars,79}, info), NumOfReqs = count_lines(Log), ok = file:delete(Log). @@ -867,19 +887,20 @@ op_switch_to_sync(cleanup, _Config) -> op_switch_to_drop(Config) -> {Log,HConfig,DLHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - + NumOfReqs = 300, + Procs = 2, NewHConfig = - HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2, - drop_new_reqs_qlen => 3, - flush_reqs_qlen => 600, + HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 1, + drop_new_reqs_qlen => 2, + flush_reqs_qlen => Procs*NumOfReqs+1, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 500, - send_burst({n,NumOfReqs}, seq, {chars,79}, info), + send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), Logged = count_lines(Log), ct:pal("Number of messages dropped = ~w (~w)", - [NumOfReqs-Logged,NumOfReqs]), - true = (Logged < NumOfReqs), + [Procs*NumOfReqs-Logged,Procs*NumOfReqs]), + true = (Logged < (Procs*NumOfReqs)), + true = (Logged > 0), ok = file:delete(Log). op_switch_to_drop(cleanup, _Config) -> ok = stop_handler(?MODULE). @@ -894,17 +915,19 @@ op_switch_to_flush(Config) -> NewHConfig = HConfig#{logger_disk_log_h => DLHConfig#{toggle_sync_qlen => 2, - drop_new_reqs_qlen => 99, - flush_reqs_qlen => 100, + %% disable drop mode + drop_new_reqs_qlen => 500, + flush_reqs_qlen => 500, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), NumOfReqs = 1000, - Procs = 500, + Procs = 200, send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), Logged = count_lines(Log), ct:pal("Number of messages flushed/dropped = ~w (~w)", [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]), true = (Logged < (NumOfReqs*Procs)), + true = (Logged > 0), ok = file:delete(Log). op_switch_to_flush(cleanup, _Config) -> ok = stop_handler(?MODULE). diff --git a/lib/kernel/test/logger_std_h_SUITE.erl b/lib/kernel/test/logger_std_h_SUITE.erl index e940e0a026..ed5504431e 100644 --- a/lib/kernel/test/logger_std_h_SUITE.erl +++ b/lib/kernel/test/logger_std_h_SUITE.erl @@ -89,7 +89,19 @@ end_per_testcase(Case, Config) -> ok. groups() -> - []. + [ + {retry_op_switch_to_sync_file, + [{repeat_until_all_ok,10}], + [op_switch_to_sync_file]}, + + {retry_op_switch_to_drop_file, + [{repeat_until_all_ok,10}], + [op_switch_to_drop_file]}, + + {retry_op_switch_to_flush_file, + [{repeat_until_all_ok,10}], + [op_switch_to_flush_file]} + ]. all() -> [add_remove_instance_tty, @@ -110,11 +122,11 @@ all() -> filesync, write_failure, sync_failure, - op_switch_to_sync_file, + {group,retry_op_switch_to_sync_file}, op_switch_to_sync_tty, - op_switch_to_drop_file, + {group,retry_op_switch_to_drop_file}, op_switch_to_drop_tty, - op_switch_to_flush_file, + {group,retry_op_switch_to_flush_file}, op_switch_to_flush_tty, limit_burst_disabled, limit_burst_enabled_one, @@ -289,10 +301,17 @@ config_fail(_Config) -> #{logger_std_h => #{restart_type => bad}, filter_default=>log, formatter=>{?MODULE,self()}}), - {error,{handler_not_added,{invalid_levels,{42,42,_}}}} = + {error,{handler_not_added,{invalid_levels,{_,1,_}}}} = + logger:add_handler(?MODULE,logger_std_h, + #{logger_std_h => #{drop_new_reqs_qlen=>1}}), + {error,{handler_not_added,{invalid_levels,{43,42,_}}}} = logger:add_handler(?MODULE,logger_std_h, - #{logger_std_h => #{toggle_sync_qlen=>42, + #{logger_std_h => #{toggle_sync_qlen=>43, drop_new_reqs_qlen=>42}}), + {error,{handler_not_added,{invalid_levels,{_,43,42}}}} = + logger:add_handler(?MODULE,logger_std_h, + #{logger_std_h => #{drop_new_reqs_qlen=>43, + flush_reqs_qlen=>42}}), ok = logger:add_handler(?MODULE,logger_std_h, #{filter_default=>log, @@ -698,14 +717,14 @@ internal_log(Type, Term) -> op_switch_to_sync_file(Config) -> {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), + NumOfReqs = 500, NewHConfig = - HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 3, - drop_new_reqs_qlen => 501, - flush_reqs_qlen => 2000, + HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2, + drop_new_reqs_qlen => NumOfReqs+1, + flush_reqs_qlen => 2*NumOfReqs, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), %% TRecvPid = start_op_trace(), - NumOfReqs = 500, send_burst({n,NumOfReqs}, seq, {chars,79}, info), NumOfReqs = count_lines(Log), %% true = analyse_trace(TRecvPid, @@ -726,13 +745,13 @@ op_switch_to_sync_file(cleanup, _Config) -> op_switch_to_sync_tty(Config) -> {HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config), + NumOfReqs = 500, NewHConfig = HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 3, - drop_new_reqs_qlen => 501, - flush_reqs_qlen => 2000, + drop_new_reqs_qlen => NumOfReqs+1, + flush_reqs_qlen => 2*NumOfReqs, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 500, send_burst({n,NumOfReqs}, seq, {chars,79}, info), ok. op_switch_to_sync_tty(cleanup, _Config) -> @@ -740,20 +759,21 @@ op_switch_to_sync_tty(cleanup, _Config) -> op_switch_to_drop_file(Config) -> {Log,HConfig,StdHConfig} = start_handler(?MODULE, ?FUNCTION_NAME, Config), - + NumOfReqs = 300, + Procs = 2, NewHConfig = - HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2, - drop_new_reqs_qlen => 3, - flush_reqs_qlen => 600, + HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 1, + drop_new_reqs_qlen => 2, + flush_reqs_qlen => Procs*NumOfReqs+1, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), %% TRecvPid = start_op_trace(), - NumOfReqs = 500, - send_burst({n,NumOfReqs}, seq, {chars,79}, info), + send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), Logged = count_lines(Log), ct:pal("Number of messages dropped = ~w (~w)", - [NumOfReqs-Logged,NumOfReqs]), - true = (Logged < NumOfReqs), + [Procs*NumOfReqs-Logged,Procs*NumOfReqs]), + true = (Logged < (Procs*NumOfReqs)), + true = (Logged > 0), %% true = analyse_trace(TRecvPid, %% fun(Events) -> find_mode(async,Events) end), %% true = analyse_trace(TRecvPid, @@ -772,14 +792,15 @@ op_switch_to_drop_file(cleanup, _Config) -> op_switch_to_drop_tty(Config) -> {HConfig,StdHConfig} = start_handler(?MODULE, standard_io, Config), + NumOfReqs = 300, + Procs = 2, NewHConfig = - HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2, - drop_new_reqs_qlen => 3, - flush_reqs_qlen => 600, + HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 1, + drop_new_reqs_qlen => 2, + flush_reqs_qlen => Procs*NumOfReqs+1, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 500, - send_burst({n,NumOfReqs}, seq, {chars,79}, info), + send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), ok. op_switch_to_drop_tty(cleanup, _Config) -> ok = stop_handler(?MODULE). @@ -794,21 +815,19 @@ op_switch_to_flush_file(Config) -> NewHConfig = HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2, - drop_new_reqs_qlen => 99, - flush_reqs_qlen => 100, + %% disable drop mode + drop_new_reqs_qlen => 500, + flush_reqs_qlen => 500, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 10000, - Procs = 100, + NumOfReqs = 1000, + Procs = 200, send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), Logged = count_lines(Log), ct:pal("Number of messages flushed/dropped = ~w (~w)", [(NumOfReqs*Procs)-Logged,NumOfReqs*Procs]), true = (Logged < (NumOfReqs*Procs)), - - %%! --- Thu Apr 12 13:46:00 2018 --- peppe was here! - %%! TODO: Verify that handler has switched to flush mode - + true = (Logged > 0), ok = file:delete(Log), ok. op_switch_to_flush_file(cleanup, _Config) -> @@ -822,12 +841,13 @@ op_switch_to_flush_tty(Config) -> NewHConfig = HConfig#{logger_std_h => StdHConfig#{toggle_sync_qlen => 2, - drop_new_reqs_qlen => 99, + %% disable drop mode + drop_new_reqs_qlen => 100, flush_reqs_qlen => 100, enable_burst_limit => false}}, ok = logger:set_handler_config(?MODULE, NewHConfig), - NumOfReqs = 10000, - Procs = 10, + NumOfReqs = 1000, + Procs = 100, send_burst({n,NumOfReqs}, {spawn,Procs,0}, {chars,79}, info), ok. op_switch_to_flush_tty(cleanup, _Config) -> -- cgit v1.2.3