diff options
author | Siri Hansen <[email protected]> | 2019-01-21 14:26:07 +0100 |
---|---|---|
committer | Siri Hansen <[email protected]> | 2019-01-21 14:26:07 +0100 |
commit | a8364425a7190b65cd33e58ac3c155a77c462b74 (patch) | |
tree | c659252e6397483d1f1771485cfee80b198a3fbb /lib | |
parent | a5682ac818347a5091ba7dbc88928e8a634f15ec (diff) | |
parent | f7123f64f6fa1727b49648e4335720910d8f10bf (diff) | |
download | otp-a8364425a7190b65cd33e58ac3c155a77c462b74.tar.gz otp-a8364425a7190b65cd33e58ac3c155a77c462b74.tar.bz2 otp-a8364425a7190b65cd33e58ac3c155a77c462b74.zip |
Merge branch 'siri/logger/erts-and-remote-log-olp' into maint
* siri/logger/erts-and-remote-log-olp:
[logger] Update bench marks to report percent instead of 0.XX
[logger] Minor fix in logger_olp_SUITE to avoid error in end_per_testcase
[logger] Remove info and reset functions from handler modules
[logger] Remove some unused variable warnings
[logger] Store proxy config in logger ets table
[logger] Log mode change and flushes in logger_proxy
[logger] Move out overload protection macros from logger_h_common.hrl
[logger] Allow logger_olp callbacks to return {stop,...}
Add logger_stress_SUITE to benchmarks spec
[logger] Add idle timer in logger_olp
[logger] Use persistent_term for storing proxy reference
Update preloaded
Use system_time instead of monotonic_time in log events
[logger] Add test for restart of logger proxy
[logger] Add API function for configuring logger proxy
[logger] Add logger_stress_SUITE
[logger] Add tests for logger_proxy
[logger] Overload protect logging from erts and remote nodes
[logger] Split overload protection functionality to own module
Diffstat (limited to 'lib')
27 files changed, 2382 insertions, 907 deletions
diff --git a/lib/kernel/doc/src/logger.xml b/lib/kernel/doc/src/logger.xml index e09c5db5e3..0668676096 100644 --- a/lib/kernel/doc/src/logger.xml +++ b/lib/kernel/doc/src/logger.xml @@ -245,6 +245,12 @@ logger:error("error happened because: ~p", [Reason]). % Without macro </desc> </datatype> <datatype> + <name name="olp_config"/> + <desc> + <p></p> + </desc> + </datatype> + <datatype> <name name="primary_config"/> <desc> <p>Primary configuration data for Logger. The following @@ -597,8 +603,8 @@ start(_, []) -> <name name="get_config" arity="0" since="OTP 21.0"/> <fsummary>Look up the current Logger configuration</fsummary> <desc> - <p>Look up all current Logger configuration, including primary - and handler configuration, and module level settings.</p> + <p>Look up all current Logger configuration, including primary, + handler, and proxy configuration, and module level settings.</p> </desc> </func> @@ -636,6 +642,17 @@ start(_, []) -> </func> <func> + <name name="get_proxy_config" arity="0" since="OTP 21.3"/> + <fsummary>Look up the current configuration for the Logger proxy.</fsummary> + <desc> + <p>Look up the current configuration for the Logger proxy.</p> + <p>For more information about the proxy, see + section <seealso marker="logger_chapter#proxy">Logger + Proxy</seealso> in the Kernel User's Guide.</p> + </desc> + </func> + + <func> <name name="get_module_level" arity="0" since="OTP 21.0"/> <fsummary>Look up all current module levels.</fsummary> <desc> @@ -801,6 +818,27 @@ start(_, []) -> </func> <func> + <name name="set_proxy_config" arity="1" since="OTP 21.3"/> + <fsummary>Set configuration data for the Logger proxy.</fsummary> + <desc> + <p>Set configuration data for the Logger proxy. This + overwrites the current proxy configuration. Keys that are not + specified in the <c><anno>Config</anno></c> map gets default + values.</p> + <p>To modify the existing configuration, + use <seealso marker="#update_proxy_config-1"> + <c>update_proxy_config/1</c></seealso>, or, if a more + complex merge is needed, read the current configuration + with <seealso marker="#get_proxy_config-0"><c>get_proxy_config/0</c> + </seealso>, then do the merge before writing the new + configuration back with this function.</p> + <p>For more information about the proxy, see + section <seealso marker="logger_chapter#proxy">Logger + Proxy</seealso> in the Kernel User's Guide.</p> + </desc> + </func> + + <func> <name name="set_module_level" arity="2" since="OTP 21.0"/> <fsummary>Set the log level for the specified modules.</fsummary> <desc> @@ -1013,6 +1051,25 @@ logger:set_process_metadata(maps:merge(logger:get_process_metadata(), Meta)). </seealso>.</p> </desc> </func> + + <func> + <name name="update_proxy_config" arity="1" since="OTP 21.3"/> + <fsummary>Update configuration data for the Logger proxy.</fsummary> + <desc> + <p>Update configuration data for the Logger proxy. This function + behaves as if it was implemented as follows:</p> + <code type="erl"> +Old = logger:get_proxy_config(), +logger:set_proxy_config(maps:merge(Old, Config)). + </code> + <p>To overwrite the existing configuration without any merge, + use <seealso marker="#set_proxy_config-1"><c>set_proxy_config/1</c> + </seealso>.</p> + <p>For more information about the proxy, see + section <seealso marker="logger_chapter#proxy">Logger + Proxy</seealso> in the Kernel User's Guide.</p> + </desc> + </func> </funcs> <section> diff --git a/lib/kernel/doc/src/logger_chapter.xml b/lib/kernel/doc/src/logger_chapter.xml index 458e61cef5..5ed7397135 100644 --- a/lib/kernel/doc/src/logger_chapter.xml +++ b/lib/kernel/doc/src/logger_chapter.xml @@ -693,8 +693,10 @@ logger:debug(#{got => connection_request, id => Id, state => State}, with <seealso marker="#logger_sasl_compatible"> <c>logger_sasl_compatible</c></seealso>.</p> <p>With this parameter, you can modify or disable the default - handler, add custom handlers and primary logger filters, and - set log levels per module.</p> + handler, add custom handlers and primary logger filters, set + log levels per module, and modify + the <seealso marker="#proxy">proxy</seealso> + configuration.</p> <p><c>Config</c> is any (zero or more) of the following:</p> <taglist> <tag><c>{handler, default, undefined}</c></tag> @@ -746,6 +748,14 @@ logger:debug(#{got => connection_request, id => Id, state => State}, <p>for each <c>Module</c>.</p> <p>Multiple entries of this type are allowed.</p> </item> + <tag><c>{proxy, ProxyConfig}</c></tag> + <item> + <p>Sets the proxy configuration, equivalent to calling</p> + <pre><seealso marker="logger#set_proxy_config/1"> + logger:set_proxy_config(ProxyConfig) + </seealso></pre> + <p>Only one entry of this type is allowed.</p> + </item> </taglist> <p>See section <seealso marker="#config_examples">Configuration @@ -1334,9 +1344,50 @@ logger:add_handler(my_disk_log_h, logger_disk_log_h, </section> <section> + <marker id="proxy"/> + <title>Logger Proxy</title> + <p>The Logger proxy is an Erlang process which is part of the + Kernel application's supervision tree. During startup, the proxy + process registers itself as the <c>system_logger</c>, meaning + that log events produced by the emulator are sent to this + process.</p> + <p>When a log event is issued on a process which has its group + leader on a remote node, Logger automatically forwards the log + event to the group leader's node. To achieve this, it first + sends the log event as an Erlang message from the original + client process to the proxy on the local node, and the proxy in + turn forwards the event to the proxy on the remote node.</p> + <p>When receiving a log event, either from the emulator or from a + remote node, the proxy calls the Logger API to log the event.</p> + <p>The proxy process is overload protected in the same way as + described in + section <seealso marker="#overload_protection">Protecting the + Handler from Overload</seealso>, but with the following default + values:</p> + <code> + #{sync_mode_qlen => 500, + drop_mode_qlen => 1000, + flush_qlen => 5000, + burst_limit_enable => false, + overload_kill_enable => false}</code> + <p>For log events from the emulator, synchronous message passing + mode is not applicable, since all messages are passed + asynchronously by the emulator. Drop mode is achieved by setting + the <c>system_logger</c> to <c>undefined</c>, forcing the + emulator to drop events until it is set back to the proxy pid + again.</p> + <p>The proxy uses <seealso marker="erts:erlang#send_nosuspend/2"> + <c>erlang:send_nosuspend/2</c></seealso> when sending log + events to a remote node. If the message could not be sent + without suspending the sender, it is dropped. This is to avoid + blocking the proxy process.</p> + </section> + + <section> <title>See Also</title> <p> <seealso marker="disk_log"><c>disk_log(3)</c></seealso>, + <seealso marker="erts:erlang"><c>erlang(3)</c></seealso>, <seealso marker="error_logger"><c>error_logger(3)</c></seealso>, <seealso marker="logger"><c>logger(3)</c></seealso>, <seealso marker="logger_disk_log_h"><c>logger_disk_log_h(3)</c></seealso>, diff --git a/lib/kernel/src/Makefile b/lib/kernel/src/Makefile index 57f17defc8..3d1506ea08 100644 --- a/lib/kernel/src/Makefile +++ b/lib/kernel/src/Makefile @@ -118,6 +118,8 @@ MODULES = \ logger_h_common \ logger_filters \ logger_formatter \ + logger_olp \ + logger_proxy \ logger_server \ logger_simple_h \ logger_sup \ @@ -151,7 +153,7 @@ INTERNAL_HRL_FILES= application_master.hrl disk_log.hrl \ inet_dns.hrl inet_res.hrl \ inet_boot.hrl inet_config.hrl inet_int.hrl \ inet_dns_record_adts.hrl \ - logger_internal.hrl logger_h_common.hrl + logger_internal.hrl logger_olp.hrl logger_h_common.hrl ERL_FILES= $(MODULES:%=%.erl) @@ -279,6 +281,8 @@ $(EBIN)/logger_config.beam: logger_internal.hrl ../include/logger.hrl $(EBIN)/logger_disk_log_h.beam: logger_h_common.hrl logger_internal.hrl ../include/logger.hrl ../include/file.hrl $(EBIN)/logger_filters.beam: logger_internal.hrl ../include/logger.hrl $(EBIN)/logger_formatter.beam: logger_internal.hrl ../include/logger.hrl +$(EBIN)/logger_olp.beam: logger_olp.hrl logger_internal.hrl +$(EBIN)/logger_proxy.beam: logger_internal.hrl $(EBIN)/logger_server.beam: logger_internal.hrl ../include/logger.hrl $(EBIN)/logger_simple_h.beam: logger_internal.hrl ../include/logger.hrl $(EBIN)/logger_std_h.beam: logger_h_common.hrl logger_internal.hrl ../include/logger.hrl ../include/file.hrl diff --git a/lib/kernel/src/kernel.app.src b/lib/kernel/src/kernel.app.src index fe073621c8..a1d9e8e215 100644 --- a/lib/kernel/src/kernel.app.src +++ b/lib/kernel/src/kernel.app.src @@ -68,6 +68,8 @@ logger_formatter, logger_h_common, logger_handler_watcher, + logger_olp, + logger_proxy, logger_server, logger_simple_h, logger_std_h, diff --git a/lib/kernel/src/logger.erl b/lib/kernel/src/logger.erl index 6762998d4f..abdd9a9ceb 100644 --- a/lib/kernel/src/logger.erl +++ b/lib/kernel/src/logger.erl @@ -43,11 +43,14 @@ get_module_level/0, get_module_level/1, set_primary_config/1, set_primary_config/2, set_handler_config/2, set_handler_config/3, + set_proxy_config/1, update_primary_config/1, update_handler_config/2, update_handler_config/3, + update_proxy_config/1, update_formatter_config/2, update_formatter_config/3, get_primary_config/0, get_handler_config/1, get_handler_config/0, get_handler_ids/0, get_config/0, + get_proxy_config/0, add_handlers/1]). %% Private configuration @@ -122,6 +125,18 @@ {filters,log | stop,[{filter_id(),filter()}]} | {module_level,level(),[module()]}]. +-type olp_config() :: #{sync_mode_qlen => non_neg_integer(), + drop_mode_qlen => pos_integer(), + flush_qlen => pos_integer(), + burst_limit_enable => boolean(), + burst_limit_max_count => pos_integer(), + burst_limit_window_time => pos_integer(), + overload_kill_enable => boolean(), + overload_kill_qlen => pos_integer(), + overload_kill_mem_size => pos_integer(), + overload_kill_restart_after => + non_neg_integer() | infinity}. + -export_type([log_event/0, level/0, report/0, @@ -137,7 +152,8 @@ filter_arg/0, filter_return/0, config_handler/0, - formatter_config/0]). + formatter_config/0, + olp_config/0]). %%%----------------------------------------------------------------- %%% API @@ -390,6 +406,7 @@ set_primary_config(Key,Value) -> set_primary_config(Config) -> logger_server:set_config(primary,Config). + -spec set_handler_config(HandlerId,level,Level) -> Return when HandlerId :: handler_id(), Level :: level() | all | none, @@ -419,6 +436,11 @@ set_handler_config(HandlerId,Key,Value) -> set_handler_config(HandlerId,Config) -> logger_server:set_config(HandlerId,Config). +-spec set_proxy_config(Config) -> ok | {error,term()} when + Config :: olp_config(). +set_proxy_config(Config) -> + logger_server:set_config(proxy,Config). + -spec update_primary_config(Config) -> ok | {error,term()} when Config :: primary_config(). update_primary_config(Config) -> @@ -453,6 +475,11 @@ update_handler_config(HandlerId,Key,Value) -> update_handler_config(HandlerId,Config) -> logger_server:update_config(HandlerId,Config). +-spec update_proxy_config(Config) -> ok | {error,term()} when + Config :: olp_config(). +update_proxy_config(Config) -> + logger_server:update_config(proxy,Config). + -spec get_primary_config() -> Config when Config :: primary_config(). get_primary_config() -> @@ -486,6 +513,12 @@ get_handler_ids() -> {ok,#{handlers:=HandlerIds}} = logger_config:get(?LOGGER_TABLE,primary), HandlerIds. +-spec get_proxy_config() -> Config when + Config :: olp_config(). +get_proxy_config() -> + {ok,Config} = logger_config:get(?LOGGER_TABLE,proxy), + Config. + -spec update_formatter_config(HandlerId,FormatterConfig) -> ok | {error,term()} when HandlerId :: handler_id(), @@ -606,10 +639,12 @@ unset_process_metadata() -> -spec get_config() -> #{primary=>primary_config(), handlers=>[handler_config()], + proxy=>olp_config(), module_levels=>[{module(),level() | all | none}]}. get_config() -> #{primary=>get_primary_config(), handlers=>get_handler_config(), + proxy=>get_proxy_config(), module_levels=>lists:keysort(1,get_module_level())}. -spec internal_init_logger() -> ok | {error,term()}. @@ -672,6 +707,17 @@ init_kernel_handlers(Env) -> %% This function is responsible for resolving the handler config %% and then starting the correct handlers. This is done after the %% kernel supervisor tree has been started as it needs the logger_sup. +add_handlers(kernel) -> + Env = get_logger_env(kernel), + case get_proxy_opts(Env) of + undefined -> + add_handlers(kernel,Env); + Opts -> + case set_proxy_config(Opts) of + ok -> add_handlers(kernel,Env); + {error, Reason} -> {error,{bad_proxy_config,Reason}} + end + end; add_handlers(App) when is_atom(App) -> add_handlers(App,get_logger_env(App)); add_handlers(HandlerConfig) -> @@ -729,6 +775,8 @@ check_logger_config(kernel,[{filters,_,_}|Env]) -> check_logger_config(kernel,Env); check_logger_config(kernel,[{module_level,_,_}|Env]) -> check_logger_config(kernel,Env); +check_logger_config(kernel,[{proxy,_}|Env]) -> + check_logger_config(kernel,Env); check_logger_config(_,Bad) -> throw(Bad). @@ -784,6 +832,13 @@ get_primary_filters(Env) -> _ -> throw({multiple_filters,Env}) end. +get_proxy_opts(Env) -> + case [P || P={proxy,_} <- Env] of + [{proxy,Opts}] -> Opts; + [] -> undefined; + _ -> throw({multiple_proxies,Env}) + end. + %% This function looks at the kernel logger environment %% and updates it so that the correct logger is configured init_default_config(Type,Env) when Type==standard_io; @@ -880,30 +935,30 @@ log_allowed(Location,Level,Msg,Meta0) when is_map(Meta0) -> maps:merge(Location,maps:merge(proc_meta(),Meta0))), case node(maps:get(gl,Meta)) of Node when Node=/=node() -> - log_remote(Node,Level,Msg,Meta), - do_log_allowed(Level,Msg,Meta); + log_remote(Node,Level,Msg,Meta); _ -> - do_log_allowed(Level,Msg,Meta) - end. + ok + end, + do_log_allowed(Level,Msg,Meta,tid()). -do_log_allowed(Level,{Format,Args}=Msg,Meta) +do_log_allowed(Level,{Format,Args}=Msg,Meta,Tid) when ?IS_LEVEL(Level), is_list(Format), is_list(Args), is_map(Meta) -> - logger_backend:log_allowed(#{level=>Level,msg=>Msg,meta=>Meta},tid()); -do_log_allowed(Level,Report,Meta) + logger_backend:log_allowed(#{level=>Level,msg=>Msg,meta=>Meta},Tid); +do_log_allowed(Level,Report,Meta,Tid) when ?IS_LEVEL(Level), ?IS_REPORT(Report), is_map(Meta) -> logger_backend:log_allowed(#{level=>Level,msg=>{report,Report},meta=>Meta}, - tid()); -do_log_allowed(Level,String,Meta) + Tid); +do_log_allowed(Level,String,Meta,Tid) when ?IS_LEVEL(Level), ?IS_STRING(String), is_map(Meta) -> logger_backend:log_allowed(#{level=>Level,msg=>{string,String},meta=>Meta}, - tid()). + Tid). tid() -> ets:whereis(?LOGGER_TABLE). @@ -913,7 +968,7 @@ log_remote(Node,Level,Msg,Meta) -> log_remote(Node,{log,Level,Msg,Meta}). log_remote(Node,Request) -> - {logger,Node} ! Request, + logger_proxy:log({remote,Node,Request}), ok. add_default_metadata(Meta) -> diff --git a/lib/kernel/src/logger_config.erl b/lib/kernel/src/logger_config.erl index 5e9faf332c..5024d20cfe 100644 --- a/lib/kernel/src/logger_config.erl +++ b/lib/kernel/src/logger_config.erl @@ -66,6 +66,8 @@ get(Tid,What) -> case ets:lookup(Tid,table_key(What)) of [{_,_,Config}] -> {ok,Config}; + [{_,Config}] when What=:=proxy -> + {ok,Config}; [] -> {error,{not_found,What}} end. @@ -79,10 +81,15 @@ get(Tid,What,Level) -> [Data] -> {ok,Data} end. +create(Tid,proxy,Config) -> + ets:insert(Tid,{table_key(proxy),Config}); create(Tid,What,Config) -> LevelInt = level_to_int(maps:get(level,Config)), ets:insert(Tid,{table_key(What),LevelInt,Config}). +set(Tid,proxy,Config) -> + ets:insert(Tid,{table_key(proxy),Config}), + ok; set(Tid,What,Config) -> LevelInt = level_to_int(maps:get(level,Config)), %% Should do this only if the level has actually changed. Possibly @@ -148,5 +155,6 @@ int_to_level(?LOG_ALL) -> all. %%%----------------------------------------------------------------- %%% Internal +table_key(proxy) -> ?PROXY_KEY; table_key(primary) -> ?PRIMARY_KEY; table_key(HandlerId) -> {?HANDLER_KEY,HandlerId}. diff --git a/lib/kernel/src/logger_disk_log_h.erl b/lib/kernel/src/logger_disk_log_h.erl index 41e0d51a9d..47b39da900 100644 --- a/lib/kernel/src/logger_disk_log_h.erl +++ b/lib/kernel/src/logger_disk_log_h.erl @@ -24,7 +24,7 @@ -include("logger_h_common.hrl"). %%% API --export([info/1, filesync/1, reset/1]). +-export([filesync/1]). %% logger_h_common callbacks -export([init/2, check_config/4, reset_state/2, @@ -47,25 +47,6 @@ filesync(Name) -> logger_h_common:filesync(?MODULE,Name). -%%%----------------------------------------------------------------- -%%% --spec info(Name) -> Info | {error,Reason} when - Name :: atom(), - Info :: term(), - Reason :: handler_busy | {badarg,term()}. - -info(Name) -> - logger_h_common:info(?MODULE,Name). - -%%%----------------------------------------------------------------- -%%% --spec reset(Name) -> ok | {error,Reason} when - Name :: atom(), - Reason :: handler_busy | {badarg,term()}. - -reset(Name) -> - logger_h_common:reset(?MODULE,Name). - %%%=================================================================== %%% logger callbacks %%%=================================================================== diff --git a/lib/kernel/src/logger_h_common.erl b/lib/kernel/src/logger_h_common.erl index 74a2d158fc..e69f6de38d 100644 --- a/lib/kernel/src/logger_h_common.erl +++ b/lib/kernel/src/logger_h_common.erl @@ -24,11 +24,11 @@ -include("logger_internal.hrl"). %% API --export([start_link/1, info/2, filesync/2, reset/2]). +-export([filesync/2]). -%% gen_server and proc_lib callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, - terminate/2, code_change/3]). +%% logger_olp callbacks +-export([init/1, handle_load/2, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3, notify/2, reset_state/1]). %% logger callbacks -export([log/2, adding_handler/1, removing_handler/1, changing_config/3, @@ -37,52 +37,45 @@ %% Library functions for handlers -export([error_notify/1]). -%%%----------------------------------------------------------------- --define(CONFIG_KEYS,[sync_mode_qlen, - drop_mode_qlen, - flush_qlen, - burst_limit_enable, - burst_limit_max_count, - burst_limit_window_time, - overload_kill_enable, - overload_kill_qlen, - overload_kill_mem_size, - overload_kill_restart_after, - filesync_repeat_interval]). --define(READ_ONLY_KEYS,[handler_pid,mode_tab]). +-define(OLP_KEYS,[sync_mode_qlen, + drop_mode_qlen, + flush_qlen, + burst_limit_enable, + burst_limit_max_count, + burst_limit_window_time, + overload_kill_enable, + overload_kill_qlen, + overload_kill_mem_size, + overload_kill_restart_after]). + +-define(COMMON_KEYS,[filesync_repeat_interval]). + +-define(READ_ONLY_KEYS,[olp]). %%%----------------------------------------------------------------- %%% API %% This function is called by the logger_sup supervisor -start_link(Args) -> - proc_lib:start_link(?MODULE,init,[Args]). - filesync(Module, Name) -> call(Module, Name, filesync). -info(Module, Name) -> - call(Module, Name, info). - -reset(Module, Name) -> - call(Module, Name, reset). - %%%----------------------------------------------------------------- %%% Handler being added adding_handler(#{id:=Name,module:=Module}=Config) -> HConfig0 = maps:get(config, Config, #{}), - HandlerConfig0 = maps:without(?CONFIG_KEYS,HConfig0), + HandlerConfig0 = maps:without(?OLP_KEYS++?COMMON_KEYS,HConfig0), case Module:check_config(Name,set,undefined,HandlerConfig0) of {ok,HandlerConfig} -> - ModifiedCommon = maps:with(?CONFIG_KEYS,HandlerConfig), - CommonConfig0 = maps:with(?CONFIG_KEYS,HConfig0), + ModifiedCommon = maps:with(?COMMON_KEYS,HandlerConfig), + CommonConfig0 = maps:with(?COMMON_KEYS,HConfig0), CommonConfig = maps:merge( maps:merge(get_default_config(), CommonConfig0), ModifiedCommon), case check_config(CommonConfig) of ok -> HConfig = maps:merge(CommonConfig,HandlerConfig), - start(Config#{config => HConfig}); + OlpOpts = maps:with(?OLP_KEYS,HConfig0), + start(OlpOpts, Config#{config => HConfig}); {error,Faulty} -> {error,{invalid_config,Module,Faulty}} end; @@ -92,11 +85,11 @@ adding_handler(#{id:=Name,module:=Module}=Config) -> %%%----------------------------------------------------------------- %%% Handler being removed -removing_handler(#{id:=Name, module:=Module}) -> +removing_handler(#{id:=Name, module:=Module, config:=#{olp:=Olp}}) -> case whereis(?name_to_reg_name(Module,Name)) of undefined -> ok; - Pid -> + _Pid -> %% We don't want to do supervisor:terminate_child here %% since we need to distinguish this explicit stop from a %% system termination in order to avoid circular attempts @@ -106,7 +99,7 @@ removing_handler(#{id:=Name, module:=Module}) -> %% the restart type is temporary, which means that the %% child specification is automatically removed from the %% supervisor when the process dies. - _ = gen_server:call(Pid, stop), + _ = logger_olp:stop(Olp), ok end. @@ -116,34 +109,52 @@ changing_config(SetOrUpdate, #{id:=Name,config:=OldHConfig,module:=Module}, NewConfig0) -> NewHConfig0 = maps:get(config, NewConfig0, #{}), - OldHandlerConfig = maps:without(?CONFIG_KEYS++?READ_ONLY_KEYS,OldHConfig), - NewHandlerConfig0 = maps:without(?CONFIG_KEYS++?READ_ONLY_KEYS,NewHConfig0), + NoHandlerKeys = ?OLP_KEYS++?COMMON_KEYS++?READ_ONLY_KEYS, + OldHandlerConfig = maps:without(NoHandlerKeys,OldHConfig), + NewHandlerConfig0 = maps:without(NoHandlerKeys,NewHConfig0), case Module:check_config(Name, SetOrUpdate, OldHandlerConfig,NewHandlerConfig0) of {ok, NewHandlerConfig} -> - ModifiedCommon = maps:with(?CONFIG_KEYS,NewHandlerConfig), - NewCommonConfig0 = maps:with(?CONFIG_KEYS,NewHConfig0), + ModifiedCommon = maps:with(?COMMON_KEYS,NewHandlerConfig), + NewCommonConfig0 = maps:with(?COMMON_KEYS,NewHConfig0), + OldCommonConfig = maps:with(?COMMON_KEYS,OldHConfig), CommonDefault = case SetOrUpdate of set -> get_default_config(); update -> - maps:with(?CONFIG_KEYS,OldHConfig) + OldCommonConfig end, NewCommonConfig = maps:merge( maps:merge(CommonDefault,NewCommonConfig0), ModifiedCommon), case check_config(NewCommonConfig) of ok -> - ReadOnly = maps:with(?READ_ONLY_KEYS,OldHConfig), - NewHConfig = maps:merge( - maps:merge(NewCommonConfig,NewHandlerConfig), - ReadOnly), - NewConfig = NewConfig0#{config=>NewHConfig}, - HPid = maps:get(handler_pid,OldHConfig), - case call(HPid, {change_config,NewConfig}) of - ok -> {ok,NewConfig}; - Error -> Error + OlpDefault = + case SetOrUpdate of + set -> + logger_olp:get_default_opts(); + update -> + maps:with(?OLP_KEYS,OldHConfig) + end, + Olp = maps:get(olp,OldHConfig), + NewOlpOpts = maps:merge(OlpDefault, + maps:with(?OLP_KEYS,NewHConfig0)), + case logger_olp:set_opts(Olp,NewOlpOpts) of + ok -> + maybe_set_repeated_filesync(Olp,OldCommonConfig, + NewCommonConfig), + ReadOnly = maps:with(?READ_ONLY_KEYS,OldHConfig), + NewHConfig = + maps:merge( + maps:merge( + maps:merge(NewCommonConfig,NewHandlerConfig), + ReadOnly), + NewOlpOpts), + NewConfig = NewConfig0#{config=>NewHConfig}, + {ok,NewConfig}; + Error -> + Error end; {error,Faulty} -> {error,{invalid_config,Module,Faulty}} @@ -158,14 +169,12 @@ changing_config(SetOrUpdate, LogEvent :: logger:log_event(), Config :: logger:handler_config(). -log(LogEvent, Config = #{id := Name, - config := #{handler_pid := HPid, - mode_tab := ModeTab}}) -> +log(LogEvent, Config = #{config := #{olp:=Olp}}) -> %% if the handler has crashed, we must drop this event %% and hope the handler restarts so we can try again - true = is_process_alive(HPid), + true = is_process_alive(logger_olp:get_pid(Olp)), Bin = log_to_binary(LogEvent, Config), - call_cast_or_drop(Name, HPid, ModeTab, Bin). + logger_olp:load(Olp,Bin). %%%----------------------------------------------------------------- %%% Remove internal fields from configuration @@ -180,18 +189,23 @@ filter_config(#{config:=HConfig}=Config) -> %%% %%% The handler process is linked to logger_sup, which is part of the %%% kernel application's supervision tree. -start(#{id := Name} = Config0) -> +start(OlpOpts0, #{id := Name, module:=Module, config:=HConfig} = Config0) -> + RegName = ?name_to_reg_name(Module,Name), ChildSpec = #{id => Name, - start => {?MODULE, start_link, [Config0]}, + start => {logger_olp, start_link, [RegName,?MODULE, + Config0, OlpOpts0]}, restart => temporary, shutdown => 2000, type => worker, modules => [?MODULE]}, case supervisor:start_child(logger_sup, ChildSpec) of - {ok,Pid,Config} -> + {ok,Pid,Olp} -> ok = logger_handler_watcher:register_handler(Name,Pid), - {ok,Config}; + OlpOpts = logger_olp:get_opts(Olp), + {ok,Config0#{config=>(maps:merge(HConfig,OlpOpts))#{olp=>Olp}}}; + {error,{Reason,Ch}} when is_tuple(Ch), element(1,Ch)==child -> + {error,Reason}; Error -> Error end. @@ -200,103 +214,50 @@ start(#{id := Name} = Config0) -> %%% gen_server callbacks %%%=================================================================== -init(#{id := Name, module := Module, - formatter := Formatter, config := HConfig0} = Config0) -> - RegName = ?name_to_reg_name(Module,Name), - register(RegName, self()), +init(#{id := Name, module := Module, config := HConfig}) -> process_flag(trap_exit, true), - process_flag(message_queue_data, off_heap), ?init_test_hooks(), - ?start_observation(Name), - case Module:init(Name, HConfig0) of + case Module:init(Name, HConfig) of {ok,HState} -> - try ets:new(Name, [public]) of - ModeTab -> - ?set_mode(ModeTab, async), - T0 = ?timestamp(), - HConfig = HConfig0#{handler_pid => self(), - mode_tab => ModeTab}, - Config = Config0#{config => HConfig}, - proc_lib:init_ack({ok,self(),Config}), - %% Storing common config in state to avoid copying - %% (sending) the config data for each log message - CommonConfig = maps:with(?CONFIG_KEYS,HConfig), - State = - ?merge_with_stats( - CommonConfig#{id => Name, - module => Module, - mode_tab => ModeTab, - mode => async, - ctrl_sync_count => - ?CONTROLLER_SYNC_INTERVAL, - last_qlen => 0, - last_log_ts => T0, - last_op => sync, - burst_win_ts => T0, - burst_msg_count => 0, - formatter => Formatter, - handler_state => HState}), - State1 = set_repeated_filesync(State), - unset_restart_flag(State1), - gen_server:enter_loop(?MODULE, [], State1) - catch - _:Error -> - unregister(RegName), - error_notify({init_handler,Name,Error}), - proc_lib:init_ack(Error) - end; + %% Storing common config in state to avoid copying + %% (sending) the config data for each log message + CommonConfig = maps:with(?COMMON_KEYS,HConfig), + State = CommonConfig#{id => Name, + module => Module, + ctrl_sync_count => + ?CONTROLLER_SYNC_INTERVAL, + last_op => sync, + handler_state => HState}, + State1 = set_repeated_filesync(State), + {ok,State1}; Error -> - unregister(RegName), - error_notify({init_handler,Name,Error}), - proc_lib:init_ack(Error) + Error end. -%% This is the synchronous log event. -handle_call({log, Bin}, _From, State) -> - {Result,State1} = do_log(Bin, call, State), - %% Result == ok | dropped - {reply,Result, State1}; +%% This is the log event. +handle_load(Bin, #{id:=Name, + module:=Module, + handler_state:=HandlerState, + ctrl_sync_count := CtrlSync}=State) -> + if CtrlSync==0 -> + {_,HS1} = Module:write(Name, sync, Bin, HandlerState), + State#{handler_state => HS1, + ctrl_sync_count => ?CONTROLLER_SYNC_INTERVAL, + last_op=>write}; + true -> + {_,HS1} = Module:write(Name, async, Bin, HandlerState), + State#{handler_state => HS1, + ctrl_sync_count => CtrlSync-1, + last_op=>write} + end. handle_call(filesync, _From, State = #{id := Name, module := Module, handler_state := HandlerState}) -> {Result,HandlerState1} = Module:filesync(Name,sync,HandlerState), - {reply, Result, State#{handler_state=>HandlerState1, last_op=>sync}}; - -handle_call({change_config, #{formatter:=Formatter, config:=NewHConfig}}, _From, - State = #{filesync_repeat_interval := FSyncInt0}) -> - %% In the future, if handler_state must be updated due to config - %% change, then we need to add a callback to Module here. - CommonConfig = maps:with(?CONFIG_KEYS,NewHConfig), - State1 = maps:merge(State, CommonConfig), - State2 = - case maps:get(filesync_repeat_interval, NewHConfig) of - FSyncInt0 -> - State1; - _FSyncInt1 -> - set_repeated_filesync(cancel_repeated_filesync(State1)) - end, - {reply, ok, State2#{formatter:=Formatter}}; - -handle_call(info, _From, State) -> - {reply, State, State}; - -handle_call(reset, _From, - #{id:=Name,module:=Module,handler_state:=HandlerState}=State) -> - State1 = ?merge_with_stats(State), - {reply, ok, State1#{last_qlen => 0, - last_log_ts => ?timestamp(), - handler_state => Module:reset_state(Name,HandlerState)}}; - -handle_call(stop, _From, State) -> - {stop, {shutdown,stopped}, ok, State}. - -%% This is the asynchronous log event. -handle_cast({log, Bin}, State) -> - {_,State1} = do_log(Bin, cast, State), - {noreply, State1}; + {reply, Result, State#{handler_state=>HandlerState1, last_op=>sync}}. %% If FILESYNC_REPEAT_INTERVAL is set to a millisec value, this %% clause gets called repeatedly by the handler. In order to @@ -319,168 +280,83 @@ handle_cast(repeated_filesync, {_,HS} = Module:filesync(Name, async, HandlerState), State#{handler_state => HS, last_op => sync} end, - {noreply,set_repeated_filesync(State1)}. + {noreply,set_repeated_filesync(State1)}; + +handle_cast({set_repeated_filesync,FSyncInt},State) -> + State1 = State#{filesync_repeat_interval=>FSyncInt}, + State2 = set_repeated_filesync(cancel_repeated_filesync(State1)), + {noreply, State2}. handle_info(Info, #{id := Name, module := Module, handler_state := HandlerState} = State) -> {noreply,State#{handler_state => Module:handle_info(Name,Info,HandlerState)}}. -terminate(Reason, State = #{id := Name, - module := Module, - handler_state := HandlerState}) -> +terminate(overloaded=Reason, #{id:=Name}=State) -> + _ = log_handler_info(Name,"Handler ~p overloaded and stopping",[Name],State), + do_terminate(Reason,State), + ConfigResult = logger:get_handler_config(Name), + case ConfigResult of + {ok,#{module:=Module}=HConfig0} -> + spawn(fun() -> logger:remove_handler(Name) end), + HConfig = try Module:filter_config(HConfig0) + catch _:_ -> HConfig0 + end, + {ok,fun() -> logger:add_handler(Name,Module,HConfig) end}; + Error -> + error_notify({Name,restart_impossible,Error}), + Error + end; +terminate(Reason, State) -> + do_terminate(Reason, State). + +do_terminate(Reason, State = #{id := Name, + module := Module, + handler_state := HandlerState}) -> _ = cancel_repeated_filesync(State), _ = Module:terminate(Name, Reason, HandlerState), - ok = stop_or_restart(Name, Reason, State), - unregister(?name_to_reg_name(Module, Name)), ok. code_change(_OldVsn, State, _Extra) -> {ok, State}. +reset_state(#{id:=Name, module:=Module, handler_state:=HandlerState} = State) -> + State#{handler_state=>Module:reset_state(Name, HandlerState)}. %%%----------------------------------------------------------------- %%% Internal functions call(Module, Name, Op) when is_atom(Name) -> - call(?name_to_reg_name(Module,Name), Op); + case logger_olp:call(?name_to_reg_name(Module,Name), Op) of + {error,busy} -> {error,handler_busy}; + Other -> Other + end; call(_, Name, Op) -> {error,{badarg,{Op,[Name]}}}. -call(Server, Msg) -> - try - gen_server:call(Server, Msg, ?DEFAULT_CALL_TIMEOUT) - catch - _:{timeout,_} -> {error,handler_busy} - end. - -%% check for overload between every event (and set Mode to async, -%% sync or drop accordingly), but never flush the whole mailbox -%% before LogWindowSize events have been handled -do_log(Bin, CallOrCast, State = #{id:=Name, mode:=Mode0}) -> - T1 = ?timestamp(), - - %% check if the handler is getting overloaded, or if it's - %% recovering from overload (the check must be done for each - %% event to react quickly to large bursts of events and - %% to ensure that the handler can never end up in drop mode - %% with an empty mailbox, which would stop operation) - {Mode1,QLen,Mem,State1} = check_load(State), - - if (Mode1 == drop) andalso (Mode0 =/= drop) -> - log_handler_info(Name, "Handler ~p switched to drop mode", - [Name], State); - (Mode0 == drop) andalso ((Mode1 == async) orelse (Mode1 == sync)) -> - log_handler_info(Name, "Handler ~p switched to ~w mode", - [Name,Mode1], State); - true -> - ok - end, - - %% kill the handler if it can't keep up with the load - kill_if_choked(Name, QLen, Mem, State), - - if Mode1 == flush -> - flush(Name, QLen, T1, State1); - true -> - write(Name, Mode1, T1, Bin, CallOrCast, State1) - end. - -%% this clause is called by do_log/3 after an overload check -%% has been performed, where QLen > FlushQLen -flush(Name, _QLen0, T1, State=#{last_log_ts := _T0, mode_tab := ModeTab}) -> - %% flush messages in the mailbox (a limited number in - %% order to not cause long delays) - NewFlushed = flush_log_events(?FLUSH_MAX_N), - - %% write info in log about flushed messages +notify({mode_change,Mode0,Mode1},#{id:=Name}=State) -> + log_handler_info(Name,"Handler ~p switched from ~p to ~p mode", + [Name,Mode0,Mode1], State); +notify({flushed,Flushed},#{id:=Name}=State) -> log_handler_info(Name, "Handler ~p flushed ~w log events", - [Name,NewFlushed], State), - - %% because of the receive loop when flushing messages, the - %% handler will be scheduled out often and the mailbox could - %% grow very large, so we'd better check the queue again here - {_,_QLen1} = process_info(self(), message_queue_len), - ?observe(Name,{max_qlen,_QLen1}), - - %% Add 1 for the current log event - ?observe(Name,{flushed,NewFlushed+1}), - - State1 = ?update_max_time(?diff_time(T1,_T0),State), - State2 = ?update_max_qlen(_QLen1,State1), - {dropped,?update_other(flushed,FLUSHED,NewFlushed, - State2#{mode => ?set_mode(ModeTab,async), - last_qlen => 0, - last_log_ts => T1})}. - -%% this clause is called to write to file -write(Name, Mode, T1, Bin, _CallOrCast, - State = #{module := Module, - handler_state := HandlerState, - mode_tab := ModeTab, - ctrl_sync_count := CtrlSync, - last_qlen := LastQLen, - last_log_ts := T0}) -> - %% check if we need to limit the number of writes - %% during a burst of log events - {DoWrite,State1} = limit_burst(State), - - %% only log synhrounously every ?CONTROLLER_SYNC_INTERVAL time, to - %% give the handler time between writes so it can keep up with - %% incoming messages - {Result,LastQLen1,HandlerState1} = - if DoWrite, CtrlSync == 0 -> - ?observe(Name,{_CallOrCast,1}), - {_,HS1} = Module:write(Name, sync, Bin, HandlerState), - {ok,element(2, process_info(self(), message_queue_len)),HS1}; - DoWrite -> - ?observe(Name,{_CallOrCast,1}), - {_,HS1} = Module:write(Name, async, Bin, HandlerState), - {ok,LastQLen,HS1}; - not DoWrite -> - ?observe(Name,{flushed,1}), - {dropped,LastQLen,HandlerState} - end, - - %% Check if the time since the previous log event is long enough - - %% and the queue length small enough - to assume the mailbox has - %% been emptied, and if so, do filesync operation and reset mode to - %% async. Note that this is the best we can do to detect an idle - %% handler without setting a timer after each log call/cast. If the - %% time between two consecutive log events is fast and no new - %% event comes in after the last one, idle state won't be detected! - Time = ?diff_time(T1,T0), - State2 = - if (LastQLen1 < ?FILESYNC_OK_QLEN) andalso - (Time > ?IDLE_DETECT_TIME_USEC) -> - {_,HS2} = Module:filesync(Name,async,HandlerState), - State1#{mode => ?change_mode(ModeTab, Mode, async), - burst_msg_count => 0, - handler_state => HS2}; - true -> - State1#{mode => Mode, handler_state => HandlerState1} - end, - State3 = ?update_calls_or_casts(_CallOrCast,1,State2), - State4 = ?update_max_qlen(LastQLen1,State3), - State5 = - ?update_max_time(Time, - State4#{last_qlen := LastQLen1, - last_log_ts => T1, - last_op => write, - ctrl_sync_count => - if CtrlSync==0 -> ?CONTROLLER_SYNC_INTERVAL; - true -> CtrlSync-1 - end}), - {Result,State5}. + [Name,Flushed], State); +notify(restart,#{id:=Name}=State) -> + log_handler_info(Name, "Handler ~p restarted", [Name], State); +notify(idle,#{id:=Name,module:=Module,handler_state:=HandlerState}=State) -> + {_,HS} = Module:filesync(Name,async,HandlerState), + State#{handler_state=>HS, last_op=>sync}. log_handler_info(Name, Format, Args, #{module:=Module, - formatter:=Formatter, - handler_state:=HandlerState}) -> - Config = #{formatter=>Formatter}, + handler_state:=HandlerState}=State) -> + Config = + case logger:get_handler_config(Name) of + {ok,Conf} -> Conf; + _ -> #{formatter=>{?DEFAULT_FORMATTER,?DEFAULT_FORMAT_CONFIG}} + end, Meta = #{time=>erlang:system_time(microsecond)}, Bin = log_to_binary(#{level => notice, msg => {Format,Args}, meta => Meta}, Config), - _ = Module:write(Name, async, Bin, HandlerState), - ok. + {_,HS} = Module:write(Name, async, Bin, HandlerState), + State#{handler_state=>HS, last_op=>write}. %%%----------------------------------------------------------------- %%% Convert log data on any form to binary @@ -540,42 +416,8 @@ string_to_binary(String) -> %%%----------------------------------------------------------------- %%% Check that the configuration term is valid check_config(Config) when is_map(Config) -> - case check_common_config(maps:to_list(Config)) of - ok -> - case overload_levels_ok(Config) of - true -> - ok; - false -> - Faulty = maps:with([sync_mode_qlen, - drop_mode_qlen, - flush_qlen],Config), - {error,{invalid_levels,Faulty}} - end; - Error -> - Error - end. + check_common_config(maps:to_list(Config)). -check_common_config([{sync_mode_qlen,N}|Config]) when is_integer(N) -> - check_common_config(Config); -check_common_config([{drop_mode_qlen,N}|Config]) when is_integer(N) -> - check_common_config(Config); -check_common_config([{flush_qlen,N}|Config]) when is_integer(N) -> - check_common_config(Config); -check_common_config([{burst_limit_enable,Bool}|Config]) when is_boolean(Bool) -> - check_common_config(Config); -check_common_config([{burst_limit_max_count,N}|Config]) when is_integer(N) -> - check_common_config(Config); -check_common_config([{burst_limit_window_time,N}|Config]) when is_integer(N) -> - check_common_config(Config); -check_common_config([{overload_kill_enable,Bool}|Config]) when is_boolean(Bool) -> - check_common_config(Config); -check_common_config([{overload_kill_qlen,N}|Config]) when is_integer(N) -> - check_common_config(Config); -check_common_config([{overload_kill_mem_size,N}|Config]) when is_integer(N) -> - check_common_config(Config); -check_common_config([{overload_kill_restart_after,NorA}|Config]) - when is_integer(NorA); NorA == infinity -> - check_common_config(Config); check_common_config([{filesync_repeat_interval,NorA}|Config]) when is_integer(NorA); NorA == no_repeat -> check_common_config(Config); @@ -585,156 +427,7 @@ check_common_config([]) -> ok. get_default_config() -> - #{sync_mode_qlen => ?SYNC_MODE_QLEN, - drop_mode_qlen => ?DROP_MODE_QLEN, - flush_qlen => ?FLUSH_QLEN, - burst_limit_enable => ?BURST_LIMIT_ENABLE, - burst_limit_max_count => ?BURST_LIMIT_MAX_COUNT, - burst_limit_window_time => ?BURST_LIMIT_WINDOW_TIME, - overload_kill_enable => ?OVERLOAD_KILL_ENABLE, - overload_kill_qlen => ?OVERLOAD_KILL_QLEN, - overload_kill_mem_size => ?OVERLOAD_KILL_MEM_SIZE, - overload_kill_restart_after => ?OVERLOAD_KILL_RESTART_AFTER, - filesync_repeat_interval => ?FILESYNC_REPEAT_INTERVAL}. - -%%%----------------------------------------------------------------- -%%% Overload Protection -call_cast_or_drop(_Name, HandlerPid, ModeTab, Bin) -> - %% If the handler process is getting overloaded, the log event - %% will be synchronous instead of asynchronous (slows down the - %% logging tempo of a process doing lots of logging. If the - %% handler is choked, drop mode is set and no event will be sent. - try ?get_mode(ModeTab) of - async -> - gen_server:cast(HandlerPid, {log,Bin}); - sync -> - case call(HandlerPid, {log,Bin}) of - ok -> - ok; - _Other -> - %% dropped or {error,handler_busy} - ?observe(_Name,{dropped,1}), - ok - end; - drop -> - ?observe(_Name,{dropped,1}) - catch - %% if the ETS table doesn't exist (maybe because of a - %% handler restart), we can only drop the event - _:_ -> ?observe(_Name,{dropped,1}) - end, - ok. - -set_restart_flag(#{id := Name, module := Module} = State) -> - log_handler_info(Name, "Handler ~p overloaded and stopping", [Name], State), - Flag = list_to_atom(lists:concat([Module,"_",Name,"_restarting"])), - spawn(fun() -> - register(Flag, self()), - timer:sleep(infinity) - end), - ok. - -unset_restart_flag(#{id := Name, module := Module} = State) -> - Flag = list_to_atom(lists:concat([Module,"_",Name,"_restarting"])), - case whereis(Flag) of - undefined -> - ok; - Pid -> - exit(Pid, kill), - log_handler_info(Name, "Handler ~p restarted", [Name], State) - end. - -check_load(State = #{id:=_Name, mode_tab := ModeTab, mode := Mode, - sync_mode_qlen := SyncModeQLen, - drop_mode_qlen := DropModeQLen, - flush_qlen := FlushQLen}) -> - {_,Mem} = process_info(self(), memory), - ?observe(_Name,{max_mem,Mem}), - {_,QLen} = process_info(self(), message_queue_len), - ?observe(_Name,{max_qlen,QLen}), - %% When the handler process gets scheduled in, it's impossible - %% to predict the QLen. We could jump "up" arbitrarily from say - %% async to sync, async to drop, sync to flush, etc. However, when - %% the handler process manages the log events (without flushing), - %% one after the other, we will move "down" from drop to sync and - %% from sync to async. This way we don't risk getting stuck in - %% drop or sync mode with an empty mailbox. - {Mode1,_NewDrops,_NewFlushes} = - if - QLen >= FlushQLen -> - {flush, 0,1}; - QLen >= DropModeQLen -> - %% Note that drop mode will force log events to - %% be dropped on the client side (never sent get to - %% the handler). - IncDrops = if Mode == drop -> 0; true -> 1 end, - {?change_mode(ModeTab, Mode, drop), IncDrops,0}; - QLen >= SyncModeQLen -> - {?change_mode(ModeTab, Mode, sync), 0,0}; - true -> - {?change_mode(ModeTab, Mode, async), 0,0} - end, - State1 = ?update_other(drops,DROPS,_NewDrops,State), - {Mode1, QLen, Mem, - ?update_other(flushes,FLUSHES,_NewFlushes, - State1#{last_qlen => QLen})}. - -limit_burst(#{burst_limit_enable := false}=State) -> - {true,State}; -limit_burst(#{burst_win_ts := BurstWinT0, - burst_msg_count := BurstMsgCount, - burst_limit_window_time := BurstLimitWinTime, - burst_limit_max_count := BurstLimitMaxCnt} = State) -> - if (BurstMsgCount >= BurstLimitMaxCnt) -> - %% the limit for allowed messages has been reached - BurstWinT1 = ?timestamp(), - case ?diff_time(BurstWinT1,BurstWinT0) of - BurstCheckTime when BurstCheckTime < (BurstLimitWinTime*1000) -> - %% we're still within the burst time frame - {false,?update_other(burst_drops,BURSTS,1,State)}; - _BurstCheckTime -> - %% burst time frame passed, reset counters - {true,State#{burst_win_ts => BurstWinT1, - burst_msg_count => 0}} - end; - true -> - %% the limit for allowed messages not yet reached - {true,State#{burst_win_ts => BurstWinT0, - burst_msg_count => BurstMsgCount+1}} - end. - -kill_if_choked(Name, QLen, Mem, State = #{overload_kill_enable := KillIfOL, - overload_kill_qlen := OLKillQLen, - overload_kill_mem_size := OLKillMem}) -> - if KillIfOL andalso - ((QLen > OLKillQLen) orelse (Mem > OLKillMem)) -> - set_restart_flag(State), - exit({shutdown,{overloaded,Name,QLen,Mem}}); - true -> - ok - end. - -flush_log_events(Limit) -> - process_flag(priority, high), - Flushed = flush_log_events(0, Limit), - process_flag(priority, normal), - Flushed. - -flush_log_events(Limit, Limit) -> - Limit; -flush_log_events(N, Limit) -> - %% flush log events but leave other events, such as - %% filesync, info and change_config, so that these - %% have a chance to be processed even under heavy load - receive - {'$gen_cast',{log,_}} -> - flush_log_events(N+1, Limit); - {'$gen_call',{Pid,MRef},{log,_}} -> - Pid ! {MRef, dropped}, - flush_log_events(N+1, Limit) - after - 0 -> N - end. + #{filesync_repeat_interval => ?FILESYNC_REPEAT_INTERVAL}. set_repeated_filesync(#{filesync_repeat_interval:=FSyncInt} = State) when is_integer(FSyncInt) -> @@ -752,51 +445,12 @@ cancel_repeated_filesync(State) -> error -> State end. - -stop_or_restart(Name, {shutdown,Reason={overloaded,_Name,_QLen,_Mem}}, - #{overload_kill_restart_after := RestartAfter}) -> - %% If we're terminating because of an overload situation (see - %% kill_if_choked/4), we need to remove the handler and set a - %% restart timer. A separate process must perform this in order to - %% avoid deadlock. - HandlerPid = self(), - ConfigResult = logger:get_handler_config(Name), - RemoveAndRestart = - fun() -> - MRef = erlang:monitor(process, HandlerPid), - receive - {'DOWN',MRef,_,_,_} -> - ok - after 30000 -> - error_notify(Reason), - exit(HandlerPid, kill) - end, - case ConfigResult of - {ok,#{module:=HMod}=HConfig0} when is_integer(RestartAfter) -> - _ = logger:remove_handler(Name), - HConfig = try HMod:filter_config(HConfig0) - catch _:_ -> HConfig0 - end, - _ = timer:apply_after(RestartAfter, logger, add_handler, - [Name,HMod,HConfig]); - {ok,_} -> - _ = logger:remove_handler(Name); - {error,CfgReason} when is_integer(RestartAfter) -> - error_notify({Name,restart_impossible,CfgReason}); - {error,_} -> - ok - end - end, - spawn(RemoveAndRestart), - ok; -stop_or_restart(_Name, _Reason, _State) -> - ok. - -overload_levels_ok(HandlerConfig) -> - SMQL = maps:get(sync_mode_qlen, HandlerConfig, ?SYNC_MODE_QLEN), - DMQL = maps:get(drop_mode_qlen, HandlerConfig, ?DROP_MODE_QLEN), - FQL = maps:get(flush_qlen, HandlerConfig, ?FLUSH_QLEN), - (DMQL > 1) andalso (SMQL =< DMQL) andalso (DMQL =< FQL). - error_notify(Term) -> ?internal_log(error, Term). + +maybe_set_repeated_filesync(_Olp, + #{filesync_repeat_interval:=FSyncInt}, + #{filesync_repeat_interval:=FSyncInt}) -> + ok; +maybe_set_repeated_filesync(Olp,_,#{filesync_repeat_interval:=FSyncInt}) -> + logger_olp:cast(Olp,{set_repeated_filesync,FSyncInt}). diff --git a/lib/kernel/src/logger_h_common.hrl b/lib/kernel/src/logger_h_common.hrl index 261b0a6246..004a61d9d9 100644 --- a/lib/kernel/src/logger_h_common.hrl +++ b/lib/kernel/src/logger_h_common.hrl @@ -1,50 +1,22 @@ - -%%%----------------------------------------------------------------- -%%% Overload protection configuration - -%%! *** NOTE *** -%%! It's important that: -%%! SYNC_MODE_QLEN =< DROP_MODE_QLEN =< FLUSH_QLEN -%%! and that DROP_MODE_QLEN >= 2. -%%! Otherwise the handler could end up in drop mode with no new -%%! log requests to process. This would cause all future requests -%%! to be dropped (no switch to async mode would ever take place). - -%% This specifies the message_queue_len value where the log -%% requests switch from asynchronous casts to synchronous calls. --define(SYNC_MODE_QLEN, 10). -%% Above this message_queue_len, log requests will be dropped, -%% i.e. no log requests get sent to the handler process. --define(DROP_MODE_QLEN, 200). -%% Above this message_queue_len, the handler process will flush -%% its mailbox and only leave this number of messages in it. --define(FLUSH_QLEN, 1000). - -%% Never flush more than this number of messages in one go, -%% or the handler will be unresponsive for seconds (keep this -%% number as large as possible or the mailbox could grow large). --define(FLUSH_MAX_N, 5000). - -%% BURST_LIMIT_MAX_COUNT is the max number of log requests allowed -%% to be written within a BURST_LIMIT_WINDOW_TIME time frame. --define(BURST_LIMIT_ENABLE, true). --define(BURST_LIMIT_MAX_COUNT, 500). --define(BURST_LIMIT_WINDOW_TIME, 1000). - -%% This enables/disables the feature to automatically get the -%% handler terminated if it gets too loaded (and can't keep up). --define(OVERLOAD_KILL_ENABLE, false). -%% If the message_queue_len goes above this size even after -%% flushing has been performed, the handler is terminated. --define(OVERLOAD_KILL_QLEN, 20000). -%% If the memory usage exceeds this level --define(OVERLOAD_KILL_MEM_SIZE, 3000000). - -%% This is the default time that the handler will wait before -%% restarting and accepting new requests. The value 'infinity' -%% disables restarts. --define(OVERLOAD_KILL_RESTART_AFTER, 5000). -%%-define(OVERLOAD_KILL_RESTART_AFTER, infinity). +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2015. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% %% The handler sends asynchronous write requests to the process %% controlling the i/o device, but every once in this interval @@ -65,12 +37,6 @@ -define(FILESYNC_REPEAT_INTERVAL, 5000). %%-define(FILESYNC_REPEAT_INTERVAL, no_repeat). -%% This is the time after last message received that we think/hope -%% that the handler has an empty mailbox (no new log request has -%% come in). --define(IDLE_DETECT_TIME_MSEC, 100). --define(IDLE_DETECT_TIME_USEC, 100000). - %% Default disk log option values -define(DISK_LOG_TYPE, wrap). -define(DISK_LOG_MAX_NO_FILES, 10). @@ -83,43 +49,6 @@ list_to_atom(lists:concat([MODULE,"_",Name]))). %%%----------------------------------------------------------------- -%%% Overload protection macros - --define(timestamp(), erlang:monotonic_time(microsecond)). - --define(get_mode(Tid), - case ets:lookup(Tid, mode) of - [{mode,M}] -> M; - _ -> async - end). - --define(set_mode(Tid, M), - begin ets:insert(Tid, {mode,M}), M end). - --define(change_mode(Tid, M0, M1), - if M0 == M1 -> - M0; - true -> - ets:insert(Tid, {mode,M1}), - M1 - end). - --define(min(X1, X2), - if X2 == undefined -> X1; - X2 < X1 -> X2; - true -> X1 - end). - --define(max(X1, X2), - if - X2 == undefined -> X1; - X2 > X1 -> X2; - true -> X1 - end). - --define(diff_time(OS_T1, OS_T0), OS_T1-OS_T0). - -%%%----------------------------------------------------------------- %%% The test hook macros make it possible to observe and manipulate %%% internal handler functionality. When enabled, these macros will %%% slow down execution and therefore should not be include in code @@ -183,7 +112,6 @@ [{_,ERROR}] -> ERROR catch _:_ -> disk_log:sync(LOG) end). - -define(DEFAULT_CALL_TIMEOUT, 5000). -else. % DEFAULTS! -define(TEST_HOOKS_TAB, undefined). @@ -196,68 +124,4 @@ -define(file_datasync(DEVICE), file:datasync(DEVICE)). -define(disk_log_write(LOG, MODE, DATA), disk_log_write(LOG, MODE, DATA)). -define(disk_log_sync(LOG), disk_log:sync(LOG)). - -define(DEFAULT_CALL_TIMEOUT, 10000). --endif. - -%%%----------------------------------------------------------------- -%%% These macros enable statistics counters in the state of the -%%% handler which is useful for analysing the overload protection -%%% behaviour. These counters should not be included in code to be -%%% officially released (as some counters will grow very large -%%% over time). - -%%-define(SAVE_STATS, true). --ifdef(SAVE_STATS). - -define(merge_with_stats(STATE), - STATE#{flushes => 0, flushed => 0, drops => 0, - burst_drops => 0, casts => 0, calls => 0, - max_qlen => 0, max_time => 0}). - - -define(update_max_qlen(QLEN, STATE), - begin #{max_qlen := QLEN0} = STATE, - STATE#{max_qlen => ?max(QLEN0,QLEN)} end). - - -define(update_calls_or_casts(CALL_OR_CAST, INC, STATE), - case CALL_OR_CAST of - cast -> - #{casts := CASTS0} = STATE, - STATE#{casts => CASTS0+INC}; - call -> - #{calls := CALLS0} = STATE, - STATE#{calls => CALLS0+INC} - end). - - -define(update_max_time(TIME, STATE), - begin #{max_time := TIME0} = STATE, - STATE#{max_time => ?max(TIME0,TIME)} end). - - -define(update_other(OTHER, VAR, INCVAL, STATE), - begin #{OTHER := VAR} = STATE, - STATE#{OTHER => VAR+INCVAL} end). - --else. % DEFAULT! - -define(merge_with_stats(STATE), STATE). - -define(update_max_qlen(_QLEN, STATE), STATE). - -define(update_calls_or_casts(_CALL_OR_CAST, _INC, STATE), STATE). - -define(update_max_time(_TIME, STATE), STATE). - -define(update_other(_OTHER, _VAR, _INCVAL, STATE), STATE). --endif. - -%%%----------------------------------------------------------------- -%%% These macros enable callbacks that make it possible to analyse -%%% the overload protection behaviour from outside the handler -%%% process (including dropped requests on the client side). -%%% An external callback module (?OBSERVER_MOD) is required which -%%% is not part of the kernel application. For this reason, these -%%% callbacks should not be included in code to be officially released. - -%%-define(OBSERVER_MOD, logger_test). --ifdef(OBSERVER_MOD). - -define(start_observation(NAME), ?OBSERVER:start_observation(NAME)). - -define(observe(NAME,EVENT), ?OBSERVER:observe(NAME,EVENT)). - --else. % DEFAULT! - -define(start_observation(_NAME), ok). - -define(observe(_NAME,_EVENT), ok). -endif. -%%! <--- diff --git a/lib/kernel/src/logger_internal.hrl b/lib/kernel/src/logger_internal.hrl index d96a4ac78b..e53922e5d3 100644 --- a/lib/kernel/src/logger_internal.hrl +++ b/lib/kernel/src/logger_internal.hrl @@ -19,6 +19,7 @@ %% -include_lib("kernel/include/logger.hrl"). -define(LOGGER_TABLE,logger). +-define(PROXY_KEY,'$proxy_config$'). -define(PRIMARY_KEY,'$primary_config$'). -define(HANDLER_KEY,'$handler_config$'). -define(LOGGER_META_KEY,'$logger_metadata$'). @@ -40,12 +41,14 @@ -define(DEFAULT_LOGGER_CALL_TIMEOUT, infinity). --define(LOG_INTERNAL(Level,Report), +-define(LOG_INTERNAL(Level,Report),?DO_LOG_INTERNAL(Level,[Report])). +-define(LOG_INTERNAL(Level,Format,Args),?DO_LOG_INTERNAL(Level,[Format,Args])). +-define(DO_LOG_INTERNAL(Level,Data), case logger:allow(Level,?MODULE) of true -> %% Spawn this to avoid deadlocks - _ = spawn(logger,macro_log,[?LOCATION,Level,Report, - logger:add_default_metadata(#{})]), + _ = spawn(logger,macro_log,[?LOCATION,Level|Data]++ + [logger:add_default_metadata(#{})]), ok; false -> ok diff --git a/lib/kernel/src/logger_olp.erl b/lib/kernel/src/logger_olp.erl new file mode 100644 index 0000000000..009280a9c9 --- /dev/null +++ b/lib/kernel/src/logger_olp.erl @@ -0,0 +1,626 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2017-2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(logger_olp). +-behaviour(gen_server). + +-include("logger_olp.hrl"). +-include("logger_internal.hrl"). + +%% API +-export([start_link/4, load/2, info/1, reset/1, stop/1, restart/1, + set_opts/2, get_opts/1, get_default_opts/0, get_pid/1, + call/2, cast/2, get_ref/0, get_ref/1]). + +%% gen_server and proc_lib callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +-define(OPT_KEYS,[sync_mode_qlen, + drop_mode_qlen, + flush_qlen, + burst_limit_enable, + burst_limit_max_count, + burst_limit_window_time, + overload_kill_enable, + overload_kill_qlen, + overload_kill_mem_size, + overload_kill_restart_after]). + +-export_type([olp_ref/0, options/0]). + +-opaque olp_ref() :: {atom(),pid(),ets:tid()}. + +-type options() :: logger:olp_config(). + +%%%----------------------------------------------------------------- +%%% API + +-spec start_link(Name,Module,Args,Options) -> {ok,Pid,Olp} | {error,Reason} when + Name :: atom(), + Module :: module(), + Args :: term(), + Options :: options(), + Pid :: pid(), + Olp :: olp_ref(), + Reason :: term(). +start_link(Name,Module,Args,Options0) when is_map(Options0) -> + Options = maps:merge(get_default_opts(),Options0), + case check_opts(Options) of + ok -> + proc_lib:start_link(?MODULE,init,[[Name,Module,Args,Options]]); + Error -> + Error + end. + +-spec load(Olp, Msg) -> ok when + Olp :: olp_ref(), + Msg :: term(). +load({_Name,Pid,ModeRef},Msg) -> + %% If the process is getting overloaded, the message will be + %% synchronous instead of asynchronous (slows down the tempo of a + %% process causing much load). If the process is choked, drop mode + %% is set and no message is sent. + try ?get_mode(ModeRef) of + async -> + gen_server:cast(Pid, {'$olp_load',Msg}); + sync -> + case call(Pid, {'$olp_load',Msg}) of + ok -> + ok; + _Other -> + %% dropped or {error,busy} + ?observe(_Name,{dropped,1}), + ok + end; + drop -> + ?observe(_Name,{dropped,1}) + catch + %% if the ETS table doesn't exist (maybe because of a + %% process restart), we can only drop the event + _:_ -> ?observe(_Name,{dropped,1}) + end, + ok. + +-spec info(Olp) -> map() | {error, busy} when + Olp :: atom() | pid() | olp_ref(). +info(Olp) -> + call(Olp, info). + +-spec reset(Olp) -> ok | {error, busy} when + Olp :: atom() | pid() | olp_ref(). +reset(Olp) -> + call(Olp, reset). + +-spec stop(Olp) -> ok when + Olp :: atom() | pid() | olp_ref(). +stop({_Name,Pid,_ModRef}) -> + stop(Pid); +stop(Pid) -> + _ = gen_server:call(Pid, stop), + ok. + +-spec set_opts(Olp, Opts) -> ok | {error,term()} | {error, busy} when + Olp :: atom() | pid() | olp_ref(), + Opts :: options(). +set_opts(Olp, Opts) -> + call(Olp, {set_opts,Opts}). + +-spec get_opts(Olp) -> options() | {error, busy} when + Olp :: atom() | pid() | olp_ref(). +get_opts(Olp) -> + call(Olp, get_opts). + +-spec get_default_opts() -> options(). +get_default_opts() -> + #{sync_mode_qlen => ?SYNC_MODE_QLEN, + drop_mode_qlen => ?DROP_MODE_QLEN, + flush_qlen => ?FLUSH_QLEN, + burst_limit_enable => ?BURST_LIMIT_ENABLE, + burst_limit_max_count => ?BURST_LIMIT_MAX_COUNT, + burst_limit_window_time => ?BURST_LIMIT_WINDOW_TIME, + overload_kill_enable => ?OVERLOAD_KILL_ENABLE, + overload_kill_qlen => ?OVERLOAD_KILL_QLEN, + overload_kill_mem_size => ?OVERLOAD_KILL_MEM_SIZE, + overload_kill_restart_after => ?OVERLOAD_KILL_RESTART_AFTER}. + +-spec restart(fun(() -> any())) -> ok. +restart(Fun) -> + Result = + try Fun() + catch C:R:S -> + {error,{restart_failed,Fun,C,R,S}} + end, + ?LOG_INTERNAL(debug,[{logger_olp,restart}, + {result,Result}]), + ok. + +-spec get_ref() -> olp_ref(). +get_ref() -> + get(olp_ref). + +-spec get_ref(PidOrName) -> olp_ref() | {error, busy} when + PidOrName :: pid() | atom(). +get_ref(PidOrName) -> + call(PidOrName,get_ref). + +-spec get_pid(olp_ref()) -> pid(). +get_pid({_Name,Pid,_ModeRef}) -> + Pid. + +%%%=================================================================== +%%% gen_server callbacks +%%%=================================================================== + +init([Name,Module,Args,Options]) -> + register(Name, self()), + process_flag(message_queue_data, off_heap), + + ?start_observation(Name), + + try ets:new(Name, [public]) of + ModeRef -> + OlpRef = {Name,self(),ModeRef}, + put(olp_ref,OlpRef), + try Module:init(Args) of + {ok,CBState} -> + ?set_mode(ModeRef, async), + T0 = ?timestamp(), + proc_lib:init_ack({ok,self(),OlpRef}), + %% Storing options in state to avoid copying + %% (sending) the option data with each message + State0 = ?merge_with_stats( + Options#{id => Name, + idle=> true, + module => Module, + mode_ref => ModeRef, + mode => async, + last_qlen => 0, + last_load_ts => T0, + burst_win_ts => T0, + burst_msg_count => 0, + cb_state => CBState}), + State = reset_restart_flag(State0), + gen_server:enter_loop(?MODULE, [], State); + Error -> + _ = ets:delete(ModeRef), + unregister(Name), + proc_lib:init_ack(Error) + catch + _:Error -> + _ = ets:delete(ModeRef), + unregister(Name), + proc_lib:init_ack(Error) + end + catch + _:Error -> + unregister(Name), + proc_lib:init_ack(Error) + end. + +%% This is the synchronous load event. +handle_call({'$olp_load', Msg}, _From, State) -> + {Result,State1} = do_load(Msg, call, State#{idle=>false}), + %% Result == ok | dropped + reply_return(Result,State1); + +handle_call(get_ref,_From,#{id:=Name,mode_ref:=ModeRef}=State) -> + reply_return({Name,self(),ModeRef},State); + +handle_call({set_opts,Opts0},_From,State) -> + Opts = maps:merge(maps:with(?OPT_KEYS,State),Opts0), + case check_opts(Opts) of + ok -> + reply_return(ok, maps:merge(State,Opts)); + Error -> + reply_return(Error, State) + end; + +handle_call(get_opts,_From,State) -> + reply_return(maps:with(?OPT_KEYS,State), State); + +handle_call(info, _From, State) -> + reply_return(State, State); + +handle_call(reset, _From, #{module:=Module,cb_state:=CBState}=State) -> + State1 = ?merge_with_stats(State), + CBState1 = try_callback_call(Module,reset_state,[CBState],CBState), + reply_return(ok, State1#{idle => true, + last_qlen => 0, + last_load_ts => ?timestamp(), + cb_state => CBState1}); + +handle_call(stop, _From, State) -> + {stop, {shutdown,stopped}, ok, State}; + +handle_call(Msg, From, #{module:=Module,cb_state:=CBState}=State) -> + case try_callback_call(Module,handle_call,[Msg, From, CBState]) of + {reply,Reply,CBState1} -> + reply_return(Reply,State#{cb_state=>CBState1}); + {noreply,CBState1} -> + noreply_return(State#{cb_state=>CBState1}); + {stop, Reason, Reply, CBState1} -> + {stop, Reason, Reply, State#{cb_state=>CBState1}}; + {stop, Reason, CBState1} -> + {stop, Reason, State#{cb_state=>CBState1}} + end. + +%% This is the asynchronous load event. +handle_cast({'$olp_load', Msg}, State) -> + {_Result,State1} = do_load(Msg, cast, State#{idle=>false}), + noreply_return(State1); + +handle_cast(Msg, #{module:=Module, cb_state:=CBState} = State) -> + case try_callback_call(Module,handle_cast,[Msg, CBState]) of + {noreply,CBState1} -> + noreply_return(State#{cb_state=>CBState1}); + {stop, Reason, CBState1} -> + {stop, Reason, State#{cb_state=>CBState1}} + end. + +handle_info(timeout, #{mode_ref:=_ModeRef, mode:=Mode} = State) -> + State1 = notify(idle,State), + State2 = maybe_notify_mode_change(async,State1), + {noreply, State2#{idle => true, + mode => ?change_mode(_ModeRef, Mode, async), + burst_msg_count => 0}}; +handle_info(Msg, #{module := Module, cb_state := CBState} = State) -> + case try_callback_call(Module,handle_info,[Msg, CBState]) of + {noreply,CBState1} -> + noreply_return(State#{cb_state=>CBState1}); + {stop, Reason, CBState1} -> + {stop, Reason, State#{cb_state=>CBState1}}; + {load,CBState1} -> + {_,State1} = do_load(Msg, cast, State#{idle=>false, + cb_state=>CBState1}), + noreply_return(State1) + end. + +terminate({shutdown,{overloaded,_QLen,_Mem}}, + #{id:=Name, module := Module, cb_state := CBState, + overload_kill_restart_after := RestartAfter} = State) -> + %% We're terminating because of an overload situation (see + %% kill_if_choked/3). + unregister(Name), %%!!!! to avoid error printout of callback crashed on stop + case try_callback_call(Module,terminate,[overloaded,CBState],ok) of + {ok,Fun} when is_function(Fun,0), is_integer(RestartAfter) -> + set_restart_flag(State), + _ = timer:apply_after(RestartAfter,?MODULE,restart,[Fun]), + ok; + _ -> + ok + end; +terminate(Reason, #{id:=Name, module:=Module, cb_state:=CBState}) -> + _ = try_callback_call(Module,terminate,[Reason,CBState],ok), + unregister(Name), + ok. + +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + + +%%%----------------------------------------------------------------- +%%% Internal functions +-spec call(Olp, term()) -> term() | {error,busy} when + Olp :: atom() | pid() | olp_ref(). +call({_Name, Pid, _ModeRef},Msg) -> + call(Pid, Msg); +call(Server, Msg) -> + try + gen_server:call(Server, Msg) + catch + _:{timeout,_} -> {error,busy} + end. + +-spec cast(olp_ref(),term()) -> ok. +cast({_Name, Pid, _ModeRef},Msg) -> + gen_server:cast(Pid, Msg). + +%% check for overload between every event (and set Mode to async, +%% sync or drop accordingly), but never flush the whole mailbox +%% before LogWindowSize events have been handled +do_load(Msg, CallOrCast, State) -> + T1 = ?timestamp(), + State1 = ?update_time(T1,State), + + %% check if the process is getting overloaded, or if it's + %% recovering from overload (the check must be done for each + %% event to react quickly to large bursts of events and + %% to ensure that the handler can never end up in drop mode + %% with an empty mailbox, which would stop operation) + {Mode1,QLen,Mem,State2} = check_load(State1), + + %% kill the handler if it can't keep up with the load + kill_if_choked(QLen, Mem, State2), + + if Mode1 == flush -> + flush(T1, State2); + true -> + handle_load(Mode1, T1, Msg, CallOrCast, State2) + end. + +%% this function is called by do_load/3 after an overload check +%% has been performed, where QLen > FlushQLen +flush(T1, State=#{id := _Name, mode := Mode, last_load_ts := _T0, mode_ref := ModeRef}) -> + %% flush load messages in the mailbox (a limited number in order + %% to not cause long delays) + NewFlushed = flush_load(?FLUSH_MAX_N), + + %% write info in log about flushed messages + State1=notify({flushed,NewFlushed},State), + + %% because of the receive loop when flushing messages, the + %% handler will be scheduled out often and the mailbox could + %% grow very large, so we'd better check the queue again here + {_,QLen1} = process_info(self(), message_queue_len), + ?observe(_Name,{max_qlen,QLen1}), + + %% Add 1 for the current log event + ?observe(_Name,{flushed,NewFlushed+1}), + + State2 = ?update_max_time(?diff_time(T1,_T0),State1), + State3 = ?update_max_qlen(QLen1,State2), + State4 = maybe_notify_mode_change(async,State3), + {dropped,?update_other(flushed,FLUSHED,NewFlushed, + State4#{mode => ?change_mode(ModeRef,Mode,async), + last_qlen => QLen1, + last_load_ts => T1})}. + +%% this function is called to actually handle the message +handle_load(Mode, T1, Msg, _CallOrCast, + State = #{id := _Name, + module := Module, + cb_state := CBState, + last_qlen := LastQLen, + last_load_ts := _T0}) -> + %% check if we need to limit the number of writes + %% during a burst of log events + {DoWrite,State1} = limit_burst(State), + + {Result,LastQLen1,CBState1} = + if DoWrite -> + ?observe(_Name,{_CallOrCast,1}), + CBS = try_callback_call(Module,handle_load,[Msg,CBState]), + {ok,element(2, process_info(self(), message_queue_len)),CBS}; + true -> + ?observe(_Name,{flushed,1}), + {dropped,LastQLen,CBState} + end, + State2 = State1#{cb_state=>CBState1}, + + State3 = State2#{mode => Mode}, + State4 = ?update_calls_or_casts(_CallOrCast,1,State3), + State5 = ?update_max_qlen(LastQLen1,State4), + State6 = + ?update_max_time(?diff_time(T1,_T0), + State5#{last_qlen := LastQLen1, + last_load_ts => T1}), + State7 = case Result of + ok -> + S = ?update_freq(T1,State6), + ?update_other(writes,WRITES,1,S); + _ -> + State6 + end, + {Result,State7}. + + +%%%----------------------------------------------------------------- +%%% Check that the options are valid +check_opts(Options) when is_map(Options) -> + case do_check_opts(maps:to_list(Options)) of + ok -> + case overload_levels_ok(Options) of + true -> + ok; + false -> + Faulty = maps:with([sync_mode_qlen, + drop_mode_qlen, + flush_qlen],Options), + {error,{invalid_olp_levels,Faulty}} + end; + {error,Key,Value} -> + {error,{invalid_olp_config,#{Key=>Value}}} + end. + +do_check_opts([{sync_mode_qlen,N}|Options]) when is_integer(N) -> + do_check_opts(Options); +do_check_opts([{drop_mode_qlen,N}|Options]) when is_integer(N) -> + do_check_opts(Options); +do_check_opts([{flush_qlen,N}|Options]) when is_integer(N) -> + do_check_opts(Options); +do_check_opts([{burst_limit_enable,Bool}|Options]) when is_boolean(Bool) -> + do_check_opts(Options); +do_check_opts([{burst_limit_max_count,N}|Options]) when is_integer(N) -> + do_check_opts(Options); +do_check_opts([{burst_limit_window_time,N}|Options]) when is_integer(N) -> + do_check_opts(Options); +do_check_opts([{overload_kill_enable,Bool}|Options]) when is_boolean(Bool) -> + do_check_opts(Options); +do_check_opts([{overload_kill_qlen,N}|Options]) when is_integer(N) -> + do_check_opts(Options); +do_check_opts([{overload_kill_mem_size,N}|Options]) when is_integer(N) -> + do_check_opts(Options); +do_check_opts([{overload_kill_restart_after,NorA}|Options]) + when is_integer(NorA); NorA == infinity -> + do_check_opts(Options); +do_check_opts([{Key,Value}|_]) -> + {error,Key,Value}; +do_check_opts([]) -> + ok. + +set_restart_flag(#{id := Name, module := Module}) -> + Flag = list_to_atom(lists:concat([Module,"_",Name,"_restarting"])), + spawn(fun() -> + register(Flag, self()), + timer:sleep(infinity) + end), + ok. + +reset_restart_flag(#{id := Name, module := Module} = State) -> + Flag = list_to_atom(lists:concat([Module,"_",Name,"_restarting"])), + case whereis(Flag) of + undefined -> + State; + Pid -> + exit(Pid, kill), + notify(restart,State) + end. + +check_load(State = #{id:=_Name, mode_ref := ModeRef, mode := Mode, + sync_mode_qlen := SyncModeQLen, + drop_mode_qlen := DropModeQLen, + flush_qlen := FlushQLen}) -> + {_,Mem} = process_info(self(), memory), + ?observe(_Name,{max_mem,Mem}), + {_,QLen} = process_info(self(), message_queue_len), + ?observe(_Name,{max_qlen,QLen}), + %% When the handler process gets scheduled in, it's impossible + %% to predict the QLen. We could jump "up" arbitrarily from say + %% async to sync, async to drop, sync to flush, etc. However, when + %% the handler process manages the log events (without flushing), + %% one after the other, we will move "down" from drop to sync and + %% from sync to async. This way we don't risk getting stuck in + %% drop or sync mode with an empty mailbox. + {Mode1,_NewDrops,_NewFlushes} = + if + QLen >= FlushQLen -> + {flush, 0,1}; + QLen >= DropModeQLen -> + %% Note that drop mode will force load messages to + %% be dropped on the client side (never sent to + %% the olp process). + IncDrops = if Mode == drop -> 0; true -> 1 end, + {?change_mode(ModeRef, Mode, drop), IncDrops,0}; + QLen >= SyncModeQLen -> + {?change_mode(ModeRef, Mode, sync), 0,0}; + true -> + {?change_mode(ModeRef, Mode, async), 0,0} + end, + State1 = ?update_other(drops,DROPS,_NewDrops,State), + State2 = ?update_max_qlen(QLen,State1), + State3 = maybe_notify_mode_change(Mode1,State2), + {Mode1, QLen, Mem, + ?update_other(flushes,FLUSHES,_NewFlushes, + State3#{last_qlen => QLen})}. + +limit_burst(#{burst_limit_enable := false}=State) -> + {true,State}; +limit_burst(#{burst_win_ts := BurstWinT0, + burst_msg_count := BurstMsgCount, + burst_limit_window_time := BurstLimitWinTime, + burst_limit_max_count := BurstLimitMaxCnt} = State) -> + if (BurstMsgCount >= BurstLimitMaxCnt) -> + %% the limit for allowed messages has been reached + BurstWinT1 = ?timestamp(), + case ?diff_time(BurstWinT1,BurstWinT0) of + BurstCheckTime when BurstCheckTime < (BurstLimitWinTime*1000) -> + %% we're still within the burst time frame + {false,?update_other(burst_drops,BURSTS,1,State)}; + _BurstCheckTime -> + %% burst time frame passed, reset counters + {true,State#{burst_win_ts => BurstWinT1, + burst_msg_count => 0}} + end; + true -> + %% the limit for allowed messages not yet reached + {true,State#{burst_win_ts => BurstWinT0, + burst_msg_count => BurstMsgCount+1}} + end. + +kill_if_choked(QLen, Mem, #{overload_kill_enable := KillIfOL, + overload_kill_qlen := OLKillQLen, + overload_kill_mem_size := OLKillMem}) -> + if KillIfOL andalso + ((QLen > OLKillQLen) orelse (Mem > OLKillMem)) -> + exit({shutdown,{overloaded,QLen,Mem}}); + true -> + ok + end. + +flush_load(Limit) -> + process_flag(priority, high), + Flushed = flush_load(0, Limit), + process_flag(priority, normal), + Flushed. + +flush_load(Limit, Limit) -> + Limit; +flush_load(N, Limit) -> + %% flush log events but leave other events, such as info, reset + %% and stop, so that these have a chance to be processed even + %% under heavy load + receive + {'$gen_cast',{'$olp_load',_}} -> + flush_load(N+1, Limit); + {'$gen_call',{Pid,MRef},{'$olp_load',_}} -> + Pid ! {MRef, dropped}, + flush_load(N+1, Limit); + {log,_,_,_,_} -> + flush_load(N+1, Limit); + {log,_,_,_} -> + flush_load(N+1, Limit) + after + 0 -> N + end. + +overload_levels_ok(Options) -> + SMQL = maps:get(sync_mode_qlen, Options, ?SYNC_MODE_QLEN), + DMQL = maps:get(drop_mode_qlen, Options, ?DROP_MODE_QLEN), + FQL = maps:get(flush_qlen, Options, ?FLUSH_QLEN), + (DMQL > 1) andalso (SMQL =< DMQL) andalso (DMQL =< FQL). + +maybe_notify_mode_change(drop,#{mode:=Mode0}=State) + when Mode0=/=drop -> + notify({mode_change,Mode0,drop},State); +maybe_notify_mode_change(Mode1,#{mode:=drop}=State) + when Mode1==async; Mode1==sync -> + notify({mode_change,drop,Mode1},State); +maybe_notify_mode_change(_,State) -> + State. + +notify(Note,#{module:=Module,cb_state:=CBState}=State) -> + CBState1 = try_callback_call(Module,notify,[Note,CBState],CBState), + State#{cb_state=>CBState1}. + +try_callback_call(Module, Function, Args) -> + try_callback_call(Module, Function, Args, '$no_default_return'). + +try_callback_call(Module, Function, Args, DefRet) -> + try apply(Module, Function, Args) + catch + throw:R -> R; + error:undef:S when DefRet=/='$no_default_return' -> + case S of + [{Module,Function,Args,_}|_] -> + DefRet; + _ -> + erlang:raise(error,undef,S) + end + end. + +noreply_return(#{idle:=true}=State) -> + {noreply,State}; +noreply_return(#{idle:=false}=State) -> + {noreply,State,?IDLE_DETECT_TIME}. + +reply_return(Reply,#{idle:=true}=State) -> + {reply,Reply,State}; +reply_return(Reply,#{idle:=false}=State) -> + {reply,Reply,State,?IDLE_DETECT_TIME}. diff --git a/lib/kernel/src/logger_olp.hrl b/lib/kernel/src/logger_olp.hrl new file mode 100644 index 0000000000..9b4f5ebf27 --- /dev/null +++ b/lib/kernel/src/logger_olp.hrl @@ -0,0 +1,180 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2015. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +%%%----------------------------------------------------------------- +%%% Overload protection configuration + +%%! *** NOTE *** +%%! It's important that: +%%! SYNC_MODE_QLEN =< DROP_MODE_QLEN =< FLUSH_QLEN +%%! and that DROP_MODE_QLEN >= 2. +%%! Otherwise the process could end up in drop mode with no new +%%! log requests to process. This would cause all future requests +%%! to be dropped (no switch to async mode would ever take place). + +%% This specifies the message_queue_len value where the log +%% requests switch from asynchronous casts to synchronous calls. +-define(SYNC_MODE_QLEN, 10). +%% Above this message_queue_len, log requests will be dropped, +%% i.e. no log requests get sent to the process. +-define(DROP_MODE_QLEN, 200). +%% Above this message_queue_len, the process will flush its mailbox +%% and only leave this number of messages in it. +-define(FLUSH_QLEN, 1000). + +%% Never flush more than this number of messages in one go, or the +%% process will be unresponsive for seconds (keep this number as large +%% as possible or the mailbox could grow large). +-define(FLUSH_MAX_N, 5000). + +%% BURST_LIMIT_MAX_COUNT is the max number of log requests allowed +%% to be written within a BURST_LIMIT_WINDOW_TIME time frame. +-define(BURST_LIMIT_ENABLE, true). +-define(BURST_LIMIT_MAX_COUNT, 500). +-define(BURST_LIMIT_WINDOW_TIME, 1000). + +%% This enables/disables the feature to automatically terminate the +%% process if it gets too loaded (and can't keep up). +-define(OVERLOAD_KILL_ENABLE, false). +%% If the message_queue_len goes above this size even after +%% flushing has been performed, the process is terminated. +-define(OVERLOAD_KILL_QLEN, 20000). +%% If the memory usage exceeds this level, the process is terminated. +-define(OVERLOAD_KILL_MEM_SIZE, 3000000). + +%% This is the default time to wait before restarting and accepting +%% new requests. The value 'infinity' disables restarts. +-define(OVERLOAD_KILL_RESTART_AFTER, 5000). + +%% This is the time in milliseconds after last load message received +%% that we notify the callback about being idle. +-define(IDLE_DETECT_TIME, 100). + +%%%----------------------------------------------------------------- +%%% Overload protection macros + +-define(timestamp(), erlang:monotonic_time(microsecond)). + +-define(get_mode(Tid), + case ets:lookup(Tid, mode) of + [{mode,M}] -> M; + _ -> async + end). + +-define(set_mode(Tid, M), + begin ets:insert(Tid, {mode,M}), M end). + +-define(change_mode(Tid, M0, M1), + if M0 == M1 -> + M0; + true -> + ets:insert(Tid, {mode,M1}), + M1 + end). + +-define(max(X1, X2), + if + X2 == undefined -> X1; + X2 > X1 -> X2; + true -> X1 + end). + +-define(diff_time(OS_T1, OS_T0), OS_T1-OS_T0). + +%%%----------------------------------------------------------------- +%%% These macros enable statistics counters in the state of the +%%% process, which is useful for analysing the overload protection +%%% behaviour. These counters should not be included in code to be +%%% officially released (as some counters will grow very large over +%%% time). + +%% -define(SAVE_STATS, true). +-ifdef(SAVE_STATS). + -define(merge_with_stats(STATE), + begin + TIME = ?timestamp(), + STATE#{start => TIME, time => {TIME,0}, + flushes => 0, flushed => 0, drops => 0, + burst_drops => 0, casts => 0, calls => 0, + writes => 0, max_qlen => 0, max_time => 0, + freq => {TIME,0,0}} end). + + -define(update_max_qlen(QLEN, STATE), + begin #{max_qlen := QLEN0} = STATE, + STATE#{max_qlen => ?max(QLEN0,QLEN)} end). + + -define(update_calls_or_casts(CALL_OR_CAST, INC, STATE), + case CALL_OR_CAST of + cast -> + #{casts := CASTS0} = STATE, + STATE#{casts => CASTS0+INC}; + call -> + #{calls := CALLS0} = STATE, + STATE#{calls => CALLS0+INC} + end). + + -define(update_max_time(TIME, STATE), + begin #{max_time := TIME0} = STATE, + STATE#{max_time => ?max(TIME0,TIME)} end). + + -define(update_other(OTHER, VAR, INCVAL, STATE), + begin #{OTHER := VAR} = STATE, + STATE#{OTHER => VAR+INCVAL} end). + + -define(update_freq(TIME,STATE), + begin + case STATE of + #{freq := {START, 49, _}} -> + STATE#{freq => {TIME, 0, trunc(1000000*50/(?diff_time(TIME,START)))}}; + #{freq := {START, N, FREQ}} -> + STATE#{freq => {START, N+1, FREQ}} + end end). + + -define(update_time(TIME,STATE), + begin #{start := START} = STATE, + STATE#{time => {TIME,trunc((?diff_time(TIME,START))/1000000)}} end). + +-else. % DEFAULT! + -define(merge_with_stats(STATE), STATE). + -define(update_max_qlen(_QLEN, STATE), STATE). + -define(update_calls_or_casts(_CALL_OR_CAST, _INC, STATE), STATE). + -define(update_max_time(_TIME, STATE), STATE). + -define(update_other(_OTHER, _VAR, _INCVAL, STATE), STATE). + -define(update_freq(_TIME, STATE), STATE). + -define(update_time(_TIME, STATE), STATE). +-endif. + +%%%----------------------------------------------------------------- +%%% These macros enable callbacks that make it possible to analyse the +%%% overload protection behaviour from outside the process (including +%%% dropped requests on the client side). An external callback module +%%% (?OBSERVER_MOD) is required which is not part of the kernel +%%% application. For this reason, these callbacks should not be +%%% included in code to be officially released. + +%%-define(OBSERVER_MOD, logger_test). +-ifdef(OBSERVER_MOD). + -define(start_observation(NAME), ?OBSERVER:start_observation(NAME)). + -define(observe(NAME,EVENT), ?OBSERVER:observe(NAME,EVENT)). + +-else. % DEFAULT! + -define(start_observation(_NAME), ok). + -define(observe(_NAME,_EVENT), ok). +-endif. diff --git a/lib/kernel/src/logger_proxy.erl b/lib/kernel/src/logger_proxy.erl new file mode 100644 index 0000000000..24b293805c --- /dev/null +++ b/lib/kernel/src/logger_proxy.erl @@ -0,0 +1,165 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2017-2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(logger_proxy). + +%% API +-export([start_link/0, restart/0, log/1, child_spec/0, get_default_config/0]). + +%% logger_olp callbacks +-export([init/1, handle_load/2, handle_info/2, terminate/2, + notify/2]). + +-include("logger_internal.hrl"). + +-define(SERVER,?MODULE). + +%%%----------------------------------------------------------------- +%%% API +-spec log(RemoteLog) -> ok when + RemoteLog :: {remote,node(),LogEvent}, + LogEvent :: {log,Level,Format,Args,Meta} | + {log,Level,StringOrReport,Meta}, + Level :: logger:level(), + Format :: io:format(), + Args :: list(term()), + StringOrReport :: unicode:chardata() | logger:report(), + Meta :: logger:metadata(). +log(RemoteLog) -> + Olp = persistent_term:get(?MODULE), + case logger_olp:get_pid(Olp) =:= self() of + true -> + %% This happens when the log event comes from the + %% emulator, and the group leader is on a remote node. + _ = handle_load(RemoteLog, no_state), + ok; + false -> + logger_olp:load(Olp, RemoteLog) + end. + +%% Called by supervisor +-spec start_link() -> {ok,pid(),logger_olp:olp_ref()} | {error,term()}. +start_link() -> + %% Notice that sync_mode is only used when logging to remote node, + %% i.e. when the log/2 API function is called. + %% + %% When receiving log events from the emulator or from a remote + %% node, the log event is sent as a message to this process, and + %% thus received directly in handle_info/2. This means that the + %% mode (async/sync/drop) is not read before the message is + %% sent. Thus sync mode is never entered, and drop mode is + %% implemented by setting the system_logger flag to undefined (see + %% notify/2) + %% + %% Burst limit is disabled, since this is only a proxy and we + %% don't want to limit bursts twice (here and in the handler). + logger_olp:start_link(?SERVER,?MODULE,[],logger:get_proxy_config()). + +%% Fun used for restarting this process after it has been killed due +%% to overload (must set overload_kill_enable=>true in opts) +restart() -> + case supervisor:start_child(logger_sup, child_spec()) of + {ok,_Pid,Olp} -> + {ok,Olp}; + {error,{Reason,Ch}} when is_tuple(Ch), element(1,Ch)==child -> + {error,Reason}; + Error -> + Error + end. + +%% Called internally and by logger_sup +child_spec() -> + Name = ?SERVER, + #{id => Name, + start => {?MODULE, start_link, []}, + restart => temporary, + shutdown => 2000, + type => worker, + modules => [?MODULE]}. + +get_default_config() -> + OlpDefault = logger_olp:get_default_opts(), + OlpDefault#{sync_mode_qlen=>500, + drop_mode_qlen=>1000, + flush_qlen=>5000, + burst_limit_enable=>false}. + +%%%=================================================================== +%%% gen_server callbacks +%%%=================================================================== + +init([]) -> + process_flag(trap_exit, true), + _ = erlang:system_flag(system_logger,self()), + persistent_term:put(?MODULE,logger_olp:get_ref()), + {ok,no_state}. + +%% Log event to send to the node where the group leader of it's client resides +handle_load({remote,Node,Log},State) -> + %% If the connection is overloaded (send_nosuspend returns false), + %% we drop the message. + _ = erlang:send_nosuspend({?SERVER,Node},Log), + State; +%% Log event to log on this node +handle_load({log,Level,Format,Args,Meta},State) -> + try_log([Level,Format,Args,Meta]), + State; +handle_load({log,Level,Report,Meta},State) -> + try_log([Level,Report,Meta]), + State. + +%% Log event sent to this process e.g. from the emulator - it is really load +handle_info(Log,State) when is_tuple(Log), element(1,Log)==log -> + {load,State}. + +terminate(overloaded, _State) -> + _ = erlang:system_flag(system_logger,undefined), + {ok,fun ?MODULE:restart/0}; +terminate(_Reason, _State) -> + _ = erlang:system_flag(system_logger,whereis(logger)), + ok. + +notify({mode_change,Mode0,Mode1},State) -> + _ = if Mode1=:=drop -> % entering drop mode + erlang:system_flag(system_logger,undefined); + Mode0=:=drop -> % leaving drop mode + erlang:system_flag(system_logger,self()); + true -> + ok + end, + ?LOG_INTERNAL(notice,"~w switched from ~w to ~w mode",[?MODULE,Mode0,Mode1]), + State; +notify({flushed,Flushed},State) -> + ?LOG_INTERNAL(notice, "~w flushed ~w log events",[?MODULE,Flushed]), + State; +notify(restart,State) -> + ?LOG_INTERNAL(notice, "~w restarted", [?MODULE]), + State; +notify(_Note,State) -> + State. + +%%%----------------------------------------------------------------- +%%% Internal functions +try_log(Args) -> + try apply(logger,log,Args) + catch C:R:S -> + ?LOG_INTERNAL(debug,[{?MODULE,log_failed}, + {log,Args}, + {reason,{C,R,S}}]) + end. diff --git a/lib/kernel/src/logger_server.erl b/lib/kernel/src/logger_server.erl index b7735dbcf7..722246e82c 100644 --- a/lib/kernel/src/logger_server.erl +++ b/lib/kernel/src/logger_server.erl @@ -22,8 +22,7 @@ -behaviour(gen_server). %% API --export([start_link/0, - add_handler/3, remove_handler/1, +-export([start_link/0, add_handler/3, remove_handler/1, add_filter/2, remove_filter/2, set_module_level/2, unset_module_level/0, unset_module_level/1, cache_module_level/1, @@ -43,7 +42,7 @@ -define(SERVER, logger). -define(LOGGER_SERVER_TAG, '$logger_cb_process'). --record(state, {tid, async_req, async_req_queue}). +-record(state, {tid, async_req, async_req_queue, remote_logger}). %%%=================================================================== %%% API @@ -155,6 +154,8 @@ init([]) -> process_flag(trap_exit, true), put(?LOGGER_SERVER_TAG,true), Tid = logger_config:new(?LOGGER_TABLE), + %% Store initial proxy config. logger_proxy reads config from here at startup. + logger_config:create(Tid,proxy,logger_proxy:get_default_config()), PrimaryConfig = maps:merge(default_config(primary), #{handlers=>[simple]}), logger_config:create(Tid,primary,PrimaryConfig), @@ -221,6 +222,24 @@ handle_call({add_filter,Id,Filter}, _From,#state{tid=Tid}=State) -> handle_call({remove_filter,Id,FilterId}, _From, #state{tid=Tid}=State) -> Reply = do_remove_filter(Tid,Id,FilterId), {reply,Reply,State}; +handle_call({change_config,SetOrUpd,proxy,Config0},_From,#state{tid=Tid}=State) -> + Default = + case SetOrUpd of + set -> + logger_proxy:get_default_config(); + update -> + {ok,OldConfig} = logger_config:get(Tid,proxy), + OldConfig + end, + Config = maps:merge(Default,Config0), + Reply = + case logger_olp:set_opts(logger_proxy,Config) of + ok -> + logger_config:set(Tid,proxy,Config); + Error -> + Error + end, + {reply,Reply,State}; handle_call({change_config,SetOrUpd,primary,Config0}, _From, #state{tid=Tid}=State) -> {ok,#{handlers:=Handlers}=OldConfig} = logger_config:get(Tid,primary), @@ -357,7 +376,7 @@ terminate(_Reason, _State) -> %%%=================================================================== %%% Internal functions %%%=================================================================== -call(Request) -> +call(Request) when is_tuple(Request) -> Action = element(1,Request), case get(?LOGGER_SERVER_TAG) of true when @@ -369,6 +388,7 @@ call(Request) -> gen_server:call(?SERVER,Request,?DEFAULT_LOGGER_CALL_TIMEOUT) end. + do_add_filter(Tid,Id,{FId,_} = Filter) -> case logger_config:get(Tid,Id) of {ok,Config} -> @@ -413,11 +433,13 @@ default_config(Id,Module) -> sanity_check(Owner,Key,Value) -> sanity_check_1(Owner,[{Key,Value}]). -sanity_check(HandlerId,Config) when is_map(Config) -> - sanity_check_1(HandlerId,maps:to_list(Config)); +sanity_check(Owner,Config) when is_map(Config) -> + sanity_check_1(Owner,maps:to_list(Config)); sanity_check(_,Config) -> {error,{invalid_config,Config}}. +sanity_check_1(proxy,_Config) -> + ok; % Details are checked by logger_olp:set_opts/2 sanity_check_1(Owner,Config) when is_list(Config) -> try Type = get_type(Owner), diff --git a/lib/kernel/src/logger_std_h.erl b/lib/kernel/src/logger_std_h.erl index 63d1dbaba2..0669164bb6 100644 --- a/lib/kernel/src/logger_std_h.erl +++ b/lib/kernel/src/logger_std_h.erl @@ -26,7 +26,7 @@ -include_lib("kernel/include/file.hrl"). %% API --export([info/1, filesync/1, reset/1]). +-export([filesync/1]). %% logger_h_common callbacks -export([init/2, check_config/4, reset_state/2, @@ -36,6 +36,8 @@ -export([log/2, adding_handler/1, removing_handler/1, changing_config/3, filter_config/1]). +-define(DEFAULT_CALL_TIMEOUT, 5000). + %%%=================================================================== %%% API %%%=================================================================== @@ -49,25 +51,6 @@ filesync(Name) -> logger_h_common:filesync(?MODULE,Name). -%%%----------------------------------------------------------------- -%%% --spec info(Name) -> Info | {error,Reason} when - Name :: atom(), - Info :: term(), - Reason :: handler_busy | {badarg,term()}. - -info(Name) -> - logger_h_common:info(?MODULE,Name). - -%%%----------------------------------------------------------------- -%%% --spec reset(Name) -> ok | {error,Reason} when - Name :: atom(), - Reason :: handler_busy | {badarg,term()}. - -reset(Name) -> - logger_h_common:reset(?MODULE,Name). - %%%=================================================================== %%% logger callbacks - just forward to logger_h_common %%%=================================================================== diff --git a/lib/kernel/src/logger_sup.erl b/lib/kernel/src/logger_sup.erl index 3d6f482e20..9ea8558a16 100644 --- a/lib/kernel/src/logger_sup.erl +++ b/lib/kernel/src/logger_sup.erl @@ -50,7 +50,9 @@ init([]) -> start => {logger_handler_watcher, start_link, []}, shutdown => brutal_kill}, - {ok, {SupFlags, [Watcher]}}. + Proxy = logger_proxy:child_spec(), + + {ok, {SupFlags, [Watcher,Proxy]}}. %%%=================================================================== %%% Internal functions diff --git a/lib/kernel/test/Makefile b/lib/kernel/test/Makefile index 4a86265a4a..8a6ffe7e72 100644 --- a/lib/kernel/test/Makefile +++ b/lib/kernel/test/Makefile @@ -76,8 +76,11 @@ MODULES= \ logger_filters_SUITE \ logger_formatter_SUITE \ logger_legacy_SUITE \ + logger_olp_SUITE \ + logger_proxy_SUITE \ logger_simple_h_SUITE \ logger_std_h_SUITE \ + logger_stress_SUITE \ logger_test_lib \ os_SUITE \ pg2_SUITE \ diff --git a/lib/kernel/test/kernel_bench.spec b/lib/kernel/test/kernel_bench.spec index 4de133f21b..898ceb59e0 100644 --- a/lib/kernel/test/kernel_bench.spec +++ b/lib/kernel/test/kernel_bench.spec @@ -1,2 +1,3 @@ {groups,"../kernel_test",zlib_SUITE,[bench]}. {groups,"../kernel_test",file_SUITE,[bench]}. +{suites,"../kernel_test",[logger_stress_SUITE]}. diff --git a/lib/kernel/test/logger.cover b/lib/kernel/test/logger.cover index 960bc0abff..9691aa295e 100644 --- a/lib/kernel/test/logger.cover +++ b/lib/kernel/test/logger.cover @@ -4,9 +4,12 @@ logger_backend, logger_config, logger_disk_log_h, - logger_h_common, logger_filters, logger_formatter, + logger_handler_watcher, + logger_h_common, + logger_olp, + logger_proxy, logger_server, logger_simple_h, logger_std_h, diff --git a/lib/kernel/test/logger.spec b/lib/kernel/test/logger.spec index 1ab90b3e93..3aec37951d 100644 --- a/lib/kernel/test/logger.spec +++ b/lib/kernel/test/logger.spec @@ -7,5 +7,7 @@ logger_filters_SUITE, logger_formatter_SUITE, logger_legacy_SUITE, + logger_olp_SUITE, + logger_proxy_SUITE, logger_simple_h_SUITE, logger_std_h_SUITE]}. diff --git a/lib/kernel/test/logger_disk_log_h_SUITE.erl b/lib/kernel/test/logger_disk_log_h_SUITE.erl index 87b8250781..9bbec42de8 100644 --- a/lib/kernel/test/logger_disk_log_h_SUITE.erl +++ b/lib/kernel/test/logger_disk_log_h_SUITE.erl @@ -24,6 +24,7 @@ -include_lib("common_test/include/ct.hrl"). -include_lib("kernel/include/logger.hrl"). -include_lib("kernel/src/logger_internal.hrl"). +-include_lib("kernel/src/logger_olp.hrl"). -include_lib("kernel/src/logger_h_common.hrl"). -include_lib("stdlib/include/ms_transform.hrl"). -include_lib("kernel/include/file.hrl"). @@ -97,7 +98,6 @@ all() -> formatter_fail, config_fail, bad_input, - info_and_reset, reconfig, sync, disk_log_full, @@ -306,9 +306,9 @@ logging(cleanup, _Config) -> filter_config(_Config) -> ok = logger:add_handler(?MODULE,logger_disk_log_h,#{}), {ok,#{config:=HConfig}=Config} = logger:get_handler_config(?MODULE), - HConfig = maps:without([handler_pid,mode_tab],HConfig), + HConfig = maps:without([olp],HConfig), - FakeFullHConfig = HConfig#{handler_pid=>self(),mode_tab=>erlang:make_ref()}, + FakeFullHConfig = HConfig#{olp=>{regname,self(),erlang:make_ref()}}, #{config:=HConfig} = logger_disk_log_h:filter_config(Config#{config=>FakeFullHConfig}), ok. @@ -351,9 +351,7 @@ errors(Config) -> %% Read-only fields may (accidentially) be included in the change, %% but it won't take effect {ok,C} = logger:get_handler_config(Name1), - ok = logger:set_handler_config(Name1,config, - #{handler_pid=>self(), - mode_tab=>erlang:make_ref()}), + ok = logger:set_handler_config(Name1,config,#{olp=>dummyvalue}), {ok,C} = logger:get_handler_config(Name1), @@ -419,19 +417,16 @@ config_fail(_Config) -> filter_default=>log, formatter=>{?MODULE,self()}}), - {error,{handler_not_added,{invalid_config,logger_disk_log_h, - {invalid_levels,#{drop_mode_qlen:=1}}}}} = + {error,{handler_not_added,{invalid_olp_levels,#{drop_mode_qlen:=1}}}} = logger:add_handler(?MODULE,logger_disk_log_h, #{config => #{drop_mode_qlen=>1}}), - {error,{handler_not_added,{invalid_config,logger_disk_log_h, - {invalid_levels,#{sync_mode_qlen:=43, - drop_mode_qlen:=42}}}}} = + {error,{handler_not_added,{invalid_olp_levels,#{sync_mode_qlen:=43, + drop_mode_qlen:=42}}}} = logger:add_handler(?MODULE,logger_disk_log_h, #{config => #{sync_mode_qlen=>43, drop_mode_qlen=>42}}), - {error,{handler_not_added,{invalid_config,logger_disk_log_h, - {invalid_levels,#{drop_mode_qlen:=43, - flush_qlen:=42}}}}} = + {error,{handler_not_added,{invalid_olp_levels,#{drop_mode_qlen:=43, + flush_qlen:=42}}}} = logger:add_handler(?MODULE,logger_disk_log_h, #{config => #{drop_mode_qlen=>43, flush_qlen=>42}}), @@ -445,7 +440,7 @@ config_fail(_Config) -> #{max_no_files=>2}), %% incorrect values of OP params {ok,#{config := HConfig}} = logger:get_handler_config(?MODULE), - {error,{invalid_config,logger_disk_log_h,{invalid_levels,_}}} = + {error,{invalid_olp_levels,_}} = logger:update_handler_config(?MODULE,config, HConfig#{sync_mode_qlen=>100, flush_qlen=>99}), @@ -459,18 +454,7 @@ config_fail(cleanup,_Config) -> bad_input(_Config) -> {error,{badarg,{filesync,["BadType"]}}} = - logger_disk_log_h:filesync("BadType"), - {error,{badarg,{info,["BadType"]}}} = logger_disk_log_h:info("BadType"), - {error,{badarg,{reset,["BadType"]}}} = logger_disk_log_h:reset("BadType"). - -info_and_reset(_Config) -> - ok = logger:add_handler(?MODULE,logger_disk_log_h, - #{filter_default=>log, - formatter=>{?MODULE,self()}}), - #{id := ?MODULE} = logger_disk_log_h:info(?MODULE), - ok = logger_disk_log_h:reset(?MODULE). -info_and_reset(cleanup,_Config) -> - logger:remove_handler(?MODULE). + logger_disk_log_h:filesync("BadType"). reconfig(Config) -> Dir = ?config(priv_dir,Config), @@ -479,7 +463,7 @@ reconfig(Config) -> #{filter_default=>log, filters=>?DEFAULT_HANDLER_FILTERS([?MODULE]), formatter=>{?MODULE,self()}}), - #{id := ?MODULE, + #{%id := ?MODULE, sync_mode_qlen := ?SYNC_MODE_QLEN, drop_mode_qlen := ?DROP_MODE_QLEN, flush_qlen := ?FLUSH_QLEN, @@ -490,13 +474,14 @@ reconfig(Config) -> overload_kill_qlen := ?OVERLOAD_KILL_QLEN, overload_kill_mem_size := ?OVERLOAD_KILL_MEM_SIZE, overload_kill_restart_after := ?OVERLOAD_KILL_RESTART_AFTER, - filesync_repeat_interval := ?FILESYNC_REPEAT_INTERVAL, - handler_state := - #{log_opts := #{type := ?DISK_LOG_TYPE, - max_no_files := ?DISK_LOG_MAX_NO_FILES, - max_no_bytes := ?DISK_LOG_MAX_NO_BYTES, - file := DiskLogFile}}} = - logger_disk_log_h:info(?MODULE), + cb_state := + #{handler_state := + #{log_opts := #{type := ?DISK_LOG_TYPE, + max_no_files := ?DISK_LOG_MAX_NO_FILES, + max_no_bytes := ?DISK_LOG_MAX_NO_BYTES, + file := DiskLogFile}}, + filesync_repeat_interval := ?FILESYNC_REPEAT_INTERVAL}} = + logger_olp:info(h_proc_name()), {ok,#{config := #{sync_mode_qlen := ?SYNC_MODE_QLEN, drop_mode_qlen := ?DROP_MODE_QLEN, @@ -527,7 +512,7 @@ reconfig(Config) -> overload_kill_restart_after => infinity, filesync_repeat_interval => no_repeat}, ok = logger:set_handler_config(?MODULE, config, HConfig1), - #{id := ?MODULE, + #{%id := ?MODULE, sync_mode_qlen := 1, drop_mode_qlen := 2, flush_qlen := 3, @@ -538,8 +523,8 @@ reconfig(Config) -> overload_kill_qlen := 100000, overload_kill_mem_size := 10000000, overload_kill_restart_after := infinity, - filesync_repeat_interval := no_repeat} = - logger_disk_log_h:info(?MODULE), + cb_state := #{filesync_repeat_interval := no_repeat}} = + logger_olp:info(h_proc_name()), {ok,#{config:=HConfig1}} = logger:get_handler_config(?MODULE), ok = logger:update_handler_config(?MODULE, config, @@ -577,12 +562,13 @@ reconfig(Config) -> max_no_files => 1, max_no_bytes => 1024, file => File}}), - #{handler_state := - #{log_opts := #{type := halt, - max_no_files := 1, - max_no_bytes := 1024, - file := File}}} = - logger_disk_log_h:info(?MODULE), + #{cb_state := + #{handler_state := + #{log_opts := #{type := halt, + max_no_files := 1, + max_no_bytes := 1024, + file := File}}}} = + logger_olp:info(h_proc_name()), {ok,#{config := #{type := halt, max_no_files := 1, @@ -650,13 +636,8 @@ sync(Config) -> {ok,#{config := HConfig}} = logger:get_handler_config(?MODULE), HConfig1 = HConfig#{filesync_repeat_interval => no_repeat}, ok = logger:update_handler_config(?MODULE, config, HConfig1), - no_repeat = maps:get(filesync_repeat_interval, - logger_disk_log_h:info(?MODULE)), - %% The following timer is to make sure the time from last log - %% ("first") to next ("second") is long enough, so the a flush is - %% triggered by the idle timeout between "fourth" and "fifth". - timer:sleep(?IDLE_DETECT_TIME_MSEC*2), + maps:get(cb_state,logger_olp:info(h_proc_name()))), start_tracer([{logger_disk_log_h,disk_log_write,3}, {disk_log,sync,1}], @@ -666,10 +647,10 @@ sync(Config) -> {disk_log,sync}]), logger:notice("second", ?domain), - timer:sleep(?IDLE_DETECT_TIME_MSEC*2), + timer:sleep(?IDLE_DETECT_TIME*2), logger:notice("third", ?domain), %% wait for automatic disk_log_sync - check_tracer(?IDLE_DETECT_TIME_MSEC*2), + check_tracer(?IDLE_DETECT_TIME*2), try_read_file(Log, {ok,<<"first\nsecond\nthird\n">>}, 1000), @@ -678,14 +659,15 @@ sync(Config) -> WaitT = 4500, OneSync = {logger_h_common,handle_cast,repeated_filesync}, %% receive 1 repeated_filesync per sec - start_tracer([{logger_h_common,handle_cast,2}], + start_tracer([{{logger_h_common,handle_cast,2}, + [{[repeated_filesync,'_'],[],[{message,{caller}}]}]}], [OneSync || _ <- lists:seq(1, trunc(WaitT/SyncInt))]), HConfig2 = HConfig#{filesync_repeat_interval => SyncInt}, ok = logger:update_handler_config(?MODULE, config, HConfig2), SyncInt = maps:get(filesync_repeat_interval, - logger_disk_log_h:info(?MODULE)), + maps:get(cb_state,logger_olp:info(h_proc_name()))), timer:sleep(WaitT), HConfig3 = HConfig#{filesync_repeat_interval => no_repeat}, ok = logger:update_handler_config(?MODULE, config, HConfig3), @@ -803,7 +785,7 @@ disk_log_full(cleanup, _Config) -> dbg:stop_clear(), logger:remove_handler(?MODULE). -disk_log_events(Config) -> +disk_log_events(_Config) -> Node = node(), Log = ?MODULE, ok = logger:add_handler(?MODULE, @@ -860,10 +842,12 @@ write_failure(Config) -> rpc:call(Node, ets, insert, [?TEST_HOOKS_TAB,{tester,self()}]), rpc:call(Node, ?MODULE, set_internal_log, [?MODULE,internal_log]), rpc:call(Node, ?MODULE, set_result, [disk_log_write,ok]), - HState = rpc:call(Node, logger_disk_log_h, info, [?STANDARD_HANDLER]), - ct:pal("LogOpts = ~p", [LogOpts = maps:get(log_opts, - maps:get(handler_state,HState))]), - + HState = rpc:call(Node, logger_olp, info, [h_proc_name(?STANDARD_HANDLER)]), + LogOpts = maps:get(log_opts, + maps:get(handler_state, + maps:get(cb_state,HState))), + ct:pal("LogOpts = ~p", [LogOpts]), + %% ?check and ?check_no_log in this test only check for internal log events ok = log_on_remote_node(Node, "Logged1"), rpc:call(Node, logger_disk_log_h, filesync, [?STANDARD_HANDLER]), @@ -914,15 +898,16 @@ sync_failure(Config) -> rpc:call(Node, ets, insert, [?TEST_HOOKS_TAB,{tester,self()}]), rpc:call(Node, ?MODULE, set_internal_log, [?MODULE,internal_log]), rpc:call(Node, ?MODULE, set_result, [disk_log_sync,ok]), - HState = rpc:call(Node, logger_disk_log_h, info, [?STANDARD_HANDLER]), - LogOpts = maps:get(log_opts, maps:get(handler_state,HState)), + HState = rpc:call(Node, logger_olp, info, [h_proc_name(?STANDARD_HANDLER)]), + LogOpts = maps:get(log_opts, maps:get(handler_state, + maps:get(cb_state,HState))), SyncInt = 500, ok = rpc:call(Node, logger, update_handler_config, [?STANDARD_HANDLER, config, #{filesync_repeat_interval => SyncInt}]), - Info = rpc:call(Node, logger_disk_log_h, info, [?STANDARD_HANDLER]), - SyncInt = maps:get(filesync_repeat_interval, Info), + Info = rpc:call(Node, logger_olp, info, [h_proc_name(?STANDARD_HANDLER)]), + SyncInt = maps:get(filesync_repeat_interval, maps:get(cb_state, Info)), ok = log_on_remote_node(Node, "Logged1"), ?check_no_log, @@ -1198,7 +1183,7 @@ qlen_kill_new(Config) -> receive {'DOWN', MRef, _, _, Info} -> case Info of - {shutdown,{overloaded,?MODULE,QLen,Mem}} -> + {shutdown,{overloaded,QLen,Mem}} -> ct:pal("Terminated with qlen = ~w, mem = ~w", [QLen,Mem]); killed -> ct:pal("Slow shutdown, handler process was killed!", []) @@ -1208,7 +1193,7 @@ qlen_kill_new(Config) -> ok after 5000 -> - Info = logger_disk_log_h:info(?MODULE), + Info = logger_olp:info(h_proc_name()), ct:pal("Handler state = ~p", [Info]), ct:fail("Handler not dead! It should not have survived this!") end. @@ -1235,7 +1220,7 @@ mem_kill_new(Config) -> receive {'DOWN', MRef, _, _, Info} -> case Info of - {shutdown,{overloaded,?MODULE,QLen,Mem}} -> + {shutdown,{overloaded,QLen,Mem}} -> ct:pal("Terminated with qlen = ~w, mem = ~w", [QLen,Mem]); killed -> ct:pal("Slow shutdown, handler process was killed!", []) @@ -1245,7 +1230,7 @@ mem_kill_new(Config) -> ok after 5000 -> - Info = logger_disk_log_h:info(?MODULE), + Info = logger_olp:info(h_proc_name()), ct:pal("Handler state = ~p", [Info]), ct:fail("Handler not dead! It should not have survived this!") end. @@ -1271,7 +1256,7 @@ restart_after(Config) -> ok after 5000 -> - Info1 = logger_std_h:info(?MODULE), + Info1 = logger_olp:info(h_proc_name()), ct:pal("Handler state = ~p", [Info1]), ct:fail("Handler not dead! It should not have survived this!") end, @@ -1295,7 +1280,7 @@ restart_after(Config) -> ok after 5000 -> - Info2 = logger_std_h:info(?MODULE), + Info2 = logger_olp:info(h_proc_name()), ct:pal("Handler state = ~p", [Info2]), ct:fail("Handler not dead! It should not have survived this!") end, @@ -1316,11 +1301,15 @@ handler_requests_under_load(Config) -> flush_qlen => 2000, burst_limit_enable => false}}, ok = logger:update_handler_config(?MODULE, NewHConfig), - Pid = spawn_link(fun() -> send_requests(?MODULE, 1, [{filesync,[]}, - {info,[]}, - {reset,[]}, - {change_config,[]}]) - end), + Pid = spawn_link( + fun() -> send_requests(1,[{logger_disk_log_h,filesync,[?MODULE],[]}, + {logger_olp,info,[h_proc_name()],[]}, + {logger_olp,reset,[h_proc_name()],[]}, + {logger,update_handler_config, + [?MODULE, config, + #{overload_kill_enable => false}], + []}]) + end), Procs = 100, Sent = Procs * send_burst({n,5000}, {spawn,Procs,10}, {chars,79}, notice), Pid ! {self(),finish}, @@ -1332,29 +1321,22 @@ handler_requests_under_load(Config) -> [E || E <- Res, is_tuple(E) andalso (element(1,E) == error)] end, - Errors = [{Req,FindError(Res)} || {Req,Res} <- ReqResult], - NoOfReqs = lists:foldl(fun({_,Res}, N) -> N + length(Res) end, 0, ReqResult), + Errors = [{Func,FindError(Res)} || {_,Func,_,Res} <- ReqResult], + NoOfReqs = lists:foldl(fun({_,_,_,Res}, N) -> N + length(Res) end, + 0, ReqResult), ct:pal("~w requests made. Errors: ~n~p", [NoOfReqs,Errors]), ok = file_delete(Log). handler_requests_under_load(cleanup, _Config) -> ok = stop_handler(?MODULE). -send_requests(HName, TO, Reqs = [{Req,Res}|Rs]) -> +send_requests(TO, Reqs = [{Mod,Func,Args,Res}|Rs]) -> receive {From,finish} -> From ! {self(),Reqs} after TO -> - Result = - case Req of - change_config -> - logger:update_handler_config(HName, logger_disk_log_h, - #{overload_kill_enable => - false}); - Func -> - logger_disk_log_h:Func(HName) - end, - send_requests(HName, TO, Rs ++ [{Req,[Result|Res]}]) + Result = apply(Mod,Func,Args), + send_requests(TO, Rs ++ [{Mod,Func,Args,[Result|Res]}]) end. %%%----------------------------------------------------------------- @@ -1472,15 +1454,6 @@ format(Msg,Tag) -> erlang:display(Error), exit(Error). -remove(Handler, LogName) -> - logger_disk_log_h:remove(Handler, LogName), - HState = #{log_names := Logs} = logger_disk_log_h:info(), - false = maps:is_key(LogName, HState), - false = lists:member(LogName, Logs), - false = logger_config:exist(?LOGGER_TABLE, LogName), - {error,no_such_log} = disk_log:info(LogName), - ok. - start_and_add(Name, Config, LogOpts) -> HConfig = maps:get(config, Config, #{}), HConfig1 = maps:merge(HConfig, LogOpts), @@ -1607,7 +1580,9 @@ start_tracer(Trace,Expected) -> ok. tpl([{M,F,A}|Trace]) -> - {ok,Match} = dbg:tpl(M,F,A,c), + tpl([{{M,F,A},c}|Trace]); +tpl([{{M,F,A},MS}|Trace]) -> + {ok,Match} = dbg:tpl(M,F,A,MS), case lists:keyfind(matched,1,Match) of {_,_,1} -> ok; diff --git a/lib/kernel/test/logger_env_var_SUITE.erl b/lib/kernel/test/logger_env_var_SUITE.erl index e8d1a313dc..9d2ad11be8 100644 --- a/lib/kernel/test/logger_env_var_SUITE.erl +++ b/lib/kernel/test/logger_env_var_SUITE.erl @@ -59,7 +59,8 @@ groups() -> logger_undefined, logger_many_handlers_default_first, logger_many_handlers_default_last, - logger_many_handlers_default_last_broken_filter + logger_many_handlers_default_last_broken_filter, + logger_proxy ]}, {bad,[],[bad_error_logger, bad_level, @@ -541,6 +542,19 @@ logger_many_handlers(Config, Env, LogErr, LogInfo, NumProgress) -> ok. +logger_proxy(Config) -> + %% assume current node runs with default settings + DefOpts = logger_olp:get_opts(logger_proxy), + {ok,_,Node} = setup(Config, + [{logger,[{proxy,#{sync_mode_qlen=>0, + drop_mode_qlen=>2}}]}]), + Expected = DefOpts#{sync_mode_qlen:=0, + drop_mode_qlen:=2}, + Expected = rpc:call(Node,logger_olp,get_opts,[logger_proxy]), + Expected = rpc:call(Node,logger,get_proxy_config,[]), + + ok. + sasl_compatible_false(Config) -> Log = file(Config,?FUNCTION_NAME), {ok,_,Node} = setup(Config, diff --git a/lib/kernel/test/logger_olp_SUITE.erl b/lib/kernel/test/logger_olp_SUITE.erl new file mode 100644 index 0000000000..ea3eec89f5 --- /dev/null +++ b/lib/kernel/test/logger_olp_SUITE.erl @@ -0,0 +1,90 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(logger_olp_SUITE). + +-compile(export_all). + +-include_lib("kernel/src/logger_olp.hrl"). + +suite() -> + [{timetrap,{seconds,30}}]. + +init_per_suite(Config) -> + Config. + +end_per_suite(_Config) -> + ok. + +init_per_group(_Group, Config) -> + Config. + +end_per_group(_Group, _Config) -> + ok. + +init_per_testcase(_TestCase, Config) -> + Config. + +end_per_testcase(Case, Config) -> + try apply(?MODULE,Case,[cleanup,Config]) + catch error:undef -> ok + end, + ok. + +groups() -> + []. + +all() -> + [idle_timer]. + +%%%----------------------------------------------------------------- +%%% Test cases +idle_timer(_Config) -> + {ok,_Pid,Olp} = logger_olp:start_link(?MODULE,?MODULE,self(),#{}), + [logger_olp:load(Olp,{msg,N}) || N<-lists:seq(1,3)], + timer:sleep(?IDLE_DETECT_TIME*2), + [{load,{msg,1}}, + {load,{msg,2}}, + {load,{msg,3}}, + {notify,idle}] = test_server:messages_get(), + logger_olp:cast(Olp,hello), + timer:sleep(?IDLE_DETECT_TIME*2), + [{cast,hello}] = test_server:messages_get(), + ok. +idle_timer(cleanup,_Config) -> + unlink(whereis(?MODULE)), + logger_olp:stop(?MODULE), + ok. + +%%%----------------------------------------------------------------- +%%% Olp callbacks +init(P) -> + {ok,P}. + +handle_load(M,P) -> + P ! {load,M}, + P. + +handle_cast(M,P) -> + P ! {cast,M}, + {noreply,P}. + +notify(N,P) -> + P ! {notify,N}, + P. diff --git a/lib/kernel/test/logger_proxy_SUITE.erl b/lib/kernel/test/logger_proxy_SUITE.erl new file mode 100644 index 0000000000..777531e4ed --- /dev/null +++ b/lib/kernel/test/logger_proxy_SUITE.erl @@ -0,0 +1,274 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(logger_proxy_SUITE). + +-compile(export_all). + +%% -include_lib("common_test/include/ct.hrl"). +%% -include_lib("kernel/include/logger.hrl"). +%% -include_lib("kernel/src/logger_internal.hrl"). + +%% -define(str,"Log from "++atom_to_list(?FUNCTION_NAME)++ +%% ":"++integer_to_list(?LINE)). +%% -define(map_rep,#{function=>?FUNCTION_NAME, line=>?LINE}). +%% -define(keyval_rep,[{function,?FUNCTION_NAME}, {line,?LINE}]). + +%% -define(MY_LOC(N),#{mfa=>{?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY}, +%% file=>?FILE, line=>?LINE-N}). + +%% -define(TRY(X), my_try(fun() -> X end)). + + +-define(HNAME,list_to_atom(lists:concat([?MODULE,"_",?FUNCTION_NAME]))). +-define(LOC,#{mfa=>{?MODULE,?FUNCTION_NAME,?FUNCTION_ARITY},line=>?LINE}). +-define(ENSURE_TIME,5000). + +suite() -> + [{timetrap,{seconds,30}}, + {ct_hooks,[logger_test_lib]}]. + +init_per_suite(Config) -> + Config. + +end_per_suite(_Config) -> + ok. + +init_per_group(_Group, Config) -> + Config. + +end_per_group(_Group, _Config) -> + ok. + +init_per_testcase(_TestCase, Config) -> + Config. + +end_per_testcase(Case, Config) -> + try apply(?MODULE,Case,[cleanup,Config]) + catch error:undef -> ok + end, + ok. + +groups() -> + []. + +all() -> + [basic, + emulator, + remote, + remote_emulator, + config, + restart_after, + terminate]. + +%%%----------------------------------------------------------------- +%%% Test cases +basic(_Config) -> + ok = logger:add_handler(?HNAME,?MODULE,#{config=>self()}), + logger_proxy ! {log,notice,"Log from: ~p; ~p",[?FUNCTION_NAME,?LINE],L1=?LOC}, + ok = ensure(L1), + logger_proxy ! {log,notice,[{test_case,?FUNCTION_NAME},{line,?LINE}],L2=?LOC}, + ok = ensure(L2), + logger_proxy:log({remote,node(),{log,notice, + "Log from: ~p; ~p", + [?FUNCTION_NAME,?LINE], + L3=?LOC}}), + ok = ensure(L3), + logger_proxy:log({remote,node(),{log,notice, + [{test_case,?FUNCTION_NAME}, + {line,?LINE}], + L4=?LOC}}), + ok = ensure(L4), + ok. +basic(cleanup,_Config) -> + ok = logger:remove_handler(?HNAME). + +emulator(_Config) -> + ok = logger:add_handler(?HNAME,?MODULE,#{config=>self()}), + Pid = spawn(fun() -> erlang:error(some_reason) end), + ok = ensure(#{pid=>Pid}), + ok. +emulator(cleanup,_Config) -> + ok = logger:remove_handler(?HNAME). + +remote(Config) -> + {ok,_,Node} = logger_test_lib:setup(Config,[{logger,[{proxy,#{}}]}]), + ok = logger:add_handler(?HNAME,?MODULE,#{config=>self()}), + L1 = ?LOC, spawn(Node,fun() -> logger:notice("Log from ~p; ~p",[?FUNCTION_NAME,?LINE],L1) end), + ok = ensure(L1), + L2 = ?LOC, spawn(Node,fun() -> logger:notice([{test_case,?FUNCTION_NAME},{line,?LINE}],L2) end), + ok = ensure(L2), + ok. +remote(cleanup,_Config) -> + ok = logger:remove_handler(?HNAME). + +remote_emulator(Config) -> + {ok,_,Node} = logger_test_lib:setup(Config,[{logger,[{proxy,#{}}]}]), + ok = logger:add_handler(?HNAME,?MODULE,#{config=>self()}), + Pid = spawn(Node,fun() -> erlang:error(some_reason) end), + ok = ensure(#{pid=>Pid}), + ok. +remote_emulator(cleanup,_Config) -> + ok = logger:remove_handler(?HNAME). + +config(_Config) -> + C1 = #{sync_mode_qlen:=SQ, + drop_mode_qlen:=DQ} = logger:get_proxy_config(), + C1 = logger_olp:get_opts(logger_proxy), + + %% Update the existing config with these two values + SQ1 = SQ+1, + DQ1 = DQ+1, + ok = logger:update_proxy_config(#{sync_mode_qlen=>SQ1, + drop_mode_qlen=>DQ1}), + C2 = logger:get_proxy_config(), % reads from ets table + C2 = logger_olp:get_opts(logger_proxy), % ensure consistency with process opts + C2 = C1#{sync_mode_qlen:=SQ1, + drop_mode_qlen:=DQ1}, + + %% Update the existing again with only one value + SQ2 = SQ+2, + ok = logger:update_proxy_config(#{sync_mode_qlen=>SQ2}), + C3 = logger:get_proxy_config(), + C3 = logger_olp:get_opts(logger_proxy), + C3 = C2#{sync_mode_qlen:=SQ2}, + + %% Set the config, i.e. merge with defaults + ok = logger:set_proxy_config(#{sync_mode_qlen=>SQ1}), + C4 = logger:get_proxy_config(), + C4 = logger_olp:get_opts(logger_proxy), + C4 = C1#{sync_mode_qlen:=SQ1}, + + %% Reset to default + ok = logger:set_proxy_config(#{}), + C5 = logger:get_proxy_config(), + C5 = logger_olp:get_opts(logger_proxy), + C5 = logger_proxy:get_default_config(), + + %% Errors + {error,{invalid_olp_config,_}} = + logger:set_proxy_config(#{faulty_key=>1}), + {error,{invalid_olp_config,_}} = + logger:set_proxy_config(#{sync_mode_qlen=>infinity}), + {error,{invalid_config,[]}} = logger:set_proxy_config([]), + + {error,{invalid_olp_config,_}} = + logger:update_proxy_config(#{faulty_key=>1}), + {error,{invalid_olp_config,_}} = + logger:update_proxy_config(#{sync_mode_qlen=>infinity}), + {error,{invalid_config,[]}} = logger:update_proxy_config([]), + + C5 = logger:get_proxy_config(), + C5 = logger_olp:get_opts(logger_proxy), + + ok. +config(cleanup,_Config) -> + _ = logger:set_logger_proxy(logger_proxy:get_default_config()), + ok. + +restart_after(_Config) -> + Restart = 3000, + ok = logger:update_proxy_config(#{overload_kill_enable => true, + overload_kill_qlen => 10, + overload_kill_restart_after => Restart}), + Proxy = whereis(logger_proxy), + Proxy = erlang:system_info(system_logger), + ProxyConfig = logger:get_proxy_config(), + ProxyConfig = logger_olp:get_opts(logger_proxy), + + Ref = erlang:monitor(process,Proxy), + spawn(fun() -> + [logger_proxy ! {log,debug, + [{test_case,?FUNCTION_NAME}, + {line,?LINE}], + ?LOC} || _ <- lists:seq(1,100)] + end), + receive + {'DOWN',Ref,_,_,_Reason} -> + undefined = erlang:system_info(system_logger), + timer:sleep(Restart), + poll_restarted(10) + after 5000 -> + ct:fail(proxy_not_terminated) + end, + + Proxy1 = whereis(logger_proxy), + Proxy1 = erlang:system_info(system_logger), + ProxyConfig = logger:get_proxy_config(), + ProxyConfig = logger_olp:get_opts(logger_proxy), + + ok. +restart_after(cleanup,_Config) -> + _ = logger:set_logger_proxy(logger_proxy:get_default_config()), + ok. + +%% Test that system_logger flag is set to logger process if +%% logger_proxy terminates for other reason than overloaded. +terminate(_Config) -> + Logger = whereis(logger), + Proxy = whereis(logger_proxy), + Proxy = erlang:system_info(system_logger), + ProxyConfig = logger:get_proxy_config(), + ProxyConfig = logger_olp:get_opts(logger_proxy), + + Ref = erlang:monitor(process,Proxy), + ok = logger_olp:stop(Proxy), + receive + {'DOWN',Ref,_,_,_Reason} -> + Logger = erlang:system_info(system_logger), + logger_proxy:restart(), + poll_restarted(10) + after 5000 -> + ct:fail(proxy_not_terminated) + end, + + Proxy1 = whereis(logger_proxy), + Proxy1 = erlang:system_info(system_logger), + ProxyConfig = logger:get_proxy_config(), + ProxyConfig = logger_olp:get_opts(logger_proxy), + + ok. + +%%%----------------------------------------------------------------- +%%% Internal functions + +poll_restarted(0) -> + ct:fail(proxy_not_restarted); +poll_restarted(N) -> + timer:sleep(1000), + case whereis(logger_proxy) of + undefined -> + poll_restarted(N-1); + _Pid -> + ok + end. + +%% Logger handler callback +log(#{meta:=Meta},#{config:=Pid}) -> + Pid ! {logged,Meta}. + +%% Check that the log from the logger callback function log/2 is received +ensure(Match) -> + receive {logged,Meta} -> + case maps:with(maps:keys(Match),Meta) of + Match -> ok; + _NoMatch -> {error,Match,Meta,test_server:messages_get()} + end + after ?ENSURE_TIME -> {error,Match,test_server:messages_get()} + end. diff --git a/lib/kernel/test/logger_std_h_SUITE.erl b/lib/kernel/test/logger_std_h_SUITE.erl index eb17a6d857..484d914ec3 100644 --- a/lib/kernel/test/logger_std_h_SUITE.erl +++ b/lib/kernel/test/logger_std_h_SUITE.erl @@ -25,10 +25,15 @@ -include_lib("kernel/include/logger.hrl"). -include_lib("kernel/src/logger_internal.hrl"). -include_lib("kernel/src/logger_h_common.hrl"). +-include_lib("kernel/src/logger_olp.hrl"). -include_lib("stdlib/include/ms_transform.hrl"). -include_lib("kernel/include/file.hrl"). --define(check_no_log, [] = test_server:messages_get()). +-define(check_no_log, + begin + timer:sleep(?IDLE_DETECT_TIME*2), + [] = test_server:messages_get() + end). -define(check(Expected), receive {log,Expected} -> @@ -115,7 +120,6 @@ all() -> crash_std_h_to_file, crash_std_h_to_disk_log, bad_input, - info_and_reset, reconfig, file_opts, sync, @@ -209,9 +213,9 @@ default_formatter(_Config) -> filter_config(_Config) -> ok = logger:add_handler(?MODULE,logger_std_h,#{}), {ok,#{config:=HConfig}=Config} = logger:get_handler_config(?MODULE), - HConfig = maps:without([handler_pid,mode_tab],HConfig), + HConfig = maps:without([olp],HConfig), - FakeFullHConfig = HConfig#{handler_pid=>self(),mode_tab=>erlang:make_ref()}, + FakeFullHConfig = HConfig#{olp=>{regname,self(),erlang:make_ref()}}, #{config:=HConfig} = logger_std_h:filter_config(Config#{config=>FakeFullHConfig}), ok. @@ -246,13 +250,13 @@ errors(Config) -> _ -> NoDir = lists:concat(["/",?MODULE,"_dir"]), {error, - {handler_not_added,{{open_failed,NoDir,eacces},_}}} = + {handler_not_added,{open_failed,NoDir,eacces}}} = logger:add_handler(myh2,logger_std_h, #{config=>#{type=>{file,NoDir}}}) end, {error, - {handler_not_added,{{open_failed,Log,_},_}}} = + {handler_not_added,{open_failed,Log,_}}} = logger:add_handler(myh3,logger_std_h, #{config=>#{type=>{file,Log,[bad_file_opt]}}}), @@ -320,19 +324,16 @@ config_fail(_Config) -> #{config => #{restart_type => bad}, filter_default=>log, formatter=>{?MODULE,self()}}), - {error,{handler_not_added,{invalid_config,logger_std_h, - {invalid_levels,#{drop_mode_qlen:=1}}}}} = + {error,{handler_not_added,{invalid_olp_levels,#{drop_mode_qlen:=1}}}} = logger:add_handler(?MODULE,logger_std_h, #{config => #{drop_mode_qlen=>1}}), - {error,{handler_not_added,{invalid_config,logger_std_h, - {invalid_levels,#{sync_mode_qlen:=43, - drop_mode_qlen:=42}}}}} = + {error,{handler_not_added,{invalid_olp_levels,#{sync_mode_qlen:=43, + drop_mode_qlen:=42}}}} = logger:add_handler(?MODULE,logger_std_h, #{config => #{sync_mode_qlen=>43, drop_mode_qlen=>42}}), - {error,{handler_not_added,{invalid_config,logger_std_h, - {invalid_levels,#{drop_mode_qlen:=43, - flush_qlen:=42}}}}} = + {error,{handler_not_added,{invalid_olp_levels,#{drop_mode_qlen:=43, + flush_qlen:=42}}}} = logger:add_handler(?MODULE,logger_std_h, #{config => #{drop_mode_qlen=>43, flush_qlen=>42}}), @@ -344,7 +345,7 @@ config_fail(_Config) -> logger:set_handler_config(?MODULE,config, #{type=>{file,"file"}}), - {error,{invalid_config,logger_std_h,{invalid_levels,_}}} = + {error,{invalid_olp_levels,_}} = logger:set_handler_config(?MODULE,config, #{sync_mode_qlen=>100, flush_qlen=>99}), @@ -355,9 +356,7 @@ config_fail(_Config) -> %% Read-only fields may (accidentially) be included in the change, %% but it won't take effect {ok,C} = logger:get_handler_config(?MODULE), - ok = logger:set_handler_config(?MODULE,config, - #{handler_pid=>self(), - mode_tab=>erlang:make_ref()}), + ok = logger:set_handler_config(?MODULE,config,#{olp=>dummyvalue}), {ok,C} = logger:get_handler_config(?MODULE), ok. @@ -425,10 +424,13 @@ crash_std_h(Config,Func,Var,Type,Log) -> %% logger would send the log event to the logger process here instead %% of logging it itself. log_on_remote_node(Node,Msg) -> + Pid = self(), _ = spawn_link(Node, fun() -> erlang:group_leader(whereis(user),self()), - logger:notice(Msg) + logger:notice(Msg), + Pid ! done end), + receive done -> ok end, ok. @@ -456,14 +458,7 @@ sync_and_read(Node,file,Log) -> end. bad_input(_Config) -> - {error,{badarg,{filesync,["BadType"]}}} = logger_std_h:filesync("BadType"), - {error,{badarg,{info,["BadType"]}}} = logger_std_h:info("BadType"), - {error,{badarg,{reset,["BadType"]}}} = logger_std_h:reset("BadType"). - - -info_and_reset(_Config) -> - #{id := ?STANDARD_HANDLER} = logger_std_h:info(?STANDARD_HANDLER), - ok = logger_std_h:reset(?STANDARD_HANDLER). + {error,{badarg,{filesync,["BadType"]}}} = logger_std_h:filesync("BadType"). reconfig(Config) -> Dir = ?config(priv_dir,Config), @@ -473,9 +468,10 @@ reconfig(Config) -> filter_default=>log, filters=>?DEFAULT_HANDLER_FILTERS([?MODULE]), formatter=>{?MODULE,self()}}), - #{id := ?MODULE, - handler_state := #{type := standard_io, - file_ctrl_pid := FileCtrlPid}, + #{%id := ?MODULE, + cb_state:=#{handler_state := #{type := standard_io, + file_ctrl_pid := FileCtrlPid}, + filesync_repeat_interval := no_repeat}, sync_mode_qlen := ?SYNC_MODE_QLEN, drop_mode_qlen := ?DROP_MODE_QLEN, flush_qlen := ?FLUSH_QLEN, @@ -485,9 +481,8 @@ reconfig(Config) -> overload_kill_enable := ?OVERLOAD_KILL_ENABLE, overload_kill_qlen := ?OVERLOAD_KILL_QLEN, overload_kill_mem_size := ?OVERLOAD_KILL_MEM_SIZE, - overload_kill_restart_after := ?OVERLOAD_KILL_RESTART_AFTER, - filesync_repeat_interval := no_repeat} = DefaultInfo = - logger_std_h:info(?MODULE), + overload_kill_restart_after := ?OVERLOAD_KILL_RESTART_AFTER} = + logger_olp:info(h_proc_name()), {ok, #{config:= @@ -518,9 +513,10 @@ reconfig(Config) -> overload_kill_mem_size => 10000000, overload_kill_restart_after => infinity, filesync_repeat_interval => 5000}), - #{id := ?MODULE, - handler_state := #{type := standard_io, - file_ctrl_pid := FileCtrlPid}, + #{%id := ?MODULE, + cb_state := #{handler_state := #{type := standard_io, + file_ctrl_pid := FileCtrlPid}, + filesync_repeat_interval := no_repeat}, sync_mode_qlen := 1, drop_mode_qlen := 2, flush_qlen := 3, @@ -530,8 +526,7 @@ reconfig(Config) -> overload_kill_enable := true, overload_kill_qlen := 100000, overload_kill_mem_size := 10000000, - overload_kill_restart_after := infinity, - filesync_repeat_interval := no_repeat} = Info = logger_std_h:info(?MODULE), + overload_kill_restart_after := infinity} = logger_olp:info(h_proc_name()), {ok,#{config := #{type := standard_io, @@ -613,7 +608,7 @@ file_opts(Config) -> Log = filename:join(Dir, lists:concat([?FUNCTION_NAME,".log"])), BadFileOpts = [raw], BadType = {file,Log,BadFileOpts}, - {error,{handler_not_added,{{open_failed,Log,enoent},_}}} = + {error,{handler_not_added,{open_failed,Log,enoent}}} = logger:add_handler(?MODULE, logger_std_h, #{config => #{type => BadType}}), @@ -626,7 +621,9 @@ file_opts(Config) -> filters=>?DEFAULT_HANDLER_FILTERS([?MODULE]), formatter=>{?MODULE,self()}}), - #{handler_state := #{type := OkType}} = logger_std_h:info(?MODULE), + #{cb_state := #{handler_state := #{type := OkType}}} = + logger_olp:info(h_proc_name()), + {ok,#{config := #{type := OkType}}} = logger:get_handler_config(?MODULE), logger:notice(M1=?msg,?domain), ?check(M1), B1 = ?bin(M1), @@ -675,11 +672,8 @@ sync(Config) -> %% a filesync is still performed when handler goes idle ok = logger:update_handler_config(?MODULE, config, #{filesync_repeat_interval => no_repeat}), - no_repeat = maps:get(filesync_repeat_interval, logger_std_h:info(?MODULE)), - %% The following timer is to make sure the time from last log - %% ("second") to next ("third") is long enough, so the a flush is - %% triggered by the idle timeout between "thrid" and "fourth". - timer:sleep(?IDLE_DETECT_TIME_MSEC*2), + no_repeat = maps:get(filesync_repeat_interval, + maps:get(cb_state, logger_olp:info(h_proc_name()))), start_tracer([{logger_std_h, write_to_dev, 5}, {file, datasync, 1}], [{logger_std_h, write_to_dev, <<"third\n">>}, @@ -688,22 +682,24 @@ sync(Config) -> {file,datasync}]), logger:notice("third", ?domain), %% wait for automatic filesync - timer:sleep(?IDLE_DETECT_TIME_MSEC*2), + timer:sleep(?IDLE_DETECT_TIME*2), logger:notice("fourth", ?domain), %% wait for automatic filesync - check_tracer(?IDLE_DETECT_TIME_MSEC*2), + check_tracer(?IDLE_DETECT_TIME*2), %% switch repeated filesync on and verify that the looping works SyncInt = 1000, WaitT = 4500, OneSync = {logger_h_common,handle_cast,repeated_filesync}, %% receive 1 repeated_filesync per sec - start_tracer([{logger_h_common,handle_cast,2}], + start_tracer([{{logger_h_common,handle_cast,2}, + [{[repeated_filesync,'_'],[],[]}]}], [OneSync || _ <- lists:seq(1, trunc(WaitT/SyncInt))]), ok = logger:update_handler_config(?MODULE, config, #{filesync_repeat_interval => SyncInt}), - SyncInt = maps:get(filesync_repeat_interval, logger_std_h:info(?MODULE)), + SyncInt = maps:get(filesync_repeat_interval, + maps:get(cb_state,logger_olp:info(h_proc_name()))), timer:sleep(WaitT), ok = logger:update_handler_config(?MODULE, config, #{filesync_repeat_interval => no_repeat}), @@ -764,8 +760,6 @@ sync_failure(Config) -> ok = rpc:call(Node, logger, update_handler_config, [?STANDARD_HANDLER, config, #{filesync_repeat_interval => SyncInt}]), - Info = rpc:call(Node, logger_std_h, info, [?STANDARD_HANDLER]), - SyncInt = maps:get(filesync_repeat_interval, Info), ok = log_on_remote_node(Node, "Logged1"), ?check_no_log, @@ -1095,7 +1089,7 @@ qlen_kill_new(Config) -> receive {'DOWN', MRef, _, _, Info} -> case Info of - {shutdown,{overloaded,?MODULE,QLen,Mem}} -> + {shutdown,{overloaded,QLen,Mem}} -> ct:pal("Terminated with qlen = ~w, mem = ~w", [QLen,Mem]); killed -> ct:pal("Slow shutdown, handler process was killed!", []) @@ -1105,7 +1099,7 @@ qlen_kill_new(Config) -> ok after 5000 -> - Info = logger_std_h:info(?MODULE), + Info = logger_olp:info(h_proc_name()), ct:pal("Handler state = ~p", [Info]), ct:fail("Handler not dead! It should not have survived this!") end. @@ -1146,7 +1140,7 @@ mem_kill_new(Config) -> receive {'DOWN', MRef, _, _, Info} -> case Info of - {shutdown,{overloaded,?MODULE,QLen,Mem}} -> + {shutdown,{overloaded,QLen,Mem}} -> ct:pal("Terminated with qlen = ~w, mem = ~w", [QLen,Mem]); killed -> ct:pal("Slow shutdown, handler process was killed!", []) @@ -1156,7 +1150,7 @@ mem_kill_new(Config) -> ok after 5000 -> - Info = logger_std_h:info(?MODULE), + Info = logger_olp:info(h_proc_name()), ct:pal("Handler state = ~p", [Info]), ct:fail("Handler not dead! It should not have survived this!") end. @@ -1187,7 +1181,7 @@ restart_after(Config) -> ok after 5000 -> - Info1 = logger_std_h:info(?MODULE), + Info1 = logger_olp:info(h_proc_name()), ct:pal("Handler state = ~p", [Info1]), ct:fail("Handler not dead! It should not have survived this!") end, @@ -1212,7 +1206,7 @@ restart_after(Config) -> ok after 5000 -> - Info2 = logger_std_h:info(?MODULE), + Info2 = logger_olp:info(h_proc_name()), ct:pal("Handler state = ~p", [Info2]), ct:fail("Handler not dead! It should not have survived this!") end, @@ -1234,11 +1228,15 @@ handler_requests_under_load(Config) -> flush_qlen => 2000, burst_limit_enable => false}}, ok = logger:update_handler_config(?MODULE, NewHConfig), - Pid = spawn_link(fun() -> send_requests(?MODULE, 1, [{filesync,[]}, - {info,[]}, - {reset,[]}, - {change_config,[]}]) - end), + Pid = spawn_link( + fun() -> send_requests(1,[{logger_std_h,filesync,[?MODULE],[]}, + {logger_olp,info,[h_proc_name()],[]}, + {logger_olp,reset,[h_proc_name()],[]}, + {logger,update_handler_config, + [?MODULE, config, + #{overload_kill_enable => false}], + []}]) + end), Sent = send_burst({t,10000}, seq, {chars,79}, notice), Pid ! {self(),finish}, ReqResult = receive {Pid,Result} -> Result end, @@ -1249,8 +1247,9 @@ handler_requests_under_load(Config) -> [E || E <- Res, is_tuple(E) andalso (element(1,E) == error)] end, - Errors = [{Req,FindError(Res)} || {Req,Res} <- ReqResult], - NoOfReqs = lists:foldl(fun({_,Res}, N) -> N + length(Res) end, 0, ReqResult), + Errors = [{Func,FindError(Res)} || {_,Func,_,Res} <- ReqResult], + NoOfReqs = lists:foldl(fun({_,_,_,Res}, N) -> N + length(Res) end, + 0, ReqResult), ct:pal("~w requests made. Errors: ~n~p", [NoOfReqs,Errors]), ok = file_delete(Log). handler_requests_under_load(cleanup, _Config) -> @@ -1272,22 +1271,14 @@ recreate_deleted_log(cleanup, _Config) -> %%%----------------------------------------------------------------- %%% -send_requests(HName, TO, Reqs = [{Req,Res}|Rs]) -> +send_requests(TO, Reqs = [{Mod,Func,Args,Res}|Rs]) -> receive {From,finish} -> From ! {self(),Reqs} after TO -> - Result = - case Req of - change_config -> - logger:update_handler_config(HName, config, - #{overload_kill_enable => - false}); - Func -> - logger_std_h:Func(HName) - end, - send_requests(HName, TO, Rs ++ [{Req,[Result|Res]}]) + Result = apply(Mod,Func,Args), + send_requests(TO, Rs ++ [{Mod,Func,Args,[Result|Res]}]) end. @@ -1624,7 +1615,8 @@ start_tracer(Trace,Expected) -> Pid = self(), FileCtrlPid = maps:get(file_ctrl_pid, maps:get(handler_state, - logger_std_h:info(?MODULE))), + maps:get(cb_state, + logger_olp:info(h_proc_name())))), dbg:tracer(process,{fun tracer/2,{Pid,Expected}}), dbg:p(whereis(h_proc_name()),[c]), dbg:p(FileCtrlPid,[c]), @@ -1632,7 +1624,9 @@ start_tracer(Trace,Expected) -> ok. tpl([{M,F,A}|Trace]) -> - {ok,Match} = dbg:tpl(M,F,A,[]), + tpl([{{M,F,A},[]}|Trace]); +tpl([{{M,F,A},MS}|Trace]) -> + {ok,Match} = dbg:tpl(M,F,A,MS), case lists:keyfind(matched,1,Match) of {_,_,1} -> ok; diff --git a/lib/kernel/test/logger_stress_SUITE.erl b/lib/kernel/test/logger_stress_SUITE.erl new file mode 100644 index 0000000000..4072e8c86a --- /dev/null +++ b/lib/kernel/test/logger_stress_SUITE.erl @@ -0,0 +1,456 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2018. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(logger_stress_SUITE). + +-compile(export_all). + +-include_lib("common_test/include/ct_event.hrl"). +-include_lib("kernel/include/logger.hrl"). +-include_lib("kernel/src/logger_h_common.hrl"). + +-ifdef(SAVE_STATS). + -define(COLLECT_STATS(_All_,_Procs_), + ct:pal("~p",[stats(_All_,_Procs_)])). +-else. + -define(COLLECT_STATS(_All_,_Procs__), ok). +-endif. + +-define(TEST_DURATION,120). % seconds + +suite() -> + [{timetrap,{minutes,3}}, + {ct_hooks,[logger_test_lib]}]. + +init_per_suite(Config) -> + Config. + +end_per_suite(_Config) -> + ok. + +init_per_group(_Group, Config) -> + Config. + +end_per_group(_Group, _Config) -> + ok. + +init_per_testcase(_TestCase, Config) -> + Config. + +end_per_testcase(Case, Config) -> + try apply(?MODULE,Case,[cleanup,Config]) + catch error:undef -> ok + end, + ok. + +groups() -> + []. + +all() -> + [allow_events, + reject_events, + std_handler, + disk_log_handler, + emulator_events, + remote_events, + remote_to_disk_log, + remote_emulator_events, + remote_emulator_to_disk_log]. + +%%%----------------------------------------------------------------- +%%% Test cases +%%%----------------------------------------------------------------- +%% Time from log macro call to handler callback +allow_events(Config) -> + {ok,_,Node} = + logger_test_lib:setup(Config, + [{logger, + [{handler,default,?MODULE,#{}}]}, + {logger_level,notice}]), + N = 100000, + {T,_} = timer:tc(fun() -> rpc:call(Node,?MODULE,nlogs,[N]) end), + IOPS = N * 1000/T, % log events allowed per millisecond + ct_event:notify(#event{name = benchmark_data, + data = [{value,IOPS}]}), + {comment,io_lib:format("~.2f accepted events pr millisecond", + [IOPS])}. + +%% Time from log macro call to reject (log level) +reject_events(Config) -> + {ok,_,Node} = + logger_test_lib:setup(Config, + [{logger, + [{handler,default,?MODULE,#{}}]}, + {logger_level,error}]), + N = 1000000, + {T,_} = timer:tc(fun() -> rpc:call(Node,?MODULE,nlogs,[N]) end), + IOPS = N * 1000/T, % log events rejected per millisecond + ct_event:notify(#event{name = benchmark_data, + data = [{value,IOPS}]}), + {comment,io_lib:format("~.2f rejected events pr millisecond", + [IOPS])}. + +%% Cascading failure that produce gen_server and proc_lib reports - +%% how many of the produced log events are actually written to a log +%% with logger_std_h file handler. +std_handler(Config) -> + {ok,_,Node} = + logger_test_lib:setup(Config, + [{logger, + [{handler,default,logger_std_h, + #{config=>#{type=>{file,"default.log"}}}}]}]), + + cascade({Node,{logger_backend,log_allowed,2},[]}, + {Node,{logger_std_h,write,4},[{default,logger_std_h_default}]}, + fun otp_cascading/0). +std_handler(cleanup,_Config) -> + _ = file:delete("default.log"), + ok. + +%% Cascading failure that produce gen_server and proc_lib reports - +%% how many of the produced log events are actually written to a log +%% with logger_disk_log_h wrap file handler. +disk_log_handler(Config) -> + {ok,_,Node} = + logger_test_lib:setup(Config, + [{logger, + [{handler,default,logger_disk_log_h,#{}}]}]), + cascade({Node,{logger_backend,log_allowed,2},[]}, + {Node,{logger_disk_log_h,write,4}, + [{default,logger_disk_log_h_default}]}, + fun otp_cascading/0). +disk_log_handler(cleanup,_Config) -> + Files = filelib:wildcard("default.log.*"), + [_ = file:delete(F) || F <- Files], + ok. + +%% Cascading failure that produce log events from the emulator - how +%% many of the produced log events pass through the proxy. +emulator_events(Config) -> + {ok,_,Node} = + logger_test_lib:setup(Config, + [{logger, + [{handler,default,?MODULE,#{}}]}]), + cascade({Node,{?MODULE,producer,0},[]}, + {Node,{?MODULE,log,2},[{proxy,logger_proxy}]}, + fun em_cascading/0). + +%% Cascading failure that produce gen_server and proc_lib reports on +%% remote node - how many of the produced log events pass through the +%% proxy. +remote_events(Config) -> + {ok,_,Node1} = + logger_test_lib:setup([{postfix,1}|Config], + [{logger, + [{handler,default,?MODULE,#{}}]}]), + {ok,_,Node2} = + logger_test_lib:setup([{postfix,2}|Config],[]), + cascade({Node2,{logger_backend,log_allowed,2},[{remote_proxy,logger_proxy}]}, + {Node1,{?MODULE,log,2},[{local_proxy,logger_proxy}]}, + fun otp_cascading/0). + +%% Cascading failure that produce gen_server and proc_lib reports on +%% remote node - how many of the produced log events are actually +%% written to a log with logger_disk_log_h wrap file handler. +remote_to_disk_log(Config) -> + {ok,_,Node1} = + logger_test_lib:setup([{postfix,1}|Config], + [{logger, + [{handler,default,logger_disk_log_h,#{}}]}]), + {ok,_,Node2} = + logger_test_lib:setup([{postfix,2}|Config],[]), + cascade({Node2,{logger_backend,log_allowed,2},[{remote_proxy,logger_proxy}]}, + {Node1,{logger_disk_log_h,write,4}, + [{local_proxy,logger_proxy}, + {local_default,logger_disk_log_h_default}]}, + fun otp_cascading/0). +remote_to_disk_log(cleanup,_Config) -> + Files = filelib:wildcard("default.log.*"), + [_ = file:delete(F) || F <- Files], + ok. + +%% Cascading failure that produce log events from the emulator on +%% remote node - how many of the produced log events pass through the +%% proxy. +remote_emulator_events(Config) -> + {ok,_,Node1} = + logger_test_lib:setup([{postfix,1}|Config], + [{logger, + [{handler,default,?MODULE,#{}}]}]), + {ok,_,Node2} = + logger_test_lib:setup([{postfix,2}|Config],[]), + cascade({Node2,{?MODULE,producer,0},[{remote_proxy,logger_proxy}]}, + {Node1,{?MODULE,log,2},[{local_proxy,logger_proxy}]}, + fun em_cascading/0). + +%% Cascading failure that produce log events from the emulator on +%% remote node - how many of the produced log events are actually +%% written to a log with logger_disk_log_h wrap file handler. +remote_emulator_to_disk_log(Config) -> + {ok,_,Node1} = + logger_test_lib:setup([{postfix,1}|Config], + [{logger, + [{handler,default,logger_disk_log_h,#{}}]}]), + {ok,_,Node2} = + logger_test_lib:setup([{postfix,2}|Config],[]), + cascade({Node2,{?MODULE,producer,0},[{remote_proxy,logger_proxy}]}, + {Node1,{logger_disk_log_h,write,4}, + [{local_proxy,logger_proxy}, + {local_default,logger_disk_log_h_default}]}, + fun em_cascading/0). +remote_emulator_to_disk_log(cleanup,_Config) -> + Files = filelib:wildcard("default.log.*"), + [_ = file:delete(F) || F <- Files], + ok. + +%%%----------------------------------------------------------------- +%%% Internal functions +nlogs(N) -> + group_leader(whereis(user),self()), + Str = "\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "[\\]^_`abcdefghijklmnopqr", + [?LOG_NOTICE(Str) || _ <- lists:seq(1,N)], + ok. + +%% cascade(ProducerInfo,ConsumerInfo,TestFun) +cascade({PNode,PMFA,_PStatProcs},{CNode,CMFA,_CStatProcs},TestFun) -> + Tab = ets:new(counter,[set,public]), + ets:insert(Tab,{producer,0}), + ets:insert(Tab,{consumer,0}), + dbg:tracer(process,{fun tracer/2,{Tab,PNode,CNode}}), + dbg:n(PNode), + dbg:n(CNode), + dbg:cn(node()), + dbg:p(all,[call,arity]), + dbg:tpl(PMFA,[]), + dbg:tpl(CMFA,[]), + + Pid = rpc:call(CNode,?MODULE,wrap_test,[PNode,TestFun]), + MRef = erlang:monitor(process,Pid), + TO = ?TEST_DURATION*1000, + receive {'DOWN',MRef,_,_,Reason} -> + ct:fail({remote_pid_down,Reason}) + after TO -> + All = ets:lookup_element(Tab,producer,2), + Written = ets:lookup_element(Tab,consumer,2), + dbg:stop_clear(), + ?COLLECT_STATS(All, + [{PNode,P,Id} || {Id,P} <- _PStatProcs] ++ + [{CNode,P,Id} || {Id,P} <- _CStatProcs]), + Ratio = Written/All * 100, + ct_event:notify(#event{name = benchmark_data, + data = [{value,Ratio}]}), + {comment,io_lib:format("~p % (~p written, ~p produced)", + [round(Ratio),Written,All])} + end. + +wrap_test(Fun) -> + wrap_test(node(),Fun). +wrap_test(Node,Fun) -> + reset(), + group_leader(whereis(user),self()), + rpc:call(Node,?MODULE,do_fun,[Fun]). + +do_fun(Fun) -> + reset(), + Fun(). + +reset() -> + reset([logger_std_h_default, logger_disk_log_h_default, logger_proxy]). +reset([P|Ps]) -> + is_pid(whereis(P)) andalso logger_olp:reset(P), + reset(Ps); +reset([]) -> + ok. + + +tracer({trace,_,call,{?MODULE,producer,_}},{Tab,_PNode,_CNode}=S) -> + ets:update_counter(Tab,producer,1), + S; +tracer({trace,Pid,call,{logger_backend,log_allowed,_}},{Tab,PNode,_CNode}=S) when node(Pid)=:=PNode -> + ets:update_counter(Tab,producer,1), + S; +tracer({trace,_,call,{?MODULE,log,_}},{Tab,_PNode,_CNode}=S) -> + ets:update_counter(Tab,consumer,1), + S; +tracer({trace,_,call,{_,write,_}},{Tab,_PNode,_CNode}=S) -> + ets:update_counter(Tab,consumer,1), + S; +tracer(_,S) -> + S. + + +%%%----------------------------------------------------------------- +%%% Collect statistics +-define(STAT_KEYS, + [burst_drops, + calls, + casts, + drops, + flushed, + flushes, + freq, + last_qlen, + max_qlen, + time, + writes]). +-define(EVENT_KEYS, + [calls,casts,flushed]). + +stats(All,Procs) -> + NI = [{Id,rpc:call(N,logger_olp,info,[P])} || {N,P,Id}<-Procs], + [{all,All}|[stats(Id,I,All) || {Id,I} <- NI]]. + +stats(Id,Info,All) -> + S = maps:with(?STAT_KEYS,Info), + AllOnProc = lists:sum(maps:values(maps:with(?EVENT_KEYS,S))), + if All>0 -> + Writes = maps:get(writes,S), + {_,ActiveTime} = maps:get(time,S), + Rate = round(100*Writes/All), + RateOnProc = + if AllOnProc>0 -> + round(100*Writes/AllOnProc); + true -> + 0 + end, + AvFreq = + if ActiveTime>0 -> + round(Writes/ActiveTime); + true -> + 0 + end, + {Id, + {stats,S}, + {rate,Rate}, + {rate_on_proc,RateOnProc}, + {av_freq,AvFreq}}; + true -> + {Id,none} + end. + +%%%----------------------------------------------------------------- +%%% Spawn a lot of processes that crash repeatedly, causing a lot of +%%% error reports from the emulator. +em_cascading() -> + spawn(fun() -> super() end). + +super() -> + process_flag(trap_exit,true), + spawn_link(fun server/0), + [spawn_link(fun client/0) || _<-lists:seq(1,10000)], + super_loop(). + +super_loop() -> + receive + {'EXIT',_,server} -> + spawn_link(fun server/0), + super_loop(); + {'EXIT',_,_} -> + _L = lists:sum(lists:seq(1,10000)), + spawn_link(fun client/0), + super_loop() + end. + +client() -> + receive + after 1 -> + case whereis(server) of + Pid when is_pid(Pid) -> + ok; + undefined -> + producer(), + erlang:error(some_exception) + end + end, + client(). + +server() -> + register(server,self()), + receive + after 3000 -> + exit(server) + end. + + +%%%----------------------------------------------------------------- +%%% Create a supervisor tree with processes that crash repeatedly, +%%% causing a lot of supervisor reports and crashreports +otp_cascading() -> + {ok,Pid} = supervisor:start_link({local,otp_super}, ?MODULE, [otp_super]), + unlink(Pid), + Pid. + +otp_server_sup() -> + supervisor:start_link({local,otp_server_sup},?MODULE,[otp_server_sup]). + +otp_client_sup(N) -> + supervisor:start_link({local,otp_client_sup},?MODULE,[otp_client_sup,N]). + +otp_server() -> + gen_server:start_link({local,otp_server},?MODULE,[otp_server],[]). + +otp_client() -> + gen_server:start_link(?MODULE,[otp_client],[]). + +init([otp_super]) -> + {ok, {{one_for_one, 200, 10}, + [{client_sup, + {?MODULE, otp_client_sup, [10000]}, + permanent, 1000, supervisor, [?MODULE]}, + {server_sup, + {?MODULE, otp_server_sup, []}, + permanent, 1000, supervisor, [?MODULE]} + ]}}; +init([otp_server_sup]) -> + {ok, {{one_for_one, 2, 10}, + [{server, + {?MODULE, otp_server, []}, + permanent, 1000, worker, [?MODULE]} + ]}}; +init([otp_client_sup,N]) -> + spawn(fun() -> + [supervisor:start_child(otp_client_sup,[]) + || _ <- lists:seq(1,N)] + end), + {ok, {{simple_one_for_one, N*10, 1}, + [{client, + {?MODULE, otp_client, []}, + permanent, 1000, worker, [?MODULE]} + ]}}; +init([otp_server]) -> + {ok, server, 10000}; +init([otp_client]) -> + {ok, client,1}. + +handle_info(timeout, client) -> + true = is_pid(whereis(otp_server)), + {noreply,client,1}; +handle_info(timeout, server) -> + exit(self(), some_error). + +%%%----------------------------------------------------------------- +%%% Logger callbacks +log(_LogEvent,_Config) -> + ok. + +%%%----------------------------------------------------------------- +%%% Function to trace on for counting produced emulator messages +producer() -> + ok. diff --git a/lib/kernel/test/logger_test_lib.erl b/lib/kernel/test/logger_test_lib.erl index 81eb9ce5eb..be4bc427fb 100644 --- a/lib/kernel/test/logger_test_lib.erl +++ b/lib/kernel/test/logger_test_lib.erl @@ -28,11 +28,17 @@ post_end_per_testcase/5, post_end_per_suite/3]). setup(Config,Vars) -> + Postfix = case proplists:get_value(postfix, Config) of + undefined -> ""; + P -> ["_",P] + end, FuncStr = lists:concat([proplists:get_value(suite, Config), "_", - proplists:get_value(tc, Config)]), + proplists:get_value(tc, Config)| + Postfix]), ConfigFileName = filename:join(proplists:get_value(priv_dir, Config), FuncStr), file:write_file(ConfigFileName ++ ".config", io_lib:format("[{kernel, ~p}].",[Vars])), - case test_server:start_node(proplists:get_value(tc, Config), slave, + Sname = lists:concat([proplists:get_value(tc,Config)|Postfix]), + case test_server:start_node(Sname, slave, [{args, ["-pa ",filename:dirname(code:which(?MODULE)), " -boot start_sasl -kernel start_timer true " "-config ",ConfigFileName]}]) of |