diff options
author | Peter Andersson <[email protected]> | 2018-06-07 10:59:04 +0200 |
---|---|---|
committer | Siri Hansen <[email protected]> | 2018-06-11 15:01:41 +0200 |
commit | b91eceaf71e0009346d119346a33894ed53be7b0 (patch) | |
tree | 166b092cf81517ed7958a13de1745c5e206e4db2 /lib/kernel/src/logger_disk_log_h.erl | |
parent | f20c281be6fa27e2da1c543a64029531c5f00325 (diff) | |
download | otp-b91eceaf71e0009346d119346a33894ed53be7b0.tar.gz otp-b91eceaf71e0009346d119346a33894ed53be7b0.tar.bz2 otp-b91eceaf71e0009346d119346a33894ed53be7b0.zip |
[logger] Implement logging of handler overload status
Conflicts:
lib/kernel/src/logger_disk_log_h.erl
lib/kernel/src/logger_std_h.erl
Diffstat (limited to 'lib/kernel/src/logger_disk_log_h.erl')
-rw-r--r-- | lib/kernel/src/logger_disk_log_h.erl | 89 |
1 files changed, 66 insertions, 23 deletions
diff --git a/lib/kernel/src/logger_disk_log_h.erl b/lib/kernel/src/logger_disk_log_h.erl index d339957ac8..c3d2204630 100644 --- a/lib/kernel/src/logger_disk_log_h.erl +++ b/lib/kernel/src/logger_disk_log_h.erl @@ -37,6 +37,9 @@ adding_handler/1, removing_handler/1, changing_config/2, swap_buffer/2]). +%% handler internal +-export([log_handler_info/4]). + %%%=================================================================== %%% API %%%=================================================================== @@ -250,7 +253,7 @@ swap_buffer(Name, Buffer) -> log(LogEvent, Config = #{id := Name, config := #{handler_pid := HPid, mode_tab := ModeTab}}) -> - %% if the handler has crashed, we must drop this request + %% if the handler has crashed, we must drop this event %% and hope the handler restarts so we can try again true = is_process_alive(HPid), Bin = logger_h_common:log_to_binary(LogEvent, Config), @@ -295,6 +298,16 @@ init([Name, Config = #{config := HConfig, disk_log_opts := LogOpts}, mode_tab => ModeTab}}, proc_lib:init_ack({ok,self(),Config1}), gen_server:cast(self(), repeated_disk_log_sync), + case logger_h_common:unset_restart_flag(Name, ?MODULE) of + true -> + %% inform about restart + gen_server:cast(self(), {log_handler_info, + "Handler ~p restarted", + [Name]}); + false -> + %% initial start + ok + end, enter_loop(Config1, State1) catch _:Error -> @@ -315,15 +328,13 @@ enter_loop(#{wait_for_buffer:=true}=Config,State) -> Bin = logger_h_common:log_to_binary(Log,Config), {_,S1} = do_log(Bin,cast,S), S1 - end, - State, - Buffer) + end, State, Buffer) end, gen_server:enter_loop(?MODULE,[],State1); enter_loop(_Config,State) -> gen_server:enter_loop(?MODULE,[],State). -%% This is the synchronous log request. +%% This is the synchronous log event. handle_call({log, Bin}, _From, State) -> {Result,State1} = do_log(Bin, call, State), %% Result == ok | dropped @@ -377,15 +388,19 @@ handle_call(stop, _From, State) -> {stop, {shutdown,stopped}, ok, State}. -%% This is the asynchronous log request. +%% This is the asynchronous log event. handle_cast({log, Bin}, State) -> {_,State1} = do_log(Bin, cast, State), {noreply, State1}; +handle_cast({log_handler_info, Format, Args}, State = #{id:=Name}) -> + log_handler_info(Name, Format, Args, State), + {noreply, State}; + %% If FILESYNC_REPEAT_INTERVAL is set to a millisec value, this %% clause gets called repeatedly by the handler. In order to %% guarantee that a filesync *always* happens after the last log -%% request, the repeat operation must be active! +%% event, the repeat operation must be active! handle_cast(repeated_disk_log_sync, State = #{id := Name, filesync_repeat_interval := FSyncInt, @@ -526,21 +541,31 @@ stop(Name) -> if C == 0 -> Interval; true -> C-1 end). -%% check for overload between every request (and set Mode to async, +%% check for overload between every event (and set Mode to async, %% sync or drop accordingly), but never flush the whole mailbox -%% before LogWindowSize requests have been handled -do_log(Bin, CallOrCast, State = #{id:=Name, mode := _Mode0}) -> +%% before LogWindowSize events have been handled +do_log(Bin, CallOrCast, State = #{id:=Name, mode := Mode0}) -> T1 = ?timestamp(), %% check if the handler is getting overloaded, or if it's %% recovering from overload (the check must be done for each - %% request to react quickly to large bursts of requests and + %% event to react quickly to large bursts of events and %% to ensure that the handler can never end up in drop mode %% with an empty mailbox, which would stop operation) {Mode1,QLen,Mem,State1} = logger_h_common:check_load(State), + if (Mode1 == drop) andalso (Mode0 =/= drop) -> + log_handler_info(Name, "Handler ~p switched to drop mode", + [Name], State); + (Mode0 == drop) andalso ((Mode1 == async) orelse (Mode1 == sync)) -> + log_handler_info(Name, "Handler ~p switched to ~w mode", + [Name,Mode1], State); + true -> + ok + end, + %% kill the handler if it can't keep up with the load - logger_h_common:kill_if_choked(Name, QLen, Mem, State), + logger_h_common:kill_if_choked(Name, QLen, Mem, ?MODULE, State), if Mode1 == flush -> flush(Name, QLen, T1, State1); @@ -550,22 +575,26 @@ do_log(Bin, CallOrCast, State = #{id:=Name, mode := _Mode0}) -> %% this function is called by do_log/3 after an overload check %% has been performed, where QLen > FlushQLen -flush(_Name, _QLen0, T1, State=#{last_log_ts := _T0, mode_tab := ModeTab}) -> +flush(Name, _QLen0, T1, State=#{last_log_ts := _T0, mode_tab := ModeTab}) -> %% flush messages in the mailbox (a limited number in %% order to not cause long delays) - _NewFlushed = logger_h_common:flush_log_requests(?FLUSH_MAX_N), + NewFlushed = logger_h_common:flush_log_events(?FLUSH_MAX_N), + + %% write info in log about flushed messages + log_handler_info(Name, "Handler ~p flushed ~w log events", + [Name,NewFlushed], State), %% because of the receive loop when flushing messages, the %% handler will be scheduled out often and the mailbox could %% grow very large, so we'd better check the queue again here {_,_QLen1} = process_info(self(), message_queue_len), - ?observe(_Name,{max_qlen,_QLen1}), + ?observe(Name,{max_qlen,_QLen1}), - %% Add 1 for the current log request - ?observe(_Name,{flushed,_NewFlushed+1}), + %% Add 1 for the current log event + ?observe(Name,{flushed,NewFlushed+1}), State1 = ?update_max_time(?diff_time(T1,_T0),State), - {dropped,?update_other(flushed,FLUSHED,_NewFlushed, + {dropped,?update_other(flushed,FLUSHED,NewFlushed, State1#{mode => ?set_mode(ModeTab,async), last_qlen => 0, last_log_ts => T1})}. @@ -578,10 +607,10 @@ write(Name, Mode, T1, Bin, _CallOrCast, last_qlen := LastQLen, last_log_ts := T0}) -> %% check if we need to limit the number of writes - %% during a burst of log requests + %% during a burst of log events {DoWrite,BurstWinT,BurstMsgCount} = logger_h_common:limit_burst(State), - %% only send a synhrounous request to the disk_log process + %% only send a synhrounous event to the disk_log process %% every DLSyncInt time, to give the handler time between %% writes so it can keep up with incoming messages {Status,LastQLen1,State1} = @@ -599,13 +628,13 @@ write(Name, Mode, T1, Bin, _CallOrCast, {dropped, LastQLen, State} end, - %% Check if the time since the previous log request is long enough - + %% Check if the time since the previous log event is long enough - %% and the queue length small enough - to assume the mailbox has %% been emptied, and if so, do filesync operation and reset mode to %% async. Note that this is the best we can do to detect an idle %% handler without setting a timer after each log call/cast. If the - %% time between two consecutive log requests is fast and no new - %% request comes in after the last one, idle state won't be detected! + %% time between two consecutive log events is fast and no new + %% event comes in after the last one, idle state won't be detected! Time = ?diff_time(T1,T0), {Mode1,BurstMsgCount1,State2} = if (LastQLen1 < ?FILESYNC_OK_QLEN) andalso @@ -628,6 +657,20 @@ write(Name, Mode, T1, Bin, _CallOrCast, {Status,State4}. +log_handler_info(Name, Format, Args, State) -> + Config = + case logger:get_handler_config(Name) of + {ok,Conf} -> Conf; + _ -> #{formatter=>{?DEFAULT_FORMATTER,?DEFAULT_FORMAT_CONFIG}} + end, + Meta = #{time=>erlang:system_time(microsecond)}, + Bin = logger_h_common:log_to_binary(#{level => notice, + msg => {Format,Args}, + meta => Meta}, Config), + _ = disk_log_write(Name, Bin, State), + ok. + + open_disk_log(Name, LogOpts) -> #{file := File, max_no_bytes := MaxNoBytes, |