From b91eceaf71e0009346d119346a33894ed53be7b0 Mon Sep 17 00:00:00 2001
From: Peter Andersson <peppe@erlang.org>
Date: Thu, 7 Jun 2018 10:59:04 +0200
Subject: [logger] Implement logging of handler overload status

Conflicts:
	lib/kernel/src/logger_disk_log_h.erl
	lib/kernel/src/logger_std_h.erl
---
 lib/kernel/src/logger_std_h.erl | 102 +++++++++++++++++++++++++++-------------
 1 file changed, 70 insertions(+), 32 deletions(-)

(limited to 'lib/kernel/src/logger_std_h.erl')

diff --git a/lib/kernel/src/logger_std_h.erl b/lib/kernel/src/logger_std_h.erl
index bebd5300b4..77b054a9b7 100644
--- a/lib/kernel/src/logger_std_h.erl
+++ b/lib/kernel/src/logger_std_h.erl
@@ -38,6 +38,9 @@
 -export([log/2, adding_handler/1, removing_handler/1,
          changing_config/2, swap_buffer/2]).
 
+%% handler internal
+-export([log_handler_info/4]).
+
 %%%===================================================================
 %%% API
 %%%===================================================================
@@ -232,7 +235,7 @@ swap_buffer(Name, Buffer) ->
 log(LogEvent, Config = #{id := Name,
                          config := #{handler_pid := HPid,
                                      mode_tab := ModeTab}}) ->
-    %% if the handler has crashed, we must drop this request
+    %% if the handler has crashed, we must drop this event
     %% and hope the handler restarts so we can try again
     true = is_process_alive(HPid),
     Bin = logger_h_common:log_to_binary(LogEvent, Config),
@@ -284,17 +287,20 @@ init([Name, Config = #{config := HConfig},
             proc_lib:init_ack(Error)
     end.
 
-do_init(Name, Std) when Std=:=standard_io; Std=:=standard_error ->
-    case open_log_file(Name, Std) of
-        {ok,FileCtrlPid} ->
-            {ok,#{id=>Name,type=>Std,file_ctrl_pid=>FileCtrlPid}};
-        Error ->
-            Error
-    end;
-do_init(Name, FileInfo) when is_tuple(FileInfo) ->
-    case open_log_file(Name, FileInfo) of
+do_init(Name, Type) ->
+    case open_log_file(Name, Type) of
         {ok,FileCtrlPid} ->
-            {ok,#{id=>Name,type=>FileInfo,file_ctrl_pid=>FileCtrlPid}};
+            case logger_h_common:unset_restart_flag(Name, ?MODULE) of
+                true ->
+                    %% inform about restart
+                    gen_server:cast(self(), {log_handler_info,
+                                             "Handler ~p restarted",
+                                             [Name]});
+                false ->
+                    %% initial start
+                    ok
+            end,
+            {ok,#{id=>Name,type=>Type,file_ctrl_pid=>FileCtrlPid}};
         Error ->
             Error
     end.
@@ -316,7 +322,7 @@ enter_loop(#{wait_for_buffer:=true}=Config,State) ->
 enter_loop(_Config,State) ->
     gen_server:enter_loop(?MODULE,[],State).
 
-%% This is the synchronous log request.
+%% This is the synchronous log event.
 handle_call({log, Bin}, _From, State) ->
     {Result,State1} = do_log(Bin, call, State),
     %% Result == ok | dropped
@@ -371,15 +377,19 @@ handle_call(reset, _From, State) ->
 handle_call(stop, _From, State) ->
     {stop, {shutdown,stopped}, ok, State}.
 
-%% This is the asynchronous log request.
+%% This is the asynchronous log event.
 handle_cast({log, Bin}, State) ->
     {_,State1} = do_log(Bin, cast, State),
     {noreply, State1};
 
+handle_cast({log_handler_info, Format, Args}, State = #{id:=Name}) ->
+    log_handler_info(Name, Format, Args, State),
+    {noreply, State};
+
 %% If FILESYNC_REPEAT_INTERVAL is set to a millisec value, this
 %% clause gets called repeatedly by the handler. In order to
 %% guarantee that a filesync *always* happens after the last log
-%% request, the repeat operation must be active!
+%% event, the repeat operation must be active!
 handle_cast(repeated_filesync,
             State = #{type := Type,
                       file_ctrl_pid := FileCtrlPid,
@@ -514,21 +524,31 @@ stop(Name) ->
         if C == 0 -> Interval;
            true -> C-1 end).
 
-%% check for overload between every request (and set Mode to async,
+%% check for overload between every event (and set Mode to async,
 %% sync or drop accordingly), but never flush the whole mailbox
-%% before LogWindowSize requests have been handled
-do_log(Bin, CallOrCast, State = #{id:=Name}) ->
+%% before LogWindowSize events have been handled
+do_log(Bin, CallOrCast, State = #{id:=Name, mode:=Mode0}) ->
     T1 = ?timestamp(),
 
     %% check if the handler is getting overloaded, or if it's
     %% recovering from overload (the check must be done for each
-    %% request to react quickly to large bursts of requests and
+    %% event to react quickly to large bursts of events and
     %% to ensure that the handler can never end up in drop mode
     %% with an empty mailbox, which would stop operation)
     {Mode1,QLen,Mem,State1} = logger_h_common:check_load(State),
 
+    if (Mode1 == drop) andalso (Mode0 =/= drop) ->
+            log_handler_info(Name, "Handler ~p switched to drop mode",
+                             [Name], State);
+       (Mode0 == drop) andalso ((Mode1 == async) orelse (Mode1 == sync)) ->
+            log_handler_info(Name, "Handler ~p switched to ~w mode",
+                             [Name,Mode1], State);
+       true ->
+            ok
+    end,
+
     %% kill the handler if it can't keep up with the load
-    logger_h_common:kill_if_choked(Name, QLen, Mem, State),
+    logger_h_common:kill_if_choked(Name, QLen, Mem, ?MODULE, State),
 
     if Mode1 == flush ->
             flush(Name, QLen, T1, State1);
@@ -538,22 +558,26 @@ do_log(Bin, CallOrCast, State = #{id:=Name}) ->
 
 %% this clause is called by do_log/3 after an overload check
 %% has been performed, where QLen > FlushQLen
-flush(_Name, _QLen0, T1, State=#{last_log_ts := _T0, mode_tab := ModeTab}) ->
+flush(Name, _QLen0, T1, State=#{last_log_ts := _T0, mode_tab := ModeTab}) ->
     %% flush messages in the mailbox (a limited number in
     %% order to not cause long delays)
-    _NewFlushed = logger_h_common:flush_log_requests(?FLUSH_MAX_N),
+    NewFlushed = logger_h_common:flush_log_events(?FLUSH_MAX_N),
+
+    %% write info in log about flushed messages
+    log_handler_info(Name, "Handler ~p flushed ~w log events",
+                     [Name,NewFlushed], State),
 
     %% because of the receive loop when flushing messages, the
     %% handler will be scheduled out often and the mailbox could
     %% grow very large, so we'd better check the queue again here
     {_,_QLen1} = process_info(self(), message_queue_len),
-    ?observe(_Name,{max_qlen,_QLen1}),
+    ?observe(Name,{max_qlen,_QLen1}),
     
-    %% Add 1 for the current log request
-    ?observe(_Name,{flushed,_NewFlushed+1}),
+    %% Add 1 for the current log event
+    ?observe(Name,{flushed,NewFlushed+1}),
  
     State1 = ?update_max_time(?diff_time(T1,_T0),State),
-    {dropped,?update_other(flushed,FLUSHED,_NewFlushed,
+    {dropped,?update_other(flushed,FLUSHED,NewFlushed,
                            State1#{mode => ?set_mode(ModeTab,async),
                                    last_qlen => 0,
                                    last_log_ts => T1})}.
@@ -567,10 +591,10 @@ write(_Name, Mode, T1, Bin, _CallOrCast,
                 last_log_ts := T0,
                 file_ctrl_sync_int := FileCtrlSyncInt}) ->
     %% check if we need to limit the number of writes
-    %% during a burst of log requests
+    %% during a burst of log events
     {DoWrite,BurstWinT,BurstMsgCount} = logger_h_common:limit_burst(State),
 
-    %% only send a synhrounous request to the file controller process
+    %% only send a synhrounous event to the file controller process
     %% every FileCtrlSyncInt time, to give the handler time between
     %% file writes so it can keep up with incoming messages
     {Result,LastQLen1} =
@@ -587,13 +611,13 @@ write(_Name, Mode, T1, Bin, _CallOrCast,
                 {dropped,LastQLen}
         end,
     
-    %% Check if the time since the previous log request is long enough -
+    %% Check if the time since the previous log event is long enough -
     %% and the queue length small enough - to assume the mailbox has
     %% been emptied, and if so, do filesync operation and reset mode to
     %% async. Note that this is the best we can do to detect an idle
     %% handler without setting a timer after each log call/cast. If the
-    %% time between two consecutive log requests is fast and no new
-    %% request comes in after the last one, idle state won't be detected!
+    %% time between two consecutive log events is fast and no new
+    %% event comes in after the last one, idle state won't be detected!
     Time = ?diff_time(T1,T0),
     {Mode1,BurstMsgCount1} =
         if (LastQLen1 < ?FILESYNC_OK_QLEN) andalso
@@ -654,6 +678,20 @@ close_log_file(Fd) ->
     _ = file:datasync(Fd),
     _ = file:close(Fd).
 
+
+log_handler_info(Name, Format, Args, #{file_ctrl_pid := FileCtrlPid}) ->
+    Config =
+        case logger:get_handler_config(Name) of
+            {ok,Conf} -> Conf;
+            _ -> #{formatter=>{?DEFAULT_FORMATTER,?DEFAULT_FORMAT_CONFIG}}
+        end,
+    Meta = #{time=>erlang:system_time(microsecond)},
+    Bin = logger_h_common:log_to_binary(#{level => notice,
+                                          msg => {Format,Args},
+                                          meta => Meta}, Config),
+    _ = file_write_async(FileCtrlPid, Bin),
+    ok.
+
 %%%-----------------------------------------------------------------
 %%% File control process
 
@@ -726,7 +764,7 @@ file_ctrl_init(HandlerName, StdDev, Starter) ->
 file_ctrl_loop(Fd, Type, DevName, Synced,
                PrevWriteResult, PrevSyncResult, HandlerName) ->
     receive
-        %% asynchronous request
+        %% asynchronous event
         {log,Bin} ->
             Result = if Type == file ->
                              write_to_dev(Fd, Bin, DevName,
@@ -737,7 +775,7 @@ file_ctrl_loop(Fd, Type, DevName, Synced,
             file_ctrl_loop(Fd, Type, DevName, false,
                            Result, PrevSyncResult, HandlerName);
 
-        %% synchronous request
+        %% synchronous event
         {{log,From,Bin,FileSync},MRef} ->
             if Type == file ->
                     %% check that file hasn't been deleted
-- 
cgit v1.2.3