1 files changed, 101 insertions, 68 deletions
diff --git a/lib/kernel/src/logger_h_common.erl b/lib/kernel/src/logger_h_common.erl
index 336398cd4a..a40345dddc 100644
--- a/lib/kernel/src/logger_h_common.erl
+++ b/lib/kernel/src/logger_h_common.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 2017. All Rights Reserved.
+%% Copyright Ericsson AB 2017-2018. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -24,13 +24,15 @@
 
 -export([log_to_binary/2,
          check_common_config/1,
-         call_cast_or_drop/2,
+         call_cast_or_drop/4,
          check_load/1,
          limit_burst/1,
-         kill_if_choked/4,
-         flush_log_requests/0,
-         flush_log_requests/1,
+         kill_if_choked/5,
+         flush_log_events/0,
+         flush_log_events/1,
          handler_exit/2,
+         set_restart_flag/2,
+         unset_restart_flag/2,
          cancel_timer/1,
          stop_or_restart/3,
          overload_levels_ok/1,
@@ -41,7 +43,7 @@
 %%% Covert log data on any form to binary
 -spec log_to_binary(LogEvent,Config) -> LogString when
       LogEvent :: logger:log_event(),
-      Config :: logger:config(),
+      Config :: logger:handler_config(),
       LogString :: binary().
 log_to_binary(#{msg:={report,_},meta:=#{report_cb:=_}}=Log,Config) ->
     do_log_to_binary(Log,Config);
@@ -52,7 +54,8 @@ log_to_binary(Log,Config) ->
     do_log_to_binary(Log,Config).
 
 do_log_to_binary(Log,Config) ->
-    {Formatter,FormatterConfig} = maps:get(formatter,Config),
+    {Formatter,FormatterConfig} =
+        maps:get(formatter,Config,{?DEFAULT_FORMATTER,?DEFAULT_FORMAT_CONFIG}),
     String = try_format(Log,Formatter,FormatterConfig),
     try unicode:characters_to_binary(String)
     catch _:_ ->
@@ -84,29 +87,37 @@ try_format(Log,Formatter,FormatterConfig) ->
 
 %%%-----------------------------------------------------------------
 %%% Check that the configuration term is valid
-check_common_config({toggle_sync_qlen,N}) when is_integer(N) ->
+check_common_config({mode_tab,_Tid}) ->
     valid;
-check_common_config({drop_new_reqs_qlen,N}) when is_integer(N) ->
+check_common_config({handler_pid,Pid}) when is_pid(Pid) ->
     valid;
-check_common_config({flush_reqs_qlen,N}) when is_integer(N) ->
+
+check_common_config({sync_mode_qlen,N}) when is_integer(N) ->
+    valid;
+check_common_config({drop_mode_qlen,N}) when is_integer(N) ->
+    valid;
+check_common_config({flush_qlen,N}) when is_integer(N) ->
     valid;
-check_common_config({enable_burst_limit,Bool}) when Bool == true;
+
+check_common_config({burst_limit_enable,Bool}) when Bool == true;
                                                     Bool == false ->
     valid;
-check_common_config({burst_limit_size,N}) when is_integer(N) ->
+check_common_config({burst_limit_max_count,N}) when is_integer(N) ->
     valid;
-check_common_config({burst_window_time,N}) when is_integer(N) ->
+check_common_config({burst_limit_window_time,N}) when is_integer(N) ->
     valid;
-check_common_config({enable_kill_overloaded,Bool}) when Bool == true;
-                                                        Bool == false ->
+
+check_common_config({overload_kill_enable,Bool}) when Bool == true;
+                                                      Bool == false ->
     valid;
-check_common_config({handler_overloaded_qlen,N}) when is_integer(N) ->
+check_common_config({overload_kill_qlen,N}) when is_integer(N) ->
     valid;
-check_common_config({handler_overloaded_mem,N}) when is_integer(N) ->
+check_common_config({overload_kill_mem_size,N}) when is_integer(N) ->
     valid;
-check_common_config({handler_restart_after,NorA})  when is_integer(NorA);
-                                                        NorA == never ->
+check_common_config({overload_kill_restart_after,NorA})  when is_integer(NorA);
+                                                              NorA == infinity ->
     valid;
+
 check_common_config({filesync_repeat_interval,NorA}) when is_integer(NorA);
                                                           NorA == no_repeat ->
     valid;
@@ -116,16 +127,16 @@ check_common_config(_) ->
 
 %%%-----------------------------------------------------------------
 %%% Overload Protection
-call_cast_or_drop(Name, Bin) ->
-    %% If the handler process is getting overloaded, the log request
+call_cast_or_drop(_Name, HandlerPid, ModeTab, Bin) ->
+    %% If the handler process is getting overloaded, the log event
     %% will be synchronous instead of asynchronous (slows down the
     %% logging tempo of a process doing lots of logging. If the
-    %% handler is choked, drop mode is set and no request will be sent.
-    try ?get_mode(Name) of
+    %% handler is choked, drop mode is set and no event will be sent.
+    try ?get_mode(ModeTab) of
         async ->
-            gen_server:cast(Name, {log,Bin});
+            gen_server:cast(HandlerPid, {log,Bin});
         sync ->
-            try gen_server:call(Name, {log,Bin}, ?DEFAULT_CALL_TIMEOUT) of
+            try gen_server:call(HandlerPid, {log,Bin}, ?DEFAULT_CALL_TIMEOUT) of
                 %% if return value from call == dropped, the
                 %% message has been flushed by handler and should
                 %% therefore not be counted as dropped in stats
@@ -133,32 +144,50 @@ call_cast_or_drop(Name, Bin) ->
                 dropped -> ok
             catch
                 _:{timeout,_} ->
-                    ?observe(Name,{dropped,1})
+                    ?observe(_Name,{dropped,1})
             end;
         drop ->
-            ?observe(Name,{dropped,1})
+            ?observe(_Name,{dropped,1})
     catch
         %% if the ETS table doesn't exist (maybe because of a
-        %% handler restart), we can only drop the request
-        _:_ -> ?observe(Name,{dropped,1})
+        %% handler restart), we can only drop the event
+        _:_ -> ?observe(_Name,{dropped,1})
     end,
     ok.
 
 handler_exit(_Name, Reason) ->
     exit(Reason).
 
-check_load(State = #{id:=Name, mode := Mode,
-                     toggle_sync_qlen := ToggleSyncQLen,
-                     drop_new_reqs_qlen := DropNewQLen,
-                     flush_reqs_qlen := FlushQLen}) ->
+set_restart_flag(Name, Module) ->
+    Flag = list_to_atom(lists:concat([Module,"_",Name,"_restarting"])),
+    spawn(fun() ->
+                  register(Flag, self()),
+                  timer:sleep(infinity)
+          end),
+    ok.
+
+unset_restart_flag(Name, Module) ->
+    Flag = list_to_atom(lists:concat([Module,"_",Name,"_restarting"])),
+    case whereis(Flag) of
+        undefined ->
+            false;
+        Pid ->
+            exit(Pid, kill),
+            true
+    end.
+
+check_load(State = #{id:=_Name, mode_tab := ModeTab, mode := Mode,
+                     sync_mode_qlen := SyncModeQLen,
+                     drop_mode_qlen := DropModeQLen,
+                     flush_qlen := FlushQLen}) ->
     {_,Mem} = process_info(self(), memory),
-    ?observe(Name,{max_mem,Mem}),
+    ?observe(_Name,{max_mem,Mem}),
     {_,QLen} = process_info(self(), message_queue_len),
-    ?observe(Name,{max_qlen,QLen}),
+    ?observe(_Name,{max_qlen,QLen}),
     %% When the handler process gets scheduled in, it's impossible
     %% to predict the QLen. We could jump "up" arbitrarily from say
     %% async to sync, async to drop, sync to flush, etc. However, when
-    %% the handler process manages the log requests (without flushing),
+    %% the handler process manages the log events (without flushing),
     %% one after the other, we will move "down" from drop to sync and
     %% from sync to async. This way we don't risk getting stuck in
     %% drop or sync mode with an empty mailbox.
@@ -166,33 +195,33 @@ check_load(State = #{id:=Name, mode := Mode,
         if
             QLen >= FlushQLen ->
                 {flush, 0,1};
-            QLen >= DropNewQLen ->
-                %% Note that drop mode will force log requests to
+            QLen >= DropModeQLen ->
+                %% Note that drop mode will force log events to
                 %% be dropped on the client side (never sent get to
                 %% the handler).
                 IncDrops = if Mode == drop -> 0; true -> 1 end,
-                {?change_mode(Name, Mode, drop), IncDrops,0};
-            QLen >= ToggleSyncQLen ->
-                {?change_mode(Name, Mode, sync), 0,0};
+                {?change_mode(ModeTab, Mode, drop), IncDrops,0};
+            QLen >= SyncModeQLen ->
+                {?change_mode(ModeTab, Mode, sync), 0,0};
             true ->
-                {?change_mode(Name, Mode, async), 0,0}
+                {?change_mode(ModeTab, Mode, async), 0,0}
         end,
     State1 = ?update_other(drops,DROPS,_NewDrops,State),
     {Mode1, QLen, Mem,
      ?update_other(flushes,FLUSHES,_NewFlushes,
                    State1#{last_qlen => QLen})}.
 
-limit_burst(#{enable_burst_limit := false}) ->
+limit_burst(#{burst_limit_enable := false}) ->
      {true,0,0};
 limit_burst(#{burst_win_ts := BurstWinT0,
               burst_msg_count := BurstMsgCount,
-              burst_window_time := BurstWinTime,
-              burst_limit_size := BurstLimitSz}) ->
-    if (BurstMsgCount >= BurstLimitSz) -> 
+              burst_limit_window_time := BurstLimitWinTime,
+              burst_limit_max_count := BurstLimitMaxCnt}) ->
+    if (BurstMsgCount >= BurstLimitMaxCnt) -> 
             %% the limit for allowed messages has been reached
             BurstWinT1 = ?timestamp(),
             case ?diff_time(BurstWinT1,BurstWinT0) of
-                BurstCheckTime when BurstCheckTime < (BurstWinTime*1000) ->
+                BurstCheckTime when BurstCheckTime < (BurstLimitWinTime*1000) ->
                     %% we're still within the burst time frame
                     {false,BurstWinT0,BurstMsgCount};
                 _BurstCheckTime ->
@@ -204,38 +233,42 @@ limit_burst(#{burst_win_ts := BurstWinT0,
             {true,BurstWinT0,BurstMsgCount+1}
     end.
 
-kill_if_choked(Name, QLen, Mem,
-               #{enable_kill_overloaded := KillIfOL,
-                 handler_overloaded_qlen := HOLQLen,
-                 handler_overloaded_mem := HOLMem}) ->
+kill_if_choked(Name, QLen, Mem, HandlerMod,
+               State = #{overload_kill_enable   := KillIfOL,
+                         overload_kill_qlen     := OLKillQLen,
+                         overload_kill_mem_size := OLKillMem}) ->
     if KillIfOL andalso
-       ((QLen > HOLQLen) orelse (Mem > HOLMem)) ->            
+       ((QLen > OLKillQLen) orelse (Mem > OLKillMem)) ->
+            HandlerMod:log_handler_info(Name,
+                                        "Handler ~p overloaded and stopping",
+                                        [Name], State),
+            set_restart_flag(Name, HandlerMod),
             handler_exit(Name, {shutdown,{overloaded,Name,QLen,Mem}});
        true ->
             ok
     end.
 
-flush_log_requests() ->
-    flush_log_requests(-1).
+flush_log_events() ->
+    flush_log_events(-1).
 
-flush_log_requests(Limit) ->
+flush_log_events(Limit) ->
     process_flag(priority, high),
-    Flushed = flush_log_requests(0, Limit),
+    Flushed = flush_log_events(0, Limit),
     process_flag(priority, normal),
     Flushed.
 
-flush_log_requests(Limit, Limit) ->
+flush_log_events(Limit, Limit) ->
     Limit;
-flush_log_requests(N, Limit) ->
-    %% flush log requests but leave other requests, such as
-    %% file/disk_log_sync, info and change_config, so that these
+flush_log_events(N, Limit) ->
+    %% flush log events but leave other events, such as
+    %% filesync, info and change_config, so that these
     %% have a chance to be processed even under heavy load
     receive
         {'$gen_cast',{log,_}} ->
-            flush_log_requests(N+1, Limit);
+            flush_log_events(N+1, Limit);
         {'$gen_call',{Pid,MRef},{log,_}} ->
             Pid ! {MRef, dropped},
-            flush_log_requests(N+1, Limit)
+            flush_log_events(N+1, Limit)
     after
         0 -> N
     end.
@@ -245,7 +278,7 @@ cancel_timer(TRef) -> timer:cancel(TRef).
 
 
 stop_or_restart(Name, {shutdown,Reason={overloaded,_Name,_QLen,_Mem}},
-                #{handler_restart_after := RestartAfter}) ->
+                #{overload_kill_restart_after := RestartAfter}) ->
     %% If we're terminating because of an overload situation (see
     %% logger_h_common:kill_if_choked/4), we need to remove the handler
     %% and set a restart timer. A separate process must perform this
@@ -262,7 +295,7 @@ stop_or_restart(Name, {shutdown,Reason={overloaded,_Name,_QLen,_Mem}},
                         exit(HandlerPid, kill)
                 end,
                 case logger:get_handler_config(Name) of
-                    {ok,{HMod,HConfig}} when is_integer(RestartAfter) ->
+                    {ok,#{module:=HMod}=HConfig} when is_integer(RestartAfter) ->
                         _ = logger:remove_handler(Name),
                         _ = timer:apply_after(RestartAfter, logger, add_handler,
                                               [Name,HMod,HConfig]);
@@ -293,10 +326,10 @@ stop_or_restart(_Name, _Reason, _State) ->
     ok.
 
 overload_levels_ok(HandlerConfig) ->
-    TSQL = maps:get(toggle_sync_qlen, HandlerConfig, ?TOGGLE_SYNC_QLEN),
-    DNRQL = maps:get(drop_new_reqs_qlen, HandlerConfig, ?DROP_NEW_REQS_QLEN),
-    FRQL = maps:get(flush_reqs_qlen, HandlerConfig, ?FLUSH_REQS_QLEN),
-    (DNRQL > 1) andalso (TSQL =< DNRQL) andalso (DNRQL =< FRQL).
+    SMQL = maps:get(sync_mode_qlen, HandlerConfig, ?SYNC_MODE_QLEN),
+    DMQL = maps:get(drop_mode_qlen, HandlerConfig, ?DROP_MODE_QLEN),
+    FQL = maps:get(flush_qlen, HandlerConfig, ?FLUSH_QLEN),
+    (DMQL > 1) andalso (SMQL =< DMQL) andalso (DMQL =< FQL).
 
 error_notify(Term) ->
     ?internal_log(error, Term).