aboutsummaryrefslogtreecommitdiffstats
path: root/lib/snmp
diff options
context:
space:
mode:
authorMicael Karlberg <[email protected]>2019-06-07 14:29:47 +0200
committerMicael Karlberg <[email protected]>2019-06-07 15:21:17 +0200
commitd9f6409a994ae74c8b46fc1425b07134de9d0546 (patch)
tree44df26f8a9028ddc813ee828285e3ff5787b4b54 /lib/snmp
parent9e0dbd341d738e0c8a268752f131fa6ab6195ad3 (diff)
downloadotp-d9f6409a994ae74c8b46fc1425b07134de9d0546.tar.gz
otp-d9f6409a994ae74c8b46fc1425b07134de9d0546.tar.bz2
otp-d9f6409a994ae74c8b46fc1425b07134de9d0546.zip
[snmp|test] Use system monitor to discover slow VM issues
We have experienced intermittent failures of random test cases on slow VMs. In order to get more info about the system, we use system monitor to collect info. We have one global monitor system, and one local monitor on each node. Each local monitor report to the global, which in turn prints a message.
Diffstat (limited to 'lib/snmp')
-rw-r--r--lib/snmp/test/modules.mk2
-rw-r--r--lib/snmp/test/snmp_agent_test.erl6
-rw-r--r--lib/snmp/test/snmp_agent_test_lib.erl7
-rw-r--r--lib/snmp/test/snmp_test_global_sys_monitor.erl159
-rw-r--r--lib/snmp/test/snmp_test_sys_monitor.erl86
5 files changed, 257 insertions, 3 deletions
diff --git a/lib/snmp/test/modules.mk b/lib/snmp/test/modules.mk
index 8b6547f9a9..ec3870dbd8 100644
--- a/lib/snmp/test/modules.mk
+++ b/lib/snmp/test/modules.mk
@@ -42,6 +42,8 @@ SUITE_MODULES = \
snmp_manager_test
TEST_UTIL_MODULES = \
+ snmp_test_global_sys_monitor \
+ snmp_test_sys_monitor \
snmp_test_lib \
snmp_test_manager \
snmp_test_mgr \
diff --git a/lib/snmp/test/snmp_agent_test.erl b/lib/snmp/test/snmp_agent_test.erl
index 860ca17cdb..4a797efbe5 100644
--- a/lib/snmp/test/snmp_agent_test.erl
+++ b/lib/snmp/test/snmp_agent_test.erl
@@ -557,6 +557,8 @@ init_per_suite(Config0) when is_list(Config0) ->
Config3 = [{mib_dir, MibDir}, {std_mib_dir, StdMibDir} | Config2],
+ snmp_test_global_sys_monitor:start(),
+ snmp_test_sys_monitor:start(), % We need one on this node also
snmp_test_mgr_counter_server:start(),
p("init_per_suite -> end when"
@@ -580,6 +582,8 @@ end_per_suite(Config) when is_list(Config) ->
p("end_per_suite -> failed stopping counter server"
"~n Reason: ~p", [Reason])
end,
+ snmp_test_sys_monitor:stop(),
+ snmp_test_global_sys_monitor:stop(),
p("end_per_suite -> end when"
"~n Nodes: ~p", [erlang:nodes()]),
@@ -1654,7 +1658,7 @@ create_local_db_dir(Config) when is_list(Config) ->
Name = list_to_atom(atom_to_list(create_local_db_dir)
++"-"++As++"-"++Bs++"-"++Cs),
Pa = filename:dirname(code:which(?MODULE)),
- {ok,Node} = ?t:start_node(Name, slave, [{args, "-pa "++Pa}]),
+ {ok,Node} = ?t:start_node(Name, slave, [{args, "-pa " ++ Pa}]),
%% first start with a nonexisting DbDir
Fun1 = fun() ->
diff --git a/lib/snmp/test/snmp_agent_test_lib.erl b/lib/snmp/test/snmp_agent_test_lib.erl
index 1128fc8a8c..ddc901216e 100644
--- a/lib/snmp/test/snmp_agent_test_lib.erl
+++ b/lib/snmp/test/snmp_agent_test_lib.erl
@@ -1453,12 +1453,15 @@ start_node(Name) ->
""
end,
%% Do not use start_link!!! (the proc that calls this one is tmp)
- ?DBG("start_node -> Args: ~p~n",[Args]),
- A = Args ++ " -pa " ++ Pa,
+ ?DBG("start_node -> Args: ~p~n", [Args]),
+ A = Args ++ " -pa " ++ Pa ++
+ " -s " ++ atom_to_list(snmp_test_sys_monitor) ++ " start" ++
+ " -s global sync",
case (catch ?START_NODE(Name, A)) of
{ok, Node} ->
%% Tell the test_server to not clean up things it never started.
?DBG("start_node -> Node: ~p",[Node]),
+ global:sync(),
{ok, Node};
Else ->
?ERR("start_node -> failed with(other): Else: ~p",[Else]),
diff --git a/lib/snmp/test/snmp_test_global_sys_monitor.erl b/lib/snmp/test/snmp_test_global_sys_monitor.erl
new file mode 100644
index 0000000000..04dbc72133
--- /dev/null
+++ b/lib/snmp/test/snmp_test_global_sys_monitor.erl
@@ -0,0 +1,159 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2019-2019. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(snmp_test_global_sys_monitor).
+
+-export([start/0, stop/0, log/1,
+ init/1]).
+
+-define(NAME, ?MODULE).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+start() ->
+ Parent = self(),
+ proc_lib:start(?MODULE, init, [Parent]).
+
+stop() ->
+ global:send(?NAME, {?MODULE, stop}).
+
+
+log(Event) ->
+ global:send(?NAME, {?MODULE, node(), Event}).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init(Parent) ->
+ process_flag(priority, high),
+ case global:register_name(?NAME, self()) of
+ yes ->
+ info_msg("Starting", []),
+ proc_lib:init_ack(Parent, {ok, self()}),
+ loop(#{parent => Parent});
+ no ->
+ warning_msg("Already started", []),
+ proc_lib:init_ack(Parent, {error, already_started}),
+ exit(normal)
+ end.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+loop(State) ->
+ receive
+ {?MODULE, stop} ->
+ warning_msg("Stopping", []),
+ exit(normal);
+
+ {?MODULE, Node, Event} ->
+ State2 = process_event(State, Node, Event),
+ loop(State2);
+
+ {nodedown = Event, Node} ->
+ State2 = process_event(State, Node, Event),
+ loop(State2);
+
+ _ ->
+ loop(State)
+ end.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+process_event(State, Node, {Pid, TS, Tag, Info}) ->
+ process_system_event(State, Node, Pid, TS, Tag, Info);
+
+process_event(State, Node, {TS, starting}) ->
+ FTS = snmp_misc:format_timestamp(TS),
+ info_msg("System Monitor on node ~p starting at ~s", [Node, FTS]),
+ if
+ (Node =/= node()) ->
+ erlang:monitor_node(Node, true);
+ true ->
+ ok
+ end,
+ State;
+
+process_event(State, Node, {TS, already_started}) ->
+ FTS = snmp_misc:format_timestamp(TS),
+ info_msg("System Monitor on node ~p already started", [Node, FTS]),
+ State;
+
+process_event(State, Node, nodedown) ->
+ info_msg("Node ~p down", [Node]),
+ State;
+
+process_event(State, Node, Event) ->
+ warning_msg("Received unknown event from node ~p:"
+ "~n ~p", [Node, Event]),
+ State.
+
+
+%% System Monitor events
+process_system_event(State, Node, Pid, TS, long_gc, Info) ->
+ print_system_event("Long GC", Node, Pid, TS, Info),
+ State;
+process_system_event(State, Node, Pid, TS, long_schedule, Info) ->
+ print_system_event("Long Schedule", Node, Pid, TS, Info),
+ State;
+process_system_event(State, Node, Pid, TS, large_heap, Info) ->
+ print_system_event("Large Heap", Node, Pid, TS, Info),
+ State;
+process_system_event(State, Node, Pid, TS, busy_port, Info) ->
+ print_system_event("Busy port", Node, Pid, TS, Info),
+ State;
+process_system_event(State, Node, Pid, TS, busy_dist_port, Info) ->
+ print_system_event("Busy dist port", Node, Pid, TS, Info),
+ State;
+
+%% And everything else
+process_system_event(State, Node, Pid, TS, Tag, Info) ->
+ Pre = f("Unknown Event '~p'", [Tag]),
+ print_system_event(Pre, Node, Pid, TS, Info),
+ State.
+
+
+print_system_event(Pre, Node, Pid, TS, Info) ->
+ FTS = snmp_misc:format_timestamp(TS),
+ warning_msg("~s from ~p (~p) at ~s:"
+ "~n ~p", [Pre, Node, Pid, FTS, Info]).
+
+f(F, A) ->
+ lists:flatten(io_lib:format(F, A)).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+info_msg(F, A) ->
+ error_logger:info_msg(format_msg(F, A), []).
+
+warning_msg(F, A) ->
+ error_logger:warning_msg(format_msg(F, A), []).
+
+
+format_msg(F, A) ->
+ lists:flatten(io_lib:format(
+ "~n" ++
+ "****** SNMP TEST GLOBAL SYSTEM MONITOR ******~n~n" ++
+ F ++ "~n~n",
+ A)).
+
diff --git a/lib/snmp/test/snmp_test_sys_monitor.erl b/lib/snmp/test/snmp_test_sys_monitor.erl
new file mode 100644
index 0000000000..2291c6ca97
--- /dev/null
+++ b/lib/snmp/test/snmp_test_sys_monitor.erl
@@ -0,0 +1,86 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2019-2019. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(snmp_test_sys_monitor).
+
+-export([start/0, stop/0,
+ init/1]).
+
+-define(NAME, ?MODULE).
+-define(GSM, snmp_test_global_sys_monitor).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+start() ->
+ Parent = self(),
+ proc_lib:start(?MODULE, init, [Parent]).
+
+stop() ->
+ case whereis(?NAME) of
+ Pid when is_pid(Pid) ->
+ Pid ! {?MODULE, stop},
+ ok;
+ _ ->
+ ok
+ end.
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+init(Parent) ->
+ process_flag(priority, high),
+ try register(?NAME, self()) of
+ true ->
+ global:sync(),
+ MonSettings = [
+ busy_port,
+ busy_dist_port,
+ {long_gc, 1000},
+ {long_schedule, 1000},
+ {large_heap, 8*1024*1024} % 8 MB
+ ],
+ erlang:system_monitor(self(), MonSettings),
+ ?GSM:log({erlang:timestamp(), starting}),
+ proc_lib:init_ack(Parent, {ok, self()}),
+ loop(#{parent => Parent})
+ catch
+ _:_:_ ->
+ ?GSM:log({erlang:timestamp(), already_started}),
+ proc_lib:init_ack(Parent, {error, already_started}),
+ exit(normal)
+ end.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+loop(State) ->
+ receive
+ {monitor, Pid, Tag, Info} ->
+ ?GSM:log({Pid, erlang:timestamp(), Tag, Info}),
+ loop(State);
+
+ _ ->
+ loop(State)
+ end.
+
+
+