diff options
author | Micael Karlberg <[email protected]> | 2019-06-07 14:29:47 +0200 |
---|---|---|
committer | Micael Karlberg <[email protected]> | 2019-06-07 15:21:17 +0200 |
commit | d9f6409a994ae74c8b46fc1425b07134de9d0546 (patch) | |
tree | 44df26f8a9028ddc813ee828285e3ff5787b4b54 /lib/snmp | |
parent | 9e0dbd341d738e0c8a268752f131fa6ab6195ad3 (diff) | |
download | otp-d9f6409a994ae74c8b46fc1425b07134de9d0546.tar.gz otp-d9f6409a994ae74c8b46fc1425b07134de9d0546.tar.bz2 otp-d9f6409a994ae74c8b46fc1425b07134de9d0546.zip |
[snmp|test] Use system monitor to discover slow VM issues
We have experienced intermittent failures of random test cases
on slow VMs. In order to get more info about the system, we
use system monitor to collect info. We have one global monitor
system, and one local monitor on each node. Each local monitor
report to the global, which in turn prints a message.
Diffstat (limited to 'lib/snmp')
-rw-r--r-- | lib/snmp/test/modules.mk | 2 | ||||
-rw-r--r-- | lib/snmp/test/snmp_agent_test.erl | 6 | ||||
-rw-r--r-- | lib/snmp/test/snmp_agent_test_lib.erl | 7 | ||||
-rw-r--r-- | lib/snmp/test/snmp_test_global_sys_monitor.erl | 159 | ||||
-rw-r--r-- | lib/snmp/test/snmp_test_sys_monitor.erl | 86 |
5 files changed, 257 insertions, 3 deletions
diff --git a/lib/snmp/test/modules.mk b/lib/snmp/test/modules.mk index 8b6547f9a9..ec3870dbd8 100644 --- a/lib/snmp/test/modules.mk +++ b/lib/snmp/test/modules.mk @@ -42,6 +42,8 @@ SUITE_MODULES = \ snmp_manager_test TEST_UTIL_MODULES = \ + snmp_test_global_sys_monitor \ + snmp_test_sys_monitor \ snmp_test_lib \ snmp_test_manager \ snmp_test_mgr \ diff --git a/lib/snmp/test/snmp_agent_test.erl b/lib/snmp/test/snmp_agent_test.erl index 860ca17cdb..4a797efbe5 100644 --- a/lib/snmp/test/snmp_agent_test.erl +++ b/lib/snmp/test/snmp_agent_test.erl @@ -557,6 +557,8 @@ init_per_suite(Config0) when is_list(Config0) -> Config3 = [{mib_dir, MibDir}, {std_mib_dir, StdMibDir} | Config2], + snmp_test_global_sys_monitor:start(), + snmp_test_sys_monitor:start(), % We need one on this node also snmp_test_mgr_counter_server:start(), p("init_per_suite -> end when" @@ -580,6 +582,8 @@ end_per_suite(Config) when is_list(Config) -> p("end_per_suite -> failed stopping counter server" "~n Reason: ~p", [Reason]) end, + snmp_test_sys_monitor:stop(), + snmp_test_global_sys_monitor:stop(), p("end_per_suite -> end when" "~n Nodes: ~p", [erlang:nodes()]), @@ -1654,7 +1658,7 @@ create_local_db_dir(Config) when is_list(Config) -> Name = list_to_atom(atom_to_list(create_local_db_dir) ++"-"++As++"-"++Bs++"-"++Cs), Pa = filename:dirname(code:which(?MODULE)), - {ok,Node} = ?t:start_node(Name, slave, [{args, "-pa "++Pa}]), + {ok,Node} = ?t:start_node(Name, slave, [{args, "-pa " ++ Pa}]), %% first start with a nonexisting DbDir Fun1 = fun() -> diff --git a/lib/snmp/test/snmp_agent_test_lib.erl b/lib/snmp/test/snmp_agent_test_lib.erl index 1128fc8a8c..ddc901216e 100644 --- a/lib/snmp/test/snmp_agent_test_lib.erl +++ b/lib/snmp/test/snmp_agent_test_lib.erl @@ -1453,12 +1453,15 @@ start_node(Name) -> "" end, %% Do not use start_link!!! (the proc that calls this one is tmp) - ?DBG("start_node -> Args: ~p~n",[Args]), - A = Args ++ " -pa " ++ Pa, + ?DBG("start_node -> Args: ~p~n", [Args]), + A = Args ++ " -pa " ++ Pa ++ + " -s " ++ atom_to_list(snmp_test_sys_monitor) ++ " start" ++ + " -s global sync", case (catch ?START_NODE(Name, A)) of {ok, Node} -> %% Tell the test_server to not clean up things it never started. ?DBG("start_node -> Node: ~p",[Node]), + global:sync(), {ok, Node}; Else -> ?ERR("start_node -> failed with(other): Else: ~p",[Else]), diff --git a/lib/snmp/test/snmp_test_global_sys_monitor.erl b/lib/snmp/test/snmp_test_global_sys_monitor.erl new file mode 100644 index 0000000000..04dbc72133 --- /dev/null +++ b/lib/snmp/test/snmp_test_global_sys_monitor.erl @@ -0,0 +1,159 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2019-2019. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(snmp_test_global_sys_monitor). + +-export([start/0, stop/0, log/1, + init/1]). + +-define(NAME, ?MODULE). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +start() -> + Parent = self(), + proc_lib:start(?MODULE, init, [Parent]). + +stop() -> + global:send(?NAME, {?MODULE, stop}). + + +log(Event) -> + global:send(?NAME, {?MODULE, node(), Event}). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +init(Parent) -> + process_flag(priority, high), + case global:register_name(?NAME, self()) of + yes -> + info_msg("Starting", []), + proc_lib:init_ack(Parent, {ok, self()}), + loop(#{parent => Parent}); + no -> + warning_msg("Already started", []), + proc_lib:init_ack(Parent, {error, already_started}), + exit(normal) + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +loop(State) -> + receive + {?MODULE, stop} -> + warning_msg("Stopping", []), + exit(normal); + + {?MODULE, Node, Event} -> + State2 = process_event(State, Node, Event), + loop(State2); + + {nodedown = Event, Node} -> + State2 = process_event(State, Node, Event), + loop(State2); + + _ -> + loop(State) + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +process_event(State, Node, {Pid, TS, Tag, Info}) -> + process_system_event(State, Node, Pid, TS, Tag, Info); + +process_event(State, Node, {TS, starting}) -> + FTS = snmp_misc:format_timestamp(TS), + info_msg("System Monitor on node ~p starting at ~s", [Node, FTS]), + if + (Node =/= node()) -> + erlang:monitor_node(Node, true); + true -> + ok + end, + State; + +process_event(State, Node, {TS, already_started}) -> + FTS = snmp_misc:format_timestamp(TS), + info_msg("System Monitor on node ~p already started", [Node, FTS]), + State; + +process_event(State, Node, nodedown) -> + info_msg("Node ~p down", [Node]), + State; + +process_event(State, Node, Event) -> + warning_msg("Received unknown event from node ~p:" + "~n ~p", [Node, Event]), + State. + + +%% System Monitor events +process_system_event(State, Node, Pid, TS, long_gc, Info) -> + print_system_event("Long GC", Node, Pid, TS, Info), + State; +process_system_event(State, Node, Pid, TS, long_schedule, Info) -> + print_system_event("Long Schedule", Node, Pid, TS, Info), + State; +process_system_event(State, Node, Pid, TS, large_heap, Info) -> + print_system_event("Large Heap", Node, Pid, TS, Info), + State; +process_system_event(State, Node, Pid, TS, busy_port, Info) -> + print_system_event("Busy port", Node, Pid, TS, Info), + State; +process_system_event(State, Node, Pid, TS, busy_dist_port, Info) -> + print_system_event("Busy dist port", Node, Pid, TS, Info), + State; + +%% And everything else +process_system_event(State, Node, Pid, TS, Tag, Info) -> + Pre = f("Unknown Event '~p'", [Tag]), + print_system_event(Pre, Node, Pid, TS, Info), + State. + + +print_system_event(Pre, Node, Pid, TS, Info) -> + FTS = snmp_misc:format_timestamp(TS), + warning_msg("~s from ~p (~p) at ~s:" + "~n ~p", [Pre, Node, Pid, FTS, Info]). + +f(F, A) -> + lists:flatten(io_lib:format(F, A)). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +info_msg(F, A) -> + error_logger:info_msg(format_msg(F, A), []). + +warning_msg(F, A) -> + error_logger:warning_msg(format_msg(F, A), []). + + +format_msg(F, A) -> + lists:flatten(io_lib:format( + "~n" ++ + "****** SNMP TEST GLOBAL SYSTEM MONITOR ******~n~n" ++ + F ++ "~n~n", + A)). + diff --git a/lib/snmp/test/snmp_test_sys_monitor.erl b/lib/snmp/test/snmp_test_sys_monitor.erl new file mode 100644 index 0000000000..2291c6ca97 --- /dev/null +++ b/lib/snmp/test/snmp_test_sys_monitor.erl @@ -0,0 +1,86 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2019-2019. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% + +-module(snmp_test_sys_monitor). + +-export([start/0, stop/0, + init/1]). + +-define(NAME, ?MODULE). +-define(GSM, snmp_test_global_sys_monitor). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +start() -> + Parent = self(), + proc_lib:start(?MODULE, init, [Parent]). + +stop() -> + case whereis(?NAME) of + Pid when is_pid(Pid) -> + Pid ! {?MODULE, stop}, + ok; + _ -> + ok + end. + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +init(Parent) -> + process_flag(priority, high), + try register(?NAME, self()) of + true -> + global:sync(), + MonSettings = [ + busy_port, + busy_dist_port, + {long_gc, 1000}, + {long_schedule, 1000}, + {large_heap, 8*1024*1024} % 8 MB + ], + erlang:system_monitor(self(), MonSettings), + ?GSM:log({erlang:timestamp(), starting}), + proc_lib:init_ack(Parent, {ok, self()}), + loop(#{parent => Parent}) + catch + _:_:_ -> + ?GSM:log({erlang:timestamp(), already_started}), + proc_lib:init_ack(Parent, {error, already_started}), + exit(normal) + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +loop(State) -> + receive + {monitor, Pid, Tag, Info} -> + ?GSM:log({Pid, erlang:timestamp(), Tag, Info}), + loop(State); + + _ -> + loop(State) + end. + + + |