diff options
author | Björn Gustavsson <[email protected]> | 2010-08-12 15:11:54 +0200 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2010-09-03 10:26:11 +0200 |
commit | 9f72e25a2837bc6073482d7b79b80bdeb56984d5 (patch) | |
tree | 4d0779f96d700315b81d043cb309d84b9a7f8460 | |
parent | f673a2e5b4ae4bb2c3ecf4716b5e34b394cc4c08 (diff) | |
download | otp-9f72e25a2837bc6073482d7b79b80bdeb56984d5.tar.gz otp-9f72e25a2837bc6073482d7b79b80bdeb56984d5.tar.bz2 otp-9f72e25a2837bc6073482d7b79b80bdeb56984d5.zip |
Add test suite for os_mon
-rw-r--r-- | lib/os_mon/test/Makefile | 92 | ||||
-rw-r--r-- | lib/os_mon/test/cpu_sup_SUITE.erl | 282 | ||||
-rw-r--r-- | lib/os_mon/test/disksup_SUITE.erl | 426 | ||||
-rw-r--r-- | lib/os_mon/test/memsup_SUITE.erl | 782 | ||||
-rw-r--r-- | lib/os_mon/test/os_mon.spec | 1 | ||||
-rw-r--r-- | lib/os_mon/test/os_mon_SUITE.erl | 89 | ||||
-rw-r--r-- | lib/os_mon/test/os_mon_conf.erl | 28 | ||||
-rw-r--r-- | lib/os_mon/test/os_mon_mib_SUITE.erl | 746 | ||||
-rw-r--r-- | lib/os_mon/test/os_sup_SUITE.erl | 189 |
9 files changed, 2635 insertions, 0 deletions
diff --git a/lib/os_mon/test/Makefile b/lib/os_mon/test/Makefile new file mode 100644 index 0000000000..c87285e38b --- /dev/null +++ b/lib/os_mon/test/Makefile @@ -0,0 +1,92 @@ +# +# %CopyrightBegin% +# +# Copyright Ericsson AB 1997-2010. All Rights Reserved. +# +# The contents of this file are subject to the Erlang Public License, +# Version 1.1, (the "License"); you may not use this file except in +# compliance with the License. You should have received a copy of the +# Erlang Public License along with this software. If not, it can be +# retrieved online at http://www.erlang.org/. +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and limitations +# under the License. +# +# %CopyrightEnd% +# +include $(ERL_TOP)/make/target.mk + +include $(ERL_TOP)/make/$(TARGET)/otp.mk + +# ---------------------------------------------------- +# Target Specs +# ---------------------------------------------------- + +MODULES= \ + os_mon_SUITE \ + disksup_SUITE \ + memsup_SUITE \ + cpu_sup_SUITE \ + os_mon_mib_SUITE \ + os_sup_SUITE \ + os_mon_conf + +EBIN = . + +HRL_FILES= + +ERL_FILES= $(MODULES:%=%.erl) + +TARGET_FILES = $(MODULES:%=$(EBIN)/%.$(EMULATOR)) + +SOURCE = $(ERL_FILES) $(HRL_FILES) + +EMAKEFILE=Emakefile + +# ---------------------------------------------------- +# Release directory specification +# ---------------------------------------------------- +RELSYSDIR = $(RELEASE_PATH)/os_mon_test + +# ---------------------------------------------------- +# FLAGS +# ---------------------------------------------------- +ERL_MAKE_FLAGS += +ERL_COMPILE_FLAGS += -I$(ERL_TOP)/lib/test_server/include \ + -I$(ERL_TOP)/lib/snmp/include + +# ---------------------------------------------------- +# Targets +# ---------------------------------------------------- + +make_emakefile: + $(ERL_TOP)/make/make_emakefile $(ERL_COMPILE_FLAGS) -o$(EBIN) $(MODULES)\ + > $(EMAKEFILE) + +tests debug opt: make_emakefile + erl $(ERL_MAKE_FLAGS) -make + +clean: + rm -f $(EMAKEFILE) + rm -f $(TARGET_FILES) + rm -f core *~ + +docs: + + +# ---------------------------------------------------- +# Release Target +# ---------------------------------------------------- +include $(ERL_TOP)/make/otp_release_targets.mk + +release_spec: + +release_tests_spec: make_emakefile + $(INSTALL_DIR) $(RELSYSDIR) + $(INSTALL_DATA) os_mon.spec $(EMAKEFILE) $(SOURCE) $(RELSYSDIR) + +## tar chf - *_SUITE_data | (cd $(RELSYSDIR); tar xf -) + +release_docs_spec: diff --git a/lib/os_mon/test/cpu_sup_SUITE.erl b/lib/os_mon/test/cpu_sup_SUITE.erl new file mode 100644 index 0000000000..45f9d981d1 --- /dev/null +++ b/lib/os_mon/test/cpu_sup_SUITE.erl @@ -0,0 +1,282 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2002-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(cpu_sup_SUITE). +-include("test_server.hrl"). + +%% Test server specific exports +-export([all/1]). +-export([init_per_suite/1, end_per_suite/1]). +-export([init_per_testcase/2, end_per_testcase/2]). + +%% Test cases +-export([load_api/1]). +-export([util_api/1, util_values/1]). +-export([port/1]). +-export([terminate/1, unavailable/1, restart/1]). + +%% Default timetrap timeout (set in init_per_testcase) +-define(default_timeout, ?t:minutes(1)). + +init_per_suite(Config) when is_list(Config) -> + ?line ok = application:start(os_mon), + Config. + +end_per_suite(Config) when is_list(Config) -> + ?line ok = application:stop(os_mon), + Config. + +init_per_testcase(_Case, Config) -> + Dog = ?t:timetrap(?default_timeout), + [{watchdog, Dog} | Config]. + +end_per_testcase(_Case, Config) -> + Dog = ?config(watchdog, Config), + ?t:timetrap_cancel(Dog), + ok. + +all(suite) -> + case ?t:os_type() of + {unix, sunos} -> + [load_api, util_api, util_values, port, + {conf, terminate, [unavailable], restart}]; + {unix, linux} -> + [load_api, util_api, util_values, port, + {conf, terminate, [unavailable], restart}]; + {unix, _OSname} -> + [load_api]; + _OS -> + [unavailable] + end. + +load_api(suite) -> + []; +load_api(doc) -> + ["Test of load API functions"]; +load_api(Config) when is_list(Config) -> + + %% nprocs() + ?line N = cpu_sup:nprocs(), + ?line true = is_integer(N), + ?line true = N>0, + + %% avg1() + ?line Load1 = cpu_sup:avg1(), + ?line true = is_integer(Load1), + ?line true = Load1>0, + + %% avg5() + ?line Load5 = cpu_sup:avg5(), + ?line true = is_integer(Load5), + ?line true = Load5>0, + + %% avg15() + ?line Load15 = cpu_sup:avg15(), + ?line true = is_integer(Load15), + ?line true = Load15>0, + + ok. + +util_api(suite) -> + []; +util_api(doc) -> + ["Test of utilization API functions"]; +util_api(Config) when is_list(Config) -> + %% Some useful funs when testing util/1 + BusyP = fun({user, _Share}) -> true; + ({nice_user, _Share}) -> true; + ({kernel, _Share}) -> true; + ({hard_irq, _Share}) -> true; + ({soft_irq, _Share}) -> true; + (_) -> false + end, + NonBusyP = fun({wait, _Share}) -> true; + ({idle, _Share}) -> true; + ({steal, _Share}) -> true; + (_) -> false + end, + Sum = fun({_Tag, X}, Acc) -> Acc+X end, + + %% util() + ?line Util1 = cpu_sup:util(), + ?line true = is_number(Util1), + ?line true = Util1>0, + ?line Util2 = cpu_sup:util(), + ?line true = is_number(Util2), + ?line true = Util2>0, + + %% util([]) + ?line {all, Busy1, NonBusy1, []} = cpu_sup:util([]), + ?line 100.00 = Busy1 + NonBusy1, + + %% util([detailed]) + ?line {Cpus2, Busy2, NonBusy2, []} = cpu_sup:util([detailed]), + ?line true = lists:all(fun(X) -> is_integer(X) end, Cpus2), + ?line true = lists:all(BusyP, Busy2), + ?line true = lists:all(NonBusyP, NonBusy2), + ?line 100.00 = lists:foldl(Sum,0,Busy2)+lists:foldl(Sum,0,NonBusy2), + + %% util([per_cpu]) + ?line [{Cpu3, Busy3, NonBusy3, []}|_] = cpu_sup:util([per_cpu]), + ?line true = is_integer(Cpu3), + ?line 100.00 = Busy3 + NonBusy3, + + %% util([detailed, per_cpu]) + ?line [{Cpu4, Busy4, NonBusy4, []}|_] = + cpu_sup:util([detailed, per_cpu]), + ?line true = is_integer(Cpu4), + ?line true = lists:all(BusyP, Busy2), + ?line true = lists:all(NonBusyP, NonBusy2), + ?line 100.00 = lists:foldl(Sum,0,Busy4)+lists:foldl(Sum,0,NonBusy4), + + %% bad util/1 calls + ?line {'EXIT',{badarg,_}} = (catch cpu_sup:util(detailed)), + ?line {'EXIT',{badarg,_}} = (catch cpu_sup:util([detialed])), + + ok. + +-define(SPIN_TIME, 1000). + +util_values(suite) -> + []; +util_values(doc) -> + ["Test utilization values"]; +util_values(Config) when is_list(Config) -> + + Tester = self(), + Ref = make_ref(), + Loop = fun (L) -> L(L) end, + Spinner = fun () -> + Looper = spawn_link(fun () -> Loop(Loop) end), + receive after ?SPIN_TIME -> ok end, + unlink(Looper), + exit(Looper, kill), + Tester ! Ref + end, + + ?line cpu_sup:util(), + + ?line spawn_link(Spinner), + ?line receive Ref -> ok end, + ?line HighUtil1 = cpu_sup:util(), + + ?line receive after ?SPIN_TIME -> ok end, + ?line LowUtil1 = cpu_sup:util(), + + ?line spawn_link(Spinner), + ?line receive Ref -> ok end, + ?line HighUtil2 = cpu_sup:util(), + + ?line receive after ?SPIN_TIME -> ok end, + ?line LowUtil2 = cpu_sup:util(), + + Utils = [{high1,HighUtil1}, {low1,LowUtil1}, + {high2,HighUtil2}, {low2,LowUtil2}], + ?t:format("Utils: ~p~n", [Utils]), + + ?line false = LowUtil1 > HighUtil1, + ?line false = LowUtil1 > HighUtil2, + ?line false = LowUtil2 > HighUtil1, + ?line false = LowUtil2 > HighUtil2, + + ok. + + +% Outdated +% The portprogram is now restarted if killed, and not by os_mon... + +port(suite) -> + []; +port(doc) -> + ["Test that cpu_sup handles a terminating port program"]; +port(Config) when is_list(Config) -> + case cpu_sup_os_pid() of + {ok, PidStr} -> + %% Monitor cpu_sup + ?line MonRef = erlang:monitor(process, cpu_sup), + ?line N1 = cpu_sup:nprocs(), + ?line true = N1>0, + + %% Kill the port program + case os:cmd("kill -9 " ++ PidStr) of + [] -> + %% cpu_sup should not terminate + receive + {'DOWN', MonRef, _, _, Reason} -> + ?line ?t:fail({unexpected_exit_reason, Reason}) + after 3000 -> + ok + end, + + %% Give cpu_sup time to restart cpu_sup port + ?t:sleep(?t:seconds(3)), + ?line N2 = cpu_sup:nprocs(), + ?line true = N2>0, + + erlang:demonitor(MonRef), + ok; + + Line -> + erlang:demonitor(MonRef), + {skip, {not_killed, Line}} + end; + _ -> + {skip, os_pid_not_found } + end. + +terminate(suite) -> + []; +terminate(Config) when is_list(Config) -> + ?line ok = application:set_env(os_mon, start_cpu_sup, false), + ?line ok = supervisor:terminate_child(os_mon_sup, cpu_sup), + ok. + +unavailable(suite) -> + []; +unavailable(doc) -> + ["Test correct behaviour when service is unavailable"]; +unavailable(Config) when is_list(Config) -> + + %% Make sure all API functions return their dummy values + ?line 0 = cpu_sup:nprocs(), + ?line 0 = cpu_sup:avg1(), + ?line 0 = cpu_sup:avg5(), + ?line 0 = cpu_sup:avg15(), + ?line 0 = cpu_sup:util(), + ?line {all,0,0,[]} = cpu_sup:util([]), + ?line {all,0,0,[]} = cpu_sup:util([detailed]), + ?line {all,0,0,[]} = cpu_sup:util([per_cpu]), + ?line {all,0,0,[]} = cpu_sup:util([detailed,per_cpu]), + + ok. + +restart(suite) -> + []; +restart(Config) when is_list(Config) -> + ?line ok = application:set_env(os_mon, start_cpu_sup, true), + ?line {ok, _Pid} = supervisor:restart_child(os_mon_sup, cpu_sup), + ok. + +%% Aux + +cpu_sup_os_pid() -> + Str = os:cmd("ps -e | grep '[c]pu_sup'"), + case io_lib:fread("~s", Str) of + {ok, [Pid], _Rest} -> {ok, Pid}; + _ -> {error, pid_not_found} + end. diff --git a/lib/os_mon/test/disksup_SUITE.erl b/lib/os_mon/test/disksup_SUITE.erl new file mode 100644 index 0000000000..987d631c36 --- /dev/null +++ b/lib/os_mon/test/disksup_SUITE.erl @@ -0,0 +1,426 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(disksup_SUITE). +-include("test_server.hrl"). + +%% Test server specific exports +-export([all/1]). +-export([init_per_suite/1, end_per_suite/1]). +-export([init_per_testcase/2, end_per_testcase/2]). + +%% Test cases +-export([api/1, config/1, alarm/1]). +-export([port/1]). +-export([terminate/1, unavailable/1, restart/1]). +-export([otp_5910/1]). + +%% Default timetrap timeout (set in init_per_testcase) +-define(default_timeout, ?t:minutes(1)). + +init_per_suite(Config) when is_list(Config) -> + ?line ok = application:start(os_mon), + Config. + +end_per_suite(Config) when is_list(Config) -> + ?line ok = application:stop(os_mon), + Config. + +init_per_testcase(_Case, Config) -> + Dog = ?t:timetrap(?default_timeout), + [{watchdog,Dog} | Config]. + +end_per_testcase(_Case, Config) -> + Dog = ?config(watchdog, Config), + ?t:timetrap_cancel(Dog), + ok. + +all(suite) -> + Bugs = [otp_5910], + case ?t:os_type() of + {unix, sunos} -> + [api, config, alarm, port, + {conf, terminate, [unavailable], restart}] ++ Bugs; + {unix, _OSname} -> + [api, alarm] ++ Bugs; + {win32, _OSname} -> + [api, alarm] ++ Bugs; + _OS -> + [unavailable] + end. + +api(suite) -> + []; +api(doc) -> + ["Test of API functions"]; +api(Config) when is_list(Config) -> + + %% get_disk_data() + ?line [{Id, KByte, Capacity}|_] = disksup:get_disk_data(), + ?line true = io_lib:printable_list(Id), + ?line true = is_integer(KByte), + ?line true = is_integer(Capacity), + ?line true = KByte>0, + ?line true = Capacity>0, + + %% get_check_interval() + ?line 1800000 = disksup:get_check_interval(), + + %% set_check_interval(Minutes) + ?line ok = disksup:set_check_interval(20), + ?line 1200000 = disksup:get_check_interval(), + ?line {'EXIT',{badarg,_}} = (catch disksup:set_check_interval(0.5)), + ?line 1200000 = disksup:get_check_interval(), + ?line ok = disksup:set_check_interval(30), + + %% get_almost_full_threshold() + ?line 80 = disksup:get_almost_full_threshold(), + + %% set_almost_full_threshold(Float) + ?line ok = disksup:set_almost_full_threshold(0.90), + ?line 90 = disksup:get_almost_full_threshold(), + ?line {'EXIT',{badarg,_}} = + (catch disksup:set_almost_full_threshold(-0.5)), + ?line 90 = disksup:get_almost_full_threshold(), + ?line ok = disksup:set_almost_full_threshold(0.80), + + ok. + +config(suite) -> + []; +config(doc) -> + ["Test configuration"]; +config(Config) when is_list(Config) -> + + %% Change configuration parameters and make sure change is reflected + %% when disksup is restarted + ?line ok = + application:set_env(os_mon, disk_space_check_interval, 29), + ?line ok = + application:set_env(os_mon, disk_almost_full_threshold, 0.81), + + ?line ok = supervisor:terminate_child(os_mon_sup, disksup), + ?line {ok, _Child1} = supervisor:restart_child(os_mon_sup, disksup), + + ?line 1740000 = disksup:get_check_interval(), + ?line 81 = disksup:get_almost_full_threshold(), + + %% Also try this with bad parameter values, should be ignored + ?line ok = + application:set_env(os_mon, disk_space_check_interval, 0.5), + ?line ok = + application:set_env(os_mon, disk_almost_full_threshold, -0.81), + + ?line ok = supervisor:terminate_child(os_mon_sup, disksup), + ?line {ok, _Child2} = supervisor:restart_child(os_mon_sup, disksup), + + ?line 1800000 = disksup:get_check_interval(), + ?line 80 = disksup:get_almost_full_threshold(), + + %% Reset configuration parameters + ?line ok = + application:set_env(os_mon, disk_space_check_interval, 30), + ?line ok = + application:set_env(os_mon, disk_almost_full_threshold, 0.80), + + ok. + +%%---------------------------------------------------------------------- +%% NOTE: The test case is a bit weak as it will fail if the disk usage +%% changes too much during its course, or if there are timing problems +%% with the alarm_handler receiving the alarms too late +%%---------------------------------------------------------------------- +alarm(suite) -> + []; +alarm(doc) -> + ["Test that alarms are set and cleared"]; +alarm(Config) when is_list(Config) -> + + %% Find out how many disks exceed the threshold + %% and make sure the corresponding number of alarms is set + ?line Threshold1 = disksup:get_almost_full_threshold(), % 80 + ?line Data1 = disksup:get_disk_data(), + ?line Over1 = over_threshold(Data1, Threshold1), + ?line Alarms1 = get_alarms(), + if + Over1==length(Alarms1) -> + ?line true; + true -> + dump_info(), + ?line ?t:fail({bad_alarms, Threshold1, Data1, Alarms1}) + end, + + %% Try to find a disk with space usage below Threshold1, + %% lower the threshold accordingly and make sure new alarms are set + Fun1 = fun({_Id, _Kbyte, Capacity}) -> + if + Capacity>0, Capacity<Threshold1 -> true; + true -> false + end + end, + ?line case until(Fun1, Data1) of + {_, _, Cap1} -> + Threshold2 = Cap1-1, + ?line ok = + disksup:set_almost_full_threshold(Threshold2/100), + ?line disksup ! timeout, % force a disk check + ?line Data2 = disksup:get_disk_data(), + ?line Over2 = over_threshold(Data2, Threshold2), + ?line Alarms2 = get_alarms(), + if + Over2==length(Alarms2), Over2>Over1 -> + ?line true; + true -> + dump_info(), + ?line ?t:fail({bad_alarms, Threshold2, Data2, Alarms2}) + end; + false -> + ?line ignore + end, + + %% Find out the highest space usage among all disks + %% and try to raise the threshold above this value, + %% make sure all alarms are cleared + Fun2 = fun({_Id, _Kbyte, Capacity}, MaxAcc) -> + if + Capacity>MaxAcc -> Capacity; + true -> MaxAcc + end + end, + ?line case lists:foldl(Fun2, 0, Data1) of + Max when Max<100 -> + Threshold3 = Max+1, + ?line ok = + disksup:set_almost_full_threshold(Threshold3/100), + ?line disksup ! timeout, % force a disk check + ?line Data3 = disksup:get_disk_data(), + ?line Over3 = over_threshold(Data3, Threshold3), + ?line Alarms3 = get_alarms(), + if + Over3==0, length(Alarms3)==0 -> + ?line ok; + true -> + dump_info(), + ?line ?t:fail({bad_alarms, Threshold3, Data3, Alarms3}) + end; + 100 -> + ?line ignore + end, + + %% Reset threshold + ?line ok = disksup:set_almost_full_threshold(Threshold1/100), + + ok. + +over_threshold(Data, Threshold) -> + Data2 = remove_duplicated_disks(lists:keysort(1, Data)), + lists:foldl(fun({_Id, _Kbyte, Cap}, N) when Cap>=Threshold -> + N+1; + (_DiskData, N) -> + N + end, + 0, + Data2). + +%% On some platforms (for example MontaVista) data for one disk can be +%% "duplicated": +%% Linux ppb 2.4.20_mvl31-pcore680 #1 Sun Feb 1 23:12:56 PST 2004 ppc unknown +%% +%% MontaVista(R) Linux(R) Professional Edition 3.1 +%% +%% [ppb:~]> /bin/df -lk +%% Filesystem 1k-blocks Used Available Use% Mounted on +%% rootfs 8066141 3023763 4961717 38% / +%% /dev/root 8066141 3023763 4961717 38% / +%% tmpfs 192892 0 192892 0% /dev/shm +%% +%% disksup: +%% [{"/",8066141,38}, {"/",8066141,38}, {"/dev/shm",192892,0}] +%% +%% disksup will only set ONE alarm for "/". +%% Therefore the list of disk data must be sorted and duplicated disk +%% tuples removed before calculating how many alarms should be set, or +%% the testcase will fail erroneously. +remove_duplicated_disks([{Id, _, _}, {Id, Kbyte, Cap}|T]) -> + remove_duplicated_disks([{Id, Kbyte, Cap}|T]); +remove_duplicated_disks([H|T]) -> + [H|remove_duplicated_disks(T)]; +remove_duplicated_disks([]) -> + []. + +get_alarms() -> + lists:filter(fun({{disk_almost_full, _Disk},_}) -> true; + (_) -> false + end, + alarm_handler:get_alarms()). + +until(Fun, [H|T]) -> + case Fun(H) of + true -> H; + false -> + until(Fun, T) + end; +until(_Fun, []) -> + false. + +port(suite) -> + []; +port(doc) -> + ["Test that disksup handles a terminating port program"]; +port(Config) when is_list(Config) -> + ?line Str = os:cmd("ps -ef | grep '[d]isksup'"), + case io_lib:fread("~s ~s", Str) of + {ok, [_Uid,Pid], _Rest} -> + + %% Monitor disksup + ?line MonRef = erlang:monitor(process, disksup), + ?line [{_Disk1,Kbyte1,_Cap1}|_] = disksup:get_disk_data(), + ?line true = Kbyte1>0, + + %% Kill the port program + case os:cmd("kill -9 " ++ Pid) of + [] -> + + %% disksup should now terminate + receive + {'DOWN', MonRef, _, _, {port_died, _Reason}} -> + ok; + {'DOWN', MonRef, _, _, Reason} -> + ?line ?t:fail({unexpected_exit_reason, Reason}) + after + 3000 -> + ?line ?t:fail({still_alive, Str}) + end, + + %% Give os_mon_sup time to restart disksup + ?t:sleep(?t:seconds(3)), + ?line [{_Disk2,Kbyte2,_Cap2}|_] = + disksup:get_disk_data(), + ?line true = Kbyte2>0, + + ok; + + Line -> + erlang:demonitor(MonRef), + {skip, {not_killed, Line}} + end; + _ -> + {skip, {os_pid_not_found, Str}} + end. + +terminate(suite) -> + []; +terminate(Config) when is_list(Config) -> + ?line ok = application:set_env(os_mon, start_disksup, false), + ?line ok = supervisor:terminate_child(os_mon_sup, disksup), + ok. + +unavailable(suite) -> + []; +unavailable(doc) -> + ["Test correct behaviour when service is unavailable"]; +unavailable(Config) when is_list(Config) -> + + %% Make sure all API functions return their dummy values + ?line [{"none",0,0}] = disksup:get_disk_data(), + ?line 1800000 = disksup:get_check_interval(), + ?line ok = disksup:set_check_interval(5), + ?line 80 = disksup:get_almost_full_threshold(), + ?line ok = disksup:set_almost_full_threshold(0.9), + + ok. + +restart(suite) -> + []; +restart(Config) when is_list(Config) -> + ?line ok = application:set_env(os_mon, start_disksup, true), + ?line {ok, _Pid} = supervisor:restart_child(os_mon_sup, disksup), + ok. + +otp_5910(suite) -> + []; +otp_5910(doc) -> + ["Test that alarms are cleared if disksup crashes or " + "if OS_Mon is stopped"]; +otp_5910(Config) when is_list(Config) -> + + %% Make sure disksup sets at least one alarm + ?line Data = disksup:get_disk_data(), + ?line Threshold0 = disksup:get_almost_full_threshold(), + ?line Threshold = case over_threshold(Data, Threshold0) of + 0 -> + [{_Id,_Kbyte,Cap}|_] = Data, + ?line ok = disksup:set_almost_full_threshold((Cap-1)/100), + Cap-1; + _N -> + Threshold0 + end, + ?line ok = application:set_env(os_mon, + disk_almost_full_threshold, + Threshold/100), + ?line disksup ! timeout, % force a disk check + ?line Data2 = disksup:get_disk_data(), + ?line Over = over_threshold(Data2, Threshold), + ?line Alarms = get_alarms(), + if + Over==0 -> + ?line ?t:fail({threshold_too_low, Data2, Threshold}); + Over==length(Alarms) -> + ok; + true -> + dump_info(), + ?line ?t:fail({bad_alarms, Threshold, Data2, Alarms}) + end, + + %% Kill disksup + exit(whereis(disksup), faked_disksup_crash), + + %% Wait a little to make sure disksup has been restarted, + %% then make sure the alarms are set once, but not twice + ?t:sleep(?t:seconds(1)), + ?line Data3 = disksup:get_disk_data(), + ?line Alarms2 = get_alarms(), + if + length(Alarms2)==length(Alarms) -> + ok; + true -> + dump_info(), + ?line ?t:fail({bad_alarms, Threshold, Data3, Alarms,Alarms2}) + end, + + %% Stop OS_Mon and make sure all disksup alarms are cleared + ?line ok = application:stop(os_mon), + ?t:sleep(?t:seconds(1)), + ?line Alarms3 = get_alarms(), + if + length(Alarms3)==0 -> + ok; + true -> + ?line ?t:fail({alarms_not_cleared, Alarms3}) + end, + + %% Reset threshold and restart OS_Mon + ?line ok = application:set_env(os_mon, + disksup_almost_full_threshold, 0.8), + ?line ok = disksup:set_almost_full_threshold(0.8), + ?line ok = application:start(os_mon), + + ok. + +dump_info() -> + io:format("Status: ~p~n", [sys:get_status(disksup)]). diff --git a/lib/os_mon/test/memsup_SUITE.erl b/lib/os_mon/test/memsup_SUITE.erl new file mode 100644 index 0000000000..01a7f6c7f2 --- /dev/null +++ b/lib/os_mon/test/memsup_SUITE.erl @@ -0,0 +1,782 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(memsup_SUITE). +-include("test_server.hrl"). + +%% Test server specific exports +-export([all/1]). +-export([init_per_suite/1, end_per_suite/1]). +-export([init_per_testcase/2, end_per_testcase/2]). + +%% Test cases +-export([api/1, alarm1/1, alarm2/1, process/1]). +-export([config/1, timeout/1, unavailable/1, port/1]). +-export([otp_5910/1]). + +%% Default timetrap timeout (set in init_per_testcase) +-define(default_timeout, ?t:minutes(1)). + +init_per_suite(Config) when is_list(Config) -> + ?line ok = application:start(os_mon), + Config. + +end_per_suite(Config) when is_list(Config) -> + ?line ok = application:stop(os_mon), + Config. + +init_per_testcase(_Case, Config) -> + Dog = ?t:timetrap(?default_timeout), + [{watchdog,Dog} | Config]. + +end_per_testcase(_Case, Config) -> + Dog = ?config(watchdog, Config), + ?t:timetrap_cancel(Dog), + Config. + +all(suite) -> + All = case ?t:os_type() of + {unix, sunos} -> + [api, alarm1, alarm2, process, + config, timeout, unavailable, port]; + {unix, linux} -> + [api, alarm1, alarm2, process, timeout]; + _OS -> + [api, alarm1, alarm2, process] + end, + Bugs = [otp_5910], + All ++ Bugs. + +api(suite) -> + []; +api(doc) -> + ["Test of API functions"]; +api(Config) when is_list(Config) -> + + %% get_memory_data() + ?line RegMemData = memsup:get_memory_data(), + case RegMemData of + {TotMem, AllBytes, {Pid, PidBytes}} when is_integer(TotMem), + is_integer(AllBytes), + is_pid(Pid), + is_integer(PidBytes) -> + ok; + {0, 0, _WorstPid} -> + ?line ?t:fail(first_data_collection_failed); + _ -> + ?line ?t:fail({bad_return, RegMemData}) + end, + + %% get_system_memory_data() + ?line ExtMemData = memsup:get_system_memory_data(), + Tags = [ total_memory, + free_memory, + system_total_memory, + largest_free, + number_of_free, + free_swap, + total_swap, + cached_memory, + buffered_memory, + shared_memory], + + ?line true = lists:all(fun({Tag,Value}) when is_atom(Tag), + is_integer(Value) -> + lists:member(Tag, Tags); + (_) -> + false + end, + ExtMemData), + + %% get_os_wordsize() + ?line ok = case memsup:get_os_wordsize() of + 32 -> ok; + 64 -> ok; + unsupported_os -> ok; + _ -> error + end, + + %% get_check_interval() + ?line 60000 = memsup:get_check_interval(), + + %% set_check_interval(Minutes) + ?line ok = memsup:set_check_interval(2), + ?line 120000 = memsup:get_check_interval(), + ?line {'EXIT',{badarg,_}} = + (catch memsup:set_check_interval(0.2)), + ?line 120000 = memsup:get_check_interval(), + ?line ok = memsup:set_check_interval(1), + + %% get_procmem_high_watermark() + ?line 5 = memsup:get_procmem_high_watermark(), + + %% set_procmem_high_watermark() + ?line ok = memsup:set_procmem_high_watermark(0.1), + ?line 10 = memsup:get_procmem_high_watermark(), + ?line {'EXIT',{badarg,_}} = + (catch memsup:set_procmem_high_watermark(-0.1)), + ?line 10 = memsup:get_procmem_high_watermark(), + ?line ok = memsup:set_procmem_high_watermark(0.05), + + %% get_sysmem_high_watermark() + ?line 80 = memsup:get_sysmem_high_watermark(), + + %% set_sysmem_high_watermark() + ?line ok = memsup:set_sysmem_high_watermark(0.9), + ?line 90 = memsup:get_sysmem_high_watermark(), + ?line {'EXIT',{badarg,_}} = + (catch memsup:set_sysmem_high_watermark(-0.9)), + ?line 90 = memsup:get_sysmem_high_watermark(), + ?line ok = memsup:set_sysmem_high_watermark(0.8), + + %% get|set_helper_timeout + ?line 30 = memsup:get_helper_timeout(), + ?line ok = memsup:set_helper_timeout(29), + ?line 29 = memsup:get_helper_timeout(), + ?line {'EXIT',{badarg,_}} = (catch memsup:set_helper_timeout(31.0)), + ?line 29 = memsup:get_helper_timeout(), + ok. + +%%---------------------------------------------------------------------- +%% NOTE: The test case is a bit weak as it will fail if the memory +%% usage changes too much during its course. +%%---------------------------------------------------------------------- +alarm1(suite) -> + []; +alarm1(doc) -> + ["Test alarms when memsup_system_only==false"]; +alarm1(Config) when is_list(Config) -> + + %% If system memory usage is too high, the testcase cannot + %% be run correctly + ?line {Total, Alloc, {_Pid,_PidAlloc}} = memsup:get_memory_data(), + io:format("alarm1: Total: ~p, Alloc: ~p~n", [Total, Alloc]), + ?line SysUsage = Alloc/Total, + if + SysUsage>0.99 -> + {skip, sys_mem_too_high}; + true -> + alarm1(Config, SysUsage) + end. + +alarm1(_Config, SysUsage) -> + %% Set a long memory check interval, we will force memory checks + %% instead + ?line ok = memsup:set_check_interval(60), + + %% Check thresholds + ?line SysThreshold = (memsup:get_sysmem_high_watermark()/100), + ?line ProcThreshold = (memsup:get_procmem_high_watermark()/100), + + %% Check if a system alarm already should be set or not + SysP = if + SysUsage>SysThreshold -> true; + SysUsage=<SysThreshold -> false + end, + + %% If system memory is higher than threshold, make sure the system + %% alarm is set. Otherwise, make sure it is not set + case alarm_set(system_memory_high_watermark) of + {true, []} when SysP -> + ok; + false when not SysP -> + ok; + _ -> + ?line ?t:fail({sys_alarm, SysUsage, SysThreshold}) + end, + + %% Lower/raise the threshold to clear/set the alarm + NewSysThreshold = if + SysP -> + Value = 1.1*SysUsage, + if + Value > 0.99 -> 0.99; + true -> Value + end; + not SysP -> 0.9*SysUsage + end, + + ?line ok = memsup:set_sysmem_high_watermark(NewSysThreshold), + + %% Initiate and wait for a new data collection + ?line ok = force_collection(), + + %% Make sure the alarm is cleared/set + ?t:sleep(?t:seconds(5)), + case alarm_set(system_memory_high_watermark) of + {true, []} when not SysP -> + ok; + false when SysP -> + ok; + _ -> + ?line ?t:fail({sys_alarm, SysUsage, NewSysThreshold}) + end, + + %% Reset the threshold to set/clear the alarm again + ?line ok = memsup:set_sysmem_high_watermark(SysThreshold), + ?line ok = force_collection(), + ?t:sleep(?t:seconds(1)), + case alarm_set(system_memory_high_watermark) of + {true, []} when SysP -> + ok; + false when not SysP -> + ok; + _ -> + ?line ?t:fail({sys_alarm, SysUsage, SysThreshold}) + end, + + %% Check memory usage + ?line {Total2, _, {WorstPid, PidAlloc}} = memsup:get_memory_data(), + + %% Check if a process alarm already should be set or not + PidUsage = PidAlloc/Total2, + ProcP = if + PidUsage>ProcThreshold -> true; + PidUsage=<ProcThreshold -> false + end, + + %% Make sure the process alarm is set/not set accordingly + case alarm_set(process_memory_high_watermark) of + {true, WorstPid} when ProcP -> + ok; + false when not ProcP -> + ok; + {true, BadPid1} when ProcP -> + ?line ?t:fail({proc_alarm, WorstPid, BadPid1}); + _ -> + ?line ?t:fail({proc_alarm, PidUsage, ProcThreshold}) + end, + + %% Lower/raise the threshold to clear/set the alarm + NewProcThreshold = if + ProcP -> 1.1*PidUsage; + not ProcP -> 0.9*PidUsage + end, + ?line ok = memsup:set_procmem_high_watermark(NewProcThreshold), + ?line ok = force_collection(), + ?t:sleep(?t:seconds(1)), + case alarm_set(process_memory_high_watermark) of + {true, WorstPid} when not ProcP -> + ok; + false when ProcP -> + ok; + {true, BadPid2} when not ProcP -> + ?line test_server:fail({proc_alarm, WorstPid, BadPid2}); + _ -> + ?line ?t:fail({proc_alarm, PidUsage, ProcThreshold}) + end, + + %% Reset the threshold to clear/set the alarm + ?line ok = memsup:set_procmem_high_watermark(ProcThreshold), + ?line ok = force_collection(), + ?t:sleep(?t:seconds(1)), + case alarm_set(process_memory_high_watermark) of + {true, WorstPid} when ProcP -> + ok; + false when not ProcP -> + ok; + {true, BadPid3} when ProcP -> + ?line test_server:fail({proc_alarm, WorstPid, BadPid3}); + _ -> + ?line ?t:fail({proc_alarm, PidUsage, ProcThreshold}) + end, + + %% Reset memory check interval + ?line ok = memsup:set_check_interval(1), + ok. + +alarm2(suite) -> + []; +alarm2(doc) -> + ["Test alarms when memsup_system_only==true"]; +alarm2(Config) when is_list(Config) -> + + %% If system memory usage is too high, the testcase cannot + %% be run correctly + ?line {Total, Alloc, {_Pid,_PidAlloc}} = memsup:get_memory_data(), + ?line SysUsage = Alloc/Total, + if + SysUsage>0.99 -> + {skip, sys_mem_too_high}; + true -> + alarm2(Config, SysUsage) + end. + +alarm2(_Config, _SysUsage) -> + + %% Change memsup_system_only and restart memsup + ?line ok = application:set_env(os_mon, memsup_system_only, true), + ?line ok = supervisor:terminate_child(os_mon_sup, memsup), + ?line {ok, _Memsup1} = supervisor:restart_child(os_mon_sup, memsup), + + %% Set a long memory check interval, we will force memory checks + %% instead + ?line ok = memsup:set_check_interval(60), + + %% Check data and thresholds + ?line {Total, Alloc, undefined} = memsup:get_memory_data(), + ?line SysThreshold = (memsup:get_sysmem_high_watermark()/100), + ?line true = is_integer(memsup:get_procmem_high_watermark()), + + %% Check if a system alarm already should be set or not + ?line SysUsage = Alloc/Total, + SysP = if + SysUsage>SysThreshold -> true; + SysUsage=<SysThreshold -> false + end, + + %% If system memory is higher than threshold, make sure the system + %% alarm is set. Otherwise, make sure it is not set + case alarm_set(system_memory_high_watermark) of + {true, []} when SysP -> + ok; + false when not SysP -> + ok; + _ -> + ?line ?t:fail({sys_alarm, SysUsage, SysThreshold}) + end, + + %% Lower/raise the threshold to clear/set the alarm + NewSysThreshold = if + SysP -> + Value = 1.1*SysUsage, + if + Value > 0.99 -> 0.99; + true -> Value + end; + not SysP -> 0.9*SysUsage + end, + + ?line ok = memsup:set_sysmem_high_watermark(NewSysThreshold), + + %% Initiate and wait for a new data collection + ?line ok = force_collection(), + + %% Make sure the alarm is cleared/set + ?t:sleep(?t:seconds(1)), + case alarm_set(system_memory_high_watermark) of + {true, []} when not SysP -> + ok; + false when SysP -> + ok; + _ -> + ?line ?t:fail({sys_alarm, SysUsage, NewSysThreshold}) + end, + + %% Reset the threshold to set/clear the alarm again + ?line ok = memsup:set_sysmem_high_watermark(SysThreshold), + ?line ok = force_collection(), + ?t:sleep(?t:seconds(1)), + case alarm_set(system_memory_high_watermark) of + {true, []} when SysP -> + ok; + false when not SysP -> + ok; + _ -> + ?line ?t:fail({sys_alarm, SysUsage, SysThreshold}) + end, + + %% Reset memsup_system_only and restart memsup + %% (memory check interval is then automatically reset) + ?line ok = application:set_env(os_mon, memsup_system_only, false), + ?line ok = supervisor:terminate_child(os_mon_sup, memsup), + ?line {ok, _Memsup2} = supervisor:restart_child(os_mon_sup, memsup), + + ok. + +alarm_set(Alarm) -> + alarm_set(Alarm, alarm_handler:get_alarms()). +alarm_set(Alarm, [{Alarm,Data}|_]) -> + {true,Data}; +alarm_set(Alarm, [_|T]) -> + alarm_set(Alarm, T); +alarm_set(_Alarm, []) -> + false. + +process(suite) -> + []; +process(doc) -> + ["Make sure memsup discovers a process grown very large"]; +process(Config) when is_list(Config) -> + + %% Set a long memory check interval, we will force memory checks + %% instead + ?line ok = memsup:set_check_interval(60), + + %% Collect data + MemData = memsup:get_memory_data(), + io:format("process: memsup:get_memory_data() = ~p~n", [MemData]), + ?line {_Total,_Free,{_,Bytes}} = MemData, + + %% Start a new process larger than Worst + ?line WorsePid = spawn(fun() -> new_hog(Bytes) end), + ?t:sleep(?t:seconds(1)), + + %% Initiate and wait for a new data collection + ?line ok = force_collection(), + + %% Check that get_memory_data() returns updated result + ?line case memsup:get_memory_data() of + {_, _, {WorsePid, _MoreBytes}} -> + ok; + {_, _, BadWorst} -> + ?line ?t:fail({worst_pid, BadWorst}) + end, + + %% Reset memory check interval + ?line exit(WorsePid, done), + ?line ok = memsup:set_check_interval(1), + ok. + +new_hog(Bytes) -> + WordSize = erlang:system_info(wordsize), + N = (Bytes+200) div WordSize div 2, + List = lists:duplicate(N, a), + new_hog_1(List). + +new_hog_1(List) -> + receive + _Any -> exit(List) + end. + +config(suite) -> + []; +config(doc) -> + ["Test configuration"]; +config(Config) when is_list(Config) -> + + %% Change configuration parameters and make sure change is reflected + %% when memsup is restarted + ?line ok = application:set_env(os_mon, memory_check_interval, 2), + ?line ok = + application:set_env(os_mon, system_memory_high_watermark, 0.9), + ?line ok = + application:set_env(os_mon, process_memory_high_watermark, 0.1), + ?line ok = application:set_env(os_mon, memsup_helper_timeout, 35), + ?line ok = application:set_env(os_mon, memsup_system_only, true), + + ?line ok = supervisor:terminate_child(os_mon_sup, memsup), + ?line {ok, _Child1} = supervisor:restart_child(os_mon_sup, memsup), + + ?line 120000 = memsup:get_check_interval(), + ?line 90 = memsup:get_sysmem_high_watermark(), + ?line 10 = memsup:get_procmem_high_watermark(), + ?line 35 = memsup:get_helper_timeout(), + + %% Also try this with bad parameter values, should be ignored + ?line ok = application:set_env(os_mon, memory_check_interval, 0.2), + ?line ok = + application:set_env(os_mon, system_memory_high_watermark, -0.9), + ?line ok = + application:set_env(os_mon, process_memory_high_watermark,-0.1), + ?line ok = application:set_env(os_mon, memsup_helper_timeout, 0.35), + ?line ok = application:set_env(os_mon, memsup_system_only, arne), + + ?line ok = supervisor:terminate_child(os_mon_sup, memsup), + ?line {ok, _Child2} = supervisor:restart_child(os_mon_sup, memsup), + + ?line 60000 = memsup:get_check_interval(), + ?line 80 = memsup:get_sysmem_high_watermark(), + ?line 5 = memsup:get_procmem_high_watermark(), + ?line 30 = memsup:get_helper_timeout(), + + %% Reset configuration parameters + ?line ok = application:set_env(os_mon, memory_check_interval, 1), + ?line ok = + application:set_env(os_mon, system_memory_high_watermark, 0.8), + ?line ok = + application:set_env(os_mon, process_memory_high_watermark,0.05), + ?line ok = application:set_env(os_mon, memsup_helper_timeout, 30), + ?line ok = application:set_env(os_mon, memsup_system_only, false), + + ok. + +unavailable(suite) -> + []; +unavailable(doc) -> + ["Test correct behaviour when service is unavailable"]; +unavailable(Config) when is_list(Config) -> + + %% Close memsup + ?line ok = application:set_env(os_mon, start_memsup, false), + ?line ok = supervisor:terminate_child(os_mon_sup, memsup), + + %% Make sure all API functions return their dummy values + ?line {0,0,{_Pid,0}} = memsup:get_memory_data(), + ?line ok = application:set_env(os_mon, memsup_system_only, true), + ?line {0,0,undefined} = memsup:get_memory_data(), + ?line ok = application:set_env(os_mon, memsup_system_only, false), + ?line [] = memsup:get_system_memory_data(), + ?line 0 = memsup:get_os_wordsize(), + ?line 60000 = memsup:get_check_interval(), + ?line ok = memsup:set_check_interval(2), + ?line 5 = memsup:get_procmem_high_watermark(), + ?line ok = memsup:set_procmem_high_watermark(0.10), + ?line 80 = memsup:get_sysmem_high_watermark(), + ?line ok = memsup:set_sysmem_high_watermark(0.90), + ?line 30 = memsup:get_helper_timeout(), + ?line ok = memsup:set_helper_timeout(35), + + %% Start memsup again, + ?line ok = application:set_env(os_mon, start_memsup, true), + ?line {ok, _Child} = supervisor:restart_child(os_mon_sup, memsup), + + ok. + +timeout(suite) -> + []; +timeout(doc) -> + ["Test stability of memsup when data collection times out"]; +timeout(Config) when is_list(Config) -> + + %% Set a long memory check interval and memsup_helper timeout, + %% we will force memory checks instead and fake timeouts + ?line ok = memsup:set_check_interval(60), + ?line ok = memsup:set_helper_timeout(3600), + + %% Provoke a timeout during memory collection + ?line memsup ! time_to_collect, + ?line memsup ! reg_collection_timeout, + + %% Not much we can check though, except that memsup is still running + ?line {_,_,_} = memsup:get_memory_data(), + + %% Provoke a timeout during extensive memory collection + %% We fake a gen_server:call/2 to be able to send a timeout message + %% while the request is being handled + + %% Linux should be handled the same way as solaris. + +% TimeoutMsg = case ?t:os_type() of +% {unix, sunos} -> ext_collection_timeout; +% {unix, linux} -> reg_collection_timeout +% end, + + TimeoutMsg = ext_collection_timeout, + + ?line Pid = whereis(memsup), + ?line Mref = erlang:monitor(process, Pid), + ?line Pid ! {'$gen_call', {self(), Mref}, get_system_memory_data}, + ?line Pid ! TimeoutMsg, + receive + {Mref, []} -> + erlang:demonitor(Mref), + ?line ok; + {Mref, Res} -> + erlang:demonitor(Mref), + ?line ?t:fail({unexpected_result, Res}); + {'DOWN', Mref, _, _, _} -> + ?line ?t:fail(no_result) + end, + + %% Reset memory check interval and memsup_helper timeout + ?line ok = memsup:set_check_interval(1), + ?line ok = memsup:set_helper_timeout(30), + ?line memsup ! time_to_collect, + + ?line [_|_] = memsup:get_system_memory_data(), + + ok. + +port(suite) -> + []; +port(doc) -> + ["Test that memsup handles a terminating port program"]; +port(Config) when is_list(Config) -> + ?line Str = os:cmd("ps -e | grep '[m]emsup'"), + case io_lib:fread("~s", Str) of + {ok, [Pid], _Rest} -> + + %% Monitor memsup + ?line MonRef = erlang:monitor(process, memsup), + ?line {Total1,_Alloc1,_Worst1} = memsup:get_memory_data(), + ?line true = Total1>0, + + %% Kill the port program + case os:cmd("kill -9 " ++ Pid) of + [] -> + + %% memsup should now terminate + receive + {'DOWN', MonRef, _, _, {port_died, _Reason}} -> + ok; + {'DOWN', MonRef, _, _, Reason} -> + ?line ?t:fail({unexpected_exit_reason, Reason}) + after + 3000 -> + ?line ?t:fail(still_alive) + end, + + %% Give os_mon_sup time to restart memsup + ?t:sleep(?t:seconds(3)), + ?line {Total2,_Alloc2,_Worst2} = + memsup:get_memory_data(), + ?line true = Total2>0, + + ok; + + Line -> + erlang:demonitor(MonRef), + {skip, {not_killed, Line}} + end; + _ -> + {skip, {os_pid_not_found, Str}} + end. + +otp_5910(suite) -> + []; +otp_5910(doc) -> + ["Test that alarms are cleared and not set twice"]; +otp_5910(Config) when is_list(Config) -> + Alarms = + [system_memory_high_watermark, process_memory_high_watermark], + + %% Make sure memsup sets both alarms + ?line ok = application:set_env(os_mon, memory_check_interval, 60), + ?line ok = memsup:set_check_interval(60), + ?line SysThreshold = (memsup:get_sysmem_high_watermark()/100), + ?line ProcThreshold = (memsup:get_procmem_high_watermark()/100), + + MemData = memsup:get_memory_data(), + + io:format("otp_5910: memsup:get_memory_data() = ~p~n", [MemData]), + ?line {Total, Alloc, {_Pid, _Bytes}} = MemData, + ?line Pid = spawn_opt(fun() -> + receive + die -> ok + end + end, [{min_heap_size, 1000}]), + %% Create a process guaranteed to live, be constant and + %% break memsup process limit + ?line {memory, Bytes} = erlang:process_info(Pid,memory), + ?line SysUsage = Alloc/Total, + ?line ProcUsage = Bytes/Total, + + if + SysUsage>SysThreshold -> + ok; + SysUsage=<SysThreshold -> + ?line ok = application:set_env(os_mon, + sys_mem_high_watermark, + 0.5 * SysUsage), + ?line ok = memsup:set_sysmem_high_watermark(0.5 * SysUsage) + end, + if + ProcUsage>ProcThreshold -> + ok; + ProcUsage=<ProcThreshold -> + ?line ok = application:set_env(os_mon, + proc_mem_high_watermark, + 0.5 * ProcUsage), + ?line ok = memsup:set_procmem_high_watermark(0.5 *ProcUsage) + end, + ?line ok = force_collection(), + ?t:sleep(?t:seconds(1)), + lists:foreach(fun(AlarmId) -> + case alarm_set(AlarmId) of + {true, _} -> ok; + false -> + ?line ?t:fail({alarm_not_set, + AlarmId}) + end + end, + Alarms), + + %% Kill guaranteed process... + Pid ! die, + %% Kill memsup + exit(whereis(memsup), faked_memsup_crash), + %% Wait a little to make sure memsup has been restarted, + %% then make sure the alarms are set once, but not twice + ?t:sleep(?t:seconds(1)), + ?line MemUsage = memsup:get_memory_data(), + SetAlarms = alarm_handler:get_alarms(), + case lists:foldl(fun(system_memory_high_watermark, {S, P}) -> + {S+1, P}; + (process_memory_high_watermark, {S, P}) -> + {S, P+1}; + (_AlarmId, Acc0) -> + Acc0 + end, + {0, 0}, + SetAlarms) of + {0, 0} -> + ok; + _ -> + ?line ?t:fail({bad_number_of_alarms, SetAlarms, MemUsage}) + end, + + %% Stop OS_Mon and make sure all memsup alarms are cleared + ?line ok = application:stop(os_mon), + ?t:sleep(?t:seconds(1)), + lists:foreach(fun(AlarmId) -> + case alarm_set(AlarmId) of + false -> ok; + {true, _} -> + ?line ?t:fail({alarm_is_set, AlarmId}) + end + end, + Alarms), + + %% Reset configuration and restart OS_Mon + ?line ok = application:set_env(os_mon,memory_check_interval,1), + ?line ok = application:set_env(os_mon,sys_mem_high_watermark,0.8), + ?line ok = application:set_env(os_mon,proc_mem_high_watermark,0.05), + ?line ok = application:start(os_mon), + + ok. + +%%---------------------------------------------------------------------- +%% Auxiliary +%%---------------------------------------------------------------------- + +force_collection() -> + erlang:trace(whereis(memsup), true, ['receive']), + memsup ! time_to_collect, + TimerRef = erlang:send_after(5000, self(), timeout), + force_collection(TimerRef). + +force_collection(TimerRef) -> + receive + {trace, _Pid, 'receive', {collected_sys, _Sys}} -> + erlang:cancel_timer(TimerRef), + erlang:trace(whereis(memsup), false, ['receive']), + flush(), + ok; + {trace, _Pid, 'receive', reg_collection_timeout} -> + erlang:cancel_timer(TimerRef), + erlang:trace(whereis(memsup), false, ['receive']), + flush(), + collection_timeout; + timout -> + erlang:trace(whereis(memsup), false, ['receive']), + flush(), + timeout; + _Msg -> + force_collection(TimerRef) + end. + +flush() -> + receive + {trace, _, _, _} -> + flush(); + timeout -> + flush() + after 0 -> + ok + end. diff --git a/lib/os_mon/test/os_mon.spec b/lib/os_mon/test/os_mon.spec new file mode 100644 index 0000000000..bdae523795 --- /dev/null +++ b/lib/os_mon/test/os_mon.spec @@ -0,0 +1 @@ +{topcase, {dir, "../os_mon_test"}}. diff --git a/lib/os_mon/test/os_mon_SUITE.erl b/lib/os_mon/test/os_mon_SUITE.erl new file mode 100644 index 0000000000..ce52271ff8 --- /dev/null +++ b/lib/os_mon/test/os_mon_SUITE.erl @@ -0,0 +1,89 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1997-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(os_mon_SUITE). +-include("test_server.hrl"). + +%% Test server specific exports +-export([all/1]). +-export([init_per_testcase/2, fin_per_testcase/2]). + +%% Test cases +-export([app_file/1, config/1]). + +%% Default timetrap timeout (set in init_per_testcase) +-define(default_timeout, ?t:minutes(1)). + +init_per_testcase(_Case, Config) -> + Dog = test_server:timetrap(?default_timeout), + [{watchdog, Dog}|Config]. + +fin_per_testcase(_Case, Config) -> + Dog = ?config(watchdog, Config), + test_server:timetrap_cancel(Dog), + ok. + +all(suite) -> + case ?t:os_type() of + {unix, sunos} -> [app_file, config]; + _OS -> [app_file] + end. + +app_file(suite) -> + []; +app_file(doc) -> + ["Testing .app file"]; +app_file(Config) when is_list(Config) -> + ?line ok = test_server:app_test(os_mon), + ok. + +config(suite) -> + []; +config(doc) -> + ["Test OS_Mon configuration"]; +config(Config) when is_list(Config) -> + + IsReg = fun(Name) -> is_pid(whereis(Name)) end, + IsNotReg = fun(Name) -> undefined == whereis(Name) end, + + ?line ok = application:start(os_mon), + ?line true = lists:all(IsReg, [cpu_sup, disksup, memsup]), + ?line ok = application:stop(os_mon), + + ?line ok = application:set_env(os_mon, start_cpu_sup, false), + ?line ok = application:start(os_mon), + ?line true = lists:all(IsReg, [disksup, memsup]), + ?line true = IsNotReg(cpu_sup), + ?line ok = application:stop(os_mon), + ?line ok = application:set_env(os_mon, start_cpu_sup, true), + + ?line ok = application:set_env(os_mon, start_disksup, false), + ?line ok = application:start(os_mon), + ?line true = lists:all(IsReg, [cpu_sup, memsup]), + ?line true = IsNotReg(disksup), + ?line ok = application:stop(os_mon), + ?line ok = application:set_env(os_mon, start_disksup, true), + + ?line ok = application:set_env(os_mon, start_memsup, false), + ?line ok = application:start(os_mon), + ?line true = lists:all(IsReg, [cpu_sup, disksup]), + ?line true = IsNotReg(memsup), + ?line ok = application:stop(os_mon), + ?line ok = application:set_env(os_mon, start_memsup, true), + + ok. diff --git a/lib/os_mon/test/os_mon_conf.erl b/lib/os_mon/test/os_mon_conf.erl new file mode 100644 index 0000000000..5c1fa43047 --- /dev/null +++ b/lib/os_mon/test/os_mon_conf.erl @@ -0,0 +1,28 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(os_mon_conf). +-export([init/1,fin/1]). + +init(Conf) -> + RetVal = application:start(os_mon,temporary), + [{os_mon,RetVal}|Conf]. + +fin(Conf) -> + application:stop(os_mon), + lists:keydelete(os_mon,1,Conf). diff --git a/lib/os_mon/test/os_mon_mib_SUITE.erl b/lib/os_mon/test/os_mon_mib_SUITE.erl new file mode 100644 index 0000000000..a1d463030a --- /dev/null +++ b/lib/os_mon/test/os_mon_mib_SUITE.erl @@ -0,0 +1,746 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2004-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(os_mon_mib_SUITE). + +%-define(STANDALONE,1). + +-ifdef(STANDALONE). +-define(line,erlang:display({line,?LINE}),). +-define(config(A,B), config(A,B)). +-else. +-include("test_server.hrl"). +-include_lib("os_mon/include/OTP-OS-MON-MIB.hrl"). +-include_lib("snmp/include/snmp_types.hrl"). +-endif. + +% Test server specific exports +-export([all/1, init_per_suite/1, end_per_suite/1, + init_per_testcase/2, end_per_testcase/2]). + + +% Test cases must be exported. +-export([update_load_table/1]). + +-export([get_mem_sys_mark/1, get_mem_proc_mark/1, get_disk_threshold/1, + get_load_table/1, get_next_load_table/1, get_disk_table/1, + get_next_disk_table/1, real_snmp_request/1, load_unload/1]). + +-export([sys_tot_mem/1, sys_used_mem/1, large_erl_process/1, + large_erl_process_mem/1, cpu_load/1, cpu_load5/1, cpu_load15/1, + os_wordsize/1, sys_tot_mem64/1, sys_used_mem64/1, + large_erl_process_mem64/1, disk_descr/1, disk_kbytes/1, + disk_capacity/1]). + +-export([tickets/1]). +-export([otp_6351/1, otp_7441/1]). + +-define(TRAP_UDP, 5000). +-define(AGENT_UDP, 4000). +-define(CONF_FILE_VER, [v2]). +-define(SYS_NAME, "Test os_mon_mibs"). +-define(MAX_MSG_SIZE, 484). +-define(ENGINE_ID, "mgrEngine"). +-define(MGR_PORT, 5001). + +%%--------------------------------------------------------------------- +-ifdef(STANDALONE). +-export([run/0]). +run() -> + catch init_per_suite([]), + Ret = (catch update_load_table([])), + catch end_per_suite([]), + Ret. +-else. + +init_per_testcase(_Case, Config) when is_list(Config) -> + Dog = test_server:timetrap(test_server:minutes(6)), + [{watchdog, Dog}|Config]. + +end_per_testcase(_Case, Config) when is_list(Config) -> + Dog = ?config(watchdog, Config), + test_server:timetrap_cancel(Dog), + Config. + +all(doc) -> + ["Test os_mon mibs and provided instrumentation functions."]; + +all(suite) -> + [load_unload, get_mem_sys_mark, get_mem_proc_mark, + get_disk_threshold, get_load_table, get_next_load_table, + get_disk_table, get_next_disk_table, real_snmp_request, + update_load_table, tickets]. + +tickets(suite) -> + [otp_6351, otp_7441]. + +-endif. +%%--------------------------------------------------------------------- +%%-------------------------------------------------------------------- +%% Function: init_per_suite(Config) -> Config +%% Config - [tuple()] +%% A list of key/value pairs, holding the test case configuration. +%% Description: Initiation before the whole suite +%% +%% Note: This function is free to add any key/value pairs to the Config +%% variable, but should NOT alter/remove any existing entries. +%%-------------------------------------------------------------------- +init_per_suite(Config) -> + ?line application:start(sasl), + ?line application:start(mnesia), + ?line application:start(os_mon), + + %% Create initial configuration data for the snmp application + ?line PrivDir = ?config(priv_dir, Config), + ?line ConfDir = filename:join(PrivDir, "conf"), + ?line DbDir = filename:join(PrivDir,"db"), + ?line MgrDir = filename:join(PrivDir,"mgr"), + + ?line file:make_dir(ConfDir), + ?line file:make_dir(DbDir), + ?line file:make_dir(MgrDir), + + {ok, HostName} = inet:gethostname(), + {ok, Addr} = inet:getaddr(HostName, inet), + + ?line snmp_config:write_agent_snmp_files(ConfDir, ?CONF_FILE_VER, + tuple_to_list(Addr), ?TRAP_UDP, + tuple_to_list(Addr), + ?AGENT_UDP, ?SYS_NAME), + + ?line snmp_config:write_manager_snmp_files(MgrDir, tuple_to_list(Addr), + ?MGR_PORT, ?MAX_MSG_SIZE, + ?ENGINE_ID, [], [], []), + + %% To make sure application:set_env is not overwritten by any + %% app-file settings. + ?line ok = application:load(snmp), + + ?line application:set_env(snmp, agent, [{db_dir, DbDir}, + {config, [{dir, ConfDir}]}, + {agent_type, master}, + {agent_verbosity, trace}, + {net_if, [{verbosity, trace}]}]), + ?line application:set_env(snmp, manager, [{config, [{dir, MgrDir}, + {db_dir, MgrDir}, + {verbosity, trace}]}, + {server, [{verbosity, trace}]}, + {net_if, [{verbosity, trace}]}, + {versions, [v1, v2, v3]}]), + application:start(snmp), + + %% Load the mibs that should be tested + otp_mib:load(snmp_master_agent), + os_mon_mib:load(snmp_master_agent), + + [{agent_ip, Addr}| Config]. +%%-------------------------------------------------------------------- +%% Function: end_per_suite(Config) -> _ +%% Config - [tuple()] +%% A list of key/value pairs, holding the test case configuration. +%% Description: Cleanup after the whole suite +%%-------------------------------------------------------------------- +end_per_suite(Config) -> + PrivDir = ?config(priv_dir, Config), + ConfDir = filename:join(PrivDir,"conf"), + DbDir = filename:join(PrivDir,"db"), + MgrDir = filename:join(PrivDir, "mgr"), + + %% Uload mibs + snmpa:unload_mibs(snmp_master_agent,["OTP-OS-MON-MIB"]), + otp_mib:unload(snmp_master_agent), + + %% Clean up + application:stop(snmp), + application:stop(mnesia), + application:stop(os_mon), + + del_dir(ConfDir), + del_dir(DbDir), + (catch del_dir(MgrDir)), + ok. + +%%--------------------------------------------------------------------- +%% Test cases +%%--------------------------------------------------------------------- +load_unload(doc) -> + ["Test to unload and the reload the OTP.mib "]; +load_unload(suite) -> []; +load_unload(Config) when list(Config) -> + ?line os_mon_mib:unload(snmp_master_agent), + ?line os_mon_mib:load(snmp_master_agent), + ok. +%%--------------------------------------------------------------------- + +update_load_table(doc) -> + ["check os_mon_mib:update_load_table error handling"]; +update_load_table(suite) -> + []; +update_load_table(Config) when is_list(Config) -> + ?line Node = start_node(), + ?line ok = rpc:call(Node,application,start,[sasl]), + ?line ok = rpc:call(Node,application,start,[os_mon]), + ?line ok = os_mon_mib:update_load_table(), + ?line rpc:call(Node,application,stop,[os_mon]), + ?line ok = os_mon_mib:update_load_table(), + ?line stop_node(Node), + ok. + +otp_6351(doc) -> + ["like update_load_table, when memsup_system_only==true"]; +otp_6351(suite) -> + []; +otp_6351(Config) when is_list(Config) -> + ?line Node = start_node(), + ?line ok = rpc:call(Node,application,start,[sasl]), + ?line ok = rpc:call(Node,application,load,[os_mon]), + ?line ok = rpc:call(Node,application,set_env, + [os_mon,memsup_system_only,true]), + ?line ok = rpc:call(Node,application,start,[os_mon]), + ?line Res = rpc:call(Node,os_mon_mib,get_load,[Node]), + if + is_tuple(Res), element(1, Res)==loadTable -> + ?line ok; + true -> + ?line ?t:fail(Res) + end, + ?line rpc:call(Node,application,stop,[os_mon]), + ?line stop_node(Node), + ok. + + + + +%%--------------------------------------------------------------------- +get_mem_sys_mark(doc) -> + ["Simulates a get call to test the instrumentation function " + "for the loadMemorySystemWatermark variable."]; +get_mem_sys_mark(suite) -> + []; +get_mem_sys_mark(Config) when is_list(Config) -> + case os_mon_mib:mem_sys_mark(get) of + {value, SysMark} when is_integer(SysMark) -> + ok; + _ -> + ?line test_server:fail(sys_mark_value_not_integer) + end. +%%--------------------------------------------------------------------- +get_mem_proc_mark(doc) -> + ["Simulates a get call to test the instrumentation function " + "for the loadMemoryErlProcWatermark variable."]; +get_mem_proc_mark(suite) -> + []; +get_mem_proc_mark(Config) when is_list(Config) -> + case os_mon_mib:mem_proc_mark(get) of + {value, ProcMark} when is_integer(ProcMark) -> + ok; + _ -> + ?line test_server:fail(proc_mark_value_not_integer) + end. +%%--------------------------------------------------------------------- +get_disk_threshold(doc) -> + ["Simulates a get call to test the instrumentation function " + "for the diskAlmostFullThreshold variable."]; +get_disk_threshold(suite) -> + []; +get_disk_threshold(Config) when is_list(Config) -> + case os_mon_mib:disk_threshold(get) of + {value, ProcMark} when is_integer(ProcMark) -> + ok; + _ -> + ?line test_server:fail(disk_threshold_value_not_integer) + end. +%%--------------------------------------------------------------------- + +%%% Note that when we have a string key, as in loadTable, the +%%% instrumentation will deal with the [length(String), String]. We +%%% have to know about this, when short cutting SNMP and calling +%%% instrumentation functions directly as done in most test cases in +%%% this test suite + +get_load_table(doc) -> + ["Simulates get calls to test the instrumentation function " + "for the loadTable"]; +get_load_table(suite) -> + []; +get_load_table(Config) when is_list(Config) -> + + NodeStr = atom_to_list(node()), + NodeLen = length(NodeStr), + + {_, _, {Pid, _}} = memsup:get_memory_data(), + PidStr = lists:flatten(io_lib:format("~w", [Pid])), + ?line [{value, NodeStr},{value, PidStr}] = + os_mon_mib:load_table(get, [NodeLen | NodeStr], + [?loadErlNodeName, ?loadLargestErlProcess]), + + ?line Values = os_mon_mib:load_table(get, [NodeLen | NodeStr] , + [?loadSystemTotalMemory, + ?loadSystemUsedMemory, + ?loadLargestErlProcessUsedMemory, + ?loadCpuLoad, + ?loadCpuLoad5, + ?loadCpuLoad15, + ?loadOsWordsize, + ?loadSystemTotalMemory64, + ?loadSystemUsedMemory64, + ?loadLargestErlProcessUsedMemory64]), + + IsInt = fun({value, Val}) when is_integer(Val) -> + true; + (_) -> + false + end, + + NewValues = lists:filter(IsInt, Values), + + case length(NewValues) of + 10 -> + ok; + _ -> + ?line test_server:fail(value_not_integer) + end, + + ?line [{noValue,noSuchInstance}, {noValue,noSuchInstance}, + {noValue,noSuchInstance}, {noValue,noSuchInstance}, + {noValue,noSuchInstance}, {noValue,noSuchInstance}, + {noValue,noSuchInstance}, {noValue,noSuchInstance}, + {noValue,noSuchInstance}, {noValue,noSuchInstance}, + {noValue,noSuchInstance}, {noValue,noSuchInstance}] = + os_mon_mib:load_table(get, [3, 102, 111, 111], + [?loadErlNodeName, + ?loadSystemTotalMemory, + ?loadSystemUsedMemory, + ?loadLargestErlProcess, + ?loadLargestErlProcessUsedMemory, + ?loadCpuLoad, + ?loadCpuLoad5, + ?loadCpuLoad15, + ?loadOsWordsize, + ?loadSystemTotalMemory64, + ?loadSystemUsedMemory64, + ?loadLargestErlProcessUsedMemory64]), + + ok. +%%--------------------------------------------------------------------- +get_next_load_table(doc) -> + ["Simulates get_next calls to test the instrumentation function " + "for the loadTable"]; +get_next_load_table(suite) -> + [ sys_tot_mem, + sys_used_mem, + large_erl_process, + large_erl_process_mem, + cpu_load, + cpu_load5, + cpu_load15, + os_wordsize, + sys_tot_mem64, + sys_used_mem64, + large_erl_process_mem64]. + +sys_tot_mem(doc) -> + []; +sys_tot_mem(suite) -> + []; +sys_tot_mem(Config) when is_list(Config) -> + ?line [{[?loadSystemTotalMemory, Len | NodeStr], Mem}] = + os_mon_mib:load_table(get_next, [], [?loadSystemTotalMemory]), + ?line Len = length(NodeStr), + ?line true = lists:member(list_to_atom(NodeStr), [node() | nodes()]), + + case Mem of + Mem when is_integer(Mem) -> + ok; + _ -> + ?line test_server:fail(sys_tot_mem_value_not_integer) + end. + +sys_used_mem(doc) -> + []; +sys_used_mem(suite) -> []; +sys_used_mem(Config) when is_list(Config) -> + ?line [{[?loadSystemUsedMemory, Len | NodeStr], Mem}] = + os_mon_mib:load_table(get_next,[], [?loadSystemUsedMemory]), + ?line Len = length(NodeStr), + ?line true = lists:member(list_to_atom(NodeStr), [node() | nodes()]), + + case Mem of + Mem when is_integer(Mem) -> + ok; + _ -> + ?line test_server:fail(sys_used_mem_value_not_integer) + end. + +large_erl_process(doc) -> + []; +large_erl_process(suite) -> + []; +large_erl_process(Config) when is_list(Config) -> + {_, _, {Pid, _}} = memsup:get_memory_data(), + PidStr = lists:flatten(io_lib:format("~w", [Pid])), + ?line [{[?loadLargestErlProcess, Len | NodeStr], PidStr}] = + os_mon_mib:load_table(get_next,[], [?loadLargestErlProcess]), + ?line Len = length(NodeStr), + ?line true = lists:member(list_to_atom(NodeStr), [node() | nodes()]), + ok. + +large_erl_process_mem(doc) -> + []; +large_erl_process_mem(suite) -> + []; +large_erl_process_mem(Config) when is_list(Config) -> + + ?line [{[?loadLargestErlProcessUsedMemory, Len | NodeStr], Mem}] = + os_mon_mib:load_table(get_next,[], + [?loadLargestErlProcessUsedMemory]), + ?line Len = length(NodeStr), + ?line true = lists:member(list_to_atom(NodeStr), [node() | nodes()]), + + case Mem of + Mem when is_integer(Mem) -> + ok; + _ -> + ?line test_server:fail(erl_pid_mem_value_not_integer) + end. + +cpu_load(doc) -> + []; +cpu_load(suite) -> + []; +cpu_load(Config) when list(Config) -> + ?line [{[?loadCpuLoad, Len | NodeStr], Load}] = + os_mon_mib:load_table(get_next,[], [?loadCpuLoad]), + ?line Len = length(NodeStr), + ?line true = lists:member(list_to_atom(NodeStr), [node() | nodes()]), + + case Load of + Load when is_integer(Load) -> + ok; + _ -> + ?line test_server:fail(cpu_load_value_not_integer) + end. + +cpu_load5(doc) -> + []; +cpu_load5(suite) -> + []; +cpu_load5(Config) when is_list(Config) -> + ?line [{[?loadCpuLoad5, Len | NodeStr], Load}] = + os_mon_mib:load_table(get_next,[], [?loadCpuLoad5]), + ?line Len = length(NodeStr), + ?line true = lists:member(list_to_atom(NodeStr), [node() | nodes()]), + + case Load of + Load when is_integer(Load) -> + ok; + _ -> + ?line test_server:fail(cpu_load5_value_not_integer) + end. + +cpu_load15(doc) -> + []; +cpu_load15(suite) -> + []; +cpu_load15(Config) when is_list(Config) -> + ?line [{[?loadCpuLoad15, Len | NodeStr], Load}] = + os_mon_mib:load_table(get_next,[], [?loadCpuLoad15]), + ?line Len = length(NodeStr), + ?line true = lists:member(list_to_atom(NodeStr), [node() | nodes()]), + + case Load of + Load when is_integer(Load) -> + ok; + _ -> + ?line test_server:fail(cpu_load15_value_not_integer) + end. + +os_wordsize(doc) -> + []; +os_wordsize(suite) -> + []; +os_wordsize(Config) when is_list(Config) -> + ?line [{[?loadOsWordsize, Len | NodeStr], Wordsize}] = + os_mon_mib:load_table(get_next,[], [?loadOsWordsize]), + ?line Len = length(NodeStr), + ?line true = lists:member(list_to_atom(NodeStr), [node() | nodes()]), + + case Wordsize of + Wordsize when is_integer(Wordsize) -> + ok; + _ -> + ?line test_server:fail(os_wordsize_value_not_integer) + end. + +sys_tot_mem64(doc) -> + []; +sys_tot_mem64(suite) -> + []; +sys_tot_mem64(Config) when is_list(Config) -> + ?line [{[?loadSystemTotalMemory64, Len | NodeStr], Mem}] = + os_mon_mib:load_table(get_next, [], [?loadSystemTotalMemory64]), + ?line Len = length(NodeStr), + ?line true = lists:member(list_to_atom(NodeStr), [node() | nodes()]), + + case Mem of + Mem when is_integer(Mem) -> + ok; + _ -> + ?line test_server:fail(sys_tot_mem_value_not_integer) + end. + +sys_used_mem64(doc) -> + []; +sys_used_mem64(suite) -> []; +sys_used_mem64(Config) when is_list(Config) -> + ?line [{[?loadSystemUsedMemory64, Len | NodeStr], Mem}] = + os_mon_mib:load_table(get_next,[], [?loadSystemUsedMemory64]), + ?line Len = length(NodeStr), + ?line true = lists:member(list_to_atom(NodeStr), [node() | nodes()]), + + case Mem of + Mem when is_integer(Mem) -> + ok; + _ -> + ?line test_server:fail(sys_used_mem_value_not_integer) + end. + +large_erl_process_mem64(doc) -> + []; +large_erl_process_mem64(suite) -> + []; +large_erl_process_mem64(Config) when is_list(Config) -> + + ?line [{[?loadLargestErlProcessUsedMemory64, Len | NodeStr], Mem}] = + os_mon_mib:load_table(get_next,[], + [?loadLargestErlProcessUsedMemory64]), + ?line Len = length(NodeStr), + ?line true = lists:member(list_to_atom(NodeStr), [node() | nodes()]), + + case Mem of + Mem when is_integer(Mem) -> + ok; + _ -> + ?line test_server:fail(erl_pid_mem_value_not_integer) + end. +%%--------------------------------------------------------------------- +get_disk_table(doc) -> + ["Simulates get calls to test the instrumentation function " + "for the diskTable."]; +get_disk_table(suite) -> + []; +get_disk_table(Config) when is_list(Config) -> + + DiskData = disksup:get_disk_data(), + DiskDataLen = length(DiskData), + + if + DiskDataLen > 0 -> + ?line [{value, Value}] = + os_mon_mib:disk_table(get, [1,1], [?diskDescr]), + + case is_list(Value) of + true -> + ok; + false -> + ?line test_server:fail(value_not_a_string) + end, + + ?line Values = os_mon_mib:disk_table(get, [1,1], + [?diskId, + ?diskKBytes, + ?diskCapacity]), + + IsInt = fun({value, Val}) when is_integer(Val) -> + true; + (_) -> + false + end, + + NewValues = lists:filter(IsInt, Values), + + case length(NewValues) of + 3 -> + ok; + _ -> + ?line test_server:fail(value_not_integer) + end + end, + + ?line [{noValue,noSuchInstance}, {noValue,noSuchInstance}, + {noValue,noSuchInstance}, {noValue,noSuchInstance}] = + os_mon_mib:disk_table(get, [1, DiskDataLen + 1], [?diskId, + ?diskDescr, + ?diskKBytes, + ?diskCapacity]), + + ok. + +%%--------------------------------------------------------------------- +get_next_disk_table(doc) -> + ["Simulates get_next calls to test the instrumentation function " + "for the diskTable."]; +get_next_disk_table(suite) -> + [disk_descr, disk_kbytes, disk_capacity]. + +disk_descr(doc) -> + []; +disk_descr(suite) -> + []; +disk_descr(Config) when is_list(Config) -> + ?line [{[?diskDescr, 1,1], Descr}] = + os_mon_mib:disk_table(get_next, [], [?diskDescr]), + + case Descr of + Descr when is_list(Descr) -> + ok; + _ -> + ?line test_server:fail(disk_descr_value_not_a_string) + end. + +disk_kbytes(doc) -> + []; +disk_kbytes(suite) -> []; +disk_kbytes(Config) when is_list(Config) -> + ?line [{[?diskKBytes, 1,1], Kbytes}] = + os_mon_mib:disk_table(get_next,[], [?diskKBytes]), + + case Kbytes of + Kbytes when is_integer(Kbytes) -> + ok; + _ -> + ?line test_server:fail(disk_kbytes_value_not_integer) + end. + + +disk_capacity(doc) -> + []; +disk_capacity(suite) -> []; +disk_capacity(Config) when is_list(Config) -> + ?line [{[?diskCapacity, 1,1], Capacity}] = + os_mon_mib:disk_table(get_next,[], [?diskCapacity]), + + case Capacity of + Capacity when is_integer(Capacity) -> + ok; + _ -> + ?line test_server:fail(disk_capacity_value_not_integer) + end. + +%%--------------------------------------------------------------------- +real_snmp_request(doc) -> + ["Starts an snmp manager and sends a real snmp-reques. i.e. " + "sends a udp message on the correct format."]; +real_snmp_request(suite) -> []; +real_snmp_request(Config) when list(Config) -> + Agent_ip = ?config(agent_ip, Config), + + ?line ok = snmpm:register_user(os_mon_mib_test, snmpm_user_default, []), + ?line ok = snmpm:register_agent(os_mon_mib_test, Agent_ip, ?AGENT_UDP), + + NodStr = atom_to_list(node()), + Len = length(NodStr), + {_, _, {Pid, _}} = memsup:get_memory_data(), + PidStr = lists:flatten(io_lib:format("~w", [Pid])), + io:format("FOO: ~p~n", [PidStr]), + ?line ok = snmp_get(Agent_ip, + [?loadEntry ++ + [?loadLargestErlProcess, Len | NodStr]], + PidStr), + ?line ok = snmp_get_next(Agent_ip, + [?loadEntry ++ + [?loadSystemUsedMemory, Len | NodStr]], + ?loadEntry ++ [?loadSystemUsedMemory + 1, Len + | NodStr], PidStr), + ?line ok = snmp_set(Agent_ip, [?loadEntry ++ + [?loadLargestErlProcess, Len | NodStr]], + s, "<0.101.0>"), + ok. + +otp_7441(doc) -> + ["Starts an snmp manager and requests total memory. Was previously + integer32 which was errornous on 64 bit machines."]; +otp_7441(suite) -> + []; +otp_7441(Config) when is_list(Config) -> + Agent_ip = ?config(agent_ip, Config), + + + NodStr = atom_to_list(node()), + Len = length(NodStr), + Oids = [Oid|_] = [?loadEntry ++ [?loadSystemTotalMemory, Len | NodStr]], + ?line { ok, {noError,0,[#varbind{oid = Oid, variabletype = 'Unsigned32'}]}, _} = + snmpm:g(os_mon_mib_test, Agent_ip, ?AGENT_UDP, Oids), + + ok. + +%%--------------------------------------------------------------------- +%% Internal functions +%%--------------------------------------------------------------------- +-ifdef(STANDALONE). +config(priv_dir,_) -> + "/tmp". + +start_node() -> + Host = hd(tl(string:tokens(atom_to_list(node()),"@"))), + {ok,Node} = slave:start(Host,testnisse), + net_adm:ping(testnisse), + Node. + + +stop_node(Node) -> + rpc:call(Node,erlang,halt,[]). +-else. +start_node() -> + ?line Pa = filename:dirname(code:which(?MODULE)), + ?line {ok,Node} = test_server:start_node(testnisse, slave, + [{args, " -pa " ++ Pa}]), + Node. + +stop_node(Node) -> + test_server:stop_node(Node). + +-endif. + +del_dir(Dir) -> + io:format("Deleting: ~s~n",[Dir]), + {ok, Files} = file:list_dir(Dir), + FullPathFiles = lists:map(fun(File) -> filename:join(Dir, File) end, + Files), + lists:foreach({file, delete}, FullPathFiles), + file:del_dir(Dir). + +%%--------------------------------------------------------------------- +snmp_get(Agent_ip, Oids = [Oid |_], Result) -> + ?line {ok,{noError,0,[#varbind{oid = Oid, + variabletype = 'OCTET STRING', + value = Result}]}, _} = + snmpm:g(os_mon_mib_test, Agent_ip, ?AGENT_UDP, Oids), + ok. + +snmp_get_next(Agent_ip, Oids, NextOid, Result) -> + ?line {ok,{noError,0,[#varbind{oid = NextOid, + variabletype = 'OCTET STRING', + value = Result}]},_} = + snmpm:gn(os_mon_mib_test, Agent_ip, ?AGENT_UDP, Oids), + ok. + +snmp_set(Agent_ip, Oid, ValuType, Value) -> + ?line {ok, {notWritable, _, _}, _} = + snmpm:s(os_mon_mib_test,Agent_ip,?AGENT_UDP,[{Oid, ValuType, Value}]), + ok. diff --git a/lib/os_mon/test/os_sup_SUITE.erl b/lib/os_mon/test/os_sup_SUITE.erl new file mode 100644 index 0000000000..25041f968d --- /dev/null +++ b/lib/os_mon/test/os_sup_SUITE.erl @@ -0,0 +1,189 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2006-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(os_sup_SUITE). +-include("test_server.hrl"). + +%% Test server specific exports +-export([all/1]). +-export([init_per_suite/1, end_per_suite/1]). +-export([init_per_testcase/2, end_per_testcase/2]). + +%% Test cases +-export([message/1]). +-export([config/1, port/1]). + +%% Default timetrap timeout (set in init_per_testcase) +-define(default_timeout, ?t:minutes(1)). + +-define(TAG, test_tag). +-define(MFA, {?MODULE, test_mfa, [?TAG]}). + +-export([test_mfa/2]). + +init_per_suite(Config) when is_list(Config) -> + spawn(fun() -> message_receptor() end), + ?line application:load(os_mon), + ?line ok = application:set_env(os_mon, start_os_sup, true), + ?line ok = application:set_env(os_mon, os_sup_mfa, ?MFA), + ?line ok = application:set_env(os_mon, os_sup_enable, false), + ?line ok = application:start(os_mon), + Config. + +end_per_suite(Config) when is_list(Config) -> + ?line application:stop(os_mon), + ?line ok = application:set_env(os_mon, start_os_sup, false), + MFA = {os_sup, error_report, [std_error]}, + ?line ok = application:set_env(os_mon, os_sup_mfa, MFA), + ?line ok = application:set_env(os_mon, os_sup_enable, true), + ?line exit(whereis(message_receptor), done), + Config. + +init_per_testcase(_Case, Config) -> + Dog = ?t:timetrap(?default_timeout), + [{watchdog,Dog} | Config]. + +end_per_testcase(_Case, Config) -> + Dog = ?config(watchdog, Config), + ?t:timetrap_cancel(Dog), + ok. + +all(suite) -> + case ?t:os_type() of + {unix, sunos} -> + [message, config, port]; + {win32, _OSname} -> + [message]; + OS -> + Str = io_lib:format("os_sup not available for ~p", [OS]), + {skip, lists:flatten(Str)} + end. + +message(suite) -> + []; +message(doc) -> + ["Test OS message handling"]; +message(Config) when is_list(Config) -> + + %% Fake an OS message + Data = "10H11386278426HSystem4HTest5HError5HTesto", + ?line os_sup_server ! {faked_port, {data, Data}}, + + %% Check with message_receptor that it has been received + ?t:sleep(?t:seconds(1)), + Msg = + case ?t:os_type() of + {unix, sunos} -> + {?TAG, Data}; + {win32, _} -> + {?TAG,{{1138,627842,0},"System","Test","Error","Testo"}} + end, + ?line message_receptor ! {check, self(), Msg}, + receive + {result, true} -> + ok; + {result, Rec} -> + ?t:fail({no_message, Rec}) + end, + + ok. + +config(suite) -> + []; +config(doc) -> + ["Test configuration"]; +config(Config) when is_list(Config) -> + + %% os_sup_enable==true and os_sup_own/os_sup_syslogconf cannot + %% be tested as test_server is not running is root + + %% os_sup_mfa is already tested, sort of (in init_per_suite) + + %% os_sup_errortag should be tested, however + + ok. + +port(suite) -> + []; +port(doc) -> + ["Test that os_sup handles a terminating port program"]; +port(Config) when is_list(Config) -> + ?line Str = os:cmd("ps -e | grep '[f]errule'"), + case io_lib:fread("~s", Str) of + {ok, [Pid], _Rest} -> + + %% Monitor os_sup_server + ?line MonRef = erlang:monitor(process, os_sup_server), + + %% Kill the port program + case os:cmd("kill -9 " ++ Pid) of + [] -> + + %% os_sup_server should now terminate + receive + {'DOWN', MonRef, _, _, {port_died, _Reason}} -> + ok; + {'DOWN', MonRef, _, _, Reason} -> + ?line ?t:fail({unexpected_exit_reason, Reason}) + after + 3000 -> + ?line ?t:fail(still_alive) + end, + + %% Give os_mon_sup time to restart os_sup + ?t:sleep(?t:seconds(3)), + ?line true = is_pid(whereis(os_sup_server)), + + ok; + + Line -> + erlang:demonitor(MonRef), + {skip, {not_killed, Line}} + end; + _ -> + {skip, {os_pid_not_found}} + end. + +%%---------------------------------------------------------------------- +%% Auxiliary +%%---------------------------------------------------------------------- + +test_mfa(Message, Tag) -> + message_receptor ! {Tag, Message}. + +message_receptor() -> + register(message_receptor, self()), + message_receptor([]). + +message_receptor(Received) -> + receive + %% Check if a certain message has been received + {check, From, Msg} -> + case lists:member(Msg, Received) of + true -> + From ! {result, true}, + message_receptor(lists:delete(Msg, Received)); + false -> + From ! {result, Received}, + message_receptor(Received) + end; + + %% Save all other messages + Msg -> + message_receptor([Msg|Received]) + end. |