diff options
Diffstat (limited to 'lib/os_mon/test/disksup_SUITE.erl')
-rw-r--r-- | lib/os_mon/test/disksup_SUITE.erl | 426 |
1 files changed, 426 insertions, 0 deletions
diff --git a/lib/os_mon/test/disksup_SUITE.erl b/lib/os_mon/test/disksup_SUITE.erl new file mode 100644 index 0000000000..987d631c36 --- /dev/null +++ b/lib/os_mon/test/disksup_SUITE.erl @@ -0,0 +1,426 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 1996-2010. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(disksup_SUITE). +-include("test_server.hrl"). + +%% Test server specific exports +-export([all/1]). +-export([init_per_suite/1, end_per_suite/1]). +-export([init_per_testcase/2, end_per_testcase/2]). + +%% Test cases +-export([api/1, config/1, alarm/1]). +-export([port/1]). +-export([terminate/1, unavailable/1, restart/1]). +-export([otp_5910/1]). + +%% Default timetrap timeout (set in init_per_testcase) +-define(default_timeout, ?t:minutes(1)). + +init_per_suite(Config) when is_list(Config) -> + ?line ok = application:start(os_mon), + Config. + +end_per_suite(Config) when is_list(Config) -> + ?line ok = application:stop(os_mon), + Config. + +init_per_testcase(_Case, Config) -> + Dog = ?t:timetrap(?default_timeout), + [{watchdog,Dog} | Config]. + +end_per_testcase(_Case, Config) -> + Dog = ?config(watchdog, Config), + ?t:timetrap_cancel(Dog), + ok. + +all(suite) -> + Bugs = [otp_5910], + case ?t:os_type() of + {unix, sunos} -> + [api, config, alarm, port, + {conf, terminate, [unavailable], restart}] ++ Bugs; + {unix, _OSname} -> + [api, alarm] ++ Bugs; + {win32, _OSname} -> + [api, alarm] ++ Bugs; + _OS -> + [unavailable] + end. + +api(suite) -> + []; +api(doc) -> + ["Test of API functions"]; +api(Config) when is_list(Config) -> + + %% get_disk_data() + ?line [{Id, KByte, Capacity}|_] = disksup:get_disk_data(), + ?line true = io_lib:printable_list(Id), + ?line true = is_integer(KByte), + ?line true = is_integer(Capacity), + ?line true = KByte>0, + ?line true = Capacity>0, + + %% get_check_interval() + ?line 1800000 = disksup:get_check_interval(), + + %% set_check_interval(Minutes) + ?line ok = disksup:set_check_interval(20), + ?line 1200000 = disksup:get_check_interval(), + ?line {'EXIT',{badarg,_}} = (catch disksup:set_check_interval(0.5)), + ?line 1200000 = disksup:get_check_interval(), + ?line ok = disksup:set_check_interval(30), + + %% get_almost_full_threshold() + ?line 80 = disksup:get_almost_full_threshold(), + + %% set_almost_full_threshold(Float) + ?line ok = disksup:set_almost_full_threshold(0.90), + ?line 90 = disksup:get_almost_full_threshold(), + ?line {'EXIT',{badarg,_}} = + (catch disksup:set_almost_full_threshold(-0.5)), + ?line 90 = disksup:get_almost_full_threshold(), + ?line ok = disksup:set_almost_full_threshold(0.80), + + ok. + +config(suite) -> + []; +config(doc) -> + ["Test configuration"]; +config(Config) when is_list(Config) -> + + %% Change configuration parameters and make sure change is reflected + %% when disksup is restarted + ?line ok = + application:set_env(os_mon, disk_space_check_interval, 29), + ?line ok = + application:set_env(os_mon, disk_almost_full_threshold, 0.81), + + ?line ok = supervisor:terminate_child(os_mon_sup, disksup), + ?line {ok, _Child1} = supervisor:restart_child(os_mon_sup, disksup), + + ?line 1740000 = disksup:get_check_interval(), + ?line 81 = disksup:get_almost_full_threshold(), + + %% Also try this with bad parameter values, should be ignored + ?line ok = + application:set_env(os_mon, disk_space_check_interval, 0.5), + ?line ok = + application:set_env(os_mon, disk_almost_full_threshold, -0.81), + + ?line ok = supervisor:terminate_child(os_mon_sup, disksup), + ?line {ok, _Child2} = supervisor:restart_child(os_mon_sup, disksup), + + ?line 1800000 = disksup:get_check_interval(), + ?line 80 = disksup:get_almost_full_threshold(), + + %% Reset configuration parameters + ?line ok = + application:set_env(os_mon, disk_space_check_interval, 30), + ?line ok = + application:set_env(os_mon, disk_almost_full_threshold, 0.80), + + ok. + +%%---------------------------------------------------------------------- +%% NOTE: The test case is a bit weak as it will fail if the disk usage +%% changes too much during its course, or if there are timing problems +%% with the alarm_handler receiving the alarms too late +%%---------------------------------------------------------------------- +alarm(suite) -> + []; +alarm(doc) -> + ["Test that alarms are set and cleared"]; +alarm(Config) when is_list(Config) -> + + %% Find out how many disks exceed the threshold + %% and make sure the corresponding number of alarms is set + ?line Threshold1 = disksup:get_almost_full_threshold(), % 80 + ?line Data1 = disksup:get_disk_data(), + ?line Over1 = over_threshold(Data1, Threshold1), + ?line Alarms1 = get_alarms(), + if + Over1==length(Alarms1) -> + ?line true; + true -> + dump_info(), + ?line ?t:fail({bad_alarms, Threshold1, Data1, Alarms1}) + end, + + %% Try to find a disk with space usage below Threshold1, + %% lower the threshold accordingly and make sure new alarms are set + Fun1 = fun({_Id, _Kbyte, Capacity}) -> + if + Capacity>0, Capacity<Threshold1 -> true; + true -> false + end + end, + ?line case until(Fun1, Data1) of + {_, _, Cap1} -> + Threshold2 = Cap1-1, + ?line ok = + disksup:set_almost_full_threshold(Threshold2/100), + ?line disksup ! timeout, % force a disk check + ?line Data2 = disksup:get_disk_data(), + ?line Over2 = over_threshold(Data2, Threshold2), + ?line Alarms2 = get_alarms(), + if + Over2==length(Alarms2), Over2>Over1 -> + ?line true; + true -> + dump_info(), + ?line ?t:fail({bad_alarms, Threshold2, Data2, Alarms2}) + end; + false -> + ?line ignore + end, + + %% Find out the highest space usage among all disks + %% and try to raise the threshold above this value, + %% make sure all alarms are cleared + Fun2 = fun({_Id, _Kbyte, Capacity}, MaxAcc) -> + if + Capacity>MaxAcc -> Capacity; + true -> MaxAcc + end + end, + ?line case lists:foldl(Fun2, 0, Data1) of + Max when Max<100 -> + Threshold3 = Max+1, + ?line ok = + disksup:set_almost_full_threshold(Threshold3/100), + ?line disksup ! timeout, % force a disk check + ?line Data3 = disksup:get_disk_data(), + ?line Over3 = over_threshold(Data3, Threshold3), + ?line Alarms3 = get_alarms(), + if + Over3==0, length(Alarms3)==0 -> + ?line ok; + true -> + dump_info(), + ?line ?t:fail({bad_alarms, Threshold3, Data3, Alarms3}) + end; + 100 -> + ?line ignore + end, + + %% Reset threshold + ?line ok = disksup:set_almost_full_threshold(Threshold1/100), + + ok. + +over_threshold(Data, Threshold) -> + Data2 = remove_duplicated_disks(lists:keysort(1, Data)), + lists:foldl(fun({_Id, _Kbyte, Cap}, N) when Cap>=Threshold -> + N+1; + (_DiskData, N) -> + N + end, + 0, + Data2). + +%% On some platforms (for example MontaVista) data for one disk can be +%% "duplicated": +%% Linux ppb 2.4.20_mvl31-pcore680 #1 Sun Feb 1 23:12:56 PST 2004 ppc unknown +%% +%% MontaVista(R) Linux(R) Professional Edition 3.1 +%% +%% [ppb:~]> /bin/df -lk +%% Filesystem 1k-blocks Used Available Use% Mounted on +%% rootfs 8066141 3023763 4961717 38% / +%% /dev/root 8066141 3023763 4961717 38% / +%% tmpfs 192892 0 192892 0% /dev/shm +%% +%% disksup: +%% [{"/",8066141,38}, {"/",8066141,38}, {"/dev/shm",192892,0}] +%% +%% disksup will only set ONE alarm for "/". +%% Therefore the list of disk data must be sorted and duplicated disk +%% tuples removed before calculating how many alarms should be set, or +%% the testcase will fail erroneously. +remove_duplicated_disks([{Id, _, _}, {Id, Kbyte, Cap}|T]) -> + remove_duplicated_disks([{Id, Kbyte, Cap}|T]); +remove_duplicated_disks([H|T]) -> + [H|remove_duplicated_disks(T)]; +remove_duplicated_disks([]) -> + []. + +get_alarms() -> + lists:filter(fun({{disk_almost_full, _Disk},_}) -> true; + (_) -> false + end, + alarm_handler:get_alarms()). + +until(Fun, [H|T]) -> + case Fun(H) of + true -> H; + false -> + until(Fun, T) + end; +until(_Fun, []) -> + false. + +port(suite) -> + []; +port(doc) -> + ["Test that disksup handles a terminating port program"]; +port(Config) when is_list(Config) -> + ?line Str = os:cmd("ps -ef | grep '[d]isksup'"), + case io_lib:fread("~s ~s", Str) of + {ok, [_Uid,Pid], _Rest} -> + + %% Monitor disksup + ?line MonRef = erlang:monitor(process, disksup), + ?line [{_Disk1,Kbyte1,_Cap1}|_] = disksup:get_disk_data(), + ?line true = Kbyte1>0, + + %% Kill the port program + case os:cmd("kill -9 " ++ Pid) of + [] -> + + %% disksup should now terminate + receive + {'DOWN', MonRef, _, _, {port_died, _Reason}} -> + ok; + {'DOWN', MonRef, _, _, Reason} -> + ?line ?t:fail({unexpected_exit_reason, Reason}) + after + 3000 -> + ?line ?t:fail({still_alive, Str}) + end, + + %% Give os_mon_sup time to restart disksup + ?t:sleep(?t:seconds(3)), + ?line [{_Disk2,Kbyte2,_Cap2}|_] = + disksup:get_disk_data(), + ?line true = Kbyte2>0, + + ok; + + Line -> + erlang:demonitor(MonRef), + {skip, {not_killed, Line}} + end; + _ -> + {skip, {os_pid_not_found, Str}} + end. + +terminate(suite) -> + []; +terminate(Config) when is_list(Config) -> + ?line ok = application:set_env(os_mon, start_disksup, false), + ?line ok = supervisor:terminate_child(os_mon_sup, disksup), + ok. + +unavailable(suite) -> + []; +unavailable(doc) -> + ["Test correct behaviour when service is unavailable"]; +unavailable(Config) when is_list(Config) -> + + %% Make sure all API functions return their dummy values + ?line [{"none",0,0}] = disksup:get_disk_data(), + ?line 1800000 = disksup:get_check_interval(), + ?line ok = disksup:set_check_interval(5), + ?line 80 = disksup:get_almost_full_threshold(), + ?line ok = disksup:set_almost_full_threshold(0.9), + + ok. + +restart(suite) -> + []; +restart(Config) when is_list(Config) -> + ?line ok = application:set_env(os_mon, start_disksup, true), + ?line {ok, _Pid} = supervisor:restart_child(os_mon_sup, disksup), + ok. + +otp_5910(suite) -> + []; +otp_5910(doc) -> + ["Test that alarms are cleared if disksup crashes or " + "if OS_Mon is stopped"]; +otp_5910(Config) when is_list(Config) -> + + %% Make sure disksup sets at least one alarm + ?line Data = disksup:get_disk_data(), + ?line Threshold0 = disksup:get_almost_full_threshold(), + ?line Threshold = case over_threshold(Data, Threshold0) of + 0 -> + [{_Id,_Kbyte,Cap}|_] = Data, + ?line ok = disksup:set_almost_full_threshold((Cap-1)/100), + Cap-1; + _N -> + Threshold0 + end, + ?line ok = application:set_env(os_mon, + disk_almost_full_threshold, + Threshold/100), + ?line disksup ! timeout, % force a disk check + ?line Data2 = disksup:get_disk_data(), + ?line Over = over_threshold(Data2, Threshold), + ?line Alarms = get_alarms(), + if + Over==0 -> + ?line ?t:fail({threshold_too_low, Data2, Threshold}); + Over==length(Alarms) -> + ok; + true -> + dump_info(), + ?line ?t:fail({bad_alarms, Threshold, Data2, Alarms}) + end, + + %% Kill disksup + exit(whereis(disksup), faked_disksup_crash), + + %% Wait a little to make sure disksup has been restarted, + %% then make sure the alarms are set once, but not twice + ?t:sleep(?t:seconds(1)), + ?line Data3 = disksup:get_disk_data(), + ?line Alarms2 = get_alarms(), + if + length(Alarms2)==length(Alarms) -> + ok; + true -> + dump_info(), + ?line ?t:fail({bad_alarms, Threshold, Data3, Alarms,Alarms2}) + end, + + %% Stop OS_Mon and make sure all disksup alarms are cleared + ?line ok = application:stop(os_mon), + ?t:sleep(?t:seconds(1)), + ?line Alarms3 = get_alarms(), + if + length(Alarms3)==0 -> + ok; + true -> + ?line ?t:fail({alarms_not_cleared, Alarms3}) + end, + + %% Reset threshold and restart OS_Mon + ?line ok = application:set_env(os_mon, + disksup_almost_full_threshold, 0.8), + ?line ok = disksup:set_almost_full_threshold(0.8), + ?line ok = application:start(os_mon), + + ok. + +dump_info() -> + io:format("Status: ~p~n", [sys:get_status(disksup)]). |