%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 1996-2010. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. %% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. %% %% %CopyrightEnd% %% -module(disksup_SUITE). -include("test_server.hrl"). %% Test server specific exports -export([all/1]). -export([init_per_suite/1, end_per_suite/1]). -export([init_per_testcase/2, end_per_testcase/2]). %% Test cases -export([api/1, config/1, alarm/1]). -export([port/1]). -export([terminate/1, unavailable/1, restart/1]). -export([otp_5910/1]). %% Default timetrap timeout (set in init_per_testcase) -define(default_timeout, ?t:minutes(1)). init_per_suite(Config) when is_list(Config) -> ?line ok = application:start(os_mon), Config. end_per_suite(Config) when is_list(Config) -> ?line ok = application:stop(os_mon), Config. init_per_testcase(_Case, Config) -> Dog = ?t:timetrap(?default_timeout), [{watchdog,Dog} | Config]. end_per_testcase(_Case, Config) -> Dog = ?config(watchdog, Config), ?t:timetrap_cancel(Dog), ok. all(suite) -> Bugs = [otp_5910], case ?t:os_type() of {unix, sunos} -> [api, config, alarm, port, {conf, terminate, [unavailable], restart}] ++ Bugs; {unix, _OSname} -> [api, alarm] ++ Bugs; {win32, _OSname} -> [api, alarm] ++ Bugs; _OS -> [unavailable] end. api(suite) -> []; api(doc) -> ["Test of API functions"]; api(Config) when is_list(Config) -> %% get_disk_data() ?line [{Id, KByte, Capacity}|_] = disksup:get_disk_data(), ?line true = io_lib:printable_list(Id), ?line true = is_integer(KByte), ?line true = is_integer(Capacity), ?line true = KByte>0, ?line true = Capacity>0, %% get_check_interval() ?line 1800000 = disksup:get_check_interval(), %% set_check_interval(Minutes) ?line ok = disksup:set_check_interval(20), ?line 1200000 = disksup:get_check_interval(), ?line {'EXIT',{badarg,_}} = (catch disksup:set_check_interval(0.5)), ?line 1200000 = disksup:get_check_interval(), ?line ok = disksup:set_check_interval(30), %% get_almost_full_threshold() ?line 80 = disksup:get_almost_full_threshold(), %% set_almost_full_threshold(Float) ?line ok = disksup:set_almost_full_threshold(0.90), ?line 90 = disksup:get_almost_full_threshold(), ?line {'EXIT',{badarg,_}} = (catch disksup:set_almost_full_threshold(-0.5)), ?line 90 = disksup:get_almost_full_threshold(), ?line ok = disksup:set_almost_full_threshold(0.80), ok. config(suite) -> []; config(doc) -> ["Test configuration"]; config(Config) when is_list(Config) -> %% Change configuration parameters and make sure change is reflected %% when disksup is restarted ?line ok = application:set_env(os_mon, disk_space_check_interval, 29), ?line ok = application:set_env(os_mon, disk_almost_full_threshold, 0.81), ?line ok = supervisor:terminate_child(os_mon_sup, disksup), ?line {ok, _Child1} = supervisor:restart_child(os_mon_sup, disksup), ?line 1740000 = disksup:get_check_interval(), ?line 81 = disksup:get_almost_full_threshold(), %% Also try this with bad parameter values, should be ignored ?line ok = application:set_env(os_mon, disk_space_check_interval, 0.5), ?line ok = application:set_env(os_mon, disk_almost_full_threshold, -0.81), ?line ok = supervisor:terminate_child(os_mon_sup, disksup), ?line {ok, _Child2} = supervisor:restart_child(os_mon_sup, disksup), ?line 1800000 = disksup:get_check_interval(), ?line 80 = disksup:get_almost_full_threshold(), %% Reset configuration parameters ?line ok = application:set_env(os_mon, disk_space_check_interval, 30), ?line ok = application:set_env(os_mon, disk_almost_full_threshold, 0.80), ok. %%---------------------------------------------------------------------- %% NOTE: The test case is a bit weak as it will fail if the disk usage %% changes too much during its course, or if there are timing problems %% with the alarm_handler receiving the alarms too late %%---------------------------------------------------------------------- alarm(suite) -> []; alarm(doc) -> ["Test that alarms are set and cleared"]; alarm(Config) when is_list(Config) -> %% Find out how many disks exceed the threshold %% and make sure the corresponding number of alarms is set ?line Threshold1 = disksup:get_almost_full_threshold(), % 80 ?line Data1 = disksup:get_disk_data(), ?line Over1 = over_threshold(Data1, Threshold1), ?line Alarms1 = get_alarms(), if Over1==length(Alarms1) -> ?line true; true -> dump_info(), ?line ?t:fail({bad_alarms, Threshold1, Data1, Alarms1}) end, %% Try to find a disk with space usage below Threshold1, %% lower the threshold accordingly and make sure new alarms are set Fun1 = fun({_Id, _Kbyte, Capacity}) -> if Capacity>0, Capacity<Threshold1 -> true; true -> false end end, ?line case until(Fun1, Data1) of {_, _, Cap1} -> Threshold2 = Cap1-1, ?line ok = disksup:set_almost_full_threshold(Threshold2/100), ?line disksup ! timeout, % force a disk check ?line Data2 = disksup:get_disk_data(), ?line Over2 = over_threshold(Data2, Threshold2), ?line Alarms2 = get_alarms(), if Over2==length(Alarms2), Over2>Over1 -> ?line true; true -> dump_info(), ?line ?t:fail({bad_alarms, Threshold2, Data2, Alarms2}) end; false -> ?line ignore end, %% Find out the highest space usage among all disks %% and try to raise the threshold above this value, %% make sure all alarms are cleared Fun2 = fun({_Id, _Kbyte, Capacity}, MaxAcc) -> if Capacity>MaxAcc -> Capacity; true -> MaxAcc end end, ?line case lists:foldl(Fun2, 0, Data1) of Max when Max<100 -> Threshold3 = Max+1, ?line ok = disksup:set_almost_full_threshold(Threshold3/100), ?line disksup ! timeout, % force a disk check ?line Data3 = disksup:get_disk_data(), ?line Over3 = over_threshold(Data3, Threshold3), ?line Alarms3 = get_alarms(), if Over3==0, length(Alarms3)==0 -> ?line ok; true -> dump_info(), ?line ?t:fail({bad_alarms, Threshold3, Data3, Alarms3}) end; 100 -> ?line ignore end, %% Reset threshold ?line ok = disksup:set_almost_full_threshold(Threshold1/100), ok. over_threshold(Data, Threshold) -> Data2 = remove_duplicated_disks(lists:keysort(1, Data)), lists:foldl(fun({_Id, _Kbyte, Cap}, N) when Cap>=Threshold -> N+1; (_DiskData, N) -> N end, 0, Data2). %% On some platforms (for example MontaVista) data for one disk can be %% "duplicated": %% Linux ppb 2.4.20_mvl31-pcore680 #1 Sun Feb 1 23:12:56 PST 2004 ppc unknown %% %% MontaVista(R) Linux(R) Professional Edition 3.1 %% %% [ppb:~]> /bin/df -lk %% Filesystem 1k-blocks Used Available Use% Mounted on %% rootfs 8066141 3023763 4961717 38% / %% /dev/root 8066141 3023763 4961717 38% / %% tmpfs 192892 0 192892 0% /dev/shm %% %% disksup: %% [{"/",8066141,38}, {"/",8066141,38}, {"/dev/shm",192892,0}] %% %% disksup will only set ONE alarm for "/". %% Therefore the list of disk data must be sorted and duplicated disk %% tuples removed before calculating how many alarms should be set, or %% the testcase will fail erroneously. remove_duplicated_disks([{Id, _, _}, {Id, Kbyte, Cap}|T]) -> remove_duplicated_disks([{Id, Kbyte, Cap}|T]); remove_duplicated_disks([H|T]) -> [H|remove_duplicated_disks(T)]; remove_duplicated_disks([]) -> []. get_alarms() -> lists:filter(fun({{disk_almost_full, _Disk},_}) -> true; (_) -> false end, alarm_handler:get_alarms()). until(Fun, [H|T]) -> case Fun(H) of true -> H; false -> until(Fun, T) end; until(_Fun, []) -> false. port(suite) -> []; port(doc) -> ["Test that disksup handles a terminating port program"]; port(Config) when is_list(Config) -> ?line Str = os:cmd("ps -ef | grep '[d]isksup'"), case io_lib:fread("~s ~s", Str) of {ok, [_Uid,Pid], _Rest} -> %% Monitor disksup ?line MonRef = erlang:monitor(process, disksup), ?line [{_Disk1,Kbyte1,_Cap1}|_] = disksup:get_disk_data(), ?line true = Kbyte1>0, %% Kill the port program case os:cmd("kill -9 " ++ Pid) of [] -> %% disksup should now terminate receive {'DOWN', MonRef, _, _, {port_died, _Reason}} -> ok; {'DOWN', MonRef, _, _, Reason} -> ?line ?t:fail({unexpected_exit_reason, Reason}) after 3000 -> ?line ?t:fail({still_alive, Str}) end, %% Give os_mon_sup time to restart disksup ?t:sleep(?t:seconds(3)), ?line [{_Disk2,Kbyte2,_Cap2}|_] = disksup:get_disk_data(), ?line true = Kbyte2>0, ok; Line -> erlang:demonitor(MonRef), {skip, {not_killed, Line}} end; _ -> {skip, {os_pid_not_found, Str}} end. terminate(suite) -> []; terminate(Config) when is_list(Config) -> ?line ok = application:set_env(os_mon, start_disksup, false), ?line ok = supervisor:terminate_child(os_mon_sup, disksup), ok. unavailable(suite) -> []; unavailable(doc) -> ["Test correct behaviour when service is unavailable"]; unavailable(Config) when is_list(Config) -> %% Make sure all API functions return their dummy values ?line [{"none",0,0}] = disksup:get_disk_data(), ?line 1800000 = disksup:get_check_interval(), ?line ok = disksup:set_check_interval(5), ?line 80 = disksup:get_almost_full_threshold(), ?line ok = disksup:set_almost_full_threshold(0.9), ok. restart(suite) -> []; restart(Config) when is_list(Config) -> ?line ok = application:set_env(os_mon, start_disksup, true), ?line {ok, _Pid} = supervisor:restart_child(os_mon_sup, disksup), ok. otp_5910(suite) -> []; otp_5910(doc) -> ["Test that alarms are cleared if disksup crashes or " "if OS_Mon is stopped"]; otp_5910(Config) when is_list(Config) -> %% Make sure disksup sets at least one alarm ?line Data = disksup:get_disk_data(), ?line Threshold0 = disksup:get_almost_full_threshold(), ?line Threshold = case over_threshold(Data, Threshold0) of 0 -> [{_Id,_Kbyte,Cap}|_] = Data, ?line ok = disksup:set_almost_full_threshold((Cap-1)/100), Cap-1; _N -> Threshold0 end, ?line ok = application:set_env(os_mon, disk_almost_full_threshold, Threshold/100), ?line disksup ! timeout, % force a disk check ?line Data2 = disksup:get_disk_data(), ?line Over = over_threshold(Data2, Threshold), ?line Alarms = get_alarms(), if Over==0 -> ?line ?t:fail({threshold_too_low, Data2, Threshold}); Over==length(Alarms) -> ok; true -> dump_info(), ?line ?t:fail({bad_alarms, Threshold, Data2, Alarms}) end, %% Kill disksup exit(whereis(disksup), faked_disksup_crash), %% Wait a little to make sure disksup has been restarted, %% then make sure the alarms are set once, but not twice ?t:sleep(?t:seconds(1)), ?line Data3 = disksup:get_disk_data(), ?line Alarms2 = get_alarms(), if length(Alarms2)==length(Alarms) -> ok; true -> dump_info(), ?line ?t:fail({bad_alarms, Threshold, Data3, Alarms,Alarms2}) end, %% Stop OS_Mon and make sure all disksup alarms are cleared ?line ok = application:stop(os_mon), ?t:sleep(?t:seconds(1)), ?line Alarms3 = get_alarms(), if length(Alarms3)==0 -> ok; true -> ?line ?t:fail({alarms_not_cleared, Alarms3}) end, %% Reset threshold and restart OS_Mon ?line ok = application:set_env(os_mon, disksup_almost_full_threshold, 0.8), ?line ok = disksup:set_almost_full_threshold(0.8), ?line ok = application:start(os_mon), ok. dump_info() -> io:format("Status: ~p~n", [sys:get_status(disksup)]).