%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. %% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. %% %% %CopyrightEnd% %% -module(timer_SUITE). -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2]). -export([do_big_test/1]). -export([big_test/1, collect/3, i_t/3, a_t/2]). -export([do_nrev/1, internal_watchdog/2]). -include_lib("test_server/include/test_server.hrl"). %% Test suite for timer module. This is a really nasty test it runs a %% lot of timeouts and then checks in the end if any of them was %% trigggered too early or if any late timeouts was much too %% late. What should be added is more testing of the interface %% functions I guess. But I don't have time for that now. %% %% Expect it to run for at least 5-10 minutes! %% The main test case in this module is "do_big_test", which %% orders a large number of timeouts and measures how %% exact the timeouts arrives. To simulate a system under load there is %% also a number of other concurrent processes running "nrev" at the same %% time. The result is analyzed afterwards by trying to check if the %% measured values are reasonable. It is hard to determine what is %% reasonable on different machines therefore the test can sometimes %% fail, even though the timer module is ok. I have checked against %% previous versions of the timer module (which contained bugs) and it %% seems it fails every time when running the buggy timer modules. %% %% The solution is to rewrite the test suite. Possible strategies for a %% rewrite: smarter math on the measuring data, test cases with varying %% amount of load. The test suite should also include tests that test the %% interface of the timer module. suite() -> [{ct_hooks,[ts_install_cth]}]. all() -> [do_big_test]. groups() -> []. init_per_suite(Config) -> Config. end_per_suite(_Config) -> ok. init_per_group(_GroupName, Config) -> Config. end_per_group(_GroupName, Config) -> Config. %% ------------------------------------------------------- %% do_big_test(TConfig) when is_list(TConfig) -> Dog = ?t:timetrap(?t:minutes(20)), Save = process_flag(trap_exit, true), Result = big_test(200), process_flag(trap_exit, Save), ?t:timetrap_cancel(Dog), report_result(Result). report_result(ok) -> ok; report_result(Error) -> ?line test_server:fail(Error). %% ------------------------------------------------------- %% big_test(N) -> C = start_collect(), system_time(), system_time(), system_time(), A1 = element(2, erlang:now()), A2 = A1 * 3, A3 = element(3, erlang:now()), random:seed(A1, A2, A3), random:uniform(100),random:uniform(100),random:uniform(100), big_loop(C, N, []), %%C ! print_report, C ! {self(), get_report}, Report = receive {report, R} -> R end, C ! stop, receive {'EXIT', C, normal} -> ok end, print_report(Report), Result = analyze_report(Report), %%io:format("big_test is done: ~w~n", [Result]), Result. big_loop(_C, 0, []) -> %%io:format("All processes are done!~n", []), ok; big_loop(C, 0, Pids) -> %%ok = io:format("Loop done, ~w processes remaining~n", [length(Pids)]), %% wait for remaining processes receive {'EXIT', Pid, done} -> big_loop(C, 0, lists:delete(Pid, Pids)); {'EXIT', Pid, Error} -> ?line ok = io:format("XXX Pid ~w died with reason ~p~n", [Pid, Error]), big_loop(C, 0, lists:delete(Pid, Pids)) end; big_loop(C, N, Pids) -> %% First reap any processes that are done. receive {'EXIT', Pid, done} -> big_loop(C, N, lists:delete(Pid, Pids)); {'EXIT', Pid, Error} -> ?line ok =io:format("XXX Internal error: Pid ~w died, reason ~p~n", [Pid, Error]), big_loop(C, N, lists:delete(Pid, Pids)) after 0 -> %% maybe start an interval timer test Pids1 = maybe_start_i_test(Pids, C, random:uniform(4)), %% start 1-4 "after" tests Pids2 = start_after_test(Pids1, C, random:uniform(4)), %%Pids2=Pids1, %% wait a little while timer:sleep(random:uniform(200)*10), %% spawn zero, one or two nrev to get some load ;-/ Pids3 = start_nrev(Pids2, random:uniform(100)), big_loop(C, N-1, Pids3) end. start_nrev(Pids, N) when N < 25 -> Pids; start_nrev(Pids, N) when N < 75 -> [spawn_link(timer_SUITE, do_nrev, [1])|Pids]; start_nrev(Pids, _N) -> NrevPid1 = spawn_link(timer_SUITE, do_nrev, [random:uniform(1000)*10]), NrevPid2 = spawn_link(timer_SUITE, do_nrev, [1]), [NrevPid1,NrevPid2|Pids]. start_after_test(Pids, C, 1) -> TO1 = random:uniform(100)*100, [s_a_t(C, TO1)|Pids]; start_after_test(Pids, C, 2) -> TO1 = random:uniform(100)*100, TO2 = TO1 div random:uniform(3) + 200, [s_a_t(C, TO1),s_a_t(C, TO2)|Pids]; start_after_test(Pids, C, N) -> TO1 = random:uniform(100)*100, start_after_test([s_a_t(C, TO1)|Pids], C, N-1). s_a_t(C, TimeOut) -> spawn_link(timer_SUITE, a_t, [C, TimeOut]). a_t(C, TimeOut) -> start_watchdog(self(), TimeOut), Start = system_time(), timer:send_after(TimeOut, self(), now), receive now -> Stop = system_time(), report(C, Start,Stop,TimeOut), exit(done); watchdog -> Stop = system_time(), report(C, Start,Stop,TimeOut), ?line ok = io:format("Internal watchdog timeout (a), not good!!~n", []), exit(done) end. maybe_start_i_test(Pids, C, 1) -> %% ok do it TOI = random:uniform(100)*100, CountI = random:uniform(10) + 3, % at least 4 times [spawn_link(timer_SUITE, i_t, [C, TOI, CountI])|Pids]; maybe_start_i_test(Pids, _C, _) -> Pids. i_t(C, TimeOut, Times) -> start_watchdog(self(), TimeOut*Times), Start = system_time(), {ok, Ref} = timer:send_interval(TimeOut, interval), i_wait(Start, Start, 1, TimeOut, Times, Ref, C). i_wait(Start, Prev, Times, TimeOut, Times, Ref, C) -> receive interval -> Now = system_time(), report_interval(C, {final,Times}, Start, Prev, Now, TimeOut), timer:cancel(Ref), exit(done); watchdog -> Now = system_time(), report_interval(C, {final,Times}, Start, Prev, Now, TimeOut), timer:cancel(Ref), ?line ok = io:format("Internal watchdog timeout (i), not good!!~n", []), exit(done) end; i_wait(Start, Prev, Count, TimeOut, Times, Ref, C) -> receive interval -> Now = system_time(), report_interval(C, Count, Start, Prev, Now, TimeOut), i_wait(Start, Now, Count+1, TimeOut, Times, Ref, C); watchdog -> Now = system_time(), report_interval(C, {final,Count}, Start, Prev, Now, TimeOut), ?line ok = io:format("Internal watchdog timeout (j), not good!!~n", []), exit(done) end. report(C, Start, Stop, Time) -> C ! {a_sample, Start, Stop, Time}. report_interval(C, Count, Start, Prev, Now, TimeOut) -> C ! {i_sample, Count, Start, Prev, Now, TimeOut}. %% ------------------------------------------------------- %% %% internal watchdog start_watchdog(Pid, TimeOut) -> spawn_link(timer_SUITE, internal_watchdog, [Pid, 3*TimeOut+1000]). internal_watchdog(Pid, TimeOut) -> receive after TimeOut -> Pid ! watchdog, exit(normal) end. %% ------------------------------------------------------- %% -record(stat, {n=0,max=0,min=min,avg=0}). start_collect() -> spawn_link(timer_SUITE, collect, [0,{0,new_update(),new_update()},[]]). collect(N, {E,A,B}, I) -> receive {a_sample, Start, Stop, Time} when Stop - Start > Time -> collect(N+1, {E,update(Stop-Start-Time,A),B}, I); {a_sample, Start, Stop, Time} when Stop - Start < Time -> collect(N+1, {E,A,update(Time-Stop+Start,B)}, I); {a_sample, _Start, _Stop, _Time} -> collect(N+1, {E+1,A,B}, I); {i_sample, {final,Count}, Start, Prev, Now, TimeOut} -> IntervDiff = Now - Prev - TimeOut, Drift = Now - (Count*TimeOut) - Start, collect(N, {E,A,B}, [{{final,Count},IntervDiff,Drift}|I]); {i_sample, Count, Start, Prev, Now, TimeOut} -> IntervDiff = Now - Prev - TimeOut, Drift = Now - (Count*TimeOut) - Start, collect(N, {E,A,B}, [{Count,IntervDiff,Drift}|I]); print_report -> print_report({E,A,B,I}), collect(N,{E,A,B}, I); {Pid, get_report} when is_pid(Pid) -> Pid ! {report, {E, A, B, I}}, collect(N,{E,A,B}, I); reset -> collect(0, {0,new_update(),new_update()}, []); stop -> exit(normal); _Other -> collect(N, {E,A,B}, I) end. new_update() -> #stat{}. update(New, Stat) when New > Stat#stat.max -> Stat#stat{n=Stat#stat.n + 1, max=New, avg=(New+Stat#stat.avg) div 2}; update(New, Stat) when New < Stat#stat.min -> Stat#stat{n=Stat#stat.n + 1, min=New, avg=(New+Stat#stat.avg) div 2}; update(New, Stat) -> Stat#stat{n=Stat#stat.n + 1, avg=(New+Stat#stat.avg) div 2}. %update(New, {N,Max,Min,Avg}) when New>Max -> % {N+1,New,Min,(New+Avg) div 2}; %update(New, {N,Max,Min,Avg}) when New % {N+1,Max,New,(New+Avg) div 2}; %update(New, {N,Max,Min,Avg}) -> % {N+1,Max,Min,(New+Avg) div 2}. print_report({E,LateS,EarlyS,I}) -> Early = EarlyS#stat.n, Late = LateS#stat.n, Total = E + Early + Late, io:format("~nOn total of ~w timeouts, there were ~w exact, ~w " "late and ~w early.~n", [Total, E, Late, Early]), io:format("Late stats (N,Max,Min,Avg): ~w~nEarly stats: ~w~n", [LateS, EarlyS]), IntervS = collect_interval_final_stats(I), io:format("Interval stats (Max,Min,Avg): ~w~n", [IntervS]), ok. collect_interval_final_stats(I) -> collect_interval_final_stats(I, 0, min, 0). collect_interval_final_stats([], Max, Min, Avg) -> {Max, Min, Avg}; collect_interval_final_stats([{{final,_Count},_,Dev}|T], Max, Min, Avg) -> NMax = if Dev>Max -> Dev; true -> Max end, NMin = if Dev Dev; true -> Min end, collect_interval_final_stats(T, NMax, NMin, (Dev+Avg) div 2); collect_interval_final_stats([_|T], Max, Min, Avg) -> collect_interval_final_stats(T, Max, Min, Avg). analyze_report({E,LateS,EarlyS,I}) -> Early = EarlyS#stat.n, Late = LateS#stat.n, IntervS = collect_interval_final_stats(I), Res1 = min_and_early_check(E, Early, Late, element(2,IntervS)), Res2 = abnormal_max_check(LateS#stat.max, element(1,IntervS)), res_combine(ok, [Res1, Res2]). -define(ok_i_min, -100). -define(ok_max, 8000). -define(ok_i_max, 4000). %% ok as long as Early == 0 and IntervMin >= ok_interv_min min_and_early_check(_Exact, 0, _Late, IntervMin) when IntervMin >= ?ok_i_min -> ok; min_and_early_check(_Exact, Early, _Late, IntervMin) when IntervMin >= ?ok_i_min -> {error, {early_timeouts, Early}}; min_and_early_check(_Exact, 0, _Late, _IntervMin) -> {error, early_interval_timeout}; min_and_early_check(_Exact, Early, _Late, _IntervMin) -> {error, [{early_timeouts, Early},{error, early_interval_timeout}]}. abnormal_max_check(LateMax, IntMax) when LateMax < ?ok_max, IntMax < ?ok_i_max -> ok; abnormal_max_check(LateMax, IntMax) when IntMax < ?ok_i_max -> {error, {big_late_max, LateMax}}; abnormal_max_check(LateMax, IntMax) when LateMax < ?ok_max -> {error, {big_interval_max, IntMax}}; abnormal_max_check(LateMax, IntMax) -> {error, [{big_late_max, LateMax},{big_interval_max, IntMax}]}. res_combine(Res, []) -> Res; res_combine(Res, [ok|T]) -> res_combine(Res, T); res_combine(ok, [{error,What}|T]) -> res_combine({error,What}, T); res_combine({error,Es}, [{error,E}|T]) -> res_combine({error,lists:flatten([E,Es])}, T). system_time() -> %%element(1, statistics(wall_clock)). {M,S,U} = erlang:now(), 1000000000 * M + 1000 * S + (U div 1000). %% ------------------------------------------------------- %% do_nrev(Sleep) -> timer:sleep(Sleep), test(1000,"abcdefghijklmnopqrstuvxyz1234"), exit(done). test(0,_) -> true; test(N,L) -> nrev(L), test(N - 1, L). nrev([]) -> []; nrev([H|T]) -> append(nrev(T), [H]). append([H|T],Z) -> [H|append(T,Z)]; append([],X) -> X. %% ------------------------------------------------------- %%