%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 1996-2013. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
%%
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%%
%% %CopyrightEnd%
%%
-module(timer_SUITE).
-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2]).
-export([do_big_test/1]).
-export([big_test/1, collect/3, i_t/3, a_t/2]).
-export([do_nrev/1, internal_watchdog/2]).
-include_lib("test_server/include/test_server.hrl").
%% Test suite for timer module. This is a really nasty test it runs a
%% lot of timeouts and then checks in the end if any of them was
%% trigggered too early or if any late timeouts was much too
%% late. What should be added is more testing of the interface
%% functions I guess. But I don't have time for that now.
%%
%% Expect it to run for at least 5-10 minutes!
%% The main test case in this module is "do_big_test", which
%% orders a large number of timeouts and measures how
%% exact the timeouts arrives. To simulate a system under load there is
%% also a number of other concurrent processes running "nrev" at the same
%% time. The result is analyzed afterwards by trying to check if the
%% measured values are reasonable. It is hard to determine what is
%% reasonable on different machines therefore the test can sometimes
%% fail, even though the timer module is ok. I have checked against
%% previous versions of the timer module (which contained bugs) and it
%% seems it fails every time when running the buggy timer modules.
%%
%% The solution is to rewrite the test suite. Possible strategies for a
%% rewrite: smarter math on the measuring data, test cases with varying
%% amount of load. The test suite should also include tests that test the
%% interface of the timer module.
suite() -> [{ct_hooks,[ts_install_cth]}].
all() ->
[do_big_test].
groups() ->
[].
init_per_suite(Config) ->
Config.
end_per_suite(_Config) ->
ok.
init_per_group(_GroupName, Config) ->
Config.
end_per_group(_GroupName, Config) ->
Config.
%% ------------------------------------------------------- %%
do_big_test(TConfig) when is_list(TConfig) ->
Dog = ?t:timetrap(?t:minutes(20)),
Save = process_flag(trap_exit, true),
Result = big_test(200),
process_flag(trap_exit, Save),
?t:timetrap_cancel(Dog),
report_result(Result).
report_result(ok) -> ok;
report_result(Error) -> ?line test_server:fail(Error).
%% ------------------------------------------------------- %%
big_test(N) ->
C = start_collect(),
system_time(), system_time(), system_time(),
A1 = element(2, erlang:now()),
A2 = A1 * 3,
A3 = element(3, erlang:now()),
random:seed(A1, A2, A3),
random:uniform(100),random:uniform(100),random:uniform(100),
big_loop(C, N, []),
%%C ! print_report,
C ! {self(), get_report},
Report = receive
{report, R} ->
R
end,
C ! stop,
receive
{'EXIT', C, normal} ->
ok
end,
print_report(Report),
Result = analyze_report(Report),
%%io:format("big_test is done: ~w~n", [Result]),
Result.
big_loop(_C, 0, []) ->
%%io:format("All processes are done!~n", []),
ok;
big_loop(C, 0, Pids) ->
%%ok = io:format("Loop done, ~w processes remaining~n", [length(Pids)]),
%% wait for remaining processes
receive
{'EXIT', Pid, done} ->
big_loop(C, 0, lists:delete(Pid, Pids));
{'EXIT', Pid, Error} ->
?line ok = io:format("XXX Pid ~w died with reason ~p~n",
[Pid, Error]),
big_loop(C, 0, lists:delete(Pid, Pids))
end;
big_loop(C, N, Pids) ->
%% First reap any processes that are done.
receive
{'EXIT', Pid, done} ->
big_loop(C, N, lists:delete(Pid, Pids));
{'EXIT', Pid, Error} ->
?line ok =io:format("XXX Internal error: Pid ~w died, reason ~p~n",
[Pid, Error]),
big_loop(C, N, lists:delete(Pid, Pids))
after 0 ->
%% maybe start an interval timer test
Pids1 = maybe_start_i_test(Pids, C, random:uniform(4)),
%% start 1-4 "after" tests
Pids2 = start_after_test(Pids1, C, random:uniform(4)),
%%Pids2=Pids1,
%% wait a little while
timer:sleep(random:uniform(200)*10),
%% spawn zero, one or two nrev to get some load ;-/
Pids3 = start_nrev(Pids2, random:uniform(100)),
big_loop(C, N-1, Pids3)
end.
start_nrev(Pids, N) when N < 25 ->
Pids;
start_nrev(Pids, N) when N < 75 ->
[spawn_link(timer_SUITE, do_nrev, [1])|Pids];
start_nrev(Pids, _N) ->
NrevPid1 = spawn_link(timer_SUITE, do_nrev, [random:uniform(1000)*10]),
NrevPid2 = spawn_link(timer_SUITE, do_nrev, [1]),
[NrevPid1,NrevPid2|Pids].
start_after_test(Pids, C, 1) ->
TO1 = random:uniform(100)*100,
[s_a_t(C, TO1)|Pids];
start_after_test(Pids, C, 2) ->
TO1 = random:uniform(100)*100,
TO2 = TO1 div random:uniform(3) + 200,
[s_a_t(C, TO1),s_a_t(C, TO2)|Pids];
start_after_test(Pids, C, N) ->
TO1 = random:uniform(100)*100,
start_after_test([s_a_t(C, TO1)|Pids], C, N-1).
s_a_t(C, TimeOut) ->
spawn_link(timer_SUITE, a_t, [C, TimeOut]).
a_t(C, TimeOut) ->
start_watchdog(self(), TimeOut),
Start = system_time(),
timer:send_after(TimeOut, self(), now),
receive
now ->
Stop = system_time(),
report(C, Start,Stop,TimeOut),
exit(done);
watchdog ->
Stop = system_time(),
report(C, Start,Stop,TimeOut),
?line ok = io:format("Internal watchdog timeout (a), not good!!~n",
[]),
exit(done)
end.
maybe_start_i_test(Pids, C, 1) ->
%% ok do it
TOI = random:uniform(100)*100,
CountI = random:uniform(10) + 3, % at least 4 times
[spawn_link(timer_SUITE, i_t, [C, TOI, CountI])|Pids];
maybe_start_i_test(Pids, _C, _) ->
Pids.
i_t(C, TimeOut, Times) ->
start_watchdog(self(), TimeOut*Times),
Start = system_time(),
{ok, Ref} = timer:send_interval(TimeOut, interval),
i_wait(Start, Start, 1, TimeOut, Times, Ref, C).
i_wait(Start, Prev, Times, TimeOut, Times, Ref, C) ->
receive
interval ->
Now = system_time(),
report_interval(C, {final,Times}, Start, Prev, Now, TimeOut),
timer:cancel(Ref),
exit(done);
watchdog ->
Now = system_time(),
report_interval(C, {final,Times}, Start, Prev, Now, TimeOut),
timer:cancel(Ref),
?line ok = io:format("Internal watchdog timeout (i), not good!!~n",
[]),
exit(done)
end;
i_wait(Start, Prev, Count, TimeOut, Times, Ref, C) ->
receive
interval ->
Now = system_time(),
report_interval(C, Count, Start, Prev, Now, TimeOut),
i_wait(Start, Now, Count+1, TimeOut, Times, Ref, C);
watchdog ->
Now = system_time(),
report_interval(C, {final,Count}, Start, Prev, Now, TimeOut),
?line ok = io:format("Internal watchdog timeout (j), not good!!~n",
[]),
exit(done)
end.
report(C, Start, Stop, Time) ->
C ! {a_sample, Start, Stop, Time}.
report_interval(C, Count, Start, Prev, Now, TimeOut) ->
C ! {i_sample, Count, Start, Prev, Now, TimeOut}.
%% ------------------------------------------------------- %%
%% internal watchdog
start_watchdog(Pid, TimeOut) ->
spawn_link(timer_SUITE, internal_watchdog, [Pid, 3*TimeOut+1000]).
internal_watchdog(Pid, TimeOut) ->
receive
after TimeOut ->
Pid ! watchdog,
exit(normal)
end.
%% ------------------------------------------------------- %%
-record(stat, {n=0,max=0,min=min,avg=0}).
start_collect() ->
spawn_link(timer_SUITE, collect, [0,{0,new_update(),new_update()},[]]).
collect(N, {E,A,B}, I) ->
receive
{a_sample, Start, Stop, Time} when Stop - Start > Time ->
collect(N+1, {E,update(Stop-Start-Time,A),B}, I);
{a_sample, Start, Stop, Time} when Stop - Start < Time ->
collect(N+1, {E,A,update(Time-Stop+Start,B)}, I);
{a_sample, _Start, _Stop, _Time} ->
collect(N+1, {E+1,A,B}, I);
{i_sample, {final,Count}, Start, Prev, Now, TimeOut} ->
IntervDiff = Now - Prev - TimeOut,
Drift = Now - (Count*TimeOut) - Start,
collect(N, {E,A,B}, [{{final,Count},IntervDiff,Drift}|I]);
{i_sample, Count, Start, Prev, Now, TimeOut} ->
IntervDiff = Now - Prev - TimeOut,
Drift = Now - (Count*TimeOut) - Start,
collect(N, {E,A,B}, [{Count,IntervDiff,Drift}|I]);
print_report ->
print_report({E,A,B,I}),
collect(N,{E,A,B}, I);
{Pid, get_report} when is_pid(Pid) ->
Pid ! {report, {E, A, B, I}},
collect(N,{E,A,B}, I);
reset ->
collect(0, {0,new_update(),new_update()}, []);
stop ->
exit(normal);
_Other ->
collect(N, {E,A,B}, I)
end.
new_update() -> #stat{}.
update(New, Stat) when New > Stat#stat.max ->
Stat#stat{n=Stat#stat.n + 1, max=New, avg=(New+Stat#stat.avg) div 2};
update(New, Stat) when New < Stat#stat.min ->
Stat#stat{n=Stat#stat.n + 1, min=New, avg=(New+Stat#stat.avg) div 2};
update(New, Stat) ->
Stat#stat{n=Stat#stat.n + 1, avg=(New+Stat#stat.avg) div 2}.
%update(New, {N,Max,Min,Avg}) when New>Max ->
% {N+1,New,Min,(New+Avg) div 2};
%update(New, {N,Max,Min,Avg}) when New<Min ->
% {N+1,Max,New,(New+Avg) div 2};
%update(New, {N,Max,Min,Avg}) ->
% {N+1,Max,Min,(New+Avg) div 2}.
print_report({E,LateS,EarlyS,I}) ->
Early = EarlyS#stat.n, Late = LateS#stat.n,
Total = E + Early + Late,
io:format("~nOn total of ~w timeouts, there were ~w exact, ~w "
"late and ~w early.~n", [Total, E, Late, Early]),
io:format("Late stats (N,Max,Min,Avg): ~w~nEarly stats: ~w~n",
[LateS, EarlyS]),
IntervS = collect_interval_final_stats(I),
io:format("Interval stats (Max,Min,Avg): ~w~n", [IntervS]),
ok.
collect_interval_final_stats(I) ->
collect_interval_final_stats(I, 0, min, 0).
collect_interval_final_stats([], Max, Min, Avg) ->
{Max, Min, Avg};
collect_interval_final_stats([{{final,_Count},_,Dev}|T], Max, Min, Avg) ->
NMax = if Dev>Max -> Dev; true -> Max end,
NMin = if Dev<Min -> Dev; true -> Min end,
collect_interval_final_stats(T, NMax, NMin, (Dev+Avg) div 2);
collect_interval_final_stats([_|T], Max, Min, Avg) ->
collect_interval_final_stats(T, Max, Min, Avg).
analyze_report({E,LateS,EarlyS,I}) ->
Early = EarlyS#stat.n, Late = LateS#stat.n,
IntervS = collect_interval_final_stats(I),
Res1 = min_and_early_check(E, Early, Late, element(2,IntervS)),
Res2 = abnormal_max_check(LateS#stat.max, element(1,IntervS)),
res_combine(ok, [Res1, Res2]).
-define(ok_i_min, -100).
-define(ok_max, 8000).
-define(ok_i_max, 4000).
%% ok as long as Early == 0 and IntervMin >= ok_interv_min
min_and_early_check(_Exact, 0, _Late, IntervMin) when IntervMin >= ?ok_i_min ->
ok;
min_and_early_check(_Exact, Early, _Late, IntervMin) when IntervMin >= ?ok_i_min ->
{error, {early_timeouts, Early}};
min_and_early_check(_Exact, 0, _Late, _IntervMin) ->
{error, early_interval_timeout};
min_and_early_check(_Exact, Early, _Late, _IntervMin) ->
{error, [{early_timeouts, Early},{error, early_interval_timeout}]}.
abnormal_max_check(LateMax, IntMax) when LateMax < ?ok_max,
IntMax < ?ok_i_max ->
ok;
abnormal_max_check(LateMax, IntMax) when IntMax < ?ok_i_max ->
{error, {big_late_max, LateMax}};
abnormal_max_check(LateMax, IntMax) when LateMax < ?ok_max ->
{error, {big_interval_max, IntMax}};
abnormal_max_check(LateMax, IntMax) ->
{error, [{big_late_max, LateMax},{big_interval_max, IntMax}]}.
res_combine(Res, []) ->
Res;
res_combine(Res, [ok|T]) ->
res_combine(Res, T);
res_combine(ok, [{error,What}|T]) ->
res_combine({error,What}, T);
res_combine({error,Es}, [{error,E}|T]) ->
res_combine({error,lists:flatten([E,Es])}, T).
system_time() ->
%%element(1, statistics(wall_clock)).
{M,S,U} = erlang:now(),
1000000000 * M + 1000 * S + (U div 1000).
%% ------------------------------------------------------- %%
do_nrev(Sleep) ->
timer:sleep(Sleep),
test(1000,"abcdefghijklmnopqrstuvxyz1234"),
exit(done).
test(0,_) ->
true;
test(N,L) ->
nrev(L),
test(N - 1, L).
nrev([]) ->
[];
nrev([H|T]) ->
append(nrev(T), [H]).
append([H|T],Z) ->
[H|append(T,Z)];
append([],X) ->
X.
%% ------------------------------------------------------- %%