aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2016-07-21 19:07:16 +0200
committerRickard Green <[email protected]>2016-07-26 17:37:40 +0200
commit99a29386cb1a62bc7e9c205d897cd20017c598b4 (patch)
tree74e962b88678fd47492c37f99af05501007b2459 /erts/emulator
parent832985da5c5ca3f7635d77aaa31904d79adeb37e (diff)
downloadotp-99a29386cb1a62bc7e9c205d897cd20017c598b4.tar.gz
otp-99a29386cb1a62bc7e9c205d897cd20017c598b4.tar.bz2
otp-99a29386cb1a62bc7e9c205d897cd20017c598b4.zip
Ignore long time failures during high CPU utilization
Diffstat (limited to 'erts/emulator')
-rw-r--r--erts/emulator/test/long_timers_test.erl188
-rw-r--r--erts/emulator/test/z_SUITE.erl8
2 files changed, 162 insertions, 34 deletions
diff --git a/erts/emulator/test/long_timers_test.erl b/erts/emulator/test/long_timers_test.erl
index 7c055a31f9..90efe1292c 100644
--- a/erts/emulator/test/long_timers_test.erl
+++ b/erts/emulator/test/long_timers_test.erl
@@ -27,11 +27,16 @@
%%% Created : 21 Aug 2006 by Rickard Green <[email protected]>
%%%-------------------------------------------------------------------
+-define(HIGH_CPU_INFO, "Ignored due to high CPU utilization.").
+-define(MISSING_CPU_INFO, "Ignored due to missing CPU utilization information.").
-define(MAX_TIMEOUT, 60). % Minutes
--define(MAX_LATE_MS, 15*1000). % Milliseconds
+-define(MAX_LATE_MS, 1000). % Milliseconds
-define(REG_NAME, '___LONG___TIMERS___TEST___SERVER___').
+-define(HIGH_UTIL, 96.0).
+-define(UTIL_INTERVAL, 10000).
+
-define(DRV_NAME, timer_driver).
% First byte in communication with the timer driver
@@ -72,52 +77,149 @@ check_result() ->
receive
{'DOWN', Mon, process, _, Reason} ->
{?REG_NAME, 'DOWN', Reason};
- {result, ?REG_NAME, TORs, Start, End} ->
+ {result, ?REG_NAME, TORs, Start, End, UtilData} ->
erlang:demonitor(Mon),
receive {'DOWN', Mon, _, _, _} -> ok after 0 -> ok end,
stop_node(Node),
- check(TORs, ms((End - Start) - max_late()), ok)
+ Res = check(TORs, Start, End, UtilData, ms((End - Start) - max_late()), ok),
+ io:format("Start = ~p~n End = ~p~n UtilData = ~p~n", [Start, End, UtilData]),
+ Res
end.
+res(New, Old) when New == failed; Old == failed ->
+ failed;
+res(New, Old) when New == missing_cpu_info; Old == missing_cpu_info ->
+ missing_cpu_info;
+res(New, Old) when New == high_cpu; Old == high_cpu ->
+ high_cpu;
+res(New, _Old) ->
+ New.
+
check([#timeout_rec{timeout = Timeout,
type = Type,
timeout_diff = undefined} | TORs],
+ Start,
+ End,
+ UtilData,
NeedRes,
- _Ok) when Timeout < NeedRes ->
- io:format("~p timeout = ~p ms failed! No timeout.~n",
- [Type, Timeout]),
- check(TORs, NeedRes, failed);
+ Ok) when Timeout < NeedRes ->
+ {NewOk, HCPU} = case had_high_cpu_util(Start,
+ Timeout,
+ End - Timeout*1000,
+ UtilData) of
+ yes -> {res(high_cpu, Ok), ?HIGH_CPU_INFO};
+ no -> {res(failed, Ok), ""};
+ missing -> {res(missing_cpu_info, Ok), "FAILED", ?MISSING_CPU_INFO}
+ end,
+ io:format("~p timeout = ~p ms FAILED! No timeout. ~s~n",
+ [Type, Timeout, HCPU]),
+ check(TORs, Start, End, UtilData, NeedRes, NewOk);
check([#timeout_rec{timeout_diff = undefined} | TORs],
+ Start,
+ End,
+ UtilData,
NeedRes,
Ok) ->
- check(TORs, NeedRes, Ok);
+ check(TORs, Start, End, UtilData, NeedRes, Ok);
check([#timeout_rec{timeout = Timeout,
type = Type,
timeout_diff = {error, Reason}} | TORs],
+ Start,
+ End,
+ UtilData,
NeedRes,
_Ok) ->
- io:format("~p timeout = ~p ms failed! exit reason ~p~n",
+ io:format("~p timeout = ~p ms FAILED! exit reason ~p~n",
[Type, Timeout, Reason]),
- check(TORs, NeedRes, failed);
+ check(TORs, Start, End, UtilData, NeedRes, failed);
check([#timeout_rec{timeout = Timeout,
type = Type,
timeout_diff = TimeoutDiff} | TORs],
+ Start,
+ End,
+ UtilData,
NeedRes,
Ok) ->
- {NewOk, SuccessStr} = case ((0 =< TimeoutDiff)
- andalso (TimeoutDiff =< max_late())) of
- true -> {Ok, "succeeded"};
- false -> {failed, "FAILED"}
+ {NewOk, SuccessStr, HCPU} = case {(0 =< TimeoutDiff),
+ (TimeoutDiff =< max_late())} of
+ {true, true} ->
+ {res(ok, Ok), "succeeded", ""};
+ {false, _} ->
+ {res(failed, Ok), "FAILED", ""};
+ _ ->
+ case had_high_cpu_util(Start,
+ Timeout,
+ TimeoutDiff,
+ UtilData) of
+ yes -> {res(high_cpu, Ok), "FAILED", ?HIGH_CPU_INFO};
+ no -> {res(failed, Ok), "FAILED", ""};
+ missing -> {res(missing_cpu_info, Ok), "FAILED", ?MISSING_CPU_INFO}
+ end
end,
- io:format("~s timeout = ~s ms ~s! timeout diff = ~s.~n",
+ io:format("~s timeout = ~s ms ~s! timeout diff = ~s. ~s~n",
[type_str(Type),
time_str(Timeout),
SuccessStr,
- time_str(TimeoutDiff, erlang:convert_time_unit(1, seconds, native))]),
- check(TORs, NeedRes, NewOk);
-check([], _NeedRes, Ok) ->
+ time_str(TimeoutDiff, 1000000),
+ HCPU]),
+ check(TORs, Start, End, UtilData, NeedRes, NewOk);
+check([],_Start,_End,_UtilData,_NeedRes, Ok) ->
Ok.
+% TargetTimeout in ms, other in us.
+had_high_cpu_util(StartTime,
+ TargetTimeout,
+ TimeoutDiff,
+ UtilData) ->
+ TargetTo = StartTime + TargetTimeout*1000,
+ ActTo = TargetTo + TimeoutDiff,
+ hcpu(ActTo, TargetTo, UtilData).
+
+hcpu(_ActTo, _TargetTo, [{UT, 0} | _] = UD) ->
+ missing; %% Util is the integer zero when not supported...
+%% UT2 =:= UT1
+hcpu(ActTo, TargetTo, [{UT, _}, {UT, _} | _] = UD) ->
+ hcpu(ActTo, TargetTo, tl(UD));
+%% UT2 > UT1 > ActTo > TargetTo
+hcpu(ActTo, TargetTo, [{_UT2, _}, {UT1, _} | _] = UD) when UT1 > ActTo ->
+ hcpu(ActTo, TargetTo, tl(UD));
+%% UT2 >= ActTo > TargetTo >= UT1
+hcpu(ActTo, TargetTo,
+ [{UT2, U}, {UT1, _} | _]) when UT2 >= ActTo,
+ TargetTo >= UT1 ->
+ case U >= (((ActTo - TargetTo) / (UT2 - UT1))
+ * (?HIGH_UTIL/100.0)) of
+ true -> yes;
+ false -> no
+ end;
+%% UT2 >= ActTo >= UT1 > TargetTo
+hcpu(ActTo, TargetTo,
+ [{UT2, U}, {UT1, _} | _] = UD) when UT2 >= ActTo,
+ ActTo >= UT1,
+ UT1 > TargetTo ->
+ case U >= (((ActTo - UT1) / (UT2 - UT1))
+ * (?HIGH_UTIL/100.0)) of
+ true -> hcpu(ActTo, TargetTo, tl(UD));
+ false -> no
+ end;
+%% ActTo > UT2 >= TargetTo >= UT1
+hcpu(ActTo, TargetTo,
+ [{UT2, U}, {UT1, _} | _]) when ActTo > UT2,
+ TargetTo >= UT1 ->
+ case U >= (((UT2 - TargetTo) / (UT2 - UT1))
+ * (?HIGH_UTIL/100.0)) of
+ true -> yes;
+ false -> no
+ end;
+%% ActTo > UT2 > UT1 > TargetTo
+hcpu(ActTo, TargetTo,
+ [{UT2, U}, {UT1, _} | _] = UD) when ActTo > UT2,
+ UT1 > TargetTo ->
+ case U >= ?HIGH_UTIL of
+ true -> hcpu(ActTo, TargetTo, tl(UD));
+ false -> no
+ end.
+
type_str(receive_after) -> "receive ... after";
type_str(bif_timer) -> "BIF timer";
type_str(driver) -> "driver".
@@ -142,24 +244,24 @@ unit_str(Res) -> Res.
to_diff(Timeout, Start, Stop) ->
%% 'Timeout' in milli seconds
- %% 'Start', 'Stop', and result in native unit
- (Stop - Start) - erlang:convert_time_unit(Timeout, milli_seconds, native).
+ %% 'Start', 'Stop', and result in micro seconds
+ (Stop - Start) - Timeout*1000.
ms(Time) ->
- erlang:convert_time_unit(Time, native, milli_seconds).
+ erlang:convert_time_unit(Time, micro_seconds, milli_seconds).
max_late() ->
- erlang:convert_time_unit(?MAX_LATE_MS, milli_seconds, native).
+ erlang:convert_time_unit(?MAX_LATE_MS, milli_seconds, micro_seconds).
receive_after(Timeout) ->
- Start = erlang:monotonic_time(),
+ Start = erlang:monotonic_time(micro_seconds),
receive
{get_result, ?REG_NAME} ->
?REG_NAME ! #timeout_rec{pid = self(),
type = receive_after,
timeout = Timeout}
after Timeout ->
- Stop = erlang:monotonic_time(),
+ Stop = erlang:monotonic_time(micro_seconds),
receive
{get_result, ?REG_NAME} ->
?REG_NAME ! #timeout_rec{pid = self(),
@@ -174,7 +276,7 @@ receive_after(Timeout) ->
driver(Timeout) ->
Port = open_port({spawn, ?DRV_NAME},[]),
link(Port),
- Start = erlang:monotonic_time(),
+ Start = erlang:monotonic_time(micro_seconds),
erlang:port_command(Port, <<?START_TIMER, Timeout:32>>),
receive
{get_result, ?REG_NAME} ->
@@ -182,7 +284,7 @@ driver(Timeout) ->
type = driver,
timeout = Timeout};
{Port,{data,[?TIMER]}} ->
- Stop = erlang:monotonic_time(),
+ Stop = erlang:monotonic_time(micro_seconds),
unlink(Port),
true = erlang:port_close(Port),
receive
@@ -197,7 +299,7 @@ driver(Timeout) ->
end.
bif_timer(Timeout) ->
- Start = erlang:monotonic_time(),
+ Start = erlang:monotonic_time(micro_seconds),
Tmr = erlang:start_timer(Timeout, self(), ok),
receive
{get_result, ?REG_NAME} ->
@@ -205,7 +307,7 @@ bif_timer(Timeout) ->
type = bif_timer,
timeout = Timeout};
{timeout, Tmr, ok} ->
- Stop = erlang:monotonic_time(),
+ Stop = erlang:monotonic_time(micro_seconds),
receive
{get_result, ?REG_NAME} ->
?REG_NAME ! #timeout_rec{pid = self(),
@@ -218,13 +320,22 @@ bif_timer(Timeout) ->
end.
test(Starter, DrvDir, StartDone) ->
+ process_flag(priority, high),
erl_ddll:start(),
ok = load_driver(DrvDir, ?DRV_NAME),
process_flag(trap_exit, true),
register(?REG_NAME, self()),
{group_leader, GL} = process_info(whereis(net_kernel),group_leader),
group_leader(GL, self()),
- Start = erlang:monotonic_time(),
+ try
+ application:start(sasl),
+ application:start(os_mon)
+ catch
+ _ : _ ->
+ ok
+ end,
+ UtilData = new_util(),
+ Start = erlang:monotonic_time(micro_seconds),
TORs = lists:map(fun (Min) ->
TO = Min*60*1000,
[#timeout_rec{pid = spawn_opt(
@@ -252,16 +363,27 @@ test(Starter, DrvDir, StartDone) ->
lists:seq(1, ?MAX_TIMEOUT)),
FlatTORs = lists:flatten(TORs),
Starter ! StartDone,
- test_loop(FlatTORs, Start).
+ test_loop(FlatTORs, Start, UtilData).
+
+new_util() ->
+ new_util([]).
+
+new_util(UtilData) ->
+ Util = cpu_sup:util(),
+ Time = erlang:monotonic_time(micro_seconds),
+ [{Time, Util} | UtilData].
-test_loop(TORs, Start) ->
+test_loop(TORs, Start, UtilData) ->
receive
{get_result, ?REG_NAME, Pid} ->
- End = erlang:monotonic_time(),
- Pid ! {result, ?REG_NAME, get_test_results(TORs), Start, End},
+ End = erlang:monotonic_time(micro_seconds),
+ EndUtilData = new_util(UtilData),
+ Pid ! {result, ?REG_NAME, get_test_results(TORs), Start, End, EndUtilData},
erl_ddll:unload_driver(?DRV_NAME),
erl_ddll:stop(),
exit(bye)
+ after ?UTIL_INTERVAL ->
+ test_loop(TORs, Start, new_util(UtilData))
end.
get_test_results(TORs) ->
diff --git a/erts/emulator/test/z_SUITE.erl b/erts/emulator/test/z_SUITE.erl
index d1085c1958..ab56018373 100644
--- a/erts/emulator/test/z_SUITE.erl
+++ b/erts/emulator/test/z_SUITE.erl
@@ -191,7 +191,13 @@ node_container_refc_check(Config) when is_list(Config) ->
ok.
long_timers(Config) when is_list(Config) ->
- ok = long_timers_test:check_result().
+ case long_timers_test:check_result() of
+ ok -> ok;
+ high_cpu -> {comment, "Ignored failures due to high CPU utilization"};
+ missing_cpu_info -> {comment, "Ignored failures due to missing CPU utilization information"};
+ Fail -> ct:fail(Fail)
+ end.
+
pollset_size(Config) when is_list(Config) ->
Name = pollset_size_testcase_initial_state_holder,