aboutsummaryrefslogtreecommitdiffstats
path: root/lib/snmp/test/snmp_agent_test_lib.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/snmp/test/snmp_agent_test_lib.erl')
-rw-r--r--lib/snmp/test/snmp_agent_test_lib.erl337
1 files changed, 256 insertions, 81 deletions
diff --git a/lib/snmp/test/snmp_agent_test_lib.erl b/lib/snmp/test/snmp_agent_test_lib.erl
index 6defdadb5a..c0da47dc4c 100644
--- a/lib/snmp/test/snmp_agent_test_lib.erl
+++ b/lib/snmp/test/snmp_agent_test_lib.erl
@@ -66,7 +66,7 @@
]).
%% Internal exports
--export([wait/5, run/4]).
+-export([tc_wait/5, tc_run/4]).
-include_lib("kernel/include/file.hrl").
-include_lib("common_test/include/ct.hrl").
@@ -276,87 +276,197 @@ init_case(Config) when is_list(Config) ->
%%% configuration.
%%%--------------------------------------------------
-try_test(Mod, Func) ->
- call(get(mgr_node), ?MODULE, run, [Mod, Func, [], []]).
-
-try_test(Mod, Func, A) ->
- call(get(mgr_node), ?MODULE, run, [Mod, Func, A, []]).
-
-try_test(Mod, Func, A, Opts) ->
- call(get(mgr_node), ?MODULE, run, [Mod, Func, A, Opts]).
-
-call(N,M,F,A) ->
- ?DBG("call -> entry with~n"
- " N: ~p~n"
- " M: ~p~n"
- " F: ~p~n"
- " A: ~p~n"
- " when~n"
- " get(): ~p",
- [N,M,F,A,get()]),
- spawn(N, ?MODULE, wait, [self(),get(),M,F,A]),
+try_test(TcRunMod, TcRunFunc) ->
+ try_test(TcRunMod, TcRunFunc, []).
+
+try_test(TcRunMod, TcRunFunc, TcRunArgs) ->
+ try_test(TcRunMod, TcRunFunc, TcRunArgs, []).
+
+try_test(TcRunMod, TcRunFunc, TcRunArgs, TcRunOpts) ->
+ Node = get(mgr_node),
+ Mod = ?MODULE,
+ Func = tc_run,
+ Args = [TcRunMod, TcRunFunc, TcRunArgs, TcRunOpts],
+ tc_try(Node, Mod, Func, Args).
+
+%% We spawn a test case runner process on the manager node.
+%% The assumption is that the manager shall do something, but
+%% not all test cases have the manager perform actions.
+%% In some cases we make a rpc call back to the agent node directly
+%% and call something in the agent... (for example the info_test
+%% test case).
+%% We should use link (instead of monitor) in order for the test case
+%% timeout cleanup (kills) should have effect on the test case runner
+%% process as well.
+
+tc_try(N, M, F, A) ->
+ ?PRINT2("tc_try -> entry with"
+ "~n N: ~p"
+ "~n M: ~p"
+ "~n F: ~p"
+ "~n A: ~p"
+ "~n when"
+ "~n get(): ~p"
+ "~n", [N,
+ M, F, A,
+ get()]),
+ case net_adm:ping(N) of
+ pong ->
+ ?PRINT2("tc_try -> ~p still running - start runner~n", [N]),
+ OldFlag = trap_exit(true), % Make sure we catch it
+ Runner = spawn_link(N, ?MODULE, tc_wait, [self(), get(), M, F, A]),
+ await_tc_runner_started(Runner, OldFlag),
+ await_tc_runner_done(Runner, OldFlag);
+ pang ->
+ ?EPRINT2("tc_try -> ~p *not* running~n", [N]),
+ skip({node_not_running, N})
+ end.
+
+await_tc_runner_started(Runner, OldFlag) ->
+ ?PRINT2("await tc-runner (~p) start ack~n", [Runner]),
receive
- {done, {'EXIT', Rn}, Loc} ->
- ?DBG("call -> done with exit: "
- "~n Rn: ~p"
- "~n Loc: ~p", [Rn, Loc]),
+ {'EXIT', Runner, Reason} ->
+ ?EPRINT2("TC runner start failed: "
+ "~n ~p~n", [Reason]),
+ exit({tx_runner_start_failed, Reason});
+ {tc_runner_started, Runner} ->
+ ?PRINT2("TC runner start acknowledged~n"),
+ ok
+ after 10000 -> %% We should *really* not have to wait this long, but...
+ trap_exit(OldFlag),
+ unlink_and_flush_exit(Runner),
+ RunnerInfo = process_info(Runner),
+ ?EPRINT2("TC runner start timeout: "
+ "~n ~p", [RunnerInfo]),
+ %% If we don't get a start ack within 10 seconds, we are f*ed
+ exit(Runner, kill),
+ exit({tc_runner_start, timeout, RunnerInfo})
+ end.
+
+await_tc_runner_done(Runner, OldFlag) ->
+ receive
+ {'EXIT', Runner, Reason} ->
+ %% This is not a normal (tc) failure (that is the clause below).
+ %% Instead the tc runner process crashed, for some reason. So
+ %% check if have got any system events, and if so, skip.
+ SysEvs = snmp_test_global_sys_monitor:events(),
+ if
+ (SysEvs =:= []) ->
+ ?EPRINT2("TC runner failed: "
+ "~n ~p~n", [Reason]),
+ exit({tx_runner_failed, Reason});
+ true ->
+ ?EPRINT2("TC runner failed when we got system events: "
+ "~n Reason: ~p"
+ "~n Sys Events: ~p"
+ "~n", [Reason, SysEvs]),
+ skip([{reason, Reason}, {system_events, SysEvs}])
+ end;
+ {tc_runner_done, Runner, {'EXIT', {skip, Reason}}, Loc} ->
+ ?PRINT2("call -> done with skip: "
+ "~n Reason: ~p"
+ "~n Loc: ~p"
+ "~n", [Reason, Loc]),
+ trap_exit(OldFlag),
+ unlink_and_flush_exit(Runner),
+ put(test_server_loc, Loc),
+ skip(Reason);
+ {tc_runner_done, Runner, {'EXIT', Rn}, Loc} ->
+ ?PRINT2("call -> done with exit: "
+ "~n Rn: ~p"
+ "~n Loc: ~p"
+ "~n", [Rn, Loc]),
+ trap_exit(OldFlag),
+ unlink_and_flush_exit(Runner),
put(test_server_loc, Loc),
exit(Rn);
- {done, Ret, _Zed} ->
+ {tc_runner_done, Runner, Ret, _Zed} ->
?DBG("call -> done:"
"~n Ret: ~p"
"~n Zed: ~p", [Ret, _Zed]),
+ trap_exit(OldFlag),
+ unlink_and_flush_exit(Runner),
case Ret of
{error, Reason} ->
exit(Reason);
+ {skip, Reason} ->
+ skip(Reason);
OK ->
OK
end
end.
-wait(From, Env, M, F, A) ->
- ?DBG("wait -> entry with"
- "~n From: ~p"
- "~n Env: ~p"
- "~n M: ~p"
- "~n F: ~p"
- "~n A: ~p", [From, Env, M, F, A]),
+trap_exit(Flag) when is_boolean(Flag) ->
+ erlang:process_flag(trap_exit, Flag).
+
+unlink_and_flush_exit(Pid) ->
+ unlink(Pid),
+ receive
+ {'EXIT', Pid, _} ->
+ ok
+ after 0 ->
+ ok
+ end.
+
+tc_wait(From, Env, M, F, A) ->
+ ?PRINT2("tc_wait -> entry with"
+ "~n From: ~p"
+ "~n Env: ~p"
+ "~n M: ~p"
+ "~n F: ~p"
+ "~n A: ~p", [From, Env, M, F, A]),
+ From ! {tc_runner_started, self()},
lists:foreach(fun({K,V}) -> put(K,V) end, Env),
- Rn = (catch apply(M, F, A)),
- ?DBG("wait -> Rn: ~n~p", [Rn]),
- From ! {done, Rn, get(test_server_loc)},
- exit(Rn).
-
-run(Mod, Func, Args, Opts) ->
- ?DBG("run -> entry with"
- "~n Mod: ~p"
- "~n Func: ~p"
- "~n Args: ~p"
- "~n Opts: ~p", [Mod, Func, Args, Opts]),
- M = get(mib_dir),
- Dir = get(mgr_dir),
- User = snmp_misc:get_option(user, Opts, "all-rights"),
- SecLevel = snmp_misc:get_option(sec_level, Opts, noAuthNoPriv),
- EngineID = snmp_misc:get_option(engine_id, Opts, "agentEngine"),
+ ?PRINT2("tc_wait -> env set - now run tc~n"),
+ Res = (catch apply(M, F, A)),
+ ?PRINT2("tc_wait -> tc run done: "
+ "~n ~p"
+ "~n", [Res]),
+ From ! {tc_runner_done, self(), Res, get(test_server_loc)},
+ %% The point of this is that in some cases we have seen that the
+ %% exit signal having been "passed on" to the CT, which consider any
+ %% exit a fail (even if its {'EXIT', ok}).
+ %% So, just to be on the safe side, convert an 'ok' to a 'normal'.
+ case Res of
+ ok ->
+ exit(normal);
+ {ok, _} ->
+ exit(normal);
+ _ ->
+ exit(Res)
+ end.
+
+tc_run(Mod, Func, Args, Opts) ->
+ ?PRINT2("tc_run -> entry with"
+ "~n Mod: ~p"
+ "~n Func: ~p"
+ "~n Args: ~p"
+ "~n Opts: ~p"
+ "~n", [Mod, Func, Args, Opts]),
+ (catch snmp_test_mgr:stop()), % If we had a running mgr from a failed case
+ M = get(mib_dir),
+ Dir = get(mgr_dir),
+ User = snmp_misc:get_option(user, Opts, "all-rights"),
+ SecLevel = snmp_misc:get_option(sec_level, Opts, noAuthNoPriv),
+ EngineID = snmp_misc:get_option(engine_id, Opts, "agentEngine"),
CtxEngineID = snmp_misc:get_option(context_engine_id, Opts, EngineID),
- Community = snmp_misc:get_option(community, Opts, "all-rights"),
- ?DBG("run -> start crypto app",[]),
- _CryptoRes = ?CRYPTO_START(),
- ?DBG("run -> Crypto: ~p", [_CryptoRes]),
- catch snmp_test_mgr:stop(), % If we had a running mgr from a failed case
- StdM = join(code:priv_dir(snmp), "mibs") ++ "/",
- Vsn = get(vsn),
- ?DBG("run -> config:"
- "~n M: ~p"
- "~n Vsn: ~p"
- "~n Dir: ~p"
- "~n User: ~p"
- "~n SecLevel: ~p"
- "~n EngineID: ~p"
- "~n CtxEngineID: ~p"
- "~n Community: ~p"
- "~n StdM: ~p",
- [M,Vsn,Dir,User,SecLevel,EngineID,CtxEngineID,Community,StdM]),
+ Community = snmp_misc:get_option(community, Opts, "all-rights"),
+ ?DBG("tc_run -> start crypto app",[]),
+ _CryptoRes = ?CRYPTO_START(),
+ ?DBG("tc_run -> Crypto: ~p", [_CryptoRes]),
+ StdM = join(code:priv_dir(snmp), "mibs") ++ "/",
+ Vsn = get(vsn),
+ ?PRINT2("tc_run -> config:"
+ "~n M: ~p"
+ "~n Vsn: ~p"
+ "~n Dir: ~p"
+ "~n User: ~p"
+ "~n SecLevel: ~p"
+ "~n EngineID: ~p"
+ "~n CtxEngineID: ~p"
+ "~n Community: ~p"
+ "~n StdM: ~p"
+ "~n", [M,Vsn,Dir,User,SecLevel,EngineID,CtxEngineID,Community,StdM]),
case snmp_test_mgr:start([%% {agent, snmp_test_lib:hostname()},
{packet_server_debug, true},
{debug, true},
@@ -376,24 +486,45 @@ run(Mod, Func, Args, Opts) ->
{mibs, mibs(StdM, M)}]) of
{ok, _Pid} ->
case (catch apply(Mod, Func, Args)) of
+ {'EXIT', {skip, Reason}} ->
+ ?EPRINT2("apply skip detected: "
+ "~n ~p", [Reason]),
+ (catch snmp_test_mgr:stop()),
+ ?SKIP(Reason);
{'EXIT', Reason} ->
- catch snmp_test_mgr:stop(),
- ?FAIL({apply_failed, {Mod, Func, Args}, Reason});
+ %% We have hosts (mostly *very* slooow VMs) that
+ %% can timeout anything. Since we are basically
+ %% testing communication, we therefor must check
+ %% for system events at every failure. Grrr!
+ SysEvs = snmp_test_global_sys_monitor:events(),
+ (catch snmp_test_mgr:stop()),
+ if
+ (SysEvs =:= []) ->
+ ?EPRINT2("TC runner failed: "
+ "~n ~p~n", [Reason]),
+ ?FAIL({apply_failed, {Mod, Func, Args}, Reason});
+ true ->
+ ?EPRINT2("apply exit catched when we got system events: "
+ "~n Reason: ~p"
+ "~n Sys Events: ~p"
+ "~n", [Reason, SysEvs]),
+ ?SKIP([{reason, Reason}, {system_events, SysEvs}])
+ end;
Res ->
- catch snmp_test_mgr:stop(),
+ (catch snmp_test_mgr:stop()),
Res
end;
{error, Reason} ->
?EPRINT2("Failed starting (test) manager: "
"~n ~p", [Reason]),
- catch snmp_test_mgr:stop(),
+ (catch snmp_test_mgr:stop()),
?line ?FAIL({mgr_start_error, Reason});
Err ->
?EPRINT2("Failed starting (test) manager: "
"~n ~p", [Err]),
- catch snmp_test_mgr:stop(),
+ (catch snmp_test_mgr:stop()),
?line ?FAIL({mgr_start_failure, Err})
end.
@@ -907,10 +1038,22 @@ expect2(Mod, Line, F) ->
%% ----------------------------------------------------------------------
-get_timeout() ->
- get_timeout(os:type()).
+-define(BASE_REQ_TIMEOUT, 3500).
-get_timeout(_) -> 3500.
+get_timeout() ->
+ %% Try to figure out how "fast" a machine is.
+ %% We assume that the number of schedulers
+ %% (which depends on the number of core:s)
+ %% effect the performance of the host...
+ %% This is obviously not enough. The network
+ %% also matterns, clock freq or the CPU, ...
+ %% But its better than what we had before...
+ case erlang:system_info(schedulers) of
+ N when is_integer(N) ->
+ ?BASE_REQ_TIMEOUT + timer:seconds(10 div N);
+ _ ->
+ ?BASE_REQ_TIMEOUT
+ end.
receive_pdu(To) ->
receive
@@ -1083,6 +1226,18 @@ do_expect(trap, Enterp, Generic, Specific, ExpVBs, To) ->
{PureE, Generic, Specific, ExpVBs},
{Ent2, G2, Spec2, VBs}}};
+ {error, timeout} = Error ->
+ SysEvs = snmp_test_global_sys_monitor:events(),
+ io_format_expect("[expecting trap] got timeout when system events:"
+ "~n ~p", [SysEvs]),
+ if
+ (SysEvs =:= []) ->
+ Error;
+ true ->
+ skip({system_events, SysEvs})
+ end;
+
+
Error ->
Error
end.
@@ -1184,7 +1339,7 @@ do_expect2(Check, Type, Err, Idx, ExpVBs, To)
io_format_expect("received unexpected pdu with (11) "
"~n Type: ~p"
"~n ReqId: ~p"
- "~n Errot status: ~p"
+ "~n Error status: ~p"
"~n Error index: ~p",
[Type2, ReqId, Err2, Idx2]),
{error,
@@ -1247,7 +1402,7 @@ do_expect2(Check, Type, Err, Idx, ExpVBs, To)
io_format_expect("received unexpected pdu with (15) "
"~n Type: ~p"
"~n ReqId: ~p"
- "~n Errot status: ~p"
+ "~n Error status: ~p"
"~n Error index: ~p"
"~n Varbinds: ~p",
[Type2, ReqId, Err2, Idx2, VBs2]),
@@ -1257,10 +1412,23 @@ do_expect2(Check, Type, Err, Idx, ExpVBs, To)
{Type2, Err2, Idx2, VBs2},
ReqId}};
- Error ->
- io_format_expect("received error (16): "
+
+ {error, timeout} = Error ->
+ SysEvs = snmp_test_global_sys_monitor:events(),
+ io_format_expect("got timeout (16) when system events:"
+ "~n ~p", [SysEvs]),
+ if
+ (SysEvs =:= []) ->
+ Error;
+ true ->
+ skip({system_events, SysEvs})
+ end;
+
+
+ Error ->
+ io_format_expect("received error (17): "
"~n Error: ~p", [Error]),
- Error
+ Error
end.
@@ -1378,12 +1546,15 @@ start_node(Name) ->
""
end,
%% Do not use start_link!!! (the proc that calls this one is tmp)
- ?DBG("start_node -> Args: ~p~n",[Args]),
- A = Args ++ " -pa " ++ Pa,
+ ?DBG("start_node -> Args: ~p~n", [Args]),
+ A = Args ++ " -pa " ++ Pa ++
+ " -s " ++ atom_to_list(snmp_test_sys_monitor) ++ " start" ++
+ " -s global sync",
case (catch ?START_NODE(Name, A)) of
{ok, Node} ->
%% Tell the test_server to not clean up things it never started.
?DBG("start_node -> Node: ~p",[Node]),
+ global:sync(),
{ok, Node};
Else ->
?ERR("start_node -> failed with(other): Else: ~p",[Else]),
@@ -1701,6 +1872,10 @@ rpc(Node, F, A) ->
join(Dir, File) ->
filename:join(Dir, File).
+
+skip(R) ->
+ exit({skip, R}).
+
%% await_pdu(To) ->
%% await_response(To, pdu).
%%