aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnders Svensson <[email protected]>2017-04-04 17:17:21 +0200
committerAnders Svensson <[email protected]>2017-06-11 16:30:36 +0200
commit008b12c642bdcfb0fa96543da2d5142502b210d7 (patch)
tree185e69a4c84964daf86b4ce498c38e204d77e8e4
parenta47ab3e0021a3d27936f16b464112c2f08a2d2f0 (diff)
downloadotp-008b12c642bdcfb0fa96543da2d5142502b210d7.tar.gz
otp-008b12c642bdcfb0fa96543da2d5142502b210d7.tar.bz2
otp-008b12c642bdcfb0fa96543da2d5142502b210d7.zip
Rework gen_sctp suite to demonstrate remaining problems
In particular, that transmission can be very slow. The problem appears to be linked to sndbuf/recbuf, but even with buffers that are large enough to hold all messages being sent, turnaround times can still vary by hundreds of milliseconds in a reasonable test environment. Use multiple streams and a sender process to more closely mirror the usage in diameter_sctp, but neither is the source of the problems.
-rw-r--r--lib/diameter/test/diameter_gen_sctp_SUITE.erl437
1 files changed, 250 insertions, 187 deletions
diff --git a/lib/diameter/test/diameter_gen_sctp_SUITE.erl b/lib/diameter/test/diameter_gen_sctp_SUITE.erl
index 79db39ca45..457d9a0a85 100644
--- a/lib/diameter/test/diameter_gen_sctp_SUITE.erl
+++ b/lib/diameter/test/diameter_gen_sctp_SUITE.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2010-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2010-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -33,8 +33,8 @@
end_per_suite/1]).
%% testcases
--export([send_not_from_controlling_process/1,
- send_from_multiple_clients/1, send_from_multiple_clients/0,
+-export([send_one_from_many/1, send_one_from_many/0,
+ send_many_from_one/1, send_many_from_one/0,
receive_what_was_sent/1]).
-include_lib("kernel/include/inet_sctp.hrl").
@@ -45,16 +45,24 @@
%% Open sockets on the loopback address.
-define(ADDR, {127,0,0,1}).
-%% Snooze, nap, siesta.
--define(SLEEP(T), receive after T -> ok end).
-
%% An indescribably long number of milliseconds after which everthing
%% that should have happened has.
-define(FOREVER, 2000).
+%% How many milliseconds to tolerate between the fastest and slowest
+%% turnaround times.
+-define(VARIANCE, 100).
+
%% The first byte in each message we send as a simple guard against
%% not receiving what was sent.
--define(MAGIC, 42).
+-define(MAGIC, 0).
+
+%% Requested number of inbound/outbound streams.
+-define(STREAMS, 5).
+
+%% Success for send_multiple. Match in each testcase rather than in
+%% send_multiple itself for a better failure in common_test.
+-define(OK, {_, true, _, [true, true], [], _}).
%% ===========================================================================
@@ -62,8 +70,8 @@ suite() ->
[{timetrap, {seconds, 10}}].
all() ->
- [send_not_from_controlling_process,
- send_from_multiple_clients,
+ [send_one_from_many,
+ send_many_from_one,
receive_what_was_sent].
init_per_suite(Config) ->
@@ -81,130 +89,37 @@ end_per_suite(_Config) ->
%% ===========================================================================
-%% send_not_from_controlling_process/1
-%%
-%% This testcase failing shows gen_sctp:send/4 hanging when called
-%% outside the controlling process of the socket in question.
-
-send_not_from_controlling_process(_) ->
- Pids = send_not_from_controlling_process(),
- ?SLEEP(?FOREVER),
- try
- [] = [{P,I} || P <- Pids, I <- [process_info(P)], I /= undefined]
- after
- lists:foreach(fun(P) -> exit(P, kill) end, Pids)
- end.
-
-%% send_not_from_controlling_process/0
-%%
-%% Returns the pids of three spawned processes: a listening process, a
-%% connecting process and a sending process.
-%%
-%% The expected behaviour is that all three processes exit:
-%%
-%% - The listening process exits upon receiving an SCTP message
-%% sent by the sending process.
-%% - The connecting process exits upon listening process exit.
-%% - The sending process exits upon gen_sctp:send/4 return.
-%%
-%% The observed behaviour is that all three processes remain alive
-%% indefinitely:
-%%
-%% - The listening process never receives the SCTP message sent
-%% by the sending process.
-%% - The connecting process has an inet_reply message in its mailbox
-%% as a consequence of the call to gen_sctp:send/4 call from the
-%% sending process.
-%% - The call to gen_sctp:send/4 in the sending process doesn't return,
-%% hanging in prim_inet:getopts/2.
-
-send_not_from_controlling_process() ->
- FPid = self(),
- {L, MRef} = spawn_monitor(fun() -> listen(FPid) end),
- receive
- {?MODULE, C, S} ->
- demonitor(MRef, [flush]),
- [L,C,S];
- {'DOWN', MRef, process, _, _} = T ->
- error(T)
- end.
-
-%% listen/1
-
-listen(FPid) ->
- {ok, Sock} = open(),
- ok = gen_sctp:listen(Sock, true),
- {ok, PortNr} = inet:port(Sock),
- LPid = self(),
- spawn(fun() -> connect1(PortNr, FPid, LPid) end), %% connecting process
- Id = assoc(Sock),
- recv(Sock, Id).
-
-%% connect1/3
-
-connect1(PortNr, FPid, LPid) ->
- {ok, Sock} = open(),
- ok = gen_sctp:connect_init(Sock, ?ADDR, PortNr, []),
- Id = assoc(Sock),
- FPid ! {?MODULE,
- self(),
- spawn(fun() -> send(Sock, Id) end)}, %% sending process
- MRef = monitor(process, LPid),
- down(MRef). %% Waits with this as current_function.
-
-%% down/1
-
-down(MRef) ->
- receive {'DOWN', MRef, process, _, Reason} -> Reason end.
-
-%% send/2
-
-send(Sock, Id) ->
- ok = gen_sctp:send(Sock, Id, 0, <<0:32>>).
-
-%% ===========================================================================
-
-%% send_from_multiple_clients/0
+%% send_one_from_many/0
%%
%% Demonstrates sluggish delivery of messages.
-send_from_multiple_clients() ->
- [{timetrap, {seconds, 60}}].
+send_one_from_many() ->
+ [{timetrap, {seconds, 30}}].
-send_from_multiple_clients(_) ->
- {S, Rs} = T = send_from_multiple_clients(8, 1024),
- Max = ?FOREVER*1000,
- {false, [], _} = {Max < S,
- Rs -- [OI || {O,_} = OI <- Rs, is_integer(O)],
- T}.
+send_one_from_many(_) ->
+ ?OK = send_multiple(128, 1, 1024).
-%% send_from_multiple_clients/2
+%% send_one_from_many/2
%%
%% Opens a listening socket and then spawns a specified number of
-%% processes, each of which connects to the listening socket. Each
-%% connecting process then sends a message, whose size in bytes is
-%% passed as an argument, the listening process sends a reply
-%% containing the time at which the message was received, and the
-%% connecting process then exits upon reception of this reply.
+%% processes, each of which connects, sends a message, receives a
+%% reply, and exits.
%%
%% Returns the elapsed time for all connecting process to exit
-%% together with a list of exit reasons for the connecting processes.
-%% In the successful case a connecting process exits with the
-%% outbound/inbound transit times for the sent/received message as
-%% reason.
+%% together with a list of exit reasons. In the successful case a
+%% connecting process exits with the outbound/inbound transit times
+%% for the sent/received message as reason.
%%
%% The observed behaviour is that some outbound messages (that is,
%% from a connecting process to the listening process) can take an
%% unexpectedly long time to complete their journey. The more
-%% connecting processes, the longer the possible delay it seems.
-%%
-%% eg. (With F = fun send_from_multiple_clients/2.)
+%% connecting processes, the longer it can take it seems.
%%
-%% 5> F(2, 1024).
+%% eg. 5> send_one_from_many(2, 1024).
%% {875,[{128,116},{113,139}]}
-%% 6> F(4, 1024).
+%% 6> send_one_from_many(4, 1024).
%% {2995290,[{2994022,250},{2994071,80},{200,130},{211,113}]}
-%% 7> F(8, 1024).
+%% 7> send_one_from_many(8, 1024).
%% {8997461,[{8996161,116},
%% {2996471,86},
%% {2996278,116},
@@ -213,7 +128,7 @@ send_from_multiple_clients(_) ->
%% {213,159},
%% {373,173},
%% {376,118}]}
-%% 8> F(8, 1024).
+%% 8> send_one_from_many(8, 1024).
%% {21001891,[{20999968,128},
%% {8997891,172},
%% {8997927,91},
@@ -223,120 +138,261 @@ send_from_multiple_clients(_) ->
%% {117,98},
%% {149,125}]}
%%
-%% This turns out to have been due to SCTP resends as a consequence of
-%% the listener having an insufficient recbuf. Increasing the size
-%% solves the problem.
-%%
-
-send_from_multiple_clients(N, Sz)
- when is_integer(N), 0 < N, is_integer(Sz), 0 < Sz ->
- timer:tc(fun listen/2, [N, <<?MAGIC, 0:Sz/unit:8>>]).
-
-%% listen/2
-listen(N, Bin) ->
+send_multiple(Clients, Msgs, Sz)
+ when is_integer(Clients), 0 < Clients,
+ is_integer(Msgs), 0 < Msgs,
+ is_integer(Sz), 0 < Sz ->
+ T0 = diameter_lib:now(),
+ {S, Res} = timer:tc(fun listen/3, [Clients, Msgs, Sz]),
+ report(T0, Res),
+ Ts = lists:append(Res),
+ Outgoing = [DT || {_,{_,_,DT},{_,_,_},_} <- Ts],
+ Incoming = [DT || {_,{_,_,_},{_,_,DT},_} <- Ts],
+ Diffs = [lists:max(L) - lists:min(L) || L <- [Outgoing, Incoming]],
+ {S,
+ S < ?FOREVER*1000,
+ Diffs,
+ [D < V || V <- [?VARIANCE*1000], D <- Diffs],
+ [T || T <- Ts, [] == [T || {_,{_,_,_},{_,_,_},_} <- [T]]],
+ Res}.
+
+%% listen/3
+
+listen(Clients, Msgs, Sz) ->
{ok, Sock} = open(),
ok = gen_sctp:listen(Sock, true),
{ok, PortNr} = inet:port(Sock),
%% Spawn a middleman that in turn spawns N connecting processes,
%% collects a list of exit reasons and then exits with the list as
- %% reason. loop/3 returns when we receive this list from the
+ %% reason. accept/2 returns when we receive this list from the
%% middleman's 'DOWN'.
Self = self(),
- Fun = fun() -> exit(connect2(Self, PortNr, Bin)) end,
- {_, MRef} = spawn_monitor(fun() -> exit(fold(N, Fun)) end),
- loop(Sock, MRef, Bin).
+ Fun = fun() -> exit(client(Self, PortNr, Msgs, Sz)) end, %% start clients
+ {_, MRef} = spawn_monitor(fun() -> exit(clients(Clients, Fun)) end),
+ accept_loop(Sock, MRef).
-%% fold/2
+%% fclients/2
%%
%% Spawn N processes and collect their exit reasons in a list.
-fold(N, Fun) ->
+clients(N, Fun) ->
start(N, Fun),
acc(N, []).
+%% start/2
+
start(0, _) ->
ok;
+
start(N, Fun) ->
spawn_monitor(Fun),
start(N-1, Fun).
+%% acc/2
+
acc(0, Acc) ->
Acc;
+
acc(N, Acc) ->
receive
{'DOWN', _MRef, process, _, RC} ->
acc(N-1, [RC | Acc])
end.
-%% loop/3
+%% accept_loop/2
-loop(Sock, MRef, Bin) ->
+accept_loop(Sock, MRef) ->
+ ok = inet:setopts(Sock, [{active, once}]),
receive
- ?SCTP(Sock, {[#sctp_sndrcvinfo{assoc_id = Id}], B})
+ ?SCTP(Sock, {_, #sctp_assoc_change{state = comm_up,
+ outbound_streams = OS,
+ assoc_id = Id}}) ->
+ Self = self(),
+ TPid = spawn(fun() -> assoc(monitor(process, Self), Id, OS) end),
+ NewSock = peeloff(Sock, Id, TPid),
+ TPid ! {peeloff, NewSock},
+ accept_loop(Sock, MRef);
+ ?SCTP(Sock, _) ->
+ accept_loop(Sock, MRef);
+ {'DOWN', MRef, process, _, Reason} ->
+ Reason;
+ T ->
+ error(T)
+ end.
+
+%% assoc/3
+%%
+%% Server process that answers incoming messages as long as the parent
+%% lives.
+
+assoc(MRef, Id, OS)
+ when is_reference(MRef) ->
+ {peeloff, Sock} = receive T -> T end,
+ recv_loop(Sock, Id, sender(Sock, Id, OS), MRef).
+
+%% recv_loop/4
+
+recv_loop(Sock, Id, Pid, MRef) ->
+ ok = inet:setopts(Sock, [{active, once}]),
+ receive
+ ?SCTP(Sock, {[#sctp_sndrcvinfo{assoc_id = I}], B})
when is_binary(B) ->
- Sz = size(Bin),
- {Sz, Bin} = {size(B), B}, %% assert
- ok = send(Sock, Id, mark(Bin)),
- loop(Sock, MRef, Bin);
+ T2 = diameter_lib:now(),
+ I = Id, %% assert
+ <<?MAGIC, Bin/binary>> = B, %% assert
+ {[_,_,_,Sz] = L, Bytes} = unmark(Bin),
+ Sz = size(Bin) - Bytes, %% assert
+ <<_:Bytes/binary, Body:Sz/binary>> = Bin,
+ send(Pid, [T2|L], Body), %% answer
+ recv_loop(Sock, Id, Pid, MRef);
?SCTP(Sock, _) ->
- loop(Sock, MRef, Bin);
+ recv_loop(Sock, Id, Pid, MRef);
{'DOWN', MRef, process, _, Reason} ->
- Reason
+ Reason;
+ T ->
+ error(T)
end.
-%% connect2/3
+%% send/3
-connect2(Pid, PortNr, Bin) ->
- monitor(process, Pid),
+send(Pid, Header, Body) ->
+ Pid ! {send, Header, Body}.
+
+%% sender/3
+%%
+%% Start a process that sends, so as not to block the controlling process.
+sender(Sock, Id, OS) ->
+ Pid = self(),
+ spawn(fun() -> send_loop(Sock, Id, OS, 1, monitor(process, Pid)) end).
+
+%% send_loop/5
+
+send_loop(Sock, Id, OS, N, MRef) ->
+ receive
+ {send, L, Body} ->
+ Stream = N rem OS,
+ ok = send(Sock, Id, Stream, mark(Body, [N, Stream | L])),
+ send_loop(Sock, Id, OS, N+1, MRef);
+ {'DOWN', MRef, process, _, _} = T ->
+ T;
+ T ->
+ error(T)
+ end.
+
+%% peeloff/3
+
+peeloff(LSock, Id, TPid) ->
+ {ok, Sock} = gen_sctp:peeloff(LSock, Id),
+ ok = gen_sctp:controlling_process(Sock, TPid),
+ Sock.
+
+%% client/4
+
+client(Pid, PortNr, Msgs, Sz) ->
+ monitor(process, Pid),
{ok, Sock} = open(),
ok = gen_sctp:connect_init(Sock, ?ADDR, PortNr, []),
- Id = assoc(Sock),
+ recv_loop(Sock, Msgs, Sz).
- %% T1 = time before send
- %% T2 = time after listening process received our message
- %% T3 = time after reply is received
+%% recv_loop/3
- T1 = diameter_lib:now(),
- ok = send(Sock, Id, Bin),
- T2 = unmark(recv(Sock, Id)),
- T3 = diameter_lib:now(),
- {diameter_lib:micro_diff(T2, T1), %% Outbound
- diameter_lib:micro_diff(T3, T2)}. %% Inbound
+recv_loop(_, 0, T) ->
+ [_,_|Acc] = T,
+ Acc;
-%% recv/2
+recv_loop(Sock, Msgs, T) ->
+ ok = inet:setopts(Sock, [{active, once}]),
+ {I, NewT} = recv(Sock, Msgs, T, receive X -> X end),
+ recv_loop(Sock, Msgs - I, NewT).
-recv(Sock, Id) ->
- receive
- ?SCTP(Sock, {[#sctp_sndrcvinfo{assoc_id = I}], Bin})
- when is_binary(Bin) ->
- Id = I, %% assert
- Bin;
- ?SCTP(S, _) ->
- Sock = S, %% assert
- recv(Sock, Id);
- T ->
- exit(T)
+%% recv/4
+
+recv(Sock, Msgs, Sz, ?SCTP(Sock, {_, #sctp_assoc_change{} = A})) ->
+ #sctp_assoc_change{state = comm_up, %% assert
+ assoc_id = Id,
+ outbound_streams = OS}
+ = A,
+ true = is_integer(Sz), %% assert
+ send_n(Msgs, sender(Sock, Id, OS), Sz),
+ {0, [Id, OS]};
+
+recv(Sock, _, T, ?SCTP(Sock, {[#sctp_sndrcvinfo{assoc_id = Id}], Bin})) ->
+ T4 = diameter_lib:now(),
+ [Id, OS | Acc] = T,
+ {1, [Id, OS, stat(T4, Bin) | Acc]};
+
+recv(Sock, _, T, ?SCTP(Sock, _)) ->
+ {0, [_,_|_] = T};
+
+recv(_, _, _, T) ->
+ error(T).
+
+%% send_n/3
+%%
+%% Send messages to the server from dedicated processes.
+
+send_n(0, _, _) ->
+ ok;
+
+send_n(N, Pid, Sz) ->
+ M = rand:uniform(255),
+ send(Pid, [Sz], binary:copy(<<M>>, Sz)),
+ send_n(N-1, Pid, Sz).
+
+%% send/4
+
+send(Sock, Id, Stream, Bin) ->
+ case gen_sctp:send(Sock, Id, Stream, <<?MAGIC, Bin/binary>>) of
+ {error, eagain} ->
+ send(Sock, Id, Stream, Bin);
+ RC ->
+ RC
end.
-%% send/3
+%% stat/2
-send(Sock, Id, Bin) ->
- gen_sctp:send(Sock, Id, 0, Bin).
+stat(T4, <<?MAGIC, Bin/binary>>) ->
+ %% T1 = time at send
+ %% T2 = time at reception by server
+ %% T3 = time at reception by server's sender
+ %% T4 = time at reception of answer
-%% mark/1
+ {[T3,NI,SI,T2,T1,NO,SO,Sz], Bytes} = unmark(Bin),
-mark(Bin) ->
- Info = term_to_binary(diameter_lib:now()),
- <<Info/binary, Bin/binary>>.
+ Sz = size(Bin) - Bytes, %% assert
+ {T1,
+ {NO, SO, diameter_lib:micro_diff(T2, T1)}, %% Outbound
+ {NI, SI, diameter_lib:micro_diff(T4, T3)}, %% Inbound
+ T4}.
+
+%% mark/2
+
+mark(Bin, T) ->
+ Info = term_to_binary([diameter_lib:now() | T]),
+ <<Info/binary, Bin/binary>>.
+
%% unmark/1
unmark(Bin) ->
- binary_to_term(Bin).
+ T = binary_to_term(Bin),
+ {T, size(term_to_binary(T))}.
+
+%% ===========================================================================
+
+%% send_many_from_one/0
+%%
+%% Demonstrates sluggish delivery of messages.
+
+send_many_from_one() ->
+ [{timetrap, {seconds, 30}}].
+
+send_many_from_one(_) ->
+ ?OK = send_multiple(1, 128, 1024).
%% ===========================================================================
@@ -345,7 +401,7 @@ unmark(Bin) ->
%% Demonstrates reception of a message that differs from that sent.
receive_what_was_sent(_Config) ->
- send_from_multiple_clients(1, 1024*32). %% fails
+ ?OK = send_multiple(1, 1, 1024*32).
%% ===========================================================================
@@ -357,16 +413,23 @@ open() ->
%% open/1
open(Opts) ->
- gen_sctp:open([{ip, ?ADDR}, {port, 0}, {active, true}, binary,
+ gen_sctp:open([{ip, ?ADDR}, {port, 0}, {active, false}, binary,
+ {sctp_initmsg, #sctp_initmsg{num_ostreams = ?STREAMS,
+ max_instreams = ?STREAMS}},
{recbuf, 1 bsl 16}, {sndbuf, 1 bsl 16}
| Opts]).
-%% assoc/1
+%% report/2
-assoc(Sock) ->
- receive
- ?SCTP(Sock, {_, #sctp_assoc_change{state = S,
- assoc_id = Id}}) ->
- comm_up = S, %% assert
- Id
- end.
+report(T0, Ts) ->
+ ct:pal("~p~n", [lists:sort([sort([{diameter_lib:micro_diff(T1,T0),
+ OT,
+ IT,
+ diameter_lib:micro_diff(T4,T0)}
+ || {T1,OT,IT,T4} <- L])
+ || L <- Ts])]).
+
+%% sort/1
+
+sort(L) ->
+ lists:sort(fun({_,{N,_,_},_,_}, {_,{M,_,_},_,_}) -> N =< M end, L).