From 008b12c642bdcfb0fa96543da2d5142502b210d7 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Tue, 4 Apr 2017 17:17:21 +0200 Subject: Rework gen_sctp suite to demonstrate remaining problems In particular, that transmission can be very slow. The problem appears to be linked to sndbuf/recbuf, but even with buffers that are large enough to hold all messages being sent, turnaround times can still vary by hundreds of milliseconds in a reasonable test environment. Use multiple streams and a sender process to more closely mirror the usage in diameter_sctp, but neither is the source of the problems. --- lib/diameter/test/diameter_gen_sctp_SUITE.erl | 437 +++++++++++++++----------- 1 file changed, 250 insertions(+), 187 deletions(-) (limited to 'lib/diameter/test/diameter_gen_sctp_SUITE.erl') diff --git a/lib/diameter/test/diameter_gen_sctp_SUITE.erl b/lib/diameter/test/diameter_gen_sctp_SUITE.erl index 79db39ca45..457d9a0a85 100644 --- a/lib/diameter/test/diameter_gen_sctp_SUITE.erl +++ b/lib/diameter/test/diameter_gen_sctp_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2010-2016. All Rights Reserved. +%% Copyright Ericsson AB 2010-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -33,8 +33,8 @@ end_per_suite/1]). %% testcases --export([send_not_from_controlling_process/1, - send_from_multiple_clients/1, send_from_multiple_clients/0, +-export([send_one_from_many/1, send_one_from_many/0, + send_many_from_one/1, send_many_from_one/0, receive_what_was_sent/1]). -include_lib("kernel/include/inet_sctp.hrl"). @@ -45,16 +45,24 @@ %% Open sockets on the loopback address. -define(ADDR, {127,0,0,1}). -%% Snooze, nap, siesta. --define(SLEEP(T), receive after T -> ok end). - %% An indescribably long number of milliseconds after which everthing %% that should have happened has. -define(FOREVER, 2000). +%% How many milliseconds to tolerate between the fastest and slowest +%% turnaround times. +-define(VARIANCE, 100). + %% The first byte in each message we send as a simple guard against %% not receiving what was sent. --define(MAGIC, 42). +-define(MAGIC, 0). + +%% Requested number of inbound/outbound streams. +-define(STREAMS, 5). + +%% Success for send_multiple. Match in each testcase rather than in +%% send_multiple itself for a better failure in common_test. +-define(OK, {_, true, _, [true, true], [], _}). %% =========================================================================== @@ -62,8 +70,8 @@ suite() -> [{timetrap, {seconds, 10}}]. all() -> - [send_not_from_controlling_process, - send_from_multiple_clients, + [send_one_from_many, + send_many_from_one, receive_what_was_sent]. init_per_suite(Config) -> @@ -81,130 +89,37 @@ end_per_suite(_Config) -> %% =========================================================================== -%% send_not_from_controlling_process/1 -%% -%% This testcase failing shows gen_sctp:send/4 hanging when called -%% outside the controlling process of the socket in question. - -send_not_from_controlling_process(_) -> - Pids = send_not_from_controlling_process(), - ?SLEEP(?FOREVER), - try - [] = [{P,I} || P <- Pids, I <- [process_info(P)], I /= undefined] - after - lists:foreach(fun(P) -> exit(P, kill) end, Pids) - end. - -%% send_not_from_controlling_process/0 -%% -%% Returns the pids of three spawned processes: a listening process, a -%% connecting process and a sending process. -%% -%% The expected behaviour is that all three processes exit: -%% -%% - The listening process exits upon receiving an SCTP message -%% sent by the sending process. -%% - The connecting process exits upon listening process exit. -%% - The sending process exits upon gen_sctp:send/4 return. -%% -%% The observed behaviour is that all three processes remain alive -%% indefinitely: -%% -%% - The listening process never receives the SCTP message sent -%% by the sending process. -%% - The connecting process has an inet_reply message in its mailbox -%% as a consequence of the call to gen_sctp:send/4 call from the -%% sending process. -%% - The call to gen_sctp:send/4 in the sending process doesn't return, -%% hanging in prim_inet:getopts/2. - -send_not_from_controlling_process() -> - FPid = self(), - {L, MRef} = spawn_monitor(fun() -> listen(FPid) end), - receive - {?MODULE, C, S} -> - demonitor(MRef, [flush]), - [L,C,S]; - {'DOWN', MRef, process, _, _} = T -> - error(T) - end. - -%% listen/1 - -listen(FPid) -> - {ok, Sock} = open(), - ok = gen_sctp:listen(Sock, true), - {ok, PortNr} = inet:port(Sock), - LPid = self(), - spawn(fun() -> connect1(PortNr, FPid, LPid) end), %% connecting process - Id = assoc(Sock), - recv(Sock, Id). - -%% connect1/3 - -connect1(PortNr, FPid, LPid) -> - {ok, Sock} = open(), - ok = gen_sctp:connect_init(Sock, ?ADDR, PortNr, []), - Id = assoc(Sock), - FPid ! {?MODULE, - self(), - spawn(fun() -> send(Sock, Id) end)}, %% sending process - MRef = monitor(process, LPid), - down(MRef). %% Waits with this as current_function. - -%% down/1 - -down(MRef) -> - receive {'DOWN', MRef, process, _, Reason} -> Reason end. - -%% send/2 - -send(Sock, Id) -> - ok = gen_sctp:send(Sock, Id, 0, <<0:32>>). - -%% =========================================================================== - -%% send_from_multiple_clients/0 +%% send_one_from_many/0 %% %% Demonstrates sluggish delivery of messages. -send_from_multiple_clients() -> - [{timetrap, {seconds, 60}}]. +send_one_from_many() -> + [{timetrap, {seconds, 30}}]. -send_from_multiple_clients(_) -> - {S, Rs} = T = send_from_multiple_clients(8, 1024), - Max = ?FOREVER*1000, - {false, [], _} = {Max < S, - Rs -- [OI || {O,_} = OI <- Rs, is_integer(O)], - T}. +send_one_from_many(_) -> + ?OK = send_multiple(128, 1, 1024). -%% send_from_multiple_clients/2 +%% send_one_from_many/2 %% %% Opens a listening socket and then spawns a specified number of -%% processes, each of which connects to the listening socket. Each -%% connecting process then sends a message, whose size in bytes is -%% passed as an argument, the listening process sends a reply -%% containing the time at which the message was received, and the -%% connecting process then exits upon reception of this reply. +%% processes, each of which connects, sends a message, receives a +%% reply, and exits. %% %% Returns the elapsed time for all connecting process to exit -%% together with a list of exit reasons for the connecting processes. -%% In the successful case a connecting process exits with the -%% outbound/inbound transit times for the sent/received message as -%% reason. +%% together with a list of exit reasons. In the successful case a +%% connecting process exits with the outbound/inbound transit times +%% for the sent/received message as reason. %% %% The observed behaviour is that some outbound messages (that is, %% from a connecting process to the listening process) can take an %% unexpectedly long time to complete their journey. The more -%% connecting processes, the longer the possible delay it seems. -%% -%% eg. (With F = fun send_from_multiple_clients/2.) +%% connecting processes, the longer it can take it seems. %% -%% 5> F(2, 1024). +%% eg. 5> send_one_from_many(2, 1024). %% {875,[{128,116},{113,139}]} -%% 6> F(4, 1024). +%% 6> send_one_from_many(4, 1024). %% {2995290,[{2994022,250},{2994071,80},{200,130},{211,113}]} -%% 7> F(8, 1024). +%% 7> send_one_from_many(8, 1024). %% {8997461,[{8996161,116}, %% {2996471,86}, %% {2996278,116}, @@ -213,7 +128,7 @@ send_from_multiple_clients(_) -> %% {213,159}, %% {373,173}, %% {376,118}]} -%% 8> F(8, 1024). +%% 8> send_one_from_many(8, 1024). %% {21001891,[{20999968,128}, %% {8997891,172}, %% {8997927,91}, @@ -223,120 +138,261 @@ send_from_multiple_clients(_) -> %% {117,98}, %% {149,125}]} %% -%% This turns out to have been due to SCTP resends as a consequence of -%% the listener having an insufficient recbuf. Increasing the size -%% solves the problem. -%% - -send_from_multiple_clients(N, Sz) - when is_integer(N), 0 < N, is_integer(Sz), 0 < Sz -> - timer:tc(fun listen/2, [N, <>]). - -%% listen/2 -listen(N, Bin) -> +send_multiple(Clients, Msgs, Sz) + when is_integer(Clients), 0 < Clients, + is_integer(Msgs), 0 < Msgs, + is_integer(Sz), 0 < Sz -> + T0 = diameter_lib:now(), + {S, Res} = timer:tc(fun listen/3, [Clients, Msgs, Sz]), + report(T0, Res), + Ts = lists:append(Res), + Outgoing = [DT || {_,{_,_,DT},{_,_,_},_} <- Ts], + Incoming = [DT || {_,{_,_,_},{_,_,DT},_} <- Ts], + Diffs = [lists:max(L) - lists:min(L) || L <- [Outgoing, Incoming]], + {S, + S < ?FOREVER*1000, + Diffs, + [D < V || V <- [?VARIANCE*1000], D <- Diffs], + [T || T <- Ts, [] == [T || {_,{_,_,_},{_,_,_},_} <- [T]]], + Res}. + +%% listen/3 + +listen(Clients, Msgs, Sz) -> {ok, Sock} = open(), ok = gen_sctp:listen(Sock, true), {ok, PortNr} = inet:port(Sock), %% Spawn a middleman that in turn spawns N connecting processes, %% collects a list of exit reasons and then exits with the list as - %% reason. loop/3 returns when we receive this list from the + %% reason. accept/2 returns when we receive this list from the %% middleman's 'DOWN'. Self = self(), - Fun = fun() -> exit(connect2(Self, PortNr, Bin)) end, - {_, MRef} = spawn_monitor(fun() -> exit(fold(N, Fun)) end), - loop(Sock, MRef, Bin). + Fun = fun() -> exit(client(Self, PortNr, Msgs, Sz)) end, %% start clients + {_, MRef} = spawn_monitor(fun() -> exit(clients(Clients, Fun)) end), + accept_loop(Sock, MRef). -%% fold/2 +%% fclients/2 %% %% Spawn N processes and collect their exit reasons in a list. -fold(N, Fun) -> +clients(N, Fun) -> start(N, Fun), acc(N, []). +%% start/2 + start(0, _) -> ok; + start(N, Fun) -> spawn_monitor(Fun), start(N-1, Fun). +%% acc/2 + acc(0, Acc) -> Acc; + acc(N, Acc) -> receive {'DOWN', _MRef, process, _, RC} -> acc(N-1, [RC | Acc]) end. -%% loop/3 +%% accept_loop/2 -loop(Sock, MRef, Bin) -> +accept_loop(Sock, MRef) -> + ok = inet:setopts(Sock, [{active, once}]), receive - ?SCTP(Sock, {[#sctp_sndrcvinfo{assoc_id = Id}], B}) + ?SCTP(Sock, {_, #sctp_assoc_change{state = comm_up, + outbound_streams = OS, + assoc_id = Id}}) -> + Self = self(), + TPid = spawn(fun() -> assoc(monitor(process, Self), Id, OS) end), + NewSock = peeloff(Sock, Id, TPid), + TPid ! {peeloff, NewSock}, + accept_loop(Sock, MRef); + ?SCTP(Sock, _) -> + accept_loop(Sock, MRef); + {'DOWN', MRef, process, _, Reason} -> + Reason; + T -> + error(T) + end. + +%% assoc/3 +%% +%% Server process that answers incoming messages as long as the parent +%% lives. + +assoc(MRef, Id, OS) + when is_reference(MRef) -> + {peeloff, Sock} = receive T -> T end, + recv_loop(Sock, Id, sender(Sock, Id, OS), MRef). + +%% recv_loop/4 + +recv_loop(Sock, Id, Pid, MRef) -> + ok = inet:setopts(Sock, [{active, once}]), + receive + ?SCTP(Sock, {[#sctp_sndrcvinfo{assoc_id = I}], B}) when is_binary(B) -> - Sz = size(Bin), - {Sz, Bin} = {size(B), B}, %% assert - ok = send(Sock, Id, mark(Bin)), - loop(Sock, MRef, Bin); + T2 = diameter_lib:now(), + I = Id, %% assert + <> = B, %% assert + {[_,_,_,Sz] = L, Bytes} = unmark(Bin), + Sz = size(Bin) - Bytes, %% assert + <<_:Bytes/binary, Body:Sz/binary>> = Bin, + send(Pid, [T2|L], Body), %% answer + recv_loop(Sock, Id, Pid, MRef); ?SCTP(Sock, _) -> - loop(Sock, MRef, Bin); + recv_loop(Sock, Id, Pid, MRef); {'DOWN', MRef, process, _, Reason} -> - Reason + Reason; + T -> + error(T) end. -%% connect2/3 +%% send/3 -connect2(Pid, PortNr, Bin) -> - monitor(process, Pid), +send(Pid, Header, Body) -> + Pid ! {send, Header, Body}. + +%% sender/3 +%% +%% Start a process that sends, so as not to block the controlling process. +sender(Sock, Id, OS) -> + Pid = self(), + spawn(fun() -> send_loop(Sock, Id, OS, 1, monitor(process, Pid)) end). + +%% send_loop/5 + +send_loop(Sock, Id, OS, N, MRef) -> + receive + {send, L, Body} -> + Stream = N rem OS, + ok = send(Sock, Id, Stream, mark(Body, [N, Stream | L])), + send_loop(Sock, Id, OS, N+1, MRef); + {'DOWN', MRef, process, _, _} = T -> + T; + T -> + error(T) + end. + +%% peeloff/3 + +peeloff(LSock, Id, TPid) -> + {ok, Sock} = gen_sctp:peeloff(LSock, Id), + ok = gen_sctp:controlling_process(Sock, TPid), + Sock. + +%% client/4 + +client(Pid, PortNr, Msgs, Sz) -> + monitor(process, Pid), {ok, Sock} = open(), ok = gen_sctp:connect_init(Sock, ?ADDR, PortNr, []), - Id = assoc(Sock), + recv_loop(Sock, Msgs, Sz). - %% T1 = time before send - %% T2 = time after listening process received our message - %% T3 = time after reply is received +%% recv_loop/3 - T1 = diameter_lib:now(), - ok = send(Sock, Id, Bin), - T2 = unmark(recv(Sock, Id)), - T3 = diameter_lib:now(), - {diameter_lib:micro_diff(T2, T1), %% Outbound - diameter_lib:micro_diff(T3, T2)}. %% Inbound +recv_loop(_, 0, T) -> + [_,_|Acc] = T, + Acc; -%% recv/2 +recv_loop(Sock, Msgs, T) -> + ok = inet:setopts(Sock, [{active, once}]), + {I, NewT} = recv(Sock, Msgs, T, receive X -> X end), + recv_loop(Sock, Msgs - I, NewT). -recv(Sock, Id) -> - receive - ?SCTP(Sock, {[#sctp_sndrcvinfo{assoc_id = I}], Bin}) - when is_binary(Bin) -> - Id = I, %% assert - Bin; - ?SCTP(S, _) -> - Sock = S, %% assert - recv(Sock, Id); - T -> - exit(T) +%% recv/4 + +recv(Sock, Msgs, Sz, ?SCTP(Sock, {_, #sctp_assoc_change{} = A})) -> + #sctp_assoc_change{state = comm_up, %% assert + assoc_id = Id, + outbound_streams = OS} + = A, + true = is_integer(Sz), %% assert + send_n(Msgs, sender(Sock, Id, OS), Sz), + {0, [Id, OS]}; + +recv(Sock, _, T, ?SCTP(Sock, {[#sctp_sndrcvinfo{assoc_id = Id}], Bin})) -> + T4 = diameter_lib:now(), + [Id, OS | Acc] = T, + {1, [Id, OS, stat(T4, Bin) | Acc]}; + +recv(Sock, _, T, ?SCTP(Sock, _)) -> + {0, [_,_|_] = T}; + +recv(_, _, _, T) -> + error(T). + +%% send_n/3 +%% +%% Send messages to the server from dedicated processes. + +send_n(0, _, _) -> + ok; + +send_n(N, Pid, Sz) -> + M = rand:uniform(255), + send(Pid, [Sz], binary:copy(<>, Sz)), + send_n(N-1, Pid, Sz). + +%% send/4 + +send(Sock, Id, Stream, Bin) -> + case gen_sctp:send(Sock, Id, Stream, <>) of + {error, eagain} -> + send(Sock, Id, Stream, Bin); + RC -> + RC end. -%% send/3 +%% stat/2 -send(Sock, Id, Bin) -> - gen_sctp:send(Sock, Id, 0, Bin). +stat(T4, <>) -> + %% T1 = time at send + %% T2 = time at reception by server + %% T3 = time at reception by server's sender + %% T4 = time at reception of answer -%% mark/1 + {[T3,NI,SI,T2,T1,NO,SO,Sz], Bytes} = unmark(Bin), -mark(Bin) -> - Info = term_to_binary(diameter_lib:now()), - <>. + Sz = size(Bin) - Bytes, %% assert + {T1, + {NO, SO, diameter_lib:micro_diff(T2, T1)}, %% Outbound + {NI, SI, diameter_lib:micro_diff(T4, T3)}, %% Inbound + T4}. + +%% mark/2 + +mark(Bin, T) -> + Info = term_to_binary([diameter_lib:now() | T]), + <>. + %% unmark/1 unmark(Bin) -> - binary_to_term(Bin). + T = binary_to_term(Bin), + {T, size(term_to_binary(T))}. + +%% =========================================================================== + +%% send_many_from_one/0 +%% +%% Demonstrates sluggish delivery of messages. + +send_many_from_one() -> + [{timetrap, {seconds, 30}}]. + +send_many_from_one(_) -> + ?OK = send_multiple(1, 128, 1024). %% =========================================================================== @@ -345,7 +401,7 @@ unmark(Bin) -> %% Demonstrates reception of a message that differs from that sent. receive_what_was_sent(_Config) -> - send_from_multiple_clients(1, 1024*32). %% fails + ?OK = send_multiple(1, 1, 1024*32). %% =========================================================================== @@ -357,16 +413,23 @@ open() -> %% open/1 open(Opts) -> - gen_sctp:open([{ip, ?ADDR}, {port, 0}, {active, true}, binary, + gen_sctp:open([{ip, ?ADDR}, {port, 0}, {active, false}, binary, + {sctp_initmsg, #sctp_initmsg{num_ostreams = ?STREAMS, + max_instreams = ?STREAMS}}, {recbuf, 1 bsl 16}, {sndbuf, 1 bsl 16} | Opts]). -%% assoc/1 +%% report/2 -assoc(Sock) -> - receive - ?SCTP(Sock, {_, #sctp_assoc_change{state = S, - assoc_id = Id}}) -> - comm_up = S, %% assert - Id - end. +report(T0, Ts) -> + ct:pal("~p~n", [lists:sort([sort([{diameter_lib:micro_diff(T1,T0), + OT, + IT, + diameter_lib:micro_diff(T4,T0)} + || {T1,OT,IT,T4} <- L]) + || L <- Ts])]). + +%% sort/1 + +sort(L) -> + lists:sort(fun({_,{N,_,_},_,_}, {_,{M,_,_},_,_}) -> N =< M end, L). -- cgit v1.2.3 From 618acfe5dcb0aa3d8ec0101704cd8fc774ac6c90 Mon Sep 17 00:00:00 2001 From: Anders Svensson Date: Mon, 10 Apr 2017 15:28:43 +0200 Subject: Deal with SCTP association id quirk on Solaris In particular, that the association id received in messages on a one-to-one socket after peeloff may be different from the id received on the listen socket at comm_up. This seems odd, since it's then not possible to send until the id is discovered by reception of an SCTP message containing it, but it's unclear if this is a bug or a feature, or if it's specific to certain platforms. Treat it as a feature in this commit, and get the association id as mentioned, an incoming CER being expected before anything is sent. Commit da3e5d67 has more history. --- lib/diameter/test/diameter_gen_sctp_SUITE.erl | 58 ++++++++++++++++++--------- 1 file changed, 38 insertions(+), 20 deletions(-) (limited to 'lib/diameter/test/diameter_gen_sctp_SUITE.erl') diff --git a/lib/diameter/test/diameter_gen_sctp_SUITE.erl b/lib/diameter/test/diameter_gen_sctp_SUITE.erl index 457d9a0a85..d76d2bdbd3 100644 --- a/lib/diameter/test/diameter_gen_sctp_SUITE.erl +++ b/lib/diameter/test/diameter_gen_sctp_SUITE.erl @@ -228,33 +228,49 @@ accept_loop(Sock, MRef) -> %% Server process that answers incoming messages as long as the parent %% lives. -assoc(MRef, Id, OS) +assoc(MRef, _Id, OS) when is_reference(MRef) -> {peeloff, Sock} = receive T -> T end, - recv_loop(Sock, Id, sender(Sock, Id, OS), MRef). + recv_loop(Sock, false, sender(Sock, false, OS), MRef). %% recv_loop/4 recv_loop(Sock, Id, Pid, MRef) -> ok = inet:setopts(Sock, [{active, once}]), - receive - ?SCTP(Sock, {[#sctp_sndrcvinfo{assoc_id = I}], B}) - when is_binary(B) -> - T2 = diameter_lib:now(), - I = Id, %% assert - <> = B, %% assert - {[_,_,_,Sz] = L, Bytes} = unmark(Bin), - Sz = size(Bin) - Bytes, %% assert - <<_:Bytes/binary, Body:Sz/binary>> = Bin, - send(Pid, [T2|L], Body), %% answer - recv_loop(Sock, Id, Pid, MRef); - ?SCTP(Sock, _) -> - recv_loop(Sock, Id, Pid, MRef); - {'DOWN', MRef, process, _, Reason} -> - Reason; - T -> - error(T) - end. + recv(Sock, Id, Pid, MRef, receive T -> T end). + +%% recv/5 + +%% Association id can change on a peeloff socket on some versions of +%% Solaris. +recv(Sock, + false, + Pid, + MRef, + ?SCTP(Sock, {[#sctp_sndrcvinfo{assoc_id = Id}], _}) + = T) -> + Pid ! {assoc_id, Id}, + recv(Sock, Id, Pid, MRef, T); + +recv(Sock, Id, Pid, MRef, ?SCTP(Sock, {[#sctp_sndrcvinfo{assoc_id = I}], B})) + when is_binary(B) -> + T2 = diameter_lib:now(), + Id = I, %% assert + <> = B, %% assert + {[_,_,_,Sz] = L, Bytes} = unmark(Bin), + Sz = size(Bin) - Bytes, %% assert + <<_:Bytes/binary, Body:Sz/binary>> = Bin, + send(Pid, [T2|L], Body), %% answer + recv_loop(Sock, Id, Pid, MRef); + +recv(Sock, Id, Pid, MRef, ?SCTP(Sock, _)) -> + recv_loop(Sock, Id, Pid, MRef); + +recv(_, _, _, MRef, {'DOWN', MRef, process, _, Reason}) -> + Reason; + +recv(_, _, _, _, T) -> + error(T). %% send/3 @@ -273,6 +289,8 @@ sender(Sock, Id, OS) -> send_loop(Sock, Id, OS, N, MRef) -> receive + {assoc_id, I} -> + send_loop(Sock, I, OS, N, MRef); {send, L, Body} -> Stream = N rem OS, ok = send(Sock, Id, Stream, mark(Body, [N, Stream | L])), -- cgit v1.2.3