diff options
author | Anders Svensson <[email protected]> | 2017-08-08 16:20:00 +0200 |
---|---|---|
committer | Anders Svensson <[email protected]> | 2017-08-10 11:16:00 +0200 |
commit | d1f4369afb9fb9453ebe8868d88ae299d908a2e4 (patch) | |
tree | 1a56476d3a70e00e85447854bdd508b70bfa3b1b | |
parent | ca68fe8c449a170f64fd9b41b710ac67966be2ee (diff) | |
download | otp-d1f4369afb9fb9453ebe8868d88ae299d908a2e4.tar.gz otp-d1f4369afb9fb9453ebe8868d88ae299d908a2e4.tar.bz2 otp-d1f4369afb9fb9453ebe8868d88ae299d908a2e4.zip |
Avoid unnecessary copying of binaries in diameter_tcp
Since messages are accumulated by appending binaries as of three
commits back, the accumulated binary is prone to being copied, as
discussed in the Efficiency Guide. Matching the Message Length header
as bytes are being accumulated is one cause of this, so work around it
by splitting the binary and extracting the length without a match.
This doesn't feel like something that should be necessary: that matching
a binary would cause an append to copy isn't obvious. The first attempt
at simplifying the accumulation was simply to append an incoming binary
to the current fragment, match against <<_, Len:24, _/binary>> to
extract the length, and then test if there are enough bytes for a
message. This lead to horrible performance (response times for 2 MB
messages approximately 100 times worse than previously), and it wasn't
at all obvious that the reason was the accumulated binary being copied
with each append as a result of the match. Using split_binary avoids the
match context that forces the copying.
-rw-r--r-- | lib/diameter/src/transport/diameter_tcp.erl | 38 |
1 files changed, 25 insertions, 13 deletions
diff --git a/lib/diameter/src/transport/diameter_tcp.erl b/lib/diameter/src/transport/diameter_tcp.erl index bda92d02f6..6252dbddfb 100644 --- a/lib/diameter/src/transport/diameter_tcp.erl +++ b/lib/diameter/src/transport/diameter_tcp.erl @@ -748,8 +748,7 @@ acc(Head, Bin) -> %% Extract a message for which we have all bytes. acc1(Len, Bin) when Len =< byte_size(Bin) -> - <<Msg:Len/binary, Rest/binary>> = Bin, - {Msg, Rest}; + split_binary(Bin, Len); %% Wait for more packets. acc1(Len, Bin) -> @@ -757,17 +756,30 @@ acc1(Len, Bin) -> %% acc/1 -%% The Message Length isn't even sufficient for a header. Chances are -%% things will go south from here but if we're lucky then the bytes we -%% have extend to an intended message boundary and we can recover by -%% simply receiving them. Make it so. -acc(<<_:1/binary, Len:24, _/binary>> = Bin) - when Len < 20 -> - {Bin, <<>>}; - -%% Know the message length. -acc(<<_:1/binary, Len:24, _/binary>> = Bin) -> - acc1(Len, Bin); +%% Don't match on Bin since this results in it being copied at the +%% next append according to the Efficiency Guide. This is also the +%% reason that the Len is extracted and maintained when accumulating +%% messages. The simplest implementation is just to accumulate a +%% binary and match <<_, Len:24, _/binary>> each time the length is +%% required, but the performance of this decays quadratically with the +%% message length, since the binary is then copied with each append of +%% additional bytes from gen_tcp. + +acc(Bin) + when 3 < byte_size(Bin) -> + {Head, _} = split_binary(Bin, 4), + [_,A,B,C] = binary_to_list(Head), + Len = (A bsl 16) bor (B bsl 8) bor C, + if Len < 20 -> + %% Message length isn't sufficient for a Diameter Header. + %% Chances are things will go south from here but if we're + %% lucky then the bytes we have extend to an intended + %% message boundary and we can recover by simply receiving + %% them. Make it so. + {Bin, <<>>}; + true -> + acc1(Len, Bin) + end; %% Not even 4 bytes yet. acc(Bin) -> |