From fd9711d9495e4ddcd41eda7a284dfc7f37c11f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Thu, 4 Jan 2024 15:15:41 +0100 Subject: Rework and improve the decompress stream handler The read buffer was changed into an iovec to avoid doing too many binary concatenations and allocations. Decompression happens transparently: when decoding gzip, the content-encoding header is removed (we only decode when "gzip" is the only encoding so nothing remains). We always add a content_decoded key to the Req object. This key contains a list of codings that were decoded, in the reverse order in which they were. Currently it can only be empty or contain <<"gzip">> but future improvements or user handlers may see it contain more values. The option to disable decompression was renamed to decompress_enabled and defaults to true. It is no longer possible to enable/disable decompression in the middle of reading the body: this ensures that the data we pass forward is always valid. Various smaller improvements were made to the code, tests and manual pages. --- src/cowboy_decompress_h.erl | 146 ++++++++++++++++++++++++++++---------------- 1 file changed, 93 insertions(+), 53 deletions(-) (limited to 'src') diff --git a/src/cowboy_decompress_h.erl b/src/cowboy_decompress_h.erl index ffbec25..d13601b 100644 --- a/src/cowboy_decompress_h.erl +++ b/src/cowboy_decompress_h.erl @@ -1,3 +1,18 @@ +%% Copyright (c) 2024, jdamanalo +%% Copyright (c) 2024, Loïc Hoguin +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + -module(cowboy_decompress_h). -behavior(cowboy_stream). @@ -9,21 +24,27 @@ -record(state, { next :: any(), + enabled :: boolean(), ratio_limit :: non_neg_integer() | undefined, - ignore = false :: boolean(), compress = undefined :: undefined | gzip, inflate = undefined :: undefined | zlib:zstream(), is_reading = false :: boolean(), - read_body_buffer = <<>> :: binary(), + + %% We use a list of binaries to avoid doing unnecessary + %% memory allocations when inflating. We convert to binary + %% when we propagate the data. The data must be reversed + %% before converting to binary or inflating: this is done + %% via the buffer_to_binary/buffer_to_iovec functions. + read_body_buffer = [] :: [binary()], read_body_is_fin = nofin :: nofin | {fin, non_neg_integer()} }). -spec init(cowboy_stream:streamid(), cowboy_req:req(), cowboy:opts()) -> {cowboy_stream:commands(), #state{}}. -init(StreamID, Req, Opts) -> +init(StreamID, Req0, Opts) -> + Enabled = maps:get(decompress_enabled, Opts, true), RatioLimit = maps:get(decompress_ratio_limit, Opts, 20), - Ignore = maps:get(decompress_ignore, Opts, false), - State = check_req(Req), + {Req, State} = check_and_update_req(Req0), Inflate = case State#state.compress of undefined -> undefined; @@ -33,48 +54,46 @@ init(StreamID, Req, Opts) -> Z end, {Commands, Next} = cowboy_stream:init(StreamID, Req, Opts), - fold(Commands, State#state{next=Next, ratio_limit=RatioLimit, ignore=Ignore, - inflate=Inflate}). + fold(Commands, State#state{next=Next, enabled=Enabled, + ratio_limit=RatioLimit, inflate=Inflate}). -spec data(cowboy_stream:streamid(), cowboy_stream:fin(), cowboy_req:resp_body(), State) -> {cowboy_stream:commands(), State} when State::#state{}. data(StreamID, IsFin, Data, State=#state{next=Next0, inflate=undefined}) -> {Commands, Next} = cowboy_stream:data(StreamID, IsFin, Data, Next0), fold(Commands, State#state{next=Next, read_body_is_fin=IsFin}); -data(StreamID, IsFin, Data, State=#state{next=Next0, ignore=true, read_body_buffer=Buffer}) -> +data(StreamID, IsFin, Data, State=#state{next=Next0, enabled=false, read_body_buffer=Buffer}) -> {Commands, Next} = cowboy_stream:data(StreamID, IsFin, - << Buffer/binary, Data/binary >>, Next0), + buffer_to_binary([Data|Buffer]), Next0), fold(Commands, State#state{next=Next, read_body_is_fin=IsFin}); data(StreamID, IsFin, Data, State0=#state{next=Next0, ratio_limit=RatioLimit, - inflate=Z, is_reading=true, read_body_buffer=Buffer0}) -> - Buffer = << Buffer0/binary, Data/binary >>, - case inflate(Z, RatioLimit, Buffer) of - {error, Type} -> - Status = case Type of - data -> 400; - size -> 413 + inflate=Z, is_reading=true, read_body_buffer=Buffer}) -> + case inflate(Z, RatioLimit, buffer_to_iovec([Data|Buffer])) of + {error, ErrorType} -> + zlib:close(Z), + Status = case ErrorType of + data_error -> 400; + size_error -> 413 end, Commands = [ {error_response, Status, #{<<"content-length">> => <<"0">>}, <<>>}, stop ], - fold(Commands, State0#state{inflate=undefined}); + fold(Commands, State0#state{inflate=undefined, read_body_buffer=[]}); {ok, Inflated} -> State = case IsFin of nofin -> State0; fin -> - zlib:inflateEnd(Z), zlib:close(Z), State0#state{inflate=undefined} end, {Commands, Next} = cowboy_stream:data(StreamID, IsFin, Inflated, Next0), - fold(Commands, State#state{next=Next, read_body_buffer= <<>>, + fold(Commands, State#state{next=Next, read_body_buffer=[], read_body_is_fin=IsFin}) end; -data(_, IsFin, Data, State=#state{read_body_buffer=Buffer0}) -> - Buffer = << Buffer0/binary, Data/binary >>, - {[], State#state{read_body_buffer=Buffer, read_body_is_fin=IsFin}}. +data(_, IsFin, Data, State=#state{read_body_buffer=Buffer}) -> + {[], State#state{read_body_buffer=[Data|Buffer], read_body_is_fin=IsFin}}. -spec info(cowboy_stream:streamid(), any(), State) -> {cowboy_stream:commands(), State} when State::#state{}. @@ -86,12 +105,19 @@ info(StreamID, Info={CommandTag, _, _, _, _}, State=#state{next=Next0, read_body {Commands0, Next1} = cowboy_stream:info(StreamID, Info, Next0), {Commands, Next} = data(StreamID, IsFin, <<>>, State#state{next=Next1, is_reading=true}), fold(Commands ++ Commands0, Next); -info(StreamID, Info={set_options, Opts}, State=#state{next=Next0, - ignore=Ignore0, ratio_limit=RatioLimit0}) -> - Ignore = maps:get(decompress_ignore, Opts, Ignore0), +info(StreamID, Info={set_options, Opts}, State0=#state{next=Next0, + enabled=Enabled0, ratio_limit=RatioLimit0, is_reading=IsReading}) -> + Enabled = maps:get(decompress_enabled, Opts, Enabled0), RatioLimit = maps:get(decompress_ratio_limit, Opts, RatioLimit0), {Commands, Next} = cowboy_stream:info(StreamID, Info, Next0), - fold(Commands, State#state{next=Next, ignore=Ignore, ratio_limit=RatioLimit}); + %% We can't change the enabled setting after we start reading, + %% otherwise the data becomes garbage. Changing the setting + %% is not treated as an error, it is just ignored. + State = case IsReading of + true -> State0; + false -> State0#state{enabled=Enabled} + end, + fold(Commands, State#state{next=Next, ratio_limit=RatioLimit}); info(StreamID, Info, State=#state{next=Next0}) -> {Commands, Next} = cowboy_stream:info(StreamID, Info, Next0), fold(Commands, State#state{next=Next}). @@ -112,31 +138,49 @@ early_error(StreamID, Reason, PartialReq, Resp, Opts) -> %% Internal. -check_req(Req) -> +%% Check whether the request needs content decoding, and if it does +%% whether it fits our criteria for decoding. We also update the +%% Req to indicate whether content was decoded. +%% +%% We always set the content_decoded value in the Req because it +%% indicates whether content decoding was attempted. +%% +%% A malformed content-encoding header results in no decoding. +check_and_update_req(Req=#{headers := Headers}) -> + ContentDecoded = maps:get(content_decoded, Req, []), try cowboy_req:parse_header(<<"content-encoding">>, Req) of - undefined -> - #state{compress=undefined}; - Encodings -> - case [E || E=(<<"gzip">>) <- Encodings] of - [] -> - #state{compress=undefined}; - _ -> - #state{compress=gzip} - end - catch - _:_ -> - #state{compress=undefined} + %% We only automatically decompress when gzip is the only + %% encoding used. Since it's the only encoding used, we + %% can remove the header entirely before passing the Req + %% forward. + [<<"gzip">>] -> + {Req#{ + headers => maps:remove(<<"content-encoding">>, Headers), + content_decoded => [<<"gzip">>|ContentDecoded] + }, #state{compress=gzip}}; + _ -> + {Req#{content_decoded => ContentDecoded}, + #state{compress=undefined}} + catch _:_ -> + {Req#{content_decoded => ContentDecoded}, + #state{compress=undefined}} end. +buffer_to_iovec(Buffer) -> + lists:reverse(Buffer). + +buffer_to_binary(Buffer) -> + iolist_to_binary(lists:reverse(Buffer)). + fold(Commands, State) -> fold(Commands, State, []). fold([], State, Acc) -> {lists:reverse(Acc), State}; -fold([{response, Status, Headers0, Body}|Tail], State=#state{ignore=false}, Acc) -> +fold([{response, Status, Headers0, Body}|Tail], State=#state{enabled=true}, Acc) -> Headers = add_accept_encoding(Headers0), fold(Tail, State, [{response, Status, Headers, Body}|Acc]); -fold([{headers, Status, Headers0} | Tail], State=#state{ignore=false}, Acc) -> +fold([{headers, Status, Headers0} | Tail], State=#state{enabled=true}, Acc) -> Headers = add_accept_encoding(Headers0), fold(Tail, State, [{headers, Status, Headers}|Acc]); fold([Command|Tail], State, Acc) -> @@ -146,7 +190,7 @@ add_accept_encoding(Headers=#{<<"accept-encoding">> := AcceptEncoding}) -> try cow_http_hd:parse_accept_encoding(iolist_to_binary(AcceptEncoding)) of List -> case lists:keyfind(<<"gzip">>, 1, List) of - %% gzip is excluded but this handler is not ignored; we replace. + %% gzip is excluded but this handler is enabled; we replace. {_, 0} -> Replaced = lists:keyreplace(<<"gzip">>, 1, List, {<<"gzip">>, 1000}), Codings = build_accept_encoding(Replaced), @@ -167,18 +211,20 @@ add_accept_encoding(Headers=#{<<"accept-encoding">> := AcceptEncoding}) -> end end catch _:_ -> + %% The accept-encoding header is invalid. Probably empty. We replace it with ours. Headers#{<<"accept-encoding">> => <<"gzip">>} end; add_accept_encoding(Headers) -> Headers#{<<"accept-encoding">> => <<"gzip">>}. -%% From cowlib, maybe expose? +%% @todo From cowlib, maybe expose? qvalue_to_iodata(0) -> <<"0">>; qvalue_to_iodata(Q) when Q < 10 -> [<<"0.00">>, integer_to_binary(Q)]; qvalue_to_iodata(Q) when Q < 100 -> [<<"0.0">>, integer_to_binary(Q)]; qvalue_to_iodata(Q) when Q < 1000 -> [<<"0.">>, integer_to_binary(Q)]; qvalue_to_iodata(1000) -> <<"1">>. +%% @todo Should be added to Cowlib. build_accept_encoding([{ContentCoding, Q}|Tail]) -> Weight = iolist_to_binary(qvalue_to_iodata(Q)), Acc = <