aboutsummaryrefslogblamecommitdiffstats
path: root/src/cowboy_compress_h.erl
blob: 57d3dab9764ea93eed81d76bc64ca362aee403dc (plain) (tree)




















                                                                           
                         


                      
                                                   
                                                 

                                                          





                                                                     
                                                                    
                                                                                      
                                                                    


                                               






















                                                                                        





                                                                            


                                                           

                                                                  
                 
                                                                  












                                                                              


                                                  




                                                                      


                                                          



                                                   
                                            




                                    
                                           
                                                                          
                                                         
                                                                  


                                                                                    

                                                   
                                                                          


                                                       
                                                                  
                                                                                           

                                                                                             
                                                                                        





                                                            
                                                                         

                                                                            
                                                                       




                                                                     




                                                                          
                                                                           

                                                               







                                                                                   

                                                                                        
                                                                      


                                         








                                                                   


                                   















                                                                         
                                     

                                                                               
                           






                                                                        
                                    
                                                    






                                                   















                                                                                                
 




                                                           

                                                                
                                          
                                             






                                                                                

                                                                                





                                                            











                                                                   
%% Copyright (c) 2017, Loïc Hoguin <[email protected]>
%%
%% Permission to use, copy, modify, and/or distribute this software for any
%% purpose with or without fee is hereby granted, provided that the above
%% copyright notice and this permission notice appear in all copies.
%%
%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

-module(cowboy_compress_h).
-behavior(cowboy_stream).

-export([init/3]).
-export([data/4]).
-export([info/3]).
-export([terminate/3]).
-export([early_error/5]).

-record(state, {
	next :: any(),
	threshold :: non_neg_integer() | undefined,
	compress = undefined :: undefined | gzip,
	deflate = undefined :: undefined | zlib:zstream(),
	deflate_flush = sync :: none | sync
}).

-spec init(cowboy_stream:streamid(), cowboy_req:req(), cowboy:opts())
	-> {cowboy_stream:commands(), #state{}}.
init(StreamID, Req, Opts) ->
	State0 = check_req(Req),
	CompressThreshold = maps:get(compress_threshold, Opts, 300),
	DeflateFlush = buffering_to_zflush(maps:get(compress_buffering, Opts, false)),
	{Commands0, Next} = cowboy_stream:init(StreamID, Req, Opts),
	fold(Commands0, State0#state{next=Next,
		threshold=CompressThreshold,
		deflate_flush=DeflateFlush}).

-spec data(cowboy_stream:streamid(), cowboy_stream:fin(), cowboy_req:resp_body(), State)
	-> {cowboy_stream:commands(), State} when State::#state{}.
data(StreamID, IsFin, Data, State0=#state{next=Next0}) ->
	{Commands0, Next} = cowboy_stream:data(StreamID, IsFin, Data, Next0),
	fold(Commands0, State0#state{next=Next}).

-spec info(cowboy_stream:streamid(), any(), State)
	-> {cowboy_stream:commands(), State} when State::#state{}.
info(StreamID, Info, State0=#state{next=Next0}) ->
	{Commands0, Next} = cowboy_stream:info(StreamID, Info, Next0),
	fold(Commands0, State0#state{next=Next}).

-spec terminate(cowboy_stream:streamid(), cowboy_stream:reason(), #state{}) -> any().
terminate(StreamID, Reason, #state{next=Next, deflate=Z}) ->
	%% Clean the zlib:stream() in case something went wrong.
	%% In the normal scenario the stream is already closed.
	case Z of
		undefined -> ok;
		_ -> zlib:close(Z)
	end,
	cowboy_stream:terminate(StreamID, Reason, Next).

-spec early_error(cowboy_stream:streamid(), cowboy_stream:reason(),
	cowboy_stream:partial_req(), Resp, cowboy:opts()) -> Resp
	when Resp::cowboy_stream:resp_command().
early_error(StreamID, Reason, PartialReq, Resp, Opts) ->
	cowboy_stream:early_error(StreamID, Reason, PartialReq, Resp, Opts).

%% Internal.

%% Check if the client supports decoding of gzip responses.
%%
%% A malformed accept-encoding header is ignored (no compression).
check_req(Req) ->
	try cowboy_req:parse_header(<<"accept-encoding">>, Req) of
		%% Client doesn't support any compression algorithm.
		undefined ->
			#state{compress=undefined};
		Encodings ->
			%% We only support gzip so look for it specifically.
			%% @todo A recipient SHOULD consider "x-gzip" to be
			%% equivalent to "gzip". (RFC7230 4.2.3)
			case [E || E={<<"gzip">>, Q} <- Encodings, Q =/= 0] of
				[] ->
					#state{compress=undefined};
				_ ->
					#state{compress=gzip}
			end
	catch
		_:_ ->
			#state{compress=undefined}
	end.

%% Do not compress responses that contain the content-encoding header.
check_resp_headers(#{<<"content-encoding">> := _}, State) ->
	State#state{compress=undefined};
%% Do not compress responses that contain the etag header.
check_resp_headers(#{<<"etag">> := _}, State) ->
	State#state{compress=undefined};
check_resp_headers(_, State) ->
	State.

fold(Commands, State=#state{compress=undefined}) ->
	fold_vary_only(Commands, State, []);
fold(Commands, State) ->
	fold(Commands, State, []).

fold([], State, Acc) ->
	{lists:reverse(Acc), State};
%% We do not compress full sendfile bodies.
fold([Response={response, _, _, {sendfile, _, _, _}}|Tail], State, Acc) ->
	fold(Tail, State, [vary_response(Response)|Acc]);
%% We compress full responses directly, unless they are lower than
%% the configured threshold or we find we are not able to by looking at the headers.
fold([Response0={response, _, Headers, Body}|Tail],
		State0=#state{threshold=CompressThreshold}, Acc) ->
	case check_resp_headers(Headers, State0) of
		State=#state{compress=undefined} ->
			fold(Tail, State, [vary_response(Response0)|Acc]);
		State1 ->
			BodyLength = iolist_size(Body),
			if
				BodyLength =< CompressThreshold ->
					fold(Tail, State1, [vary_response(Response0)|Acc]);
				true ->
					{Response, State} = gzip_response(Response0, State1),
					fold(Tail, State, [vary_response(Response)|Acc])
			end
	end;
%% Check headers and initiate compression...
fold([Response0={headers, _, Headers}|Tail], State0, Acc) ->
	case check_resp_headers(Headers, State0) of
		State=#state{compress=undefined} ->
			fold(Tail, State, [vary_headers(Response0)|Acc]);
		State1 ->
			{Response, State} = gzip_headers(Response0, State1),
			fold(Tail, State, [vary_headers(Response)|Acc])
	end;
%% then compress each data commands individually.
fold([Data0={data, _, _}|Tail], State0=#state{compress=gzip}, Acc) ->
	{Data, State} = gzip_data(Data0, State0),
	fold(Tail, State, [Data|Acc]);
%% When trailers are sent we need to end the compression.
%% This results in an extra data command being sent.
fold([Trailers={trailers, _}|Tail], State0=#state{compress=gzip}, Acc) ->
	{{data, fin, Data}, State} = gzip_data({data, fin, <<>>}, State0),
	fold(Tail, State, [Trailers, {data, nofin, Data}|Acc]);
%% All the options from this handler can be updated for the current stream.
%% The set_options command must be propagated as-is regardless.
fold([SetOptions={set_options, Opts}|Tail], State=#state{
		threshold=CompressThreshold0, deflate_flush=DeflateFlush0}, Acc) ->
	CompressThreshold = maps:get(compress_threshold, Opts, CompressThreshold0),
	DeflateFlush = case Opts of
		#{compress_buffering := CompressBuffering} ->
			buffering_to_zflush(CompressBuffering);
		_ ->
			DeflateFlush0
	end,
	fold(Tail, State#state{threshold=CompressThreshold, deflate_flush=DeflateFlush},
		[SetOptions|Acc]);
%% Otherwise, we have an unrelated command or compression is disabled.
fold([Command|Tail], State, Acc) ->
	fold(Tail, State, [Command|Acc]).

fold_vary_only([], State, Acc) ->
	{lists:reverse(Acc), State};
fold_vary_only([Response={response, _, _, _}|Tail], State, Acc) ->
	fold_vary_only(Tail, State, [vary_response(Response)|Acc]);
fold_vary_only([Response={headers, _, _}|Tail], State, Acc) ->
	fold_vary_only(Tail, State, [vary_headers(Response)|Acc]);
fold_vary_only([Command|Tail], State, Acc) ->
	fold_vary_only(Tail, State, [Command|Acc]).

buffering_to_zflush(true) -> none;
buffering_to_zflush(false) -> sync.

gzip_response({response, Status, Headers, Body}, State) ->
	%% We can't call zlib:gzip/1 because it does an
	%% iolist_to_binary(GzBody) at the end to return
	%% a binary(). Therefore the code here is largely
	%% a duplicate of the code of that function.
	Z = zlib:open(),
	GzBody = try
		%% 31 = 16+?MAX_WBITS from zlib.erl
		%% @todo It might be good to allow them to be configured?
		zlib:deflateInit(Z, default, deflated, 31, 8, default),
		Gz = zlib:deflate(Z, Body, finish),
		zlib:deflateEnd(Z),
		Gz
	after
		zlib:close(Z)
	end,
	{{response, Status, Headers#{
		<<"content-length">> => integer_to_binary(iolist_size(GzBody)),
		<<"content-encoding">> => <<"gzip">>
	}, GzBody}, State}.

gzip_headers({headers, Status, Headers0}, State) ->
	Z = zlib:open(),
	%% We use the same arguments as when compressing the body fully.
	%% @todo It might be good to allow them to be configured?
	zlib:deflateInit(Z, default, deflated, 31, 8, default),
	Headers = maps:remove(<<"content-length">>, Headers0),
	{{headers, Status, Headers#{
		<<"content-encoding">> => <<"gzip">>
	}}, State#state{deflate=Z}}.

vary_response({response, Status, Headers, Body}) ->
	{response, Status, vary(Headers), Body}.

vary_headers({headers, Status, Headers}) ->
	{headers, Status, vary(Headers)}.

%% We must add content-encoding to vary if it's not already there.
vary(Headers=#{<<"vary">> := Vary}) ->
	try cow_http_hd:parse_vary(iolist_to_binary(Vary)) of
		'*' -> Headers;
		List ->
			case lists:member(<<"accept-encoding">>, List) of
				true -> Headers;
				false -> Headers#{<<"vary">> => [Vary, <<", accept-encoding">>]}
			end
	catch _:_ ->
		%% The vary header is invalid. Probably empty. We replace it with ours.
		Headers#{<<"vary">> => <<"accept-encoding">>}
	end;
vary(Headers) ->
	Headers#{<<"vary">> => <<"accept-encoding">>}.

%% It is not possible to combine zlib and the sendfile
%% syscall as far as I can tell, because the zlib format
%% includes a checksum at the end of the stream. We have
%% to read the file in memory, making this not suitable for
%% large files.
gzip_data({data, nofin, Sendfile={sendfile, _, _, _}},
		State=#state{deflate=Z, deflate_flush=Flush}) ->
	{ok, Data0} = read_file(Sendfile),
	Data = zlib:deflate(Z, Data0, Flush),
	{{data, nofin, Data}, State};
gzip_data({data, fin, Sendfile={sendfile, _, _, _}}, State=#state{deflate=Z}) ->
	{ok, Data0} = read_file(Sendfile),
	Data = zlib:deflate(Z, Data0, finish),
	zlib:deflateEnd(Z),
	zlib:close(Z),
	{{data, fin, Data}, State#state{deflate=undefined}};
gzip_data({data, nofin, Data0}, State=#state{deflate=Z, deflate_flush=Flush}) ->
	Data = zlib:deflate(Z, Data0, Flush),
	{{data, nofin, Data}, State};
gzip_data({data, fin, Data0}, State=#state{deflate=Z}) ->
	Data = zlib:deflate(Z, Data0, finish),
	zlib:deflateEnd(Z),
	zlib:close(Z),
	{{data, fin, Data}, State#state{deflate=undefined}}.

read_file({sendfile, Offset, Bytes, Path}) ->
	{ok, IoDevice} = file:open(Path, [read, raw, binary]),
	try
		_ = case Offset of
			0 -> ok;
			_ -> file:position(IoDevice, {bof, Offset})
		end,
		file:read(IoDevice, Bytes)
	after
		file:close(IoDevice)
	end.