From 399b6a16b4a571e293437dcc8f85808f83b32ff6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Fri, 2 Nov 2018 12:36:51 +0100 Subject: Better handle content negotiation when accept contains charsets Thanks to Philip Witty for help figuring this out. --- doc/src/manual/cowboy_rest.asciidoc | 20 +++++ src/cowboy_rest.erl | 89 +++++++++++++++++----- .../charset_in_content_types_provided_h.erl | 22 ++++++ ...harset_in_content_types_provided_implicit_h.erl | 24 ++++++ ...ntent_types_provided_implicit_no_callback_h.erl | 21 +++++ test/rest_handler_SUITE.erl | 74 ++++++++++++++++++ 6 files changed, 231 insertions(+), 19 deletions(-) create mode 100644 test/handlers/charset_in_content_types_provided_h.erl create mode 100644 test/handlers/charset_in_content_types_provided_implicit_h.erl create mode 100644 test/handlers/charset_in_content_types_provided_implicit_no_callback_h.erl diff --git a/doc/src/manual/cowboy_rest.asciidoc b/doc/src/manual/cowboy_rest.asciidoc index 1b33bb8..19f4820 100644 --- a/doc/src/manual/cowboy_rest.asciidoc +++ b/doc/src/manual/cowboy_rest.asciidoc @@ -272,6 +272,16 @@ Cowboy treats all parameters as case sensitive, except for the should therefore always provide the charset as a lowercase binary string. +When a charset is given in the media type parameters in the +accept header, Cowboy will do some additional checks to confirm +that it can use this charset. When the wildcard is used then Cowboy +will immediately call `charsets_provided` to confirm the charset +is acceptable. If the callback is undefined Cowboy assumes any +charset is acceptable. When the wildcard is not used and the charset +given in the accept header matches one of the configured media +types Cowboy will use that charset and skip the `charsets_provided` +step entirely. + Cowboy will add the negotiated `media_type` to the Req object after this step completes: @@ -284,6 +294,16 @@ req() :: #{ // @todo Case sensitivity of parsed mime content? +Cowboy may also add the negotiated `charset` to the Req object +after this step completes: + +[source,erlang] +---- +req() :: #{ + charset => binary() %% lowercase; case insensitive +} +---- + === delete_completed [source,erlang] diff --git a/src/cowboy_rest.erl b/src/cowboy_rest.erl index 2780a90..63f3d99 100644 --- a/src/cowboy_rest.erl +++ b/src/cowboy_rest.erl @@ -245,7 +245,7 @@ language_a :: undefined | binary(), %% Charset. - charsets_p = [] :: [binary()], + charsets_p = undefined :: undefined | [binary()], charset_a :: undefined | binary(), %% Whether the resource exists. @@ -503,16 +503,55 @@ match_media_type(Req, State, Accept, match_media_type(Req, State, Accept, [_Any|Tail], MediaType) -> match_media_type(Req, State, Accept, Tail, MediaType). -match_media_type_params(Req, State, _Accept, - [Provided = {{TP, STP, '*'}, _Fun}|_Tail], - {{_TA, _STA, Params_A}, _QA, _APA}) -> - PMT = {TP, STP, Params_A}, - languages_provided(Req#{media_type => PMT}, - State#state{content_type_a=Provided}); match_media_type_params(Req, State, Accept, - [Provided = {PMT = {_TP, _STP, Params_P}, _Fun}|Tail], + [Provided = {{TP, STP, '*'}, _Fun}|Tail], + MediaType = {{TA, _STA, Params_A0}, _QA, _APA}) -> + case lists:keytake(<<"charset">>, 1, Params_A0) of + {value, {_, Charset}, Params_A} when TA =:= <<"text">> -> + %% When we match against a wildcard, the media type is text + %% and has a charset parameter, we call charsets_provided + %% and check that the charset is provided. If the callback + %% is not exported, we accept inconditionally but ignore + %% the given charset so as to not send a wrong value back. + case call(Req, State, charsets_provided) of + no_call -> + languages_provided(Req#{media_type => {TP, STP, Params_A0}}, + State#state{content_type_a=Provided}); + {stop, Req2, HandlerState} -> + terminate(Req2, State#state{handler_state=HandlerState}); + {Switch, Req2, HandlerState} when element(1, Switch) =:= switch_handler -> + switch_handler(Switch, Req2, HandlerState); + {CP, Req2, HandlerState} -> + State2 = State#state{handler_state=HandlerState, charsets_p=CP}, + case lists:member(Charset, CP) of + false -> + match_media_type(Req2, State2, Accept, Tail, MediaType); + true -> + languages_provided(Req2#{media_type => {TP, STP, Params_A}}, + State2#state{content_type_a=Provided, + charset_a=Charset}) + end + end; + _ -> + languages_provided(Req#{media_type => {TP, STP, Params_A0}}, + State#state{content_type_a=Provided}) + end; +match_media_type_params(Req, State, Accept, + [Provided = {PMT = {TP, STP, Params_P0}, Fun}|Tail], MediaType = {{_TA, _STA, Params_A}, _QA, _APA}) -> - case lists:sort(Params_P) =:= lists:sort(Params_A) of + case lists:sort(Params_P0) =:= lists:sort(Params_A) of + true when TP =:= <<"text">> -> + %% When a charset was provided explicitly in both the charset header + %% and the media types provided and the negotiation is successful, + %% we keep the charset and don't call charsets_provided. This only + %% applies to text media types, however. + {Charset, Params_P} = case lists:keytake(<<"charset">>, 1, Params_P0) of + false -> {undefined, Params_P0}; + {value, {_, Charset0}, Params_P1} -> {Charset0, Params_P1} + end, + languages_provided(Req#{media_type => {TP, STP, Params_P}}, + State#state{content_type_a={{TP, STP, Params_P}, Fun}, + charset_a=Charset}); true -> languages_provided(Req#{media_type => PMT}, State#state{content_type_a=Provided}); @@ -587,6 +626,26 @@ set_language(Req, State=#state{language_a=Language}) -> %% charsets_provided should return a list of binary values indicating %% which charsets are accepted by the resource. +%% +%% A charset may have been selected while negotiating the accept header. +%% There's no need to select one again. +charsets_provided(Req, State=#state{charset_a=Charset}) + when Charset =/= undefined -> + set_content_type(Req, State); +%% If charsets_p is defined, use it instead of calling charsets_provided +%% again. We also call this clause during normal execution to avoid +%% duplicating code. +charsets_provided(Req, State=#state{charsets_p=[]}) -> + not_acceptable(Req, State); +charsets_provided(Req, State=#state{charsets_p=CP}) + when CP =/= undefined -> + case cowboy_req:parse_header(<<"accept-charset">>, Req) of + undefined -> + set_content_type(Req, State#state{charset_a=hd(CP)}); + AcceptCharset0 -> + AcceptCharset = prioritize_charsets(AcceptCharset0), + choose_charset(Req, State, AcceptCharset) + end; charsets_provided(Req, State) -> case call(Req, State, charsets_provided) of no_call -> @@ -595,17 +654,8 @@ charsets_provided(Req, State) -> terminate(Req2, State#state{handler_state=HandlerState}); {Switch, Req2, HandlerState} when element(1, Switch) =:= switch_handler -> switch_handler(Switch, Req2, HandlerState); - {[], Req2, HandlerState} -> - not_acceptable(Req2, State#state{handler_state=HandlerState}); {CP, Req2, HandlerState} -> - State2 = State#state{handler_state=HandlerState, charsets_p=CP}, - case cowboy_req:parse_header(<<"accept-charset">>, Req2) of - undefined -> - set_content_type(Req2, State2#state{charset_a=hd(CP)}); - AcceptCharset -> - AcceptCharset2 = prioritize_charsets(AcceptCharset), - choose_charset(Req2, State2, AcceptCharset2) - end + charsets_provided(Req2, State#state{handler_state=HandlerState, charsets_p=CP}) end. %% The special value "*", if present in the Accept-Charset field, @@ -690,6 +740,7 @@ variances(Req, State=#state{content_types_p=CTP, [_|_] -> [<<"accept-language">>|Variances] end, Variances3 = case CP of + undefined -> Variances2; [] -> Variances2; [_] -> Variances2; [_|_] -> [<<"accept-charset">>|Variances2] diff --git a/test/handlers/charset_in_content_types_provided_h.erl b/test/handlers/charset_in_content_types_provided_h.erl new file mode 100644 index 0000000..4774407 --- /dev/null +++ b/test/handlers/charset_in_content_types_provided_h.erl @@ -0,0 +1,22 @@ +%% This module has a media type provided with an explicit charset. + +-module(charset_in_content_types_provided_h). + +-export([init/2]). +-export([content_types_provided/2]). +-export([charsets_provided/2]). +-export([get_text_plain/2]). + +init(Req, Opts) -> + {cowboy_rest, Req, Opts}. + +content_types_provided(Req, State) -> + {[ + {{<<"text">>, <<"plain">>, [{<<"charset">>, <<"utf-8">>}]}, get_text_plain} + ], Req, State}. + +charsets_provided(Req, State) -> + {[<<"utf-16">>, <<"iso-8861-1">>], Req, State}. + +get_text_plain(Req, State) -> + {<<"This is REST!">>, Req, State}. diff --git a/test/handlers/charset_in_content_types_provided_implicit_h.erl b/test/handlers/charset_in_content_types_provided_implicit_h.erl new file mode 100644 index 0000000..fcdd0c8 --- /dev/null +++ b/test/handlers/charset_in_content_types_provided_implicit_h.erl @@ -0,0 +1,24 @@ +%% This module has a media type provided with a wildcard +%% and a list of charsets that is limited. + +-module(charset_in_content_types_provided_implicit_h). + +-export([init/2]). +-export([content_types_provided/2]). +-export([charsets_provided/2]). +-export([get_text_plain/2]). + +init(Req, Opts) -> + {cowboy_rest, Req, Opts}. + +content_types_provided(Req, State) -> + {[ + {{<<"text">>, <<"plain">>, '*'}, get_text_plain} + ], Req, State}. + +charsets_provided(Req, State) -> + {[<<"utf-8">>, <<"utf-16">>], Req, State}. + +get_text_plain(Req, State) -> + {<<"This is REST!">>, Req, State}. + diff --git a/test/handlers/charset_in_content_types_provided_implicit_no_callback_h.erl b/test/handlers/charset_in_content_types_provided_implicit_no_callback_h.erl new file mode 100644 index 0000000..ae17af3 --- /dev/null +++ b/test/handlers/charset_in_content_types_provided_implicit_no_callback_h.erl @@ -0,0 +1,21 @@ +%% This module has a media type provided with a wildcard +%% and lacks a charsets_provided callback. + +-module(charset_in_content_types_provided_implicit_no_callback_h). + +-export([init/2]). +-export([content_types_provided/2]). +-export([get_text_plain/2]). + +init(Req, Opts) -> + {cowboy_rest, Req, Opts}. + +content_types_provided(Req, State) -> + {[ + {{<<"text">>, <<"plain">>, '*'}, get_text_plain} + ], Req, State}. + +get_text_plain(Req, State) -> + {<<"This is REST!">>, Req, State}. + + diff --git a/test/rest_handler_SUITE.erl b/test/rest_handler_SUITE.erl index a9884b5..491c7bb 100644 --- a/test/rest_handler_SUITE.erl +++ b/test/rest_handler_SUITE.erl @@ -39,6 +39,12 @@ end_per_group(Name, _) -> init_dispatch(_) -> cowboy_router:compile([{'_', [ {"/", rest_hello_h, []}, + {"/charset_in_content_types_provided", + charset_in_content_types_provided_h, []}, + {"/charset_in_content_types_provided_implicit", + charset_in_content_types_provided_implicit_h, []}, + {"/charset_in_content_types_provided_implicit_no_callback", + charset_in_content_types_provided_implicit_no_callback_h, []}, {"/provide_callback_missing", provide_callback_missing_h, []}, {"/switch_handler", switch_handler_h, run}, {"/switch_handler_opts", switch_handler_h, hibernate} @@ -74,6 +80,74 @@ error_on_malformed_if_none_match(Config) -> {response, _, 400, _} = gun:await(ConnPid, Ref), ok. +charset_in_content_types_provided(Config) -> + doc("When a charset is matched explictly in content_types_provided, " + "that charset is used and the charsets_provided callback is ignored."), + ConnPid = gun_open(Config), + Ref = gun:get(ConnPid, "/charset_in_content_types_provided", [ + {<<"accept">>, <<"text/plain;charset=utf-8">>}, + {<<"accept-charset">>, <<"utf-16, utf-8;q=0.5">>}, + {<<"accept-encoding">>, <<"gzip">>} + ]), + {response, _, 200, Headers} = gun:await(ConnPid, Ref), + {_, <<"text/plain; charset=utf-8">>} = lists:keyfind(<<"content-type">>, 1, Headers), + ok. + +charset_in_content_types_provided_implicit_match(Config) -> + doc("When a charset is matched implicitly in content_types_provided, " + "the charsets_provided callback is used to determine if the media " + "type will match."), + ConnPid = gun_open(Config), + Ref = gun:get(ConnPid, "/charset_in_content_types_provided_implicit", [ + {<<"accept">>, <<"text/plain;charset=utf-16">>}, + {<<"accept-encoding">>, <<"gzip">>} + ]), + {response, _, 200, Headers} = gun:await(ConnPid, Ref), + {_, <<"text/plain; charset=utf-16">>} = lists:keyfind(<<"content-type">>, 1, Headers), + ok. + +charset_in_content_types_provided_implicit_nomatch(Config) -> + doc("When a charset is matched implicitly in content_types_provided, " + "the charsets_provided callback is used to determine if the media " + "type will match. If it doesn't, try the next media type."), + ConnPid = gun_open(Config), + Ref = gun:get(ConnPid, "/charset_in_content_types_provided_implicit", [ + {<<"accept">>, <<"text/plain;charset=utf-32, text/plain">>}, + {<<"accept-encoding">>, <<"gzip">>} + ]), + {response, _, 200, Headers} = gun:await(ConnPid, Ref), + %% We end up with the first charset listed in charsets_provided. + {_, <<"text/plain; charset=utf-8">>} = lists:keyfind(<<"content-type">>, 1, Headers), + ok. + +charset_in_content_types_provided_implicit_nomatch_error(Config) -> + doc("When a charset is matched implicitly in content_types_provided, " + "the charsets_provided callback is used to determine if the media " + "type will match. If it doesn't, and there's no other media type, " + "a 406 is returned."), + ConnPid = gun_open(Config), + Ref = gun:get(ConnPid, "/charset_in_content_types_provided_implicit", [ + {<<"accept">>, <<"text/plain;charset=utf-32">>}, + {<<"accept-encoding">>, <<"gzip">>} + ]), + {response, _, 406, _} = gun:await(ConnPid, Ref), + ok. + +charset_in_content_types_provided_implicit_no_callback(Config) -> + doc("When a charset is matched implicitly in content_types_provided, " + "and the charsets_provided callback is not exported, the media " + "type will match but the charset will be ignored like all other " + "parameters."), + ConnPid = gun_open(Config), + Ref = gun:get(ConnPid, "/charset_in_content_types_provided_implicit_no_callback", [ + {<<"accept">>, <<"text/plain;charset=utf-32">>}, + {<<"accept-encoding">>, <<"gzip">>} + ]), + {response, _, 200, Headers} = gun:await(ConnPid, Ref), + %% The charset is ignored as if it was any other parameter. + {_, <<"text/plain">>} = lists:keyfind(<<"content-type">>, 1, Headers), + ok. + provide_callback_missing(Config) -> doc("A 500 response must be sent when the ProvideCallback can't be called."), ConnPid = gun_open(Config), -- cgit v1.2.3