From c605c4fa408272f98b78e06577fb7c446b7ea2e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Wed, 26 Oct 2011 04:07:08 +0200 Subject: Add 'Accept' header parsing Rework the cowboy_http_req:parse_header/2 function while I was at it. --- src/cowboy_http.erl | 221 +++++++++++++++++++++++++++++++++++++++++- src/cowboy_http_protocol.erl | 2 +- src/cowboy_http_req.erl | 65 +++++++------ src/cowboy_http_websocket.erl | 2 +- 4 files changed, 256 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/cowboy_http.erl b/src/cowboy_http.erl index 8648b86..5036d63 100644 --- a/src/cowboy_http.erl +++ b/src/cowboy_http.erl @@ -16,7 +16,8 @@ -module(cowboy_http). %% Parsing. --export([list/2, nonempty_list/2, token/2, token_ci/2]). +-export([list/2, nonempty_list/2, + media_range/2, token/2, token_ci/2, quoted_string/2]). %% Interpretation. -export([connection_to_atom/1]). @@ -63,6 +64,144 @@ list(Data, Fun, Acc) -> end) end). +%% @doc Parse a media range. +-spec media_range(binary(), fun()) -> any(). +media_range(Data, Fun) -> + whitespace(Data, + fun (<<>>) -> {error, badarg}; + (Rest) -> media_range_type(Rest, Fun) + end). + +-spec media_range_type(binary(), fun()) -> any(). +media_range_type(Data, Fun) -> + token_ci(Data, + fun (_Rest, <<>>) -> {error, badarg}; + (Rest, Type) -> whitespace(Rest, + fun (<< $/, Rest2/bits >>) -> whitespace(Rest2, + fun (<<>>) -> {error, badarg}; + (Rest3) -> media_range_subtype(Rest3, Fun, Type) + end); + (_Rest2) -> {error, badarg} + end) + end). + +-spec media_range_subtype(binary(), fun(), binary()) -> any(). +media_range_subtype(Data, Fun, Type) -> + token_ci(Data, + fun (_Rest, <<>>) -> {error, badarg}; + (Rest, SubType) -> media_range_params(Rest, Fun, Type, SubType, []) + end). + +-spec media_range_params(binary(), fun(), binary(), binary(), + [{binary(), binary()}]) -> any(). +media_range_params(Data, Fun, Type, SubType, Acc) -> + whitespace(Data, + fun (<< $;, Rest/bits >>) -> + whitespace(Rest, + fun (Rest2) -> + media_range_param_attr(Rest2, Fun, Type, SubType, Acc) + end); + (Rest) -> Fun(Rest, {{Type, SubType, lists:reverse(Acc)}, 1000, []}) + end). + +-spec media_range_param_attr(binary(), fun(), binary(), binary(), + [{binary(), binary()}]) -> any(). +media_range_param_attr(Data, Fun, Type, SubType, Acc) -> + token_ci(Data, + fun (_Rest, <<>>) -> {error, badarg}; + (Rest, Attr) -> + whitespace(Rest, + fun (<< $=, Rest2/bits >>) -> + whitespace(Rest2, + fun (<<>>) -> {error, badarg}; + (Rest3) -> + media_range_param_value(Rest3, Fun, + Type, SubType, Acc, Attr) + end); + (_Rest2) -> + {error, badarg} + end) + end). + +-spec media_range_param_value(binary(), fun(), binary(), binary(), + [{binary(), binary()}], binary()) -> any(). +media_range_param_value(Data, Fun, Type, SubType, Acc, <<"q">>) -> + quality(Data, + fun (Rest, Quality) -> + accept_ext(Rest, Fun, Type, SubType, Acc, Quality, []) + end); +media_range_param_value(Data = << $", _/bits >>, Fun, + Type, SubType, Acc, Attr) -> + quoted_string(Data, + fun (Rest, Value) -> + media_range_params(Rest, Fun, + Type, SubType, [{Attr, Value}|Acc]) + end); +media_range_param_value(Data, Fun, Type, SubType, Acc, Attr) -> + token(Data, + fun (_Rest, <<>>) -> {error, badarg}; + (Rest, Value) -> + media_range_params(Rest, Fun, + Type, SubType, [{Attr, Value}|Acc]) + end). + +-spec accept_ext(binary(), fun(), binary(), binary(), + [{binary(), binary()}], 0..1000, + [{binary(), binary()} | binary()]) -> any(). +accept_ext(Data, Fun, Type, SubType, Params, Quality, Acc) -> + whitespace(Data, + fun (<< $;, Rest/bits >>) -> + whitespace(Rest, + fun (Rest2) -> + accept_ext_attr(Rest2, Fun, + Type, SubType, Params, Quality, Acc) + end); + (Rest) -> + Fun(Rest, {{Type, SubType, lists:reverse(Params)}, + Quality, lists:reverse(Acc)}) + end). + +-spec accept_ext_attr(binary(), fun(), binary(), binary(), + [{binary(), binary()}], 0..1000, + [{binary(), binary()} | binary()]) -> any(). +accept_ext_attr(Data, Fun, Type, SubType, Params, Quality, Acc) -> + token_ci(Data, + fun (_Rest, <<>>) -> {error, badarg}; + (Rest, Attr) -> + whitespace(Rest, + fun (<< $=, Rest2/bits >>) -> + whitespace(Rest2, + fun (<<>>) -> {error, badarg}; + (Rest3) -> + accept_ext_value(Rest3, Fun, + Type, SubType, Params, + Quality, Acc, Attr) + end); + (Rest2) -> + accept_ext(Rest2, Fun, + Type, SubType, Params, + Quality, [Attr|Acc]) + end) + end). + +-spec accept_ext_value(binary(), fun(), binary(), binary(), + [{binary(), binary()}], 0..1000, + [{binary(), binary()} | binary()], binary()) -> any(). +accept_ext_value(Data = << $", _/bits >>, Fun, + Type, SubType, Params, Quality, Acc, Attr) -> + quoted_string(Data, + fun (Rest, Value) -> + accept_ext(Rest, Fun, + Type, SubType, Params, Quality, [{Attr, Value}|Acc]) + end); +accept_ext_value(Data, Fun, Type, SubType, Params, Quality, Acc, Attr) -> + token(Data, + fun (_Rest, <<>>) -> {error, badarg}; + (Rest, Value) -> + accept_ext(Rest, Fun, + Type, SubType, Params, Quality, [{Attr, Value}|Acc]) + end). + %% @doc Skip whitespace. -spec whitespace(binary(), fun()) -> any(). whitespace(<< C, Rest/bits >>, Fun) @@ -99,6 +238,48 @@ token(<< C, Rest/bits >>, Fun, Case = ci, Acc) -> token(<< C, Rest/bits >>, Fun, Case, Acc) -> token(Rest, Fun, Case, << Acc/binary, C >>). +%% @doc Parse a quoted string. +-spec quoted_string(binary(), fun()) -> any(). +quoted_string(<< $", Rest/bits >>, Fun) -> + quoted_string(Rest, Fun, <<>>). + +-spec quoted_string(binary(), fun(), binary()) -> any(). +quoted_string(<<>>, _Fun, _Acc) -> + {error, badarg}; +quoted_string(<< $", Rest/bits >>, Fun, Acc) -> + Fun(Rest, Acc); +quoted_string(<< $\\, C, Rest/bits >>, Fun, Acc) -> + quoted_string(Rest, Fun, << Acc/binary, C >>); +quoted_string(<< C, Rest/bits >>, Fun, Acc) -> + quoted_string(Rest, Fun, << Acc/binary, C >>). + +%% @doc Parse a quality value. +-spec quality(binary(), fun()) -> any(). +quality(<< $0, $., Rest/bits >>, Fun) -> + quality(Rest, Fun, 0, 100); +quality(<< $0, Rest/bits >>, Fun) -> + Fun(Rest, 0); +quality(<< $1, $., $0, $0, $0, Rest/bits >>, Fun) -> + Fun(Rest, 1000); +quality(<< $1, $., $0, $0, Rest/bits >>, Fun) -> + Fun(Rest, 1000); +quality(<< $1, $., $0, Rest/bits >>, Fun) -> + Fun(Rest, 1000); +quality(<< $1, Rest/bits >>, Fun) -> + Fun(Rest, 1000); +quality(_Data, _Fun) -> + {error, badarg}. + +-spec quality(binary(), fun(), integer(), 1 | 10 | 100) -> any(). +quality(Data, Fun, Q, 0) -> + Fun(Data, Q); +quality(<< C, Rest/bits >>, Fun, Q, M) + when C =:= $0; C =:= $1; C =:= $2; C =:= $3; C =:= $4; + C =:= $5; C =:= $6; C =:= $7; C =:= $8; C =:= $9 -> + quality(Rest, Fun, Q + (C - $0) * M, M div 10); +quality(Data, Fun, Q, _M) -> + Fun(Data, Q). + %% Interpretation. %% @doc Walk through a tokens list and return whether @@ -135,6 +316,44 @@ nonempty_token_list_test_() -> ], [{V, fun() -> R = nonempty_list(V, fun token/2) end} || {V, R} <- Tests]. +media_range_list_test_() -> + %% {Tokens, Result} + Tests = [ + {<<"audio/*; q=0.2, audio/basic">>, [ + {{<<"audio">>, <<"*">>, []}, 200, []}, + {{<<"audio">>, <<"basic">>, []}, 1000, []} + ]}, + {<<"text/plain; q=0.5, text/html, " + "text/x-dvi; q=0.8, text/x-c">>, [ + {{<<"text">>, <<"plain">>, []}, 500, []}, + {{<<"text">>, <<"html">>, []}, 1000, []}, + {{<<"text">>, <<"x-dvi">>, []}, 800, []}, + {{<<"text">>, <<"x-c">>, []}, 1000, []} + ]}, + {<<"text/*, text/html, text/html;level=1, */*">>, [ + {{<<"text">>, <<"*">>, []}, 1000, []}, + {{<<"text">>, <<"html">>, []}, 1000, []}, + {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []}, + {{<<"*">>, <<"*">>, []}, 1000, []} + ]}, + {<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, " + "text/html;level=2;q=0.4, */*;q=0.5">>, [ + {{<<"text">>, <<"*">>, []}, 300, []}, + {{<<"text">>, <<"html">>, []}, 700, []}, + {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []}, + {{<<"text">>, <<"html">>, [{<<"level">>, <<"2">>}]}, 400, []}, + {{<<"*">>, <<"*">>, []}, 500, []} + ]}, + {<<"text/html;level=1;quoted=\"hi hi hi\";" + "q=0.123;standalone;complex=gits, text/plain">>, [ + {{<<"text">>, <<"html">>, + [{<<"level">>, <<"1">>}, {<<"quoted">>, <<"hi hi hi">>}]}, 123, + [<<"standalone">>, {<<"complex">>, <<"gits">>}]}, + {{<<"text">>, <<"plain">>, []}, 1000, []} + ]} + ], + [{V, fun() -> R = list(V, fun media_range/2) end} || {V, R} <- Tests]. + connection_to_atom_test_() -> %% {Tokens, Result} Tests = [ diff --git a/src/cowboy_http_protocol.erl b/src/cowboy_http_protocol.erl index b91101a..b0ee590 100644 --- a/src/cowboy_http_protocol.erl +++ b/src/cowboy_http_protocol.erl @@ -172,7 +172,7 @@ header({http_header, _I, 'Host', _R, _V}, Req, State) -> header({http_header, _I, 'Connection', _R, Connection}, Req=#http_req{headers=Headers}, State) -> Req2 = Req#http_req{headers=[{'Connection', Connection}|Headers]}, - {tokens, ConnTokens, Req3} + {ConnTokens, Req3} = cowboy_http_req:parse_header('Connection', Req2), ConnAtom = cowboy_http:connection_to_atom(ConnTokens), parse_header(Req3#http_req{connection=ConnAtom}, State); diff --git a/src/cowboy_http_req.erl b/src/cowboy_http_req.erl index 539c7f0..fb126e5 100644 --- a/src/cowboy_http_req.erl +++ b/src/cowboy_http_req.erl @@ -191,47 +191,50 @@ headers(Req) -> %% returned is used as a return value. %% @see parse_header/3 -spec parse_header(http_header(), #http_req{}) - -> {tokens, [binary()], #http_req{}} - | {undefined, binary(), #http_req{}} - | {error, badarg}. -parse_header('Connection', Req) -> - parse_header('Connection', Req, []); -parse_header(Name, Req) -> - parse_header(Name, Req, undefined). + -> {any(), #http_req{}} | {error, badarg}. +parse_header(Name, Req=#http_req{p_headers=PHeaders}) -> + case lists:keyfind(Name, 1, PHeaders) of + false -> parse_header(Name, Req, parse_header_default(Name)); + {Name, Value} -> {Value, Req} + end. + +%% @doc Default values for semantic header parsing. +-spec parse_header_default(http_header()) -> any(). +parse_header_default('Accept') -> []; +parse_header_default('Connection') -> []; +parse_header_default(_Name) -> undefined. %% @doc Semantically parse headers. %% -%% When the header is known, a named tuple is returned containing -%% {Type, P, Req} with Type being the type of value found in P. -%% For example, the header 'Connection' is a list of tokens, therefore -%% the value returned will be a list of binary values and Type will be -%% 'tokens'. -%% -%% When the header is known but not found, the tuple {Type, Default, Req} -%% is returned instead. -%% -%% When the header is unknown, the value is returned directly as an -%% 'undefined' tagged tuple. +%% When the header is unknown, the value is returned directly without parsing. -spec parse_header(http_header(), #http_req{}, any()) - -> {tokens, [binary()], #http_req{}} - | {undefined, binary(), #http_req{}} - | {error, badarg}. -parse_header(Name, Req=#http_req{p_headers=PHeaders}, Default) - when Name =:= 'Connection' -> + -> {any(), #http_req{}} | {error, badarg}. +parse_header(Name, Req, Default) when Name =:= 'Accept' -> + parse_header(Name, Req, Default, + fun (Value) -> + cowboy_http:list(Value, fun cowboy_http:media_range/2) + end); +parse_header(Name, Req, Default) when Name =:= 'Connection' -> + parse_header(Name, Req, Default, + fun (Value) -> + cowboy_http:nonempty_list(Value, fun cowboy_http:token_ci/2) + end); +parse_header(Name, Req, Default) -> + {Value, Req2} = header(Name, Req, Default), + {undefined, Value, Req2}. + +parse_header(Name, Req=#http_req{p_headers=PHeaders}, Default, Fun) -> case header(Name, Req) of - {undefined, Req2} -> {tokens, Default, Req2}; + {undefined, Req2} -> + {Default, Req2#http_req{p_headers=[{Name, Default}|PHeaders]}}; {Value, Req2} -> - case cowboy_http:nonempty_list(Value, fun cowboy_http:token_ci/2) of + case Fun(Value) of {error, badarg} -> {error, badarg}; P -> - {tokens, P, Req2#http_req{ - p_headers=[{Name, P}|PHeaders]}} + {P, Req2#http_req{p_headers=[{Name, P}|PHeaders]}} end - end; -parse_header(Name, Req, Default) -> - {Value, Req2} = header(Name, Req, Default), - {undefined, Value, Req2}. + end. %% @equiv cookie(Name, Req, undefined) -spec cookie(binary(), #http_req{}) diff --git a/src/cowboy_http_websocket.erl b/src/cowboy_http_websocket.erl index a5bc5f8..136160e 100644 --- a/src/cowboy_http_websocket.erl +++ b/src/cowboy_http_websocket.erl @@ -77,7 +77,7 @@ upgrade(ListenerPid, Handler, Opts, Req) -> %% @todo Upgrade is a list of products and should be parsed as such. -spec websocket_upgrade(#state{}, #http_req{}) -> {ok, #state{}, #http_req{}}. websocket_upgrade(State, Req) -> - {tokens, ConnTokens, Req2} + {ConnTokens, Req2} = cowboy_http_req:parse_header('Connection', Req), true = lists:member(<<"upgrade">>, ConnTokens), {WS, Req3} = cowboy_http_req:header('Upgrade', Req2), -- cgit v1.2.3