From 664ada1c19c277bcab250c1d38d579672c02a628 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Sat, 3 Nov 2018 14:40:02 +0100 Subject: Make cow_http_hd:parse_accept/1 lowercase charsets Charsets are case insensitive. This is now in line with parse_accept_charset/1 and parse_content_type/1. --- src/cow_http_hd.erl | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/cow_http_hd.erl b/src/cow_http_hd.erl index 1e8faaf..87b4f12 100644 --- a/src/cow_http_hd.erl +++ b/src/cow_http_hd.erl @@ -241,8 +241,22 @@ media_range_param_sep(<< C, R/bits >>, Acc, T, S, P) when ?IS_WS(C) -> media_ran media_range_before_param(<< C, R/bits >>, Acc, T, S, P) when ?IS_WS(C) -> media_range_before_param(R, Acc, T, S, P); media_range_before_param(<< $q, $=, R/bits >>, Acc, T, S, P) -> media_range_weight(R, Acc, T, S, P); +media_range_before_param(<< "charset=", $", R/bits >>, Acc, T, S, P) -> media_range_charset_quoted(R, Acc, T, S, P, <<>>); +media_range_before_param(<< "charset=", R/bits >>, Acc, T, S, P) -> media_range_charset(R, Acc, T, S, P, <<>>); media_range_before_param(<< C, R/bits >>, Acc, T, S, P) when ?IS_TOKEN(C) -> ?LOWER(media_range_param, R, Acc, T, S, P, <<>>). +media_range_charset_quoted(<< $", R/bits >>, Acc, T, S, P, V) -> + media_range_param_sep(R, Acc, T, S, [{<<"charset">>, V}|P]); +media_range_charset_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, V) when ?IS_VCHAR_OBS(C) -> + ?LOWER(media_range_charset_quoted, R, Acc, T, S, P, V); +media_range_charset_quoted(<< C, R/bits >>, Acc, T, S, P, V) when ?IS_VCHAR_OBS(C) -> + ?LOWER(media_range_charset_quoted, R, Acc, T, S, P, V). + +media_range_charset(<< C, R/bits >>, Acc, T, S, P, V) when ?IS_TOKEN(C) -> + ?LOWER(media_range_charset, R, Acc, T, S, P, V); +media_range_charset(R, Acc, T, S, P, V) -> + media_range_param_sep(R, Acc, T, S, [{<<"charset">>, V}|P]). + media_range_param(<< $=, $", R/bits >>, Acc, T, S, P, K) -> media_range_quoted(R, Acc, T, S, P, K, <<>>); media_range_param(<< $=, C, R/bits >>, Acc, T, S, P, K) when ?IS_TOKEN(C) -> media_range_value(R, Acc, T, S, P, K, << C >>); media_range_param(<< C, R/bits >>, Acc, T, S, P, K) when ?IS_TOKEN(C) -> ?LOWER(media_range_param, R, Acc, T, S, P, K). @@ -299,15 +313,24 @@ accept_value(R, Acc, T, S, P, Q, E, K, V) -> accept_ext_sep(R, Acc, T, S, P, Q, accept_ext() -> oneof([token(), parameter()]). -accept_params() -> +accept_exts() -> frequency([ {90, []}, {10, small_list(accept_ext())} ]). +accept_param() -> + frequency([ + {90, parameter()}, + {10, {<<"charset">>, oneof([token(), quoted_string()]), <<>>, <<>>}} + ]). + +accept_params() -> + small_list(accept_param()). + accept() -> ?LET({T, S, P, W, E}, - {token(), token(), small_list(parameter()), weight(), accept_params()}, + {token(), token(), accept_params(), weight(), accept_exts()}, {T, S, P, W, E, iolist_to_binary([T, $/, S, [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P], case W of @@ -328,7 +351,10 @@ prop_parse_accept() -> << _, Accept/binary >> = iolist_to_binary([[$,, A] || {_, _, _, _, _, A} <- L]), ResL = parse_accept(Accept), CheckedL = [begin - ExpectedP = [{?LOWER(K), unquote(V)} || {K, V, _, _} <- P], + ExpectedP = [case ?LOWER(K) of + <<"charset">> -> {<<"charset">>, ?LOWER(unquote(V))}; + LowK -> {LowK, unquote(V)} + end || {K, V, _, _} <- P], ExpectedE = [case Ext of {K, V, _, _} -> {?LOWER(K), unquote(V)}; K -> ?LOWER(K) @@ -385,6 +411,9 @@ parse_accept_test_() -> {{<<"image">>, <<"jpeg">>, []}, 1000, []}, {{<<"*">>, <<"*">>, []}, 200, []}, {{<<"*">>, <<"*">>, []}, 200, []} + ]}, + {<<"text/plain; charset=UTF-8">>, [ + {{<<"text">>, <<"plain">>, [{<<"charset">>, <<"utf-8">>}]}, 1000, []} ]} ], [{V, fun() -> R = parse_accept(V) end} || {V, R} <- Tests]. -- cgit v1.2.3