From 4b43d063843891153be8f6caebaa2ad5aba8d04d Mon Sep 17 00:00:00 2001 From: Ali Sabil Date: Sun, 8 Apr 2012 11:51:51 +0200 Subject: Make charsets parsing more relaxed Certain user agents send invalid Accept-Charset headers, like the following: "ISO-8859-1;utf-8;q=0.7,*;q=0.7" The user agent with which this behavior was observed presented itself with the User-Agent string: "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9) Gecko/2008052906 Firefox/3.0" Although this doesn't appear to be correct. The request might have been mangled by a transparent proxy. --- src/cowboy_http.erl | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/cowboy_http.erl b/src/cowboy_http.erl index 2f4f982..7bea8e0 100644 --- a/src/cowboy_http.erl +++ b/src/cowboy_http.erl @@ -269,7 +269,15 @@ maybe_qparam(Data, Fun) -> fun (<< $;, Rest/binary >>) -> whitespace(Rest, fun (Rest2) -> - qparam(Rest2, Fun) + %% This is a non-strict parsing clause required by some user agents + %% that use the wrong delimiter putting a charset where a qparam is + %% expected. + try qparam(Rest2, Fun) of + Result -> Result + catch + error:function_clause -> + Fun(<<",", Rest2/binary>>, 1000) + end end); (Rest) -> Fun(Rest, 1000) @@ -879,6 +887,12 @@ nonempty_charset_list_test_() -> {<<"iso-8859-5, unicode-1-1;q=0.8">>, [ {<<"iso-8859-5">>, 1000}, {<<"unicode-1-1">>, 800} + ]}, + %% Some user agents send this invalid value for the Accept-Charset header + {<<"ISO-8859-1;utf-8;q=0.7,*;q=0.7">>, [ + {<<"iso-8859-1">>, 1000}, + {<<"utf-8">>, 700}, + {<<"*">>, 700} ]} ], [{V, fun() -> R = nonempty_list(V, fun conneg/2) end} || {V, R} <- Tests]. -- cgit v1.2.3