aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngela Andin <[email protected]>2017-05-15 11:20:34 +0200
committerGitHub <[email protected]>2017-05-15 11:20:34 +0200
commitf1f81f0039466d203f66c4010fb94266bf8a7e9f (patch)
tree157356bae0db9b529209f6dfae13e5a36a33a332
parentb18f2c3428a52ad648281fc57ffee3d12a1caa0e (diff)
parenta42858f00ba09759d5f723895e52cd89c77165ac (diff)
downloadotp-f1f81f0039466d203f66c4010fb94266bf8a7e9f.tar.gz
otp-f1f81f0039466d203f66c4010fb94266bf8a7e9f.tar.bz2
otp-f1f81f0039466d203f66c4010fb94266bf8a7e9f.zip
Merge pull request #1322 from tsloughter/binary_uri_encode
Add unicode binary support to http_uri functions OTP-14404
-rw-r--r--lib/inets/doc/src/http_uri.xml19
-rw-r--r--lib/inets/src/http_lib/http_uri.erl77
-rw-r--r--lib/inets/src/http_lib/http_util.erl4
-rw-r--r--lib/inets/test/uri_SUITE.erl104
4 files changed, 176 insertions, 28 deletions
diff --git a/lib/inets/doc/src/http_uri.xml b/lib/inets/doc/src/http_uri.xml
index 696b7dfa31..acf0d2163a 100644
--- a/lib/inets/doc/src/http_uri.xml
+++ b/lib/inets/doc/src/http_uri.xml
@@ -45,6 +45,7 @@
this module:</p>
<p><c>boolean() = true | false</c></p>
<p><c>string()</c> = list of ASCII characters</p>
+ <p><c>unicode_binary()</c> = binary() with characters encoded in the UTF-8 coding standard</p>
</section>
@@ -53,22 +54,22 @@
<p>Type definitions that are related to URI:</p>
<taglist>
- <tag><c>uri() = string()</c></tag>
+ <tag><c>uri() = string() | unicode:unicode_binary()</c></tag>
<item><p>Syntax according to the URI definition in RFC 3986,
for example, "http://www.erlang.org/"</p></item>
- <tag><c>user_info() = string()</c></tag>
+ <tag><c>user_info() = string() | unicode:unicode_binary()</c></tag>
<item><p></p></item>
<tag><c>scheme() = atom()</c></tag>
<item><p>Example: http, https</p></item>
- <tag><c>host() = string()</c></tag>
+ <tag><c>host() = string() | unicode:unicode_binary()</c></tag>
<item><p></p></item>
<tag><c>port() = pos_integer()</c></tag>
<item><p></p></item>
- <tag><c>path() = string()</c></tag>
+ <tag><c>path() = string() | unicode:unicode_binary()</c></tag>
<item><p>Represents a file path or directory path</p></item>
- <tag><c>query() = string()</c></tag>
+ <tag><c>query() = string() | unicode:unicode_binary()</c></tag>
<item><p></p></item>
- <tag><c>fragment() = string()</c></tag>
+ <tag><c>fragment() = string() | unicode:unicode_binary()</c></tag>
<item><p></p></item>
</taglist>
@@ -83,7 +84,7 @@
<fsummary>Decodes a hexadecimal encoded URI.</fsummary>
<type>
- <v>HexEncodedURI = string() - A possibly hexadecimal encoded URI</v>
+ <v>HexEncodedURI = string() | unicode:unicode_binary() - A possibly hexadecimal encoded URI</v>
<v>URI = uri()</v>
</type>
@@ -98,7 +99,7 @@
<fsummary>Encodes a hexadecimal encoded URI.</fsummary>
<type>
<v>URI = uri()</v>
- <v>HexEncodedURI = string() - Hexadecimal encoded URI</v>
+ <v>HexEncodedURI = string() | unicode:unicode_binary() - Hexadecimal encoded URI</v>
</type>
<desc>
@@ -145,7 +146,7 @@
<p>Scheme validation fun is to be defined as follows:</p>
<code>
-fun(SchemeStr :: string()) ->
+fun(SchemeStr :: string() | unicode:unicode_binary()) ->
valid | {error, Reason :: term()}.
</code>
diff --git a/lib/inets/src/http_lib/http_uri.erl b/lib/inets/src/http_lib/http_uri.erl
index cb3e107ccf..4568d165e7 100644
--- a/lib/inets/src/http_lib/http_uri.erl
+++ b/lib/inets/src/http_lib/http_uri.erl
@@ -102,16 +102,23 @@ parse(AbsURI, Opts) ->
reserved() ->
sets:from_list([$;, $:, $@, $&, $=, $+, $,, $/, $?,
- $#, $[, $], $<, $>, $\", ${, $}, $|,
+ $#, $[, $], $<, $>, $\", ${, $}, $|, %"
$\\, $', $^, $%, $ ]).
-encode(URI) ->
+encode(URI) when is_list(URI) ->
Reserved = reserved(),
- lists:append([uri_encode(Char, Reserved) || Char <- URI]).
+ lists:append([uri_encode(Char, Reserved) || Char <- URI]);
+encode(URI) when is_binary(URI) ->
+ Reserved = reserved(),
+ << <<(uri_encode_binary(Char, Reserved))/binary>> || <<Char>> <= URI >>.
-decode(String) ->
- do_decode(String).
+decode(String) when is_list(String) ->
+ do_decode(String);
+decode(String) when is_binary(String) ->
+ do_decode_binary(String).
+do_decode([$+|Rest]) ->
+ [$ |do_decode(Rest)];
do_decode([$%,Hex1,Hex2|Rest]) ->
[hex2dec(Hex1)*16+hex2dec(Hex2)|do_decode(Rest)];
do_decode([First|Rest]) ->
@@ -119,6 +126,14 @@ do_decode([First|Rest]) ->
do_decode([]) ->
[].
+do_decode_binary(<<$+, Rest/bits>>) ->
+ <<$ , (do_decode_binary(Rest))/binary>>;
+do_decode_binary(<<$%, Hex:2/binary, Rest/bits>>) ->
+ <<(binary_to_integer(Hex, 16)), (do_decode_binary(Rest))/binary>>;
+do_decode_binary(<<First:1/binary, Rest/bits>>) ->
+ <<First/binary, (do_decode_binary(Rest))/binary>>;
+do_decode_binary(<<>>) ->
+ <<>>.
%%%========================================================================
%%% Internal functions
@@ -162,9 +177,30 @@ extract_scheme(Str, Opts) ->
{error, Error}
end;
_ ->
- {ok, list_to_atom(http_util:to_lower(Str))}
+ {ok, to_atom(http_util:to_lower(Str))}
end.
+to_atom(S) when is_list(S) ->
+ list_to_atom(S);
+to_atom(S) when is_binary(S) ->
+ binary_to_atom(S, unicode).
+
+parse_uri_rest(Scheme, DefaultPort, <<"//", URIPart/binary>>, Opts) ->
+ {Authority, PathQueryFragment} =
+ split_uri(URIPart, "[/?#]", {URIPart, <<"">>}, 1, 0),
+ {RawPath, QueryFragment} =
+ split_uri(PathQueryFragment, "[?#]", {PathQueryFragment, <<"">>}, 1, 0),
+ {Query, Fragment} =
+ split_uri(QueryFragment, "#", {QueryFragment, <<"">>}, 1, 0),
+ {UserInfo, HostPort} = split_uri(Authority, "@", {<<"">>, Authority}, 1, 1),
+ {Host, Port} = parse_host_port(Scheme, DefaultPort, HostPort, Opts),
+ Path = path(RawPath),
+ case lists:keyfind(fragment, 1, Opts) of
+ {fragment, true} ->
+ {ok, {Scheme, UserInfo, Host, Port, Path, Query, Fragment}};
+ _ ->
+ {ok, {Scheme, UserInfo, Host, Port, Path, Query}}
+ end;
parse_uri_rest(Scheme, DefaultPort, "//" ++ URIPart, Opts) ->
{Authority, PathQueryFragment} =
split_uri(URIPart, "[/?#]", {URIPart, ""}, 1, 0),
@@ -185,6 +221,11 @@ parse_uri_rest(Scheme, DefaultPort, "//" ++ URIPart, Opts) ->
%% In this version of the function, we no longer need
%% the Scheme argument, but just in case...
+parse_host_port(_Scheme, DefaultPort, <<"[", HostPort/binary>>, Opts) -> %ipv6
+ {Host, ColonPort} = split_uri(HostPort, "\\]", {HostPort, <<"">>}, 1, 1),
+ Host2 = maybe_ipv6_host_with_brackets(Host, Opts),
+ {_, Port} = split_uri(ColonPort, ":", {<<"">>, DefaultPort}, 0, 1),
+ {Host2, int_port(Port)};
parse_host_port(_Scheme, DefaultPort, "[" ++ HostPort, Opts) -> %ipv6
{Host, ColonPort} = split_uri(HostPort, "\\]", {HostPort, ""}, 1, 1),
Host2 = maybe_ipv6_host_with_brackets(Host, Opts),
@@ -198,12 +239,19 @@ parse_host_port(_Scheme, DefaultPort, HostPort, _Opts) ->
split_uri(UriPart, SplitChar, NoMatchResult, SkipLeft, SkipRight) ->
case re:run(UriPart, SplitChar, [{capture, first}]) of
{match, [{Match, _}]} ->
- {string:substr(UriPart, 1, Match + 1 - SkipLeft),
- string:substr(UriPart, Match + 1 + SkipRight, length(UriPart))};
+ {string:slice(UriPart, 0, Match + 1 - SkipLeft),
+ string:slice(UriPart, Match + SkipRight, string:length(UriPart))};
nomatch ->
NoMatchResult
end.
+maybe_ipv6_host_with_brackets(Host, Opts) when is_binary(Host) ->
+ case lists:keysearch(ipv6_host_with_brackets, 1, Opts) of
+ {value, {ipv6_host_with_brackets, true}} ->
+ <<"[", Host/binary, "]">>;
+ _ ->
+ Host
+ end;
maybe_ipv6_host_with_brackets(Host, Opts) ->
case lists:keysearch(ipv6_host_with_brackets, 1, Opts) of
{value, {ipv6_host_with_brackets, true}} ->
@@ -212,15 +260,18 @@ maybe_ipv6_host_with_brackets(Host, Opts) ->
Host
end.
-
int_port(Port) when is_integer(Port) ->
Port;
+int_port(Port) when is_binary(Port) ->
+ binary_to_integer(Port);
int_port(Port) when is_list(Port) ->
list_to_integer(Port);
%% This is the case where no port was found and there was no default port
int_port(no_default_port) ->
throw({error, no_default_port}).
+path(<<"">>) ->
+ <<"/">>;
path("") ->
"/";
path(Path) ->
@@ -234,6 +285,14 @@ uri_encode(Char, Reserved) ->
[Char]
end.
+uri_encode_binary(Char, Reserved) ->
+ case sets:is_element(Char, Reserved) of
+ true ->
+ << $%, (integer_to_binary(Char, 16))/binary >>;
+ false ->
+ <<Char>>
+ end.
+
hex2dec(X) when (X>=$0) andalso (X=<$9) -> X-$0;
hex2dec(X) when (X>=$A) andalso (X=<$F) -> X-$A+10;
hex2dec(X) when (X>=$a) andalso (X=<$f) -> X-$a+10.
diff --git a/lib/inets/src/http_lib/http_util.erl b/lib/inets/src/http_lib/http_util.erl
index 20c342dd66..78b0aa2885 100644
--- a/lib/inets/src/http_lib/http_util.erl
+++ b/lib/inets/src/http_lib/http_util.erl
@@ -35,10 +35,10 @@
%%% Internal application API
%%%=========================================================================
to_upper(Str) ->
- string:to_upper(Str).
+ string:uppercase(Str).
to_lower(Str) ->
- string:to_lower(Str).
+ string:lowercase(Str).
%% Example: Mon, 09-Dec-2002 13:46:00 GMT
convert_netscapecookie_date([_D,_A,_Y, $,, $ ,
diff --git a/lib/inets/test/uri_SUITE.erl b/lib/inets/test/uri_SUITE.erl
index b26c645821..d055af59e3 100644
--- a/lib/inets/test/uri_SUITE.erl
+++ b/lib/inets/test/uri_SUITE.erl
@@ -25,6 +25,7 @@
-module(uri_SUITE).
+-include_lib("eunit/include/eunit.hrl").
-include_lib("common_test/include/ct.hrl").
-include("inets_test_lib.hrl").
@@ -50,7 +51,8 @@ all() ->
fragments,
escaped,
hexed_query,
- scheme_validation
+ scheme_validation,
+ encode_decode
].
%%--------------------------------------------------------------------
@@ -73,7 +75,10 @@ end_per_testcase(_Case, _Config) ->
ipv4(Config) when is_list(Config) ->
{ok, {http,[],"127.0.0.1",80,"/foobar.html",[]}} =
- http_uri:parse("http://127.0.0.1/foobar.html").
+ http_uri:parse("http://127.0.0.1/foobar.html"),
+
+ {ok, {http,<<>>,<<"127.0.0.1">>,80,<<"/foobar.html">>,<<>>}} =
+ http_uri:parse(<<"http://127.0.0.1/foobar.html">>).
ipv6(Config) when is_list(Config) ->
{ok, {http,[],"2010:836B:4179::836B:4179",80,"/foobar.html",[]}} =
@@ -89,24 +94,52 @@ ipv6(Config) when is_list(Config) ->
[{foo, false}]),
{error,
{malformed_url, _, "http://2010:836B:4179::836B:4179/foobar.html"}} =
- http_uri:parse("http://2010:836B:4179::836B:4179/foobar.html").
+ http_uri:parse("http://2010:836B:4179::836B:4179/foobar.html"),
+
+ {ok, {http,<<>>,<<"2010:836B:4179::836B:4179">>,80,<<"/foobar.html">>,<<>>}} =
+ http_uri:parse(<<"http://[2010:836B:4179::836B:4179]/foobar.html">>),
+ {ok, {http,<<>>,<<"[2010:836B:4179::836B:4179]">>,80,<<"/foobar.html">>,<<>>}} =
+ http_uri:parse(<<"http://[2010:836B:4179::836B:4179]/foobar.html">>,
+ [{ipv6_host_with_brackets, true}]),
+ {ok, {http,<<>>,<<"2010:836B:4179::836B:4179">>,80,<<"/foobar.html">>,<<>>}} =
+ http_uri:parse(<<"http://[2010:836B:4179::836B:4179]/foobar.html">>,
+ [{ipv6_host_with_brackets, false}]),
+ {ok, {http,<<>>,<<"2010:836B:4179::836B:4179">>,80,<<"/foobar.html">>,<<>>}} =
+ http_uri:parse(<<"http://[2010:836B:4179::836B:4179]/foobar.html">>,
+ [{foo, false}]),
+ {error,
+ {malformed_url, _, <<"http://2010:836B:4179::836B:4179/foobar.html">>}} =
+ http_uri:parse(<<"http://2010:836B:4179::836B:4179/foobar.html">>).
host(Config) when is_list(Config) ->
{ok, {http,[],"localhost",8888,"/foobar.html",[]}} =
- http_uri:parse("http://localhost:8888/foobar.html").
+ http_uri:parse("http://localhost:8888/foobar.html"),
+
+ {ok, {http,<<>>,<<"localhost">>,8888,<<"/foobar.html">>,<<>>}} =
+ http_uri:parse(<<"http://localhost:8888/foobar.html">>).
userinfo(Config) when is_list(Config) ->
{ok, {http,"nisse:foobar","localhost",8888,"/foobar.html",[]}} =
- http_uri:parse("http://nisse:foobar@localhost:8888/foobar.html").
+ http_uri:parse("http://nisse:foobar@localhost:8888/foobar.html"),
+
+ {ok, {http,<<"nisse:foobar">>,<<"localhost">>,8888,<<"/foobar.html">>,<<>>}} =
+ http_uri:parse(<<"http://nisse:foobar@localhost:8888/foobar.html">>).
scheme(Config) when is_list(Config) ->
{error, no_scheme} = http_uri:parse("localhost/foobar.html"),
{error, {malformed_url, _, _}} =
- http_uri:parse("localhost:8888/foobar.html").
+ http_uri:parse("localhost:8888/foobar.html"),
+
+ {error, no_scheme} = http_uri:parse(<<"localhost/foobar.html">>),
+ {error, {malformed_url, _, _}} =
+ http_uri:parse(<<"localhost:8888/foobar.html">>).
queries(Config) when is_list(Config) ->
{ok, {http,[],"localhost",8888,"/foobar.html","?foo=bar&foobar=42"}} =
- http_uri:parse("http://localhost:8888/foobar.html?foo=bar&foobar=42").
+ http_uri:parse("http://localhost:8888/foobar.html?foo=bar&foobar=42"),
+
+ {ok, {http,<<>>,<<"localhost">>,8888,<<"/foobar.html">>,<<"?foo=bar&foobar=42">>}} =
+ http_uri:parse(<<"http://localhost:8888/foobar.html?foo=bar&foobar=42">>).
fragments(Config) when is_list(Config) ->
{ok, {http,[],"localhost",80,"/",""}} =
@@ -142,6 +175,41 @@ fragments(Config) when is_list(Config) ->
http_uri:parse("http://localhost?query#", [{fragment,true}]),
{ok, {http,[],"localhost",80,"/path","?query","#"}} =
http_uri:parse("http://localhost/path?query#", [{fragment,true}]),
+
+
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"">>}} =
+ http_uri:parse(<<"http://localhost#fragment">>),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"">>}} =
+ http_uri:parse(<<"http://localhost/path#fragment">>),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"?query">>}} =
+ http_uri:parse(<<"http://localhost?query#fragment">>),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"?query">>}} =
+ http_uri:parse(<<"http://localhost/path?query#fragment">>),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"">>,<<"#fragment">>}} =
+ http_uri:parse(<<"http://localhost#fragment">>, [{fragment,true}]),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"">>,<<"#fragment">>}} =
+ http_uri:parse(<<"http://localhost/path#fragment">>, [{fragment,true}]),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"?query">>,<<"#fragment">>}} =
+ http_uri:parse(<<"http://localhost?query#fragment">>, [{fragment,true}]),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"?query">>,<<"#fragment">>}} =
+ http_uri:parse(<<"http://localhost/path?query#fragment">>,
+ [{fragment,true}]),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"">>,<<"">>}} =
+ http_uri:parse(<<"http://localhost">>, [{fragment,true}]),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"">>,<<"">>}} =
+ http_uri:parse(<<"http://localhost/path">>, [{fragment,true}]),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"?query">>,<<"">>}} =
+ http_uri:parse(<<"http://localhost?query">>, [{fragment,true}]),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"?query">>,<<"">>}} =
+ http_uri:parse(<<"http://localhost/path?query">>, [{fragment,true}]),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"">>,<<"#">>}} =
+ http_uri:parse(<<"http://localhost#">>, [{fragment,true}]),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"">>,<<"#">>}} =
+ http_uri:parse(<<"http://localhost/path#">>, [{fragment,true}]),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"?query">>,<<"#">>}} =
+ http_uri:parse(<<"http://localhost?query#">>, [{fragment,true}]),
+ {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"?query">>,<<"#">>}} =
+ http_uri:parse(<<"http://localhost/path?query#">>, [{fragment,true}]),
ok.
escaped(Config) when is_list(Config) ->
@@ -152,7 +220,16 @@ escaped(Config) when is_list(Config) ->
{ok, {http,[],"www.somedomain.com",80,"/%25abc",[]}} =
http_uri:parse("http://www.somedomain.com/%25abc"),
{ok, {http,[],"www.somedomain.com",80,"/%25abc", "?foo=bar"}} =
- http_uri:parse("http://www.somedomain.com/%25abc?foo=bar").
+ http_uri:parse("http://www.somedomain.com/%25abc?foo=bar"),
+
+ {ok, {http,<<>>,<<"www.somedomain.com">>,80,<<"/%2Eabc">>,<<>>}} =
+ http_uri:parse(<<"http://www.somedomain.com/%2Eabc">>),
+ {ok, {http,<<>>,<<"www.somedomain.com">>,80,<<"/%252Eabc">>,<<>>}} =
+ http_uri:parse(<<"http://www.somedomain.com/%252Eabc">>),
+ {ok, {http,<<>>,<<"www.somedomain.com">>,80,<<"/%25abc">>,<<>>}} =
+ http_uri:parse(<<"http://www.somedomain.com/%25abc">>),
+ {ok, {http,<<>>,<<"www.somedomain.com">>,80,<<"/%25abc">>, <<"?foo=bar">>}} =
+ http_uri:parse(<<"http://www.somedomain.com/%25abc?foo=bar">>).
hexed_query(doc) ->
[{doc, "Solves OTP-6191"}];
@@ -196,6 +273,17 @@ scheme_validation(Config) when is_list(Config) ->
http_uri:parse("https://localhost#fragment",
[{scheme_validation_fun, none}]).
+encode_decode(Config) when is_list(Config) ->
+ ?assertEqual("foo%20bar", http_uri:encode("foo bar")),
+ ?assertEqual(<<"foo%20bar">>, http_uri:encode(<<"foo bar">>)),
+
+ ?assertEqual("foo bar", http_uri:decode("foo+bar")),
+ ?assertEqual(<<"foo bar">>, http_uri:decode(<<"foo+bar">>)),
+ ?assertEqual("foo bar", http_uri:decode("foo%20bar")),
+ ?assertEqual(<<"foo bar">>, http_uri:decode(<<"foo%20bar">>)),
+ ?assertEqual("foo\r\n", http_uri:decode("foo%0D%0A")),
+ ?assertEqual(<<"foo\r\n">>, http_uri:decode(<<"foo%0D%0A">>)).
+
%%--------------------------------------------------------------------
%% Internal Functions ------------------------------------------------