diff options
author | Ingela Andin <[email protected]> | 2017-05-15 11:20:34 +0200 |
---|---|---|
committer | GitHub <[email protected]> | 2017-05-15 11:20:34 +0200 |
commit | f1f81f0039466d203f66c4010fb94266bf8a7e9f (patch) | |
tree | 157356bae0db9b529209f6dfae13e5a36a33a332 | |
parent | b18f2c3428a52ad648281fc57ffee3d12a1caa0e (diff) | |
parent | a42858f00ba09759d5f723895e52cd89c77165ac (diff) | |
download | otp-f1f81f0039466d203f66c4010fb94266bf8a7e9f.tar.gz otp-f1f81f0039466d203f66c4010fb94266bf8a7e9f.tar.bz2 otp-f1f81f0039466d203f66c4010fb94266bf8a7e9f.zip |
Merge pull request #1322 from tsloughter/binary_uri_encode
Add unicode binary support to http_uri functions
OTP-14404
-rw-r--r-- | lib/inets/doc/src/http_uri.xml | 19 | ||||
-rw-r--r-- | lib/inets/src/http_lib/http_uri.erl | 77 | ||||
-rw-r--r-- | lib/inets/src/http_lib/http_util.erl | 4 | ||||
-rw-r--r-- | lib/inets/test/uri_SUITE.erl | 104 |
4 files changed, 176 insertions, 28 deletions
diff --git a/lib/inets/doc/src/http_uri.xml b/lib/inets/doc/src/http_uri.xml index 696b7dfa31..acf0d2163a 100644 --- a/lib/inets/doc/src/http_uri.xml +++ b/lib/inets/doc/src/http_uri.xml @@ -45,6 +45,7 @@ this module:</p> <p><c>boolean() = true | false</c></p> <p><c>string()</c> = list of ASCII characters</p> + <p><c>unicode_binary()</c> = binary() with characters encoded in the UTF-8 coding standard</p> </section> @@ -53,22 +54,22 @@ <p>Type definitions that are related to URI:</p> <taglist> - <tag><c>uri() = string()</c></tag> + <tag><c>uri() = string() | unicode:unicode_binary()</c></tag> <item><p>Syntax according to the URI definition in RFC 3986, for example, "http://www.erlang.org/"</p></item> - <tag><c>user_info() = string()</c></tag> + <tag><c>user_info() = string() | unicode:unicode_binary()</c></tag> <item><p></p></item> <tag><c>scheme() = atom()</c></tag> <item><p>Example: http, https</p></item> - <tag><c>host() = string()</c></tag> + <tag><c>host() = string() | unicode:unicode_binary()</c></tag> <item><p></p></item> <tag><c>port() = pos_integer()</c></tag> <item><p></p></item> - <tag><c>path() = string()</c></tag> + <tag><c>path() = string() | unicode:unicode_binary()</c></tag> <item><p>Represents a file path or directory path</p></item> - <tag><c>query() = string()</c></tag> + <tag><c>query() = string() | unicode:unicode_binary()</c></tag> <item><p></p></item> - <tag><c>fragment() = string()</c></tag> + <tag><c>fragment() = string() | unicode:unicode_binary()</c></tag> <item><p></p></item> </taglist> @@ -83,7 +84,7 @@ <fsummary>Decodes a hexadecimal encoded URI.</fsummary> <type> - <v>HexEncodedURI = string() - A possibly hexadecimal encoded URI</v> + <v>HexEncodedURI = string() | unicode:unicode_binary() - A possibly hexadecimal encoded URI</v> <v>URI = uri()</v> </type> @@ -98,7 +99,7 @@ <fsummary>Encodes a hexadecimal encoded URI.</fsummary> <type> <v>URI = uri()</v> - <v>HexEncodedURI = string() - Hexadecimal encoded URI</v> + <v>HexEncodedURI = string() | unicode:unicode_binary() - Hexadecimal encoded URI</v> </type> <desc> @@ -145,7 +146,7 @@ <p>Scheme validation fun is to be defined as follows:</p> <code> -fun(SchemeStr :: string()) -> +fun(SchemeStr :: string() | unicode:unicode_binary()) -> valid | {error, Reason :: term()}. </code> diff --git a/lib/inets/src/http_lib/http_uri.erl b/lib/inets/src/http_lib/http_uri.erl index cb3e107ccf..4568d165e7 100644 --- a/lib/inets/src/http_lib/http_uri.erl +++ b/lib/inets/src/http_lib/http_uri.erl @@ -102,16 +102,23 @@ parse(AbsURI, Opts) -> reserved() -> sets:from_list([$;, $:, $@, $&, $=, $+, $,, $/, $?, - $#, $[, $], $<, $>, $\", ${, $}, $|, + $#, $[, $], $<, $>, $\", ${, $}, $|, %" $\\, $', $^, $%, $ ]). -encode(URI) -> +encode(URI) when is_list(URI) -> Reserved = reserved(), - lists:append([uri_encode(Char, Reserved) || Char <- URI]). + lists:append([uri_encode(Char, Reserved) || Char <- URI]); +encode(URI) when is_binary(URI) -> + Reserved = reserved(), + << <<(uri_encode_binary(Char, Reserved))/binary>> || <<Char>> <= URI >>. -decode(String) -> - do_decode(String). +decode(String) when is_list(String) -> + do_decode(String); +decode(String) when is_binary(String) -> + do_decode_binary(String). +do_decode([$+|Rest]) -> + [$ |do_decode(Rest)]; do_decode([$%,Hex1,Hex2|Rest]) -> [hex2dec(Hex1)*16+hex2dec(Hex2)|do_decode(Rest)]; do_decode([First|Rest]) -> @@ -119,6 +126,14 @@ do_decode([First|Rest]) -> do_decode([]) -> []. +do_decode_binary(<<$+, Rest/bits>>) -> + <<$ , (do_decode_binary(Rest))/binary>>; +do_decode_binary(<<$%, Hex:2/binary, Rest/bits>>) -> + <<(binary_to_integer(Hex, 16)), (do_decode_binary(Rest))/binary>>; +do_decode_binary(<<First:1/binary, Rest/bits>>) -> + <<First/binary, (do_decode_binary(Rest))/binary>>; +do_decode_binary(<<>>) -> + <<>>. %%%======================================================================== %%% Internal functions @@ -162,9 +177,30 @@ extract_scheme(Str, Opts) -> {error, Error} end; _ -> - {ok, list_to_atom(http_util:to_lower(Str))} + {ok, to_atom(http_util:to_lower(Str))} end. +to_atom(S) when is_list(S) -> + list_to_atom(S); +to_atom(S) when is_binary(S) -> + binary_to_atom(S, unicode). + +parse_uri_rest(Scheme, DefaultPort, <<"//", URIPart/binary>>, Opts) -> + {Authority, PathQueryFragment} = + split_uri(URIPart, "[/?#]", {URIPart, <<"">>}, 1, 0), + {RawPath, QueryFragment} = + split_uri(PathQueryFragment, "[?#]", {PathQueryFragment, <<"">>}, 1, 0), + {Query, Fragment} = + split_uri(QueryFragment, "#", {QueryFragment, <<"">>}, 1, 0), + {UserInfo, HostPort} = split_uri(Authority, "@", {<<"">>, Authority}, 1, 1), + {Host, Port} = parse_host_port(Scheme, DefaultPort, HostPort, Opts), + Path = path(RawPath), + case lists:keyfind(fragment, 1, Opts) of + {fragment, true} -> + {ok, {Scheme, UserInfo, Host, Port, Path, Query, Fragment}}; + _ -> + {ok, {Scheme, UserInfo, Host, Port, Path, Query}} + end; parse_uri_rest(Scheme, DefaultPort, "//" ++ URIPart, Opts) -> {Authority, PathQueryFragment} = split_uri(URIPart, "[/?#]", {URIPart, ""}, 1, 0), @@ -185,6 +221,11 @@ parse_uri_rest(Scheme, DefaultPort, "//" ++ URIPart, Opts) -> %% In this version of the function, we no longer need %% the Scheme argument, but just in case... +parse_host_port(_Scheme, DefaultPort, <<"[", HostPort/binary>>, Opts) -> %ipv6 + {Host, ColonPort} = split_uri(HostPort, "\\]", {HostPort, <<"">>}, 1, 1), + Host2 = maybe_ipv6_host_with_brackets(Host, Opts), + {_, Port} = split_uri(ColonPort, ":", {<<"">>, DefaultPort}, 0, 1), + {Host2, int_port(Port)}; parse_host_port(_Scheme, DefaultPort, "[" ++ HostPort, Opts) -> %ipv6 {Host, ColonPort} = split_uri(HostPort, "\\]", {HostPort, ""}, 1, 1), Host2 = maybe_ipv6_host_with_brackets(Host, Opts), @@ -198,12 +239,19 @@ parse_host_port(_Scheme, DefaultPort, HostPort, _Opts) -> split_uri(UriPart, SplitChar, NoMatchResult, SkipLeft, SkipRight) -> case re:run(UriPart, SplitChar, [{capture, first}]) of {match, [{Match, _}]} -> - {string:substr(UriPart, 1, Match + 1 - SkipLeft), - string:substr(UriPart, Match + 1 + SkipRight, length(UriPart))}; + {string:slice(UriPart, 0, Match + 1 - SkipLeft), + string:slice(UriPart, Match + SkipRight, string:length(UriPart))}; nomatch -> NoMatchResult end. +maybe_ipv6_host_with_brackets(Host, Opts) when is_binary(Host) -> + case lists:keysearch(ipv6_host_with_brackets, 1, Opts) of + {value, {ipv6_host_with_brackets, true}} -> + <<"[", Host/binary, "]">>; + _ -> + Host + end; maybe_ipv6_host_with_brackets(Host, Opts) -> case lists:keysearch(ipv6_host_with_brackets, 1, Opts) of {value, {ipv6_host_with_brackets, true}} -> @@ -212,15 +260,18 @@ maybe_ipv6_host_with_brackets(Host, Opts) -> Host end. - int_port(Port) when is_integer(Port) -> Port; +int_port(Port) when is_binary(Port) -> + binary_to_integer(Port); int_port(Port) when is_list(Port) -> list_to_integer(Port); %% This is the case where no port was found and there was no default port int_port(no_default_port) -> throw({error, no_default_port}). +path(<<"">>) -> + <<"/">>; path("") -> "/"; path(Path) -> @@ -234,6 +285,14 @@ uri_encode(Char, Reserved) -> [Char] end. +uri_encode_binary(Char, Reserved) -> + case sets:is_element(Char, Reserved) of + true -> + << $%, (integer_to_binary(Char, 16))/binary >>; + false -> + <<Char>> + end. + hex2dec(X) when (X>=$0) andalso (X=<$9) -> X-$0; hex2dec(X) when (X>=$A) andalso (X=<$F) -> X-$A+10; hex2dec(X) when (X>=$a) andalso (X=<$f) -> X-$a+10. diff --git a/lib/inets/src/http_lib/http_util.erl b/lib/inets/src/http_lib/http_util.erl index 20c342dd66..78b0aa2885 100644 --- a/lib/inets/src/http_lib/http_util.erl +++ b/lib/inets/src/http_lib/http_util.erl @@ -35,10 +35,10 @@ %%% Internal application API %%%========================================================================= to_upper(Str) -> - string:to_upper(Str). + string:uppercase(Str). to_lower(Str) -> - string:to_lower(Str). + string:lowercase(Str). %% Example: Mon, 09-Dec-2002 13:46:00 GMT convert_netscapecookie_date([_D,_A,_Y, $,, $ , diff --git a/lib/inets/test/uri_SUITE.erl b/lib/inets/test/uri_SUITE.erl index b26c645821..d055af59e3 100644 --- a/lib/inets/test/uri_SUITE.erl +++ b/lib/inets/test/uri_SUITE.erl @@ -25,6 +25,7 @@ -module(uri_SUITE). +-include_lib("eunit/include/eunit.hrl"). -include_lib("common_test/include/ct.hrl"). -include("inets_test_lib.hrl"). @@ -50,7 +51,8 @@ all() -> fragments, escaped, hexed_query, - scheme_validation + scheme_validation, + encode_decode ]. %%-------------------------------------------------------------------- @@ -73,7 +75,10 @@ end_per_testcase(_Case, _Config) -> ipv4(Config) when is_list(Config) -> {ok, {http,[],"127.0.0.1",80,"/foobar.html",[]}} = - http_uri:parse("http://127.0.0.1/foobar.html"). + http_uri:parse("http://127.0.0.1/foobar.html"), + + {ok, {http,<<>>,<<"127.0.0.1">>,80,<<"/foobar.html">>,<<>>}} = + http_uri:parse(<<"http://127.0.0.1/foobar.html">>). ipv6(Config) when is_list(Config) -> {ok, {http,[],"2010:836B:4179::836B:4179",80,"/foobar.html",[]}} = @@ -89,24 +94,52 @@ ipv6(Config) when is_list(Config) -> [{foo, false}]), {error, {malformed_url, _, "http://2010:836B:4179::836B:4179/foobar.html"}} = - http_uri:parse("http://2010:836B:4179::836B:4179/foobar.html"). + http_uri:parse("http://2010:836B:4179::836B:4179/foobar.html"), + + {ok, {http,<<>>,<<"2010:836B:4179::836B:4179">>,80,<<"/foobar.html">>,<<>>}} = + http_uri:parse(<<"http://[2010:836B:4179::836B:4179]/foobar.html">>), + {ok, {http,<<>>,<<"[2010:836B:4179::836B:4179]">>,80,<<"/foobar.html">>,<<>>}} = + http_uri:parse(<<"http://[2010:836B:4179::836B:4179]/foobar.html">>, + [{ipv6_host_with_brackets, true}]), + {ok, {http,<<>>,<<"2010:836B:4179::836B:4179">>,80,<<"/foobar.html">>,<<>>}} = + http_uri:parse(<<"http://[2010:836B:4179::836B:4179]/foobar.html">>, + [{ipv6_host_with_brackets, false}]), + {ok, {http,<<>>,<<"2010:836B:4179::836B:4179">>,80,<<"/foobar.html">>,<<>>}} = + http_uri:parse(<<"http://[2010:836B:4179::836B:4179]/foobar.html">>, + [{foo, false}]), + {error, + {malformed_url, _, <<"http://2010:836B:4179::836B:4179/foobar.html">>}} = + http_uri:parse(<<"http://2010:836B:4179::836B:4179/foobar.html">>). host(Config) when is_list(Config) -> {ok, {http,[],"localhost",8888,"/foobar.html",[]}} = - http_uri:parse("http://localhost:8888/foobar.html"). + http_uri:parse("http://localhost:8888/foobar.html"), + + {ok, {http,<<>>,<<"localhost">>,8888,<<"/foobar.html">>,<<>>}} = + http_uri:parse(<<"http://localhost:8888/foobar.html">>). userinfo(Config) when is_list(Config) -> {ok, {http,"nisse:foobar","localhost",8888,"/foobar.html",[]}} = - http_uri:parse("http://nisse:foobar@localhost:8888/foobar.html"). + http_uri:parse("http://nisse:foobar@localhost:8888/foobar.html"), + + {ok, {http,<<"nisse:foobar">>,<<"localhost">>,8888,<<"/foobar.html">>,<<>>}} = + http_uri:parse(<<"http://nisse:foobar@localhost:8888/foobar.html">>). scheme(Config) when is_list(Config) -> {error, no_scheme} = http_uri:parse("localhost/foobar.html"), {error, {malformed_url, _, _}} = - http_uri:parse("localhost:8888/foobar.html"). + http_uri:parse("localhost:8888/foobar.html"), + + {error, no_scheme} = http_uri:parse(<<"localhost/foobar.html">>), + {error, {malformed_url, _, _}} = + http_uri:parse(<<"localhost:8888/foobar.html">>). queries(Config) when is_list(Config) -> {ok, {http,[],"localhost",8888,"/foobar.html","?foo=bar&foobar=42"}} = - http_uri:parse("http://localhost:8888/foobar.html?foo=bar&foobar=42"). + http_uri:parse("http://localhost:8888/foobar.html?foo=bar&foobar=42"), + + {ok, {http,<<>>,<<"localhost">>,8888,<<"/foobar.html">>,<<"?foo=bar&foobar=42">>}} = + http_uri:parse(<<"http://localhost:8888/foobar.html?foo=bar&foobar=42">>). fragments(Config) when is_list(Config) -> {ok, {http,[],"localhost",80,"/",""}} = @@ -142,6 +175,41 @@ fragments(Config) when is_list(Config) -> http_uri:parse("http://localhost?query#", [{fragment,true}]), {ok, {http,[],"localhost",80,"/path","?query","#"}} = http_uri:parse("http://localhost/path?query#", [{fragment,true}]), + + + {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"">>}} = + http_uri:parse(<<"http://localhost#fragment">>), + {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"">>}} = + http_uri:parse(<<"http://localhost/path#fragment">>), + {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"?query">>}} = + http_uri:parse(<<"http://localhost?query#fragment">>), + {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"?query">>}} = + http_uri:parse(<<"http://localhost/path?query#fragment">>), + {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"">>,<<"#fragment">>}} = + http_uri:parse(<<"http://localhost#fragment">>, [{fragment,true}]), + {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"">>,<<"#fragment">>}} = + http_uri:parse(<<"http://localhost/path#fragment">>, [{fragment,true}]), + {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"?query">>,<<"#fragment">>}} = + http_uri:parse(<<"http://localhost?query#fragment">>, [{fragment,true}]), + {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"?query">>,<<"#fragment">>}} = + http_uri:parse(<<"http://localhost/path?query#fragment">>, + [{fragment,true}]), + {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"">>,<<"">>}} = + http_uri:parse(<<"http://localhost">>, [{fragment,true}]), + {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"">>,<<"">>}} = + http_uri:parse(<<"http://localhost/path">>, [{fragment,true}]), + {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"?query">>,<<"">>}} = + http_uri:parse(<<"http://localhost?query">>, [{fragment,true}]), + {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"?query">>,<<"">>}} = + http_uri:parse(<<"http://localhost/path?query">>, [{fragment,true}]), + {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"">>,<<"#">>}} = + http_uri:parse(<<"http://localhost#">>, [{fragment,true}]), + {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"">>,<<"#">>}} = + http_uri:parse(<<"http://localhost/path#">>, [{fragment,true}]), + {ok, {http,<<>>,<<"localhost">>,80,<<"/">>,<<"?query">>,<<"#">>}} = + http_uri:parse(<<"http://localhost?query#">>, [{fragment,true}]), + {ok, {http,<<>>,<<"localhost">>,80,<<"/path">>,<<"?query">>,<<"#">>}} = + http_uri:parse(<<"http://localhost/path?query#">>, [{fragment,true}]), ok. escaped(Config) when is_list(Config) -> @@ -152,7 +220,16 @@ escaped(Config) when is_list(Config) -> {ok, {http,[],"www.somedomain.com",80,"/%25abc",[]}} = http_uri:parse("http://www.somedomain.com/%25abc"), {ok, {http,[],"www.somedomain.com",80,"/%25abc", "?foo=bar"}} = - http_uri:parse("http://www.somedomain.com/%25abc?foo=bar"). + http_uri:parse("http://www.somedomain.com/%25abc?foo=bar"), + + {ok, {http,<<>>,<<"www.somedomain.com">>,80,<<"/%2Eabc">>,<<>>}} = + http_uri:parse(<<"http://www.somedomain.com/%2Eabc">>), + {ok, {http,<<>>,<<"www.somedomain.com">>,80,<<"/%252Eabc">>,<<>>}} = + http_uri:parse(<<"http://www.somedomain.com/%252Eabc">>), + {ok, {http,<<>>,<<"www.somedomain.com">>,80,<<"/%25abc">>,<<>>}} = + http_uri:parse(<<"http://www.somedomain.com/%25abc">>), + {ok, {http,<<>>,<<"www.somedomain.com">>,80,<<"/%25abc">>, <<"?foo=bar">>}} = + http_uri:parse(<<"http://www.somedomain.com/%25abc?foo=bar">>). hexed_query(doc) -> [{doc, "Solves OTP-6191"}]; @@ -196,6 +273,17 @@ scheme_validation(Config) when is_list(Config) -> http_uri:parse("https://localhost#fragment", [{scheme_validation_fun, none}]). +encode_decode(Config) when is_list(Config) -> + ?assertEqual("foo%20bar", http_uri:encode("foo bar")), + ?assertEqual(<<"foo%20bar">>, http_uri:encode(<<"foo bar">>)), + + ?assertEqual("foo bar", http_uri:decode("foo+bar")), + ?assertEqual(<<"foo bar">>, http_uri:decode(<<"foo+bar">>)), + ?assertEqual("foo bar", http_uri:decode("foo%20bar")), + ?assertEqual(<<"foo bar">>, http_uri:decode(<<"foo%20bar">>)), + ?assertEqual("foo\r\n", http_uri:decode("foo%0D%0A")), + ?assertEqual(<<"foo\r\n">>, http_uri:decode(<<"foo%0D%0A">>)). + %%-------------------------------------------------------------------- %% Internal Functions ------------------------------------------------ |