aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPéter Dimitrov <[email protected]>2018-02-07 16:08:32 +0100
committerPéter Dimitrov <[email protected]>2018-02-08 16:26:54 +0100
commitc69bbc7ce1af2dc295fc17fcb31485e2d4caafa7 (patch)
treea9a4a8e3ee569f87dbf9e04053cbd1c2af42d451
parent3cb3d5c1927e6235b0e51ba7ca31e2a3458fba01 (diff)
downloadotp-c69bbc7ce1af2dc295fc17fcb31485e2d4caafa7.tar.gz
otp-c69bbc7ce1af2dc295fc17fcb31485e2d4caafa7.tar.bz2
otp-c69bbc7ce1af2dc295fc17fcb31485e2d4caafa7.zip
stdlib: Improve URI normalization in uri_string
- normalize/1 accepts uri_map() as input type and can return error() if URI parsing fails. - Added normalize/2 that can return a normalized uri_map(). Change-Id: Icdd2e60c15019d3eec2e7bc994cae03066a79194
-rw-r--r--lib/stdlib/doc/src/uri_string.xml36
-rw-r--r--lib/stdlib/src/uri_string.erl56
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl55
3 files changed, 126 insertions, 21 deletions
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
index 21f470e763..6b52ffdd4d 100644
--- a/lib/stdlib/doc/src/uri_string.xml
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -4,7 +4,7 @@
<erlref>
<header>
<copyright>
- <year>2017</year><year>2017</year>
+ <year>2017</year><year>2018</year>
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
@@ -24,7 +24,7 @@
<title>uri_string</title>
<prepared>Péter Dimitrov</prepared>
<docno>1</docno>
- <date>2017-10-24</date>
+ <date>2018-02-07</date>
<rev>A</rev>
</header>
<module>uri_string</module>
@@ -70,7 +70,8 @@
<seealso marker="#transcode/2"><c>transcode/2</c></seealso>
</item>
<item>Transforming URIs into a normalized form<br></br>
- <seealso marker="#normalize/1"><c>normalize/1</c></seealso>
+ <seealso marker="#normalize/1"><c>normalize/1</c></seealso><br></br>
+ <seealso marker="#normalize/2"><c>normalize/2</c></seealso>
</item>
<item>Composing form-urlencoded query strings from a list of key-value pairs<br></br>
<seealso marker="#compose_query/1"><c>compose_query/1</c></seealso><br></br>
@@ -233,7 +234,7 @@
<name name="normalize" arity="1"/>
<fsummary>Syntax-based normalization.</fsummary>
<desc>
- <p>Transforms <c><anno>URIString</anno></c> into a normalized form
+ <p>Transforms an <c><anno>URI</anno></c> into a normalized form
using Syntax-Based Normalization as defined by
<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.</p>
<p>This function implements case normalization, percent-encoding
@@ -247,6 +248,33 @@
<![CDATA[<<"mid/6">>]]>
3> uri_string:normalize("http://localhost:80").
"https://localhost/"
+4> <input>uri_string:normalize(#{scheme => "http",port => 80,path => "/a/b/c/./../../g",</input>
+4> host => "localhost-örebro"}).
+"http://localhost-%C3%B6rebro/a/g"
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="normalize" arity="2"/>
+ <fsummary>Syntax-based normalization.</fsummary>
+ <desc>
+ <p>Same as <c>normalize/1</c> but with an additional
+ <c><anno>Options</anno></c> parameter, that controls if the normalized URI
+ shall be returned as an uri_map().
+ There is one supported option: <c>return_map</c>.
+ </p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:normalize("/a/b/c/./../../g", [return_map]).</input>
+#{path => "/a/g"}
+2> <![CDATA[uri_string:normalize(<<"mid/content=5/../6">>, [return_map]).]]>
+<![CDATA[#{path => <<"mid/6">>}]]>
+3> uri_string:normalize("http://localhost:80", [return_map]).
+#{scheme => "http",path => "/",host => "localhost"}
+4> <input>uri_string:normalize(#{scheme => "http",port => 80,path => "/a/b/c/./../../g",</input>
+4> host => "localhost-örebro"}, [return_map]).
+#{scheme => "http",path => "/a/g",host => "localhost-örebro"}
</pre>
</desc>
</func>
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index a84679c595..ed3a2a10ac 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -227,7 +227,7 @@
%% External API
%%-------------------------------------------------------------------------
-export([compose_query/1, compose_query/2,
- dissect_query/1, normalize/1, parse/1,
+ dissect_query/1, normalize/1, normalize/2, parse/1,
recompose/1, transcode/2]).
-export_type([error/0, uri_map/0, uri_string/0]).
@@ -292,18 +292,36 @@
%%-------------------------------------------------------------------------
%% Normalize URIs
%%-------------------------------------------------------------------------
--spec normalize(URIString) -> NormalizedURI when
- URIString :: uri_string(),
- NormalizedURI :: uri_string().
-normalize(URIString) ->
- %% Percent-encoding normalization and case normalization for
- %% percent-encoded triplets are achieved by running parse and
- %% recompose on the input URI string.
- recompose(
- normalize_path_segment(
- normalize_scheme_based(
- normalize_case(
- parse(URIString))))).
+-spec normalize(URI) -> NormalizedURI when
+ URI :: uri_string() | uri_map(),
+ NormalizedURI :: uri_string()
+ | error().
+normalize(URIMap) ->
+ normalize(URIMap, []).
+
+
+-spec normalize(URI, Options) -> NormalizedURI when
+ URI :: uri_string() | uri_map(),
+ Options :: [return_map],
+ NormalizedURI :: uri_string() | uri_map().
+normalize(URIMap, []) when is_map(URIMap) ->
+ recompose(normalize_map(URIMap));
+normalize(URIMap, [return_map]) when is_map(URIMap) ->
+ normalize_map(URIMap);
+normalize(URIString, []) ->
+ case parse(URIString) of
+ Value when is_map(Value) ->
+ recompose(normalize_map(Value));
+ Error ->
+ Error
+ end;
+normalize(URIString, [return_map]) ->
+ case parse(URIString) of
+ Value when is_map(Value) ->
+ normalize_map(Value);
+ Error ->
+ Error
+ end.
%%-------------------------------------------------------------------------
@@ -393,7 +411,7 @@ transcode(URIString, Options) when is_list(URIString) ->
%% (application/x-www-form-urlencoded encoding algorithm)
%%-------------------------------------------------------------------------
-spec compose_query(QueryList) -> QueryString when
- QueryList :: [{uri_string(), uri_string()}],
+ QueryList :: [{unicode:chardata(), unicode:chardata()}],
QueryString :: uri_string()
| error().
compose_query(List) ->
@@ -401,7 +419,7 @@ compose_query(List) ->
-spec compose_query(QueryList, Options) -> QueryString when
- QueryList :: [{uri_string(), uri_string()}],
+ QueryList :: [{unicode:chardata(), unicode:chardata()}],
Options :: [{encoding, atom()}],
QueryString :: uri_string()
| error().
@@ -432,7 +450,7 @@ compose_query([], _Options, IsList, Acc) ->
%%-------------------------------------------------------------------------
-spec dissect_query(QueryString) -> QueryList when
QueryString :: uri_string(),
- QueryList :: [{uri_string(), uri_string()}]
+ QueryList :: [{unicode:chardata(), unicode:chardata()}]
| error().
dissect_query(<<>>) ->
[];
@@ -1903,6 +1921,12 @@ base10_decode_unicode(<<H,_/binary>>, _, _) ->
%% Helper functions for normalize
%%-------------------------------------------------------------------------
+normalize_map(URIMap) ->
+ normalize_path_segment(
+ normalize_scheme_based(
+ normalize_case(URIMap))).
+
+
%% 6.2.2.1. Case Normalization
normalize_case(#{scheme := Scheme, host := Host} = Map) ->
Map#{scheme => to_lower(Scheme),
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index fef356355c..92f8bb3292 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -22,7 +22,7 @@
-include_lib("common_test/include/ct.hrl").
-export([all/0, suite/0,groups/0,
- normalize/1,
+ normalize/1, normalize_map/1, normalize_return_map/1, normalize_negative/1,
parse_binary_fragment/1, parse_binary_host/1, parse_binary_host_ipv4/1,
parse_binary_host_ipv6/1,
parse_binary_path/1, parse_binary_pct_encoded_fragment/1, parse_binary_pct_encoded_query/1,
@@ -68,6 +68,9 @@ suite() ->
all() ->
[
normalize,
+ normalize_map,
+ normalize_return_map,
+ normalize_negative,
parse_binary_scheme,
parse_binary_userinfo,
parse_binary_pct_encoded_userinfo,
@@ -912,6 +915,56 @@ normalize(_Config) ->
<<"tftp://localhost">> =
uri_string:normalize(<<"tftp://localhost:69">>).
+normalize_map(_Config) ->
+ "/a/g" = uri_string:normalize(#{path => "/a/b/c/./../../g"}),
+ <<"mid/6">> = uri_string:normalize(#{path => <<"mid/content=5/../6">>}),
+ "http://localhost-%C3%B6rebro/a/g" =
+ uri_string:normalize(#{scheme => "http",port => 80,path => "/a/b/c/./../../g",
+ host => "localhost-örebro"}),
+ <<"http://localhost-%C3%B6rebro/a/g">> =
+ uri_string:normalize(#{scheme => <<"http">>,port => 80,
+ path => <<"/a/b/c/./../../g">>,
+ host => <<"localhost-örebro"/utf8>>}),
+ <<"https://localhost/">> =
+ uri_string:normalize(#{scheme => <<"https">>,port => 443,path => <<>>,
+ host => <<"localhost">>}),
+ <<"https://localhost:445/">> =
+ uri_string:normalize(#{scheme => <<"https">>,port => 445,path => <<>>,
+ host => <<"localhost">>}),
+ <<"ftp://localhost">> =
+ uri_string:normalize(#{scheme => <<"ftp">>,port => 21,path => <<>>,
+ host => <<"localhost">>}),
+ <<"ssh://localhost">> =
+ uri_string:normalize(#{scheme => <<"ssh">>,port => 22,path => <<>>,
+ host => <<"localhost">>}),
+ <<"sftp://localhost">> =
+ uri_string:normalize(#{scheme => <<"sftp">>,port => 22,path => <<>>,
+ host => <<"localhost">>}),
+ <<"tftp://localhost">> =
+ uri_string:normalize(#{scheme => <<"tftp">>,port => 69,path => <<>>,
+ host => <<"localhost">>}).
+
+normalize_return_map(_Config) ->
+ #{scheme := "http",path := "/a/g",host := "localhost-örebro"} =
+ uri_string:normalize("http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g",
+ [return_map]),
+ #{scheme := <<"http">>,path := <<"/a/g">>, host := <<"localhost-örebro"/utf8>>} =
+ uri_string:normalize(<<"http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g">>,
+ [return_map]),
+ #{scheme := <<"https">>,path := <<"/">>, host := <<"localhost">>} =
+ uri_string:normalize(#{scheme => <<"https">>,port => 443,path => <<>>,
+ host => <<"localhost">>}, [return_map]).
+
+normalize_negative(_Config) ->
+ {error,invalid_uri,":"} =
+ uri_string:normalize("http://local>host"),
+ {error,invalid_uri,":"} =
+ uri_string:normalize(<<"http://local>host">>),
+ {error,invalid_uri,":"} =
+ uri_string:normalize("http://[192.168.0.1]", [return_map]),
+ {error,invalid_uri,":"} =
+ uri_string:normalize(<<"http://[192.168.0.1]">>, [return_map]).
+
interop_query_utf8(_Config) ->
Q = uri_string:compose_query([{"foo bar","1"}, {"合", "2"}]),
Uri = uri_string:recompose(#{path => "/", query => Q}),