diff options
author | Péter Dimitrov <[email protected]> | 2018-02-07 16:08:32 +0100 |
---|---|---|
committer | Péter Dimitrov <[email protected]> | 2018-02-08 16:26:54 +0100 |
commit | c69bbc7ce1af2dc295fc17fcb31485e2d4caafa7 (patch) | |
tree | a9a4a8e3ee569f87dbf9e04053cbd1c2af42d451 | |
parent | 3cb3d5c1927e6235b0e51ba7ca31e2a3458fba01 (diff) | |
download | otp-c69bbc7ce1af2dc295fc17fcb31485e2d4caafa7.tar.gz otp-c69bbc7ce1af2dc295fc17fcb31485e2d4caafa7.tar.bz2 otp-c69bbc7ce1af2dc295fc17fcb31485e2d4caafa7.zip |
stdlib: Improve URI normalization in uri_string
- normalize/1 accepts uri_map() as input type and can return
error() if URI parsing fails.
- Added normalize/2 that can return a normalized uri_map().
Change-Id: Icdd2e60c15019d3eec2e7bc994cae03066a79194
-rw-r--r-- | lib/stdlib/doc/src/uri_string.xml | 36 | ||||
-rw-r--r-- | lib/stdlib/src/uri_string.erl | 56 | ||||
-rw-r--r-- | lib/stdlib/test/uri_string_SUITE.erl | 55 |
3 files changed, 126 insertions, 21 deletions
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml index 21f470e763..6b52ffdd4d 100644 --- a/lib/stdlib/doc/src/uri_string.xml +++ b/lib/stdlib/doc/src/uri_string.xml @@ -4,7 +4,7 @@ <erlref> <header> <copyright> - <year>2017</year><year>2017</year> + <year>2017</year><year>2018</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -24,7 +24,7 @@ <title>uri_string</title> <prepared>Péter Dimitrov</prepared> <docno>1</docno> - <date>2017-10-24</date> + <date>2018-02-07</date> <rev>A</rev> </header> <module>uri_string</module> @@ -70,7 +70,8 @@ <seealso marker="#transcode/2"><c>transcode/2</c></seealso> </item> <item>Transforming URIs into a normalized form<br></br> - <seealso marker="#normalize/1"><c>normalize/1</c></seealso> + <seealso marker="#normalize/1"><c>normalize/1</c></seealso><br></br> + <seealso marker="#normalize/2"><c>normalize/2</c></seealso> </item> <item>Composing form-urlencoded query strings from a list of key-value pairs<br></br> <seealso marker="#compose_query/1"><c>compose_query/1</c></seealso><br></br> @@ -233,7 +234,7 @@ <name name="normalize" arity="1"/> <fsummary>Syntax-based normalization.</fsummary> <desc> - <p>Transforms <c><anno>URIString</anno></c> into a normalized form + <p>Transforms an <c><anno>URI</anno></c> into a normalized form using Syntax-Based Normalization as defined by <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.</p> <p>This function implements case normalization, percent-encoding @@ -247,6 +248,33 @@ <![CDATA[<<"mid/6">>]]> 3> uri_string:normalize("http://localhost:80"). "https://localhost/" +4> <input>uri_string:normalize(#{scheme => "http",port => 80,path => "/a/b/c/./../../g",</input> +4> host => "localhost-örebro"}). +"http://localhost-%C3%B6rebro/a/g" + </pre> + </desc> + </func> + + <func> + <name name="normalize" arity="2"/> + <fsummary>Syntax-based normalization.</fsummary> + <desc> + <p>Same as <c>normalize/1</c> but with an additional + <c><anno>Options</anno></c> parameter, that controls if the normalized URI + shall be returned as an uri_map(). + There is one supported option: <c>return_map</c>. + </p> + <p><em>Example:</em></p> + <pre> +1> <input>uri_string:normalize("/a/b/c/./../../g", [return_map]).</input> +#{path => "/a/g"} +2> <![CDATA[uri_string:normalize(<<"mid/content=5/../6">>, [return_map]).]]> +<![CDATA[#{path => <<"mid/6">>}]]> +3> uri_string:normalize("http://localhost:80", [return_map]). +#{scheme => "http",path => "/",host => "localhost"} +4> <input>uri_string:normalize(#{scheme => "http",port => 80,path => "/a/b/c/./../../g",</input> +4> host => "localhost-örebro"}, [return_map]). +#{scheme => "http",path => "/a/g",host => "localhost-örebro"} </pre> </desc> </func> diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index a84679c595..ed3a2a10ac 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -227,7 +227,7 @@ %% External API %%------------------------------------------------------------------------- -export([compose_query/1, compose_query/2, - dissect_query/1, normalize/1, parse/1, + dissect_query/1, normalize/1, normalize/2, parse/1, recompose/1, transcode/2]). -export_type([error/0, uri_map/0, uri_string/0]). @@ -292,18 +292,36 @@ %%------------------------------------------------------------------------- %% Normalize URIs %%------------------------------------------------------------------------- --spec normalize(URIString) -> NormalizedURI when - URIString :: uri_string(), - NormalizedURI :: uri_string(). -normalize(URIString) -> - %% Percent-encoding normalization and case normalization for - %% percent-encoded triplets are achieved by running parse and - %% recompose on the input URI string. - recompose( - normalize_path_segment( - normalize_scheme_based( - normalize_case( - parse(URIString))))). +-spec normalize(URI) -> NormalizedURI when + URI :: uri_string() | uri_map(), + NormalizedURI :: uri_string() + | error(). +normalize(URIMap) -> + normalize(URIMap, []). + + +-spec normalize(URI, Options) -> NormalizedURI when + URI :: uri_string() | uri_map(), + Options :: [return_map], + NormalizedURI :: uri_string() | uri_map(). +normalize(URIMap, []) when is_map(URIMap) -> + recompose(normalize_map(URIMap)); +normalize(URIMap, [return_map]) when is_map(URIMap) -> + normalize_map(URIMap); +normalize(URIString, []) -> + case parse(URIString) of + Value when is_map(Value) -> + recompose(normalize_map(Value)); + Error -> + Error + end; +normalize(URIString, [return_map]) -> + case parse(URIString) of + Value when is_map(Value) -> + normalize_map(Value); + Error -> + Error + end. %%------------------------------------------------------------------------- @@ -393,7 +411,7 @@ transcode(URIString, Options) when is_list(URIString) -> %% (application/x-www-form-urlencoded encoding algorithm) %%------------------------------------------------------------------------- -spec compose_query(QueryList) -> QueryString when - QueryList :: [{uri_string(), uri_string()}], + QueryList :: [{unicode:chardata(), unicode:chardata()}], QueryString :: uri_string() | error(). compose_query(List) -> @@ -401,7 +419,7 @@ compose_query(List) -> -spec compose_query(QueryList, Options) -> QueryString when - QueryList :: [{uri_string(), uri_string()}], + QueryList :: [{unicode:chardata(), unicode:chardata()}], Options :: [{encoding, atom()}], QueryString :: uri_string() | error(). @@ -432,7 +450,7 @@ compose_query([], _Options, IsList, Acc) -> %%------------------------------------------------------------------------- -spec dissect_query(QueryString) -> QueryList when QueryString :: uri_string(), - QueryList :: [{uri_string(), uri_string()}] + QueryList :: [{unicode:chardata(), unicode:chardata()}] | error(). dissect_query(<<>>) -> []; @@ -1903,6 +1921,12 @@ base10_decode_unicode(<<H,_/binary>>, _, _) -> %% Helper functions for normalize %%------------------------------------------------------------------------- +normalize_map(URIMap) -> + normalize_path_segment( + normalize_scheme_based( + normalize_case(URIMap))). + + %% 6.2.2.1. Case Normalization normalize_case(#{scheme := Scheme, host := Host} = Map) -> Map#{scheme => to_lower(Scheme), diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index fef356355c..92f8bb3292 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -22,7 +22,7 @@ -include_lib("common_test/include/ct.hrl"). -export([all/0, suite/0,groups/0, - normalize/1, + normalize/1, normalize_map/1, normalize_return_map/1, normalize_negative/1, parse_binary_fragment/1, parse_binary_host/1, parse_binary_host_ipv4/1, parse_binary_host_ipv6/1, parse_binary_path/1, parse_binary_pct_encoded_fragment/1, parse_binary_pct_encoded_query/1, @@ -68,6 +68,9 @@ suite() -> all() -> [ normalize, + normalize_map, + normalize_return_map, + normalize_negative, parse_binary_scheme, parse_binary_userinfo, parse_binary_pct_encoded_userinfo, @@ -912,6 +915,56 @@ normalize(_Config) -> <<"tftp://localhost">> = uri_string:normalize(<<"tftp://localhost:69">>). +normalize_map(_Config) -> + "/a/g" = uri_string:normalize(#{path => "/a/b/c/./../../g"}), + <<"mid/6">> = uri_string:normalize(#{path => <<"mid/content=5/../6">>}), + "http://localhost-%C3%B6rebro/a/g" = + uri_string:normalize(#{scheme => "http",port => 80,path => "/a/b/c/./../../g", + host => "localhost-örebro"}), + <<"http://localhost-%C3%B6rebro/a/g">> = + uri_string:normalize(#{scheme => <<"http">>,port => 80, + path => <<"/a/b/c/./../../g">>, + host => <<"localhost-örebro"/utf8>>}), + <<"https://localhost/">> = + uri_string:normalize(#{scheme => <<"https">>,port => 443,path => <<>>, + host => <<"localhost">>}), + <<"https://localhost:445/">> = + uri_string:normalize(#{scheme => <<"https">>,port => 445,path => <<>>, + host => <<"localhost">>}), + <<"ftp://localhost">> = + uri_string:normalize(#{scheme => <<"ftp">>,port => 21,path => <<>>, + host => <<"localhost">>}), + <<"ssh://localhost">> = + uri_string:normalize(#{scheme => <<"ssh">>,port => 22,path => <<>>, + host => <<"localhost">>}), + <<"sftp://localhost">> = + uri_string:normalize(#{scheme => <<"sftp">>,port => 22,path => <<>>, + host => <<"localhost">>}), + <<"tftp://localhost">> = + uri_string:normalize(#{scheme => <<"tftp">>,port => 69,path => <<>>, + host => <<"localhost">>}). + +normalize_return_map(_Config) -> + #{scheme := "http",path := "/a/g",host := "localhost-örebro"} = + uri_string:normalize("http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g", + [return_map]), + #{scheme := <<"http">>,path := <<"/a/g">>, host := <<"localhost-örebro"/utf8>>} = + uri_string:normalize(<<"http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g">>, + [return_map]), + #{scheme := <<"https">>,path := <<"/">>, host := <<"localhost">>} = + uri_string:normalize(#{scheme => <<"https">>,port => 443,path => <<>>, + host => <<"localhost">>}, [return_map]). + +normalize_negative(_Config) -> + {error,invalid_uri,":"} = + uri_string:normalize("http://local>host"), + {error,invalid_uri,":"} = + uri_string:normalize(<<"http://local>host">>), + {error,invalid_uri,":"} = + uri_string:normalize("http://[192.168.0.1]", [return_map]), + {error,invalid_uri,":"} = + uri_string:normalize(<<"http://[192.168.0.1]">>, [return_map]). + interop_query_utf8(_Config) -> Q = uri_string:compose_query([{"foo bar","1"}, {"合", "2"}]), Uri = uri_string:recompose(#{path => "/", query => Q}), |