aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPéter Dimitrov <[email protected]>2017-10-27 16:54:27 +0200
committerPéter Dimitrov <[email protected]>2017-10-27 17:16:27 +0200
commitce78af7e5a76dc4a27673ab5c80a315762b992b1 (patch)
tree367eac7591b67e6856d4cba3eea7b18949ce62a5
parent3d12c8f164f79dd67967ba5c7df7d3c555dc0f29 (diff)
downloadotp-ce78af7e5a76dc4a27673ab5c80a315762b992b1.tar.gz
otp-ce78af7e5a76dc4a27673ab5c80a315762b992b1.tar.bz2
otp-ce78af7e5a76dc4a27673ab5c80a315762b992b1.zip
stdlib: Implement normalize/1
Implements the following Syntax-Based Normalizations: - Case Normalization - Percent-Encoding Normalization - Path Segment Normalization - Scheme-Based Normalization - HTTP(S) - Basic support for FTP, SSH, SFTP, TFTP
-rw-r--r--lib/stdlib/src/uri_string.erl167
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl22
2 files changed, 188 insertions, 1 deletions
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 16650d5005..cf8c388f54 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -227,7 +227,7 @@
%% External API
%%-------------------------------------------------------------------------
-export([compose_query/1, compose_query/2,
- dissect_query/1, parse/1,
+ dissect_query/1, normalize/1, parse/1,
recompose/1, transcode/2]).
-export_type([error/0, uri_map/0, uri_string/0]).
@@ -288,6 +288,21 @@
scheme => unicode:chardata(),
userinfo => unicode:chardata()} | #{}.
+
+%%-------------------------------------------------------------------------
+%% Normalize URIs
+%%-------------------------------------------------------------------------
+-spec normalize(URIString) -> NormalizedURI when
+ URIString :: uri_string(),
+ NormalizedURI :: uri_string().
+normalize(URIString) ->
+ %% Case normalization and percent-encoding normalization are achieved
+ %% by running parse and recompose on the input URI string.
+ M = parse(URIString),
+ M1 = normalize_scheme_based(M),
+ M2 = normalize_path_segment(M1),
+ recompose(M2).
+
%%-------------------------------------------------------------------------
%% Parse URIs
%%-------------------------------------------------------------------------
@@ -1883,3 +1898,153 @@ form_urldecode(<<H/utf8,T/binary>>, Acc) ->
end;
form_urldecode(<<H,_/binary>>, _Acc) ->
throw({error, invalid_character, [H]}).
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for normalize
+%%-------------------------------------------------------------------------
+
+%% RFC 3986
+%% 6.2.2.3. Path Segment Normalization
+%% 5.2.4. Remove Dot Segments
+normalize_path_segment(Map) ->
+ Path = maps:get(path, Map, undefined),
+ Map#{path => remove_dot_segments(Path)}.
+
+
+remove_dot_segments(Path) when is_binary(Path) ->
+ remove_dot_segments(Path, <<>>);
+remove_dot_segments(Path) when is_list(Path) ->
+ B = convert_binary(Path, utf8, utf8),
+ B1 = remove_dot_segments(B, <<>>),
+ convert_list(B1, utf8).
+%%
+remove_dot_segments(<<>>, Output) ->
+ Output;
+remove_dot_segments(<<"../",T/binary>>, Output) ->
+ remove_dot_segments(T, Output);
+remove_dot_segments(<<"./",T/binary>>, Output) ->
+ remove_dot_segments(T, Output);
+remove_dot_segments(<<"/./",T/binary>>, Output) ->
+ remove_dot_segments(<<$/,T/binary>>, Output);
+remove_dot_segments(<<"/.">>, Output) ->
+ remove_dot_segments(<<$/>>, Output);
+remove_dot_segments(<<"/../",T/binary>>, Output) ->
+ Out1 = remove_last_segment(Output),
+ remove_dot_segments(<<$/,T/binary>>, Out1);
+remove_dot_segments(<<"/..">>, Output) ->
+ Out1 = remove_last_segment(Output),
+ remove_dot_segments(<<$/>>, Out1);
+remove_dot_segments(<<$.>>, Output) ->
+ remove_dot_segments(<<>>, Output);
+remove_dot_segments(<<"..">>, Output) ->
+ remove_dot_segments(<<>>, Output);
+remove_dot_segments(Input, Output) ->
+ {First, Rest} = first_path_segment(Input),
+ remove_dot_segments(Rest, <<Output/binary,First/binary>>).
+
+
+first_path_segment(Input) ->
+ F = first_path_segment(Input, <<>>),
+ split_binary(Input, byte_size(F)).
+%%
+first_path_segment(<<$/,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,$/>>);
+first_path_segment(<<C,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>).
+
+
+first_path_segment_end(<<>>, Acc) ->
+ Acc;
+first_path_segment_end(<<$/,_/binary>>, Acc) ->
+ Acc;
+first_path_segment_end(<<C,T/binary>>, Acc) ->
+ first_path_segment_end(<<T/binary>>, <<Acc/binary,C>>).
+
+
+remove_last_segment(<<>>) ->
+ <<>>;
+remove_last_segment(B) ->
+ {Init, Last} = split_binary(B, byte_size(B) - 1),
+ case Last of
+ <<$/>> ->
+ Init;
+ _Char ->
+ remove_last_segment(Init)
+ end.
+
+
+%% RFC 3986, 6.2.3. Scheme-Based Normalization
+normalize_scheme_based(Map) ->
+ Scheme = maps:get(scheme, Map, undefined),
+ Port = maps:get(port, Map, undefined),
+ Path= maps:get(path, Map, undefined),
+ case Scheme of
+ "http" ->
+ normalize_http(Map, Port, Path);
+ <<"http">> ->
+ normalize_http(Map, Port, Path);
+ "https" ->
+ normalize_https(Map, Port, Path);
+ <<"https">> ->
+ normalize_https(Map, Port, Path);
+ "ftp" ->
+ normalize_ftp(Map, Port);
+ <<"ftp">> ->
+ normalize_ftp(Map, Port);
+ "ssh" ->
+ normalize_ssh_sftp(Map, Port);
+ <<"ssh">> ->
+ normalize_ssh_sftp(Map, Port);
+ "sftp" ->
+ normalize_ssh_sftp(Map, Port);
+ <<"sftp">> ->
+ normalize_ssh_sftp(Map, Port);
+ "tftp" ->
+ normalize_tftp(Map, Port);
+ <<"tftp">> ->
+ normalize_tftp(Map, Port);
+ _Else -> Map
+ end.
+
+
+normalize_http(Map, Port, Path) ->
+ M1 = normalize_port(Map, Port, 80),
+ normalize_http_path(M1, Path).
+
+
+normalize_https(Map, Port, Path) ->
+ M1 = normalize_port(Map, Port, 443),
+ normalize_http_path(M1, Path).
+
+
+normalize_ftp(Map, Port) ->
+ normalize_port(Map, Port, 21).
+
+
+normalize_ssh_sftp(Map, Port) ->
+ normalize_port(Map, Port, 22).
+
+
+normalize_tftp(Map, Port) ->
+ normalize_port(Map, Port, 69).
+
+
+normalize_port(Map, Port, Default) ->
+ case Port of
+ Default ->
+ maps:remove(port, Map);
+ _Else ->
+ Map
+ end.
+
+
+normalize_http_path(Map, Path) ->
+ case Path of
+ "" ->
+ Map#{path => "/"};
+ <<>> ->
+ Map#{path => <<"/">>};
+ _Else ->
+ Map
+ end.
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index 9ee321c509..1567b9333a 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -22,6 +22,7 @@
-include_lib("common_test/include/ct.hrl").
-export([all/0, suite/0,groups/0,
+ normalize/1,
parse_binary_fragment/1, parse_binary_host/1, parse_binary_host_ipv4/1,
parse_binary_host_ipv6/1,
parse_binary_path/1, parse_binary_pct_encoded_fragment/1, parse_binary_pct_encoded_query/1,
@@ -65,6 +66,7 @@ suite() ->
all() ->
[
+ normalize,
parse_binary_scheme,
parse_binary_userinfo,
parse_binary_pct_encoded_userinfo,
@@ -867,3 +869,23 @@ dissect_query_negative(_Config) ->
{error,invalid_character,"ö"} = uri_string:dissect_query(<<"föo+bar=1&amp;%C3%B6=2">>),
{error,invalid_input,<<"ö">>} =
uri_string:dissect_query([<<"foo+bar=1&amp;">>,<<"%C3%B6=2ö">>]).
+
+normalize(_Config) ->
+ "/a/g" = uri_string:normalize("/a/b/c/./../../g"),
+ <<"mid/6">> = uri_string:normalize(<<"mid/content=5/../6">>),
+ "http://localhost-%C3%B6rebro/a/g" =
+ uri_string:normalize("http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g"),
+ <<"http://localhost-%C3%B6rebro/a/g">> =
+ uri_string:normalize(<<"http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g">>),
+ <<"https://localhost/">> =
+ uri_string:normalize(<<"https://localhost:443">>),
+ <<"https://localhost:445/">> =
+ uri_string:normalize(<<"https://localhost:445">>),
+ <<"ftp://localhost">> =
+ uri_string:normalize(<<"ftp://localhost:21">>),
+ <<"ssh://localhost">> =
+ uri_string:normalize(<<"ssh://localhost:22">>),
+ <<"sftp://localhost">> =
+ uri_string:normalize(<<"sftp://localhost:22">>),
+ <<"tftp://localhost">> =
+ uri_string:normalize(<<"tftp://localhost:69">>).