From 3d12c8f164f79dd67967ba5c7df7d3c555dc0f29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= Date: Fri, 27 Oct 2017 14:14:22 +0200 Subject: stdlib: Allow undefined port in uri_map() uri_map() updated to allow 'undefined' ports in order to align the implementation with RFC 3986: port = *DIGIT An 'undefined' port is mapped to a ":" during recompose operation. --- lib/stdlib/src/uri_string.erl | 39 ++++++++++++++++++---- .../test/property_test/uri_string_recompose.erl | 5 +-- lib/stdlib/test/uri_string_SUITE.erl | 11 +++--- 3 files changed, 43 insertions(+), 12 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index ca212284d2..16650d5005 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -283,7 +283,7 @@ #{fragment => unicode:chardata(), host => unicode:chardata(), path => unicode:chardata(), - port => non_neg_integer(), + port => non_neg_integer() | undefined, query => unicode:chardata(), scheme => unicode:chardata(), userinfo => unicode:chardata()} | #{}. @@ -807,7 +807,7 @@ is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). parse_host(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), H = calculate_parsed_host_port(Rest, T), - Port = binary_to_integer(H), + Port = get_port(H), {Rest, URI1#{port => Port}}; parse_host(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty @@ -836,7 +836,7 @@ parse_host(?STRING_EMPTY, URI) -> parse_reg_name(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), H = calculate_parsed_host_port(Rest, T), - Port = binary_to_integer(H), + Port = get_port(H), {Rest, URI1#{port => Port}}; parse_reg_name(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty @@ -869,7 +869,7 @@ parse_ipv4_bin(?STRING_REST($:, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), {T, URI1} = parse_port(Rest, URI), H = calculate_parsed_host_port(Rest, T), - Port = binary_to_integer(H), + Port = get_port(H), {Rest, URI1#{port => Port}}; parse_ipv4_bin(?STRING_REST($/, Rest), Acc, URI) -> _ = validate_ipv4_address(lists:reverse(Acc)), @@ -932,7 +932,7 @@ is_ipv6(Char) -> is_hex_digit(Char). parse_ipv6_bin_end(?STRING_REST($:, Rest), URI) -> {T, URI1} = parse_port(Rest, URI), H = calculate_parsed_host_port(Rest, T), - Port = binary_to_integer(H), + Port = get_port(H), {Rest, URI1#{port => Port}}; parse_ipv6_bin_end(?STRING_REST($/, Rest), URI) -> {T, URI1} = parse_segment(Rest, URI), % path-abempty @@ -1148,7 +1148,7 @@ calculate_parsed_userinfo(Input, Unparsed) -> -spec calculate_parsed_host_port(binary(), binary()) -> binary(). calculate_parsed_host_port(Input, <<>>) -> - strip_last_char(Input, [$?,$#,$/]); + strip_last_char(Input, [$:,$?,$#,$/]); calculate_parsed_host_port(Input, Unparsed) -> get_parsed_binary(Input, Unparsed). @@ -1159,6 +1159,18 @@ calculate_parsed_query_fragment(Input, Unparsed) -> get_parsed_binary(Input, Unparsed). +get_port(<<>>) -> + undefined; +get_port(B) -> + try binary_to_integer(B) of + Port -> + Port + catch + error:badarg -> + throw({error, invalid_uri, B}) + end. + + %% Strip last char if it is in list strip_last_char(<<>>, _) -> <<>>; strip_last_char(Input, [C0]) -> @@ -1187,6 +1199,19 @@ strip_last_char(Input, [C0,C1,C2]) -> init_binary(Input); _Else -> Input + end; +strip_last_char(Input, [C0,C1,C2,C3]) -> + case binary:last(Input) of + C0 -> + init_binary(Input); + C1 -> + init_binary(Input); + C2 -> + init_binary(Input); + C3 -> + init_binary(Input); + _Else -> + Input end. @@ -1530,6 +1555,8 @@ update_host(#{}, URI) -> %% URI cannot be empty for ports. E.g. ":8080" is not a valid URI +update_port(#{port := undefined}, URI) -> + concat(URI, <<":">>); update_port(#{port := Port}, URI) -> concat(URI,add_colon(encode_port(Port))); update_port(#{}, URI) -> diff --git a/lib/stdlib/test/property_test/uri_string_recompose.erl b/lib/stdlib/test/property_test/uri_string_recompose.erl index 97f9d727a0..e51a671172 100644 --- a/lib/stdlib/test/property_test/uri_string_recompose.erl +++ b/lib/stdlib/test/property_test/uri_string_recompose.erl @@ -267,8 +267,9 @@ host_uri() -> %% Port, Query, Fragment %%------------------------------------------------------------------------- port() -> - range(1,65535). - + frequency([{10, undefined}, + {10, range(1,65535)} + ]). query_map() -> unicode(). diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index 95a49f5eb3..9ee321c509 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -692,7 +692,9 @@ parse_special2(_Config) -> #{host := [],path := [],userinfo := []} = uri_string:parse("//@"), #{host := [],path := [],scheme := "foo",userinfo := []} = uri_string:parse("foo://@"), #{host := [],path := "/",userinfo := []} = uri_string:parse("//@/"), - #{host := [],path := "/",scheme := "foo",userinfo := []} = uri_string:parse("foo://@/"). + #{host := [],path := "/",scheme := "foo",userinfo := []} = uri_string:parse("foo://@/"), + #{host := "localhost",path := "/",port := undefined} = uri_string:parse("//localhost:/"), + #{host := [],path := [],port := undefined} = uri_string:parse("//:"). parse_negative(_Config) -> {error,invalid_uri,"å"} = uri_string:parse("å"), @@ -702,7 +704,8 @@ parse_negative(_Config) -> {error,invalid_uri,"ö"} = uri_string:parse("//host/path#foö"), {error,invalid_uri,"127.256.0.1"} = uri_string:parse("//127.256.0.1"), {error,invalid_uri,":::127.0.0.1"} = uri_string:parse("//[:::127.0.0.1]"), - {error,non_utf8,<<0,0,0,246>>} = uri_string:parse("//%00%00%00%F6"). + {error,invalid_utf8,<<0,0,0,246>>} = uri_string:parse("//%00%00%00%F6"), + {error,invalid_uri,"A"} = uri_string:parse("//localhost:A8"). %%------------------------------------------------------------------------- @@ -836,8 +839,8 @@ compose_query(_Config) -> uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {<<"ö"/utf8>>, <<"2">>}]). compose_query_negative(_Config) -> - {error,badarg,4} = uri_string:compose_query([{"",4}]), - {error,badarg,5} = uri_string:compose_query([{5,""}]), + {error,invalid_input,4} = uri_string:compose_query([{"",4}]), + {error,invalid_input,5} = uri_string:compose_query([{5,""}]), {error,invalid_utf8,<<"ö">>} = uri_string:compose_query([{"foo bar","1"}, {<<"ö">>, "2"}]). dissect_query(_Config) -> -- cgit v1.2.3