From 29a9dd0e17a97a3e6e46f0d08c6ba8f31db33f5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= Date: Thu, 31 Aug 2017 15:39:45 +0200 Subject: stdlib: Implement uri_string:parse --- lib/stdlib/doc/src/uri_string.xml | 6 - lib/stdlib/src/uri_string.erl | 838 ++++++++++++++++++++- lib/stdlib/test/Makefile | 1 + lib/stdlib/test/property_test/README | 12 + .../test/property_test/uri_string_decode.erl | 55 ++ lib/stdlib/test/uri_string_SUITE.erl | 326 ++++++++ lib/stdlib/test/uri_string_property_test_SUITE.erl | 42 ++ 7 files changed, 1249 insertions(+), 31 deletions(-) mode change 100644 => 100755 lib/stdlib/src/uri_string.erl create mode 100644 lib/stdlib/test/property_test/README create mode 100644 lib/stdlib/test/property_test/uri_string_decode.erl create mode 100644 lib/stdlib/test/uri_string_SUITE.erl create mode 100644 lib/stdlib/test/uri_string_property_test_SUITE.erl (limited to 'lib/stdlib') diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml index e6b2bd5e80..8283b8ca0e 100644 --- a/lib/stdlib/doc/src/uri_string.xml +++ b/lib/stdlib/doc/src/uri_string.xml @@ -90,12 +90,6 @@ - - - -

Maybe improper list of bytes (0..255).

-
-
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl old mode 100644 new mode 100755 index 2c10c34f39..619da24cbc --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -221,25 +221,24 @@ %% %% absolute-URI = scheme ":" hier-part [ "?" query ] %% - -module(uri_string). -export([compose_query/1, create_uri_reference/2, dissect_query/1, normalize/1, parse/1, recompose/1, resolve_uri_reference/2, transcode/2]). --export_type([uri_map/0, uri_string/0, bytelist/0]). +-export_type([uri_map/0, uri_string/0]). + +-define(CHAR(Char), <>). +-define(STRING_EMPTY, <<>>). +-define(STRING(MatchStr), <>). +-define(STRING_REST(MatchStr, Rest), <>). %%%========================================================================= %%% API %%%========================================================================= - --type bytelist() :: maybe_improper_list( - 0..255 | - binary() | bytelist(), - binary() | []). - +%%------------------------------------------------------------------------- %% URI compliant with RFC 3986 %% ASCII %x21 - %x7A ("!" - "z") except %% %x34 " double quote @@ -248,32 +247,37 @@ %% %x92 \ backslash %% %x94 ^ caret / circumflex %% %x96 ` grave / accent --type uri_string() :: bytelist() | binary(). +%%------------------------------------------------------------------------- +-type uri_string() :: iodata(). %% RFC 3986, Chapter 3. Syntax Components -type uri_map() :: - #{fragment := unicode:chardata(), - host := unicode:chardata(), - path := unicode:chardata(), - port := non_neg_integer(), - query := unicode:chardata(), - scheme := atom(), - userinfo := unicode:chardata()}. + #{fragment => unicode:chardata(), + host => unicode:chardata(), + path => unicode:chardata(), + port => non_neg_integer(), + query => unicode:chardata(), + scheme => unicode:chardata(), + userinfo => unicode:chardata()} | #{}. %% Parse URIs -spec parse(URIString) -> URIMap when URIString :: uri_string(), URIMap :: uri_map(). -parse(_) -> - ok. +parse(URIString) -> + if is_binary(URIString) -> + parse_uri_reference(URIString, #{}); + true -> + parse_uri_reference(URIString, [], #{}) + end. %% Recompose URIs -spec recompose(URIMap) -> URIString when URIMap :: uri_map(), URIString :: uri_string(). recompose(_) -> - ok. + "". %% Resolve references -spec resolve_uri_reference(RelativeURI, AbsoluteBaseURI) -> AbsoluteDestURI when @@ -281,7 +285,7 @@ recompose(_) -> AbsoluteBaseURI :: uri_string(), AbsoluteDestURI :: uri_string(). resolve_uri_reference(_,_) -> - ok. + "". %% Create references -spec create_uri_reference(AbsoluteSourceURI, AbsoluteBaseURI) -> RelativeDestURI when @@ -289,21 +293,21 @@ resolve_uri_reference(_,_) -> AbsoluteBaseURI :: uri_string(), RelativeDestURI :: uri_string(). create_uri_reference(_,_) -> - ok. + "". %% Normalize URIs -spec normalize(URIString) -> NormalizedURI when URIString :: uri_string(), NormalizedURI :: uri_string(). normalize(_) -> - ok. + "". %% Transcode URIs -spec transcode(URIString, Options) -> URIString when URIString :: uri_string(), Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}]. transcode(_, _) -> - ok. + "". %% Working with query strings @@ -315,11 +319,795 @@ transcode(_, _) -> QueryList :: [{unicode:chardata(), unicode:chardata()}], QueryString :: uri_string(). compose_query(_) -> - ok. + "". %% Dissect a query string into a list of unescaped key/value pairs. -spec dissect_query(QueryString) -> QueryList when QueryString :: uri_string(), QueryList :: [{unicode:chardata(), unicode:chardata()}]. dissect_query(_) -> - ok. + "". + + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 4.1. URI Reference] +%% +%% URI-reference is used to denote the most common usage of a resource +%% identifier. +%% +%% URI-reference = URI / relative-ref +%%------------------------------------------------------------------------- +-spec parse_uri_reference(iolist(), list(), uri_map()) -> uri_map(). +parse_uri_reference([], _, _) -> #{}; +parse_uri_reference(URIString, Acc, URI) -> + try parse_scheme_start(URIString, Acc, URI) of + Res -> Res + catch + throw:uri_parse_error -> + parse_relative_part(URIString, Acc, URI) + end. + +-spec parse_uri_reference(binary(), uri_map()) -> uri_map(). +parse_uri_reference(<<>>, _) -> #{}; +parse_uri_reference(URIString, URI) -> + try parse_scheme_start(URIString, URI) of + Res -> Res + catch + throw:uri_parse_error -> + parse_relative_part(URIString, URI) + end. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 4.2. Relative Reference] +%% +%% A relative reference takes advantage of the hierarchical syntax +%% (Section 1.2.3) to express a URI reference relative to the name space +%% of another hierarchical URI. +%% +%% relative-ref = relative-part [ "?" query ] [ "#" fragment ] +%% +%% relative-part = "//" authority path-abempty +%% / path-absolute +%% / path-noscheme +%% / path-empty +%%------------------------------------------------------------------------- +-spec parse_relative_part(binary(), uri_map()) -> uri_map(). +parse_relative_part(?STRING_REST("//", Rest), URI) -> + %% Parse userinfo - "//" is NOT part of authority + try parse_userinfo(Rest, URI) of + {T, URI1} -> + {Userinfo, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1), + URI1#{userinfo => Userinfo} + catch + throw:uri_parse_error -> + {T, URI1} = parse_host(Rest, URI), + {Host, _} = split_binary(Rest, byte_size_exl_single_slash(Rest) - byte_size_exl_head(T)), + URI1#{host => Host} + end; +parse_relative_part(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-absolute + {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + URI1#{path => ?STRING_REST($/, Path)}; +parse_relative_part(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + URI1#{query => ?STRING_REST($?, Query)}; +parse_relative_part(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), + URI1#{fragment => Fragment}; +parse_relative_part(?STRING_REST(Char, Rest), URI) -> + case is_segment_nz_nc(Char) of + true -> + {T, URI1} = parse_segment_nz_nc(Rest, URI), % path-noscheme + {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + URI1#{path => ?STRING_REST(Char, Path)}; + false -> throw(uri_parse_error) + end. + +-spec parse_relative_part(iolist(), list(), uri_map()) -> uri_map(). +parse_relative_part([H|Rest], Acc, URI) when is_binary(H) -> + parse_relative_part(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_relative_part([H|Rest], Acc, URI) when is_list(H) -> + parse_relative_part(H ++ Rest, Acc, URI); +parse_relative_part("//" ++ Rest, Acc, URI) -> + % Parse userinfo + try parse_userinfo(Rest, Acc, URI) of + Res -> Res + catch + throw:uri_parse_error -> + parse_host(Rest, Acc, URI) + end; +parse_relative_part([$/|Rest], _Acc, URI) -> + parse_segment(Rest, [$/], URI); % path-absolute +parse_relative_part([$?|Rest], _Acc, URI) -> + parse_query(Rest, [$?], URI); % path-empty ?query +parse_relative_part([$#|Rest], _Acc, URI) -> + parse_fragment(Rest, [], URI); % path-empty +parse_relative_part([Char|Rest], _, URI) -> + case is_segment_nz_nc(Char) of + true -> parse_segment_nz_nc(Rest, [Char], URI); % path-noscheme + false -> throw(uri_parse_error) + end. + + +%% Returns size of 'Rest' for proper calculation of splitting position. +%% Solves the following special case: +%% +%% #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>). +%% +%% While keeping the following true: +%% +%% #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>). +%% #{host := <<>>, path := <<"/hostname">>} = uri_string:parse(<<"///hostname">>). +%% +-spec byte_size_exl_single_slash(uri_string()) -> number(). +byte_size_exl_single_slash(<<$/>>) -> 0; +byte_size_exl_single_slash(Rest) -> byte_size(Rest). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.3. Path] +%% +%% The path component contains data, usually organized in hierarchical +%% form, that, along with data in the non-hierarchical query component +%% (Section 3.4), serves to identify a resource within the scope of the +%% URI's scheme and naming authority (if any). The path is terminated +%% by the first question mark ("?") or number sign ("#") character, or +%% by the end of the URI. +%% +%% path = path-abempty ; begins with "/" or is empty +%% / path-absolute ; begins with "/" but not "//" +%% / path-noscheme ; begins with a non-colon segment +%% / path-rootless ; begins with a segment +%% / path-empty ; zero characters +%% +%% path-abempty = *( "/" segment ) +%% path-absolute = "/" [ segment-nz *( "/" segment ) ] +%% path-noscheme = segment-nz-nc *( "/" segment ) +%% path-rootless = segment-nz *( "/" segment ) +%% path-empty = 0 +%% segment = *pchar +%% segment-nz = 1*pchar +%% segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) +%% ; non-zero-length segment without any colon ":" +%% +%% pchar = unreserved / pct-encoded / sub-delims / ":" / "@" +%%------------------------------------------------------------------------- + +%%------------------------------------------------------------------------- +%% path-abempty +%%------------------------------------------------------------------------- +-spec parse_segment(binary(), uri_map()) -> {binary(), uri_map()}. +parse_segment(?STRING_REST($/, Rest), URI) -> + parse_segment(Rest, URI); % segment +parse_segment(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % ?query + {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{query => ?STRING_REST($?, Query)}}; +parse_segment(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), + {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), + {Rest, URI1#{fragment => Fragment}}; +parse_segment(?STRING_REST(Char, Rest), URI) -> + case is_pchar(Char) of + true -> parse_segment(Rest, URI); + false -> throw(uri_parse_error) + end; +parse_segment(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +-spec parse_segment(iolist(), list(), uri_map()) -> uri_map(). +parse_segment(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_segment(unicode:characters_to_list(Str), Acc, URI); +parse_segment([H|Rest], Acc, URI) when is_binary(H) -> + parse_segment(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_segment([H|Rest], Acc, URI) when is_list(H) -> + parse_segment(H ++ Rest, Acc, URI); +parse_segment([$/|Rest], Acc, URI) -> + parse_segment(Rest, [$/|Acc], URI); % segment +parse_segment([$?|Rest], Acc, URI) -> + parse_query(Rest, [$?], URI#{path => lists:reverse(Acc)}); % ?query +parse_segment([$#|Rest], Acc, URI) -> + parse_fragment(Rest, [], URI#{path => lists:reverse(Acc)}); +parse_segment([Char|Rest], Acc, URI) -> + case is_pchar(Char) of + true -> parse_segment(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) + end; +parse_segment([], Acc, URI) -> + URI#{path => lists:reverse(Acc)}. + +%%------------------------------------------------------------------------- +%% path-noscheme +%%------------------------------------------------------------------------- +-spec parse_segment_nz_nc(binary(), uri_map()) -> {binary(), uri_map()}. +parse_segment_nz_nc(?STRING_REST($/, Rest), URI) -> + parse_segment(Rest, URI); % segment +parse_segment_nz_nc(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % ?query + {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{query => ?STRING_REST($?, Query)}}; +parse_segment_nz_nc(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), + {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), + {Rest, URI1#{fragment => Fragment}}; +parse_segment_nz_nc(?STRING_REST(Char, Rest), URI) -> + case is_segment_nz_nc(Char) of + true -> parse_segment_nz_nc(Rest, URI); + false -> throw(uri_parse_error) + end; +parse_segment_nz_nc(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +-spec parse_segment_nz_nc(iolist(), list(), uri_map()) -> uri_map(). +parse_segment_nz_nc(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_segment_nz_nc(unicode:characters_to_list(Str), Acc, URI); +parse_segment_nz_nc([H|Rest], Acc, URI) when is_binary(H) -> + parse_segment_nz_nc(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_segment_nz_nc([H|Rest], Acc, URI) when is_list(H) -> + parse_segment_nz_nc(H ++ Rest, Acc, URI); +parse_segment_nz_nc([$/|Rest], Acc, URI) -> + parse_segment(Rest, [$/|Acc], URI); % segment +parse_segment_nz_nc([$?|Rest], Acc, URI) -> + parse_query(Rest, [$?], URI#{path => lists:reverse(Acc)}); % ?query +parse_segment_nz_nc([$#|Rest], Acc, URI) -> + parse_fragment(Rest, [], URI#{path => lists:reverse(Acc)}); +parse_segment_nz_nc([Char|Rest], Acc, URI) -> + case is_segment_nz_nc(Char) of + true -> parse_segment_nz_nc(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) + end; +parse_segment_nz_nc([], Acc, URI) -> + URI#{path => lists:reverse(Acc)}. + +%% Check if char is pchar. +-spec is_pchar(char()) -> boolean(). +is_pchar($%) -> true; % pct-encoded +is_pchar($:) -> true; +is_pchar($@) -> true; +is_pchar(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + +%% Check if char is segment_nz_nc. +-spec is_segment_nz_nc(char()) -> boolean(). +is_segment_nz_nc($%) -> true; % pct-encoded +is_segment_nz_nc($@) -> true; +is_segment_nz_nc(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.1. Scheme] +%% +%% Each URI begins with a scheme name that refers to a specification for +%% assigning identifiers within that scheme. +%% +%% scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) +%%------------------------------------------------------------------------- +-spec parse_scheme_start(binary(), uri_map()) -> uri_map(). +parse_scheme_start(?STRING_REST(Char, Rest), URI) -> + case is_alpha(Char) of + true -> {T, URI1} = parse_scheme(Rest, URI), + {Scheme, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1), + URI1#{scheme => ?STRING_REST(Char, Scheme)}; + false -> throw(uri_parse_error) + end. + +-spec parse_scheme_start(iolist(), list(), uri_map()) -> uri_map(). +parse_scheme_start([H|Rest], Acc, URI) when is_binary(H) -> + parse_scheme_start(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_scheme_start([H|Rest], Acc, URI) when is_list(H) -> + parse_scheme_start(H ++ Rest, Acc, URI); +parse_scheme_start([Char|Rest], Acc, URI) -> + case is_alpha(Char) of + true -> parse_scheme(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) + end. + + +-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}. +parse_scheme(?STRING_REST($:, Rest), URI) -> + {_, URI1} = parse_hier(Rest, URI), + {Rest, URI1}; +parse_scheme(?STRING_REST(Char, Rest), URI) -> + case is_scheme(Char) of + true -> parse_scheme(Rest, URI); + false -> throw(uri_parse_error) + end; +parse_scheme(?STRING_EMPTY, _URI) -> + throw(uri_parse_error). + +-spec parse_scheme(iolist(), list(), uri_map()) -> uri_map(). +parse_scheme(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_scheme(unicode:characters_to_list(Str), Acc, URI); +parse_scheme([H|Rest], Acc, URI) when is_binary(H) -> + parse_scheme(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_scheme([H|Rest], Acc, URI) when is_list(H) -> + parse_scheme(H ++ Rest, Acc, URI); +parse_scheme([$:|Rest], Acc, URI) -> + parse_hier(Rest, [], URI#{scheme => lists:reverse(Acc)}); +parse_scheme([Char|Rest], Acc, URI) -> + case is_scheme(Char) of + true -> parse_scheme(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) + end; +parse_scheme([], _Acc, _URI) -> + throw(uri_parse_error). + +%% Check if char is allowed in scheme +-spec is_scheme(char()) -> boolean(). +is_scheme($+) -> true; +is_scheme($-) -> true; +is_scheme($.) -> true; +is_scheme(Char) -> is_alpha(Char) orelse is_digit(Char). + + +%%------------------------------------------------------------------------- +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%%------------------------------------------------------------------------- +-spec parse_hier(binary(), uri_map()) -> {binary(), uri_map()}. +parse_hier(?STRING_REST("//", Rest), URI) -> + % Parse userinfo - "//" is NOT part of authority + try parse_userinfo(Rest, URI) of + {T, URI1} -> + {Userinfo, _} = split_binary(Rest, byte_size(Rest) - byte_size(T) - 1), + {Rest, URI1#{userinfo => Userinfo}} + catch + throw:uri_parse_error -> + {T, URI1} = parse_host(Rest, URI), + {Host, _} = split_binary(Rest, byte_size_exl_single_slash(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{host => Host}} + end; +parse_hier(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-absolute + {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{path => ?STRING_REST($/, Path)}}; +parse_hier(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{query => ?STRING_REST($?, Query)}}; +parse_hier(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), + {Rest, URI1#{fragment => Fragment}}; +parse_hier(?STRING_REST(Char, Rest), URI) -> % path-rootless + case is_pchar(Char) of + true -> % segment_nz + {T, URI1} = parse_segment(Rest, URI), + {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{path => ?STRING_REST(Char, Path)}}; + false -> throw(uri_parse_error) + end; +parse_hier(?STRING_EMPTY, URI) -> + {<<>>, URI}. + +-spec parse_hier(iolist(), list(), uri_map()) -> uri_map(). +parse_hier(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_hier(unicode:characters_to_list(Str), Acc, URI); +parse_hier([H|Rest], Acc, URI) when is_binary(H) -> + parse_hier(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_hier([H|Rest], Acc, URI) when is_list(H) -> + parse_hier(H ++ Rest, Acc, URI); +parse_hier("//" ++ Rest, Acc, URI) -> + % Parse userinfo + try parse_userinfo(Rest, Acc, URI) of + Res -> Res + catch + throw:uri_parse_error -> + parse_host(Rest, [], URI) + end; +parse_hier([$/|Rest], _Acc, URI) -> + parse_segment(Rest, [$/], URI); % path-absolute +parse_hier([$?|Rest], _Acc, URI) -> + parse_query(Rest, [$?], URI); % path-empty ?query +parse_hier([$#|Rest], _Acc, URI) -> + parse_fragment(Rest, [], URI); % path-empty +parse_hier([Char|Rest], _, URI) -> % path-rootless + case is_pchar(Char) of + true -> parse_segment(Rest, [Char], URI); + false -> throw(uri_parse_error) + end; +parse_hier([], _, URI) -> + URI. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.2. Authority] +%% +%% Many URI schemes include a hierarchical element for a naming +%% authority so that governance of the name space defined by the +%% remainder of the URI is delegated to that authority (which may, in +%% turn, delegate it further). +%% +%% The authority component is preceded by a double slash ("//") and is +%% terminated by the next slash ("/"), question mark ("?"), or number +%% sign ("#") character, or by the end of the URI. +%% +%% authority = [ userinfo "@" ] host [ ":" port ] +%% +%% +%% [RFC 3986, Chapter 3.2.1. User Information] +%% +%% The userinfo subcomponent may consist of a user name and, optionally, +%% scheme-specific information about how to gain authorization to access +%% the resource. The user information, if present, is followed by a +%% commercial at-sign ("@") that delimits it from the host. +%% +%% userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) +%%------------------------------------------------------------------------- +-spec parse_userinfo(binary(), uri_map()) -> {binary(), uri_map()}. +parse_userinfo(?CHAR($@), _URI) -> + %% URI cannot end in userinfo state + throw(uri_parse_error); +parse_userinfo(?STRING_REST($@, Rest), URI) -> + {T, URI1} = parse_host(Rest, URI), + {Host, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{host => Host}}; +parse_userinfo(?STRING_REST(Char, Rest), URI) -> + case is_userinfo(Char) of + true -> parse_userinfo(Rest, URI); + false -> throw(uri_parse_error) + end; +parse_userinfo(?STRING_EMPTY, _URI) -> + %% URI cannot end in userinfo state + throw(uri_parse_error). + +-spec parse_userinfo(iolist(), list(), uri_map()) -> uri_map(). +parse_userinfo(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_userinfo(unicode:characters_to_list(Str), Acc, URI); +parse_userinfo([H|Rest], Acc, URI) when is_binary(H) -> + parse_userinfo(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_userinfo([H|Rest], Acc, URI) when is_list(H) -> + parse_userinfo(H ++ Rest, Acc, URI); +parse_userinfo([$@], _Acc, _URI) -> + %% URI cannot end in userinfo state + throw(uri_parse_error); +parse_userinfo([$@|Rest], Acc, URI) -> + parse_host(Rest, [], URI#{userinfo => lists:reverse(Acc)}); +parse_userinfo([Char|Rest], Acc, URI) -> + case is_userinfo(Char) of + true -> parse_userinfo(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) % URI#{userinfo => lists:reverse(Acc)} + end; +parse_userinfo([], _Acc, _URI) -> + %% URI cannot end in userinfo state + throw(uri_parse_error). + +%% Check if char is allowed in userinfo +-spec is_userinfo(char()) -> boolean(). +is_userinfo($%) -> true; % pct-encoded +is_userinfo($:) -> true; +is_userinfo(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.2.2. Host] +%% +%% The host subcomponent of authority is identified by an IP literal +%% encapsulated within square brackets, an IPv4 address in dotted- +%% decimal form, or a registered name. +%% +%% host = IP-literal / IPv4address / reg-name +%% +%% IP-literal = "[" ( IPv6address / IPvFuture ) "]" +%% +%% IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) +%% +%% IPv6address = 6( h16 ":" ) ls32 +%% / "::" 5( h16 ":" ) ls32 +%% / [ h16 ] "::" 4( h16 ":" ) ls32 +%% / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 +%% / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 +%% / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 +%% / [ *4( h16 ":" ) h16 ] "::" ls32 +%% / [ *5( h16 ":" ) h16 ] "::" h16 +%% / [ *6( h16 ":" ) h16 ] "::" +%% +%% ls32 = ( h16 ":" h16 ) / IPv4address +%% ; least-significant 32 bits of address +%% +%% h16 = 1*4HEXDIG +%% ; 16 bits of address represented in hexadecimal +%% +%% IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet +%% +%% dec-octet = DIGIT ; 0-9 +%% / %x31-39 DIGIT ; 10-99 +%% / "1" 2DIGIT ; 100-199 +%% / "2" %x30-34 DIGIT ; 200-249 +%% / "25" %x30-35 ; 250-255 +%% +%% reg-name = *( unreserved / pct-encoded / sub-delims ) +%%------------------------------------------------------------------------- +%% TODO: implement parsing of IPv4/IPv6 addresses +-spec parse_host(binary(), uri_map()) -> {binary(), uri_map()}. +parse_host(?STRING_REST($:, Rest), URI) -> + {T, URI1} = parse_port(Rest, URI), + {H, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + Port = binary_to_integer(H), + {Rest, URI1#{port => Port}}; +parse_host(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{path => ?STRING_REST($/, Path)}}; +parse_host(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{query => ?STRING_REST($?, Query)}}; +parse_host(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), + {Rest, URI1#{fragment => Fragment}}; +parse_host(?STRING_REST(Char, Rest), URI) -> + case is_reg_name(Char) of + true -> parse_host(Rest, URI); + false -> throw(uri_parse_error) + end; +parse_host(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +-spec parse_host(iolist(), list(), uri_map()) -> uri_map(). +parse_host(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_host(unicode:characters_to_list(Str), Acc, URI); +parse_host([H|Rest], Acc, URI) when is_binary(H) -> + parse_host(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_host([H|Rest], Acc, URI) when is_list(H) -> + parse_host(H ++ Rest, Acc, URI); +parse_host([$:|Rest], Acc, URI) -> + parse_port(Rest, [], URI#{host => lists:reverse(Acc)}); +parse_host([$/|Rest], Acc, URI) -> + parse_segment(Rest, [$/], URI#{host => lists:reverse(Acc)}); % path-abempty +parse_host([$?|Rest], Acc, URI) -> + parse_query(Rest, [$?], URI#{host => lists:reverse(Acc)}); % path-empty ?query +parse_host([$#|Rest], Acc, URI) -> + parse_fragment(Rest, [], URI#{host => lists:reverse(Acc)}); % path-empty +parse_host([Char|Rest], Acc, URI) -> + case is_reg_name(Char) of + true -> parse_host(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) + end; +parse_host([], Acc, URI) -> + URI#{host => lists:reverse(Acc)}. + +%% Check if char is allowed in reg-name +-spec is_reg_name(char()) -> boolean(). +is_reg_name($%) -> true; +is_reg_name(Char) -> is_unreserved(Char) orelse is_sub_delim(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.2.2. Port] +%% +%% The port subcomponent of authority is designated by an optional port +%% number in decimal following the host and delimited from it by a +%% single colon (":") character. +%% +%% port = *DIGIT +%%------------------------------------------------------------------------- +-spec parse_port(binary(), uri_map()) -> {binary(), uri_map()}. +parse_port(?STRING_REST($/, Rest), URI) -> + {T, URI1} = parse_segment(Rest, URI), % path-abempty + {Path, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{path => ?STRING_REST($/, Path)}}; +parse_port(?STRING_REST($?, Rest), URI) -> + {T, URI1} = parse_query(Rest, URI), % path-empty ?query + {Query, _} = split_binary(Rest, byte_size(Rest) - byte_size_exl_head(T)), + {Rest, URI1#{query => ?STRING_REST($?, Query)}}; +parse_port(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), % path-empty + {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), + {Rest, URI1#{fragment => Fragment}}; +parse_port(?STRING_REST(Char, Rest), URI) -> + case is_digit(Char) of + true -> parse_port(Rest, URI); + false -> throw(uri_parse_error) + end; +parse_port(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +-spec parse_port(iolist(), list(), uri_map()) -> uri_map(). +parse_port(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_port(unicode:characters_to_list(Str), Acc, URI); +parse_port([H|Rest], Acc, URI) when is_binary(H) -> + parse_port(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_port([H|Rest], Acc, URI) when is_list(H) -> + parse_port(H ++ Rest, Acc, URI); +parse_port([$/|Rest], Acc, URI) -> + {Port, _} = string:to_integer(lists:reverse(Acc)), + parse_segment(Rest, [$/], URI#{port => Port}); % path-abempty +parse_port([$?|Rest], Acc, URI) -> + {Port, _} = string:to_integer(lists:reverse(Acc)), + parse_query(Rest, [$?], URI#{port => Port}); % path-empty ?query +parse_port([$#|Rest], Acc, URI) -> + {Port, _} = string:to_integer(lists:reverse(Acc)), + parse_fragment(Rest, [], URI#{port => Port}); % path-empty +parse_port([Char|Rest], Acc, URI) -> + case is_digit(Char) of + true -> parse_port(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) + end; +parse_port([], Acc, URI) -> + {Port, _} = string:to_integer(lists:reverse(Acc)), + URI#{port => Port}. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.4. Query] +%% +%% The query component contains non-hierarchical data that, along with +%% data in the path component (Section 3.3), serves to identify a +%% resource within the scope of the URI's scheme and naming authority +%% (if any). The query component is indicated by the first question +%% mark ("?") character and terminated by a number sign ("#") character +%% or by the end of the URI. +%% +%% query = *( pchar / "/" / "?" ) +%%------------------------------------------------------------------------- +-spec parse_query(binary(), uri_map()) -> {binary(), uri_map()}. +parse_query(?STRING_REST($#, Rest), URI) -> + {T, URI1} = parse_fragment(Rest, URI), + {Fragment, _} = split_binary(Rest, byte_size(Rest) - byte_size(T)), + {Rest, URI1#{fragment => Fragment}}; +parse_query(?STRING_REST(Char, Rest), URI) -> + case is_query(Char) of + true -> parse_query(Rest, URI); + false -> throw(uri_parse_error) + end; +parse_query(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +-spec parse_query(iolist(), list(), uri_map()) -> uri_map(). +parse_query(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_query(unicode:characters_to_list(Str), Acc, URI); +parse_query([H|Rest], Acc, URI) when is_binary(H) -> + parse_query(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_query([H|Rest], Acc, URI) when is_list(H) -> + parse_query(H ++ Rest, Acc, URI); +parse_query([$#|Rest], Acc, URI) -> + parse_fragment(Rest, [], URI#{query => lists:reverse(Acc)}); +parse_query([Char|Rest], Acc, URI) -> + case is_query(Char) of + true -> parse_query(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) + end; +parse_query([], Acc, URI) -> + URI#{query => lists:reverse(Acc)}. + +%% Check if char is allowed in query +-spec is_query(char()) -> boolean(). +is_query($/) -> true; +is_query(Char) -> is_pchar(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 3.5. Fragment] +%% +%% The fragment identifier component of a URI allows indirect +%% identification of a secondary resource by reference to a primary +%% resource and additional identifying information. +%% +%% fragment = *( pchar / "/" / "?" ) +%%------------------------------------------------------------------------- +-spec parse_fragment(binary(), uri_map()) -> {binary(), uri_map()}. +parse_fragment(?STRING_REST(Char, Rest), URI) -> + case is_fragment(Char) of + true -> parse_fragment(Rest, URI); + false -> throw(uri_parse_error) + end; +parse_fragment(?STRING_EMPTY, URI) -> + {?STRING_EMPTY, URI}. + +-spec parse_fragment(iolist(), list(), uri_map()) -> uri_map(). +parse_fragment(?STRING(Str), Acc, URI) when is_list(Acc) -> + parse_fragment(unicode:characters_to_list(Str), Acc, URI); +parse_fragment([H|Rest], Acc, URI) when is_binary(H) -> + parse_fragment(unicode:characters_to_list(H, utf8) ++ Rest, + Acc, URI); +parse_fragment([H|Rest], Acc, URI) when is_list(H) -> + parse_fragment(H ++ Rest, Acc, URI); +parse_fragment([Char|Rest], Acc, URI) -> + case is_fragment(Char) of + true -> parse_fragment(Rest, [Char|Acc], URI); + false -> throw(uri_parse_error) + end; +parse_fragment([], Acc, URI) -> + URI#{fragment => lists:reverse(Acc)}. + +%% Check if char is allowed in fragment +-spec is_fragment(char()) -> boolean(). +is_fragment($/) -> true; +is_fragment($?) -> true; +is_fragment(Char) -> is_pchar(Char). + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 2.2. Reserved Characters] +%% +%% reserved = gen-delims / sub-delims +%% +%% gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +%% +%% sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +%% / "*" / "+" / "," / ";" / "=" +%% +%%------------------------------------------------------------------------- +%% %% Return true if input char is reserved. +%% -spec is_reserved(char()) -> boolean(). +%% is_reserved(Char) -> +%% is_gen_delim(Char) orelse is_sub_delim(Char). + +%% %% Check if char is reserved. +%% -spec is_gen_delim(char()) -> boolean(). +%% is_gen_delim($:) -> true; +%% is_gen_delim($/) -> true; +%% is_gen_delim($?) -> true; +%% is_gen_delim($#) -> true; +%% is_gen_delim($[) -> true; +%% is_gen_delim($]) -> true; +%% is_gen_delim($@) -> true; +%% is_gen_delim(_) -> false. + +%% Check if char is sub-delim. +-spec is_sub_delim(char()) -> boolean(). +is_sub_delim($!) -> true; +is_sub_delim($$) -> true; +is_sub_delim($&) -> true; +is_sub_delim($') -> true; +is_sub_delim($() -> true; +is_sub_delim($)) -> true; + +is_sub_delim($*) -> true; +is_sub_delim($+) -> true; +is_sub_delim($,) -> true; +is_sub_delim($;) -> true; +is_sub_delim($=) -> true; +is_sub_delim(_) -> false. + + +%%------------------------------------------------------------------------- +%% [RFC 3986, Chapter 2.3. Unreserved Characters] +%% +%% unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +%% +%%------------------------------------------------------------------------- +-spec is_unreserved(char()) -> boolean(). +is_unreserved($-) -> true; +is_unreserved($.) -> true; +is_unreserved($_) -> true; +is_unreserved($~) -> true; +is_unreserved(Char) -> is_alpha(Char) orelse is_digit(Char). + +-spec is_alpha(char()) -> boolean(). +is_alpha(C) + when $A =< C, C =< $Z; + $a =< C, C =< $z -> true; +is_alpha(_) -> false. + +-spec is_digit(char()) -> boolean(). +is_digit(C) + when $0 =< C, C =< $9 -> true; +is_digit(_) -> false. + +%% Returns the size of a binary exluding the first element. +%% Used in calls to split_binary(). +-spec byte_size_exl_head(binary()) -> number(). +byte_size_exl_head(<<>>) -> 0; +byte_size_exl_head(Binary) -> byte_size(Binary) + 1. diff --git a/lib/stdlib/test/Makefile b/lib/stdlib/test/Makefile index 523cb95065..8490770f3d 100644 --- a/lib/stdlib/test/Makefile +++ b/lib/stdlib/test/Makefile @@ -87,6 +87,7 @@ MODULES= \ timer_simple_SUITE \ unicode_SUITE \ unicode_util_SUITE \ + uri_string_SUITE \ win32reg_SUITE \ y2k_SUITE \ select_SUITE \ diff --git a/lib/stdlib/test/property_test/README b/lib/stdlib/test/property_test/README new file mode 100644 index 0000000000..57602bf719 --- /dev/null +++ b/lib/stdlib/test/property_test/README @@ -0,0 +1,12 @@ + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% %%% +%%% WARNING %%% +%%% %%% +%%% This is experimental code which may be changed or removed %%% +%%% anytime without any warning. %%% +%%% %%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +The test in this directory are written assuming that the user has a QuickCheck license. They are to be run manually. Some may be possible to be run with other tools, e.g. PropEr. + diff --git a/lib/stdlib/test/property_test/uri_string_decode.erl b/lib/stdlib/test/property_test/uri_string_decode.erl new file mode 100644 index 0000000000..137a649cf1 --- /dev/null +++ b/lib/stdlib/test/property_test/uri_string_decode.erl @@ -0,0 +1,55 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_decode). + +-compile(export_all). + +-proptest(eqc). +-proptest([triq,proper]). + +-ifndef(EQC). +-ifndef(PROPER). +-ifndef(TRIQ). +-define(EQC,true). +-endif. +-endif. +-endif. + +-ifdef(EQC). +-include_lib("eqc/include/eqc.hrl"). +-define(MOD_eqc,eqc). + +-else. +-ifdef(PROPER). +-include_lib("proper/include/proper.hrl"). +-define(MOD_eqc,proper). + +-else. +-ifdef(TRIQ). +-define(MOD_eqc,triq). +-include_lib("triq/include/triq.hrl"). + +-endif. +-endif. +-endif. + + +prop_uri_string_decode() -> + ok. diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl new file mode 100644 index 0000000000..189941de03 --- /dev/null +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -0,0 +1,326 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_SUITE). + +-include_lib("common_test/include/ct.hrl"). + +-export([all/0, suite/0,groups/0, + parse_binary_fragment/1, parse_binary_host/1, parse_binary_path/1, parse_binary_port/1, + parse_binary_query/1, parse_binary_scheme/1, parse_binary_userinfo/1, + parse_fragment/1, parse_host/1, parse_path/1, parse_port/1, + parse_query/1, parse_scheme/1, parse_userinfo/1, + parse_list/1, parse_binary/1, parse_mixed/1, parse_relative/1 + ]). + +suite() -> + [{timetrap,{minutes,1}}]. + +all() -> + [ + parse_binary_scheme, + parse_binary_userinfo, + parse_binary_host, + parse_binary_port, + parse_binary_path, + parse_binary_query, + parse_binary_fragment, + parse_scheme, + parse_userinfo, + parse_host, + parse_port, + parse_path, + parse_query, + parse_fragment, + parse_list, + parse_binary, + parse_mixed, + parse_relative + ]. + +groups() -> + []. + + +%% TODO: Negative tests +%% uri_string:parse(<<"?name=ferret">>). +%% uri_string:parse("//user@") +%% uri_string:parse("foo://user@") +%% uri_string:parse(":600"). +%% +%% uri_string:parse("//:8042x"). +%% +%% io:format("# DEBUG T: >>~s<<~n", [T]), + +parse_binary_scheme(_Config) -> + #{} = uri_string:parse(<<>>), + #{path := <<"foo">>} = uri_string:parse(<<"foo">>), + #{scheme := <<"foo">>} = uri_string:parse(<<"foo:">>), + #{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>), + #{scheme := <<"foo">>, host := <<"">>} = uri_string:parse(<<"foo://">>), + #{scheme := <<"foo">>, host := <<"">>, path := <<"/">>} = uri_string:parse(<<"foo:///">>), + #{scheme := <<"foo">>, host := <<"">>, path := <<"//">>} = uri_string:parse(<<"foo:////">>), + + #{path := <<"/">>} = uri_string:parse(<<"/">>), + #{host := <<>>} = uri_string:parse(<<"//">>), + #{host := <<>>, path := <<"/">>} = uri_string:parse(<<"///">>). + +parse_binary_userinfo(_Config) -> + #{scheme := <<"user">>, path := <<"password@localhost">>} = + uri_string:parse(<<"user:password@localhost">>), + #{path := <<"user@">>} = uri_string:parse(<<"user@">>), + #{path := <<"/user@">>} = uri_string:parse(<<"/user@">>), + #{path := <<"user@localhost">>} = uri_string:parse(<<"user@localhost">>), + #{userinfo := <<"user">>, host := <<"localhost">>} = uri_string:parse(<<"//user@localhost">>), + #{userinfo := <<"user:password">>, host := <<"localhost">>} = + uri_string:parse(<<"//user:password@localhost">>), + #{scheme := <<"foo">>, path := <<"/user@">>} = + uri_string:parse(<<"foo:/user@">>), + #{scheme := <<"foo">>, userinfo := <<"user">>, host := <<"localhost">>} = + uri_string:parse(<<"foo://user@localhost">>), + #{scheme := <<"foo">>, userinfo := <<"user:password">>, host := <<"localhost">>} = + uri_string:parse(<<"foo://user:password@localhost">>). + +parse_binary_host(_Config) -> + #{host := <<"hostname">>} = uri_string:parse(<<"//hostname">>), + #{host := <<"hostname">>,scheme := <<"foo">>} = uri_string:parse(<<"foo://hostname">>), + #{host := <<"hostname">>,scheme := <<"foo">>, userinfo := <<"user">>} = + uri_string:parse(<<"foo://user@hostname">>). + +parse_binary_port(_Config) -> + #{path:= <<"/:8042">>} = + uri_string:parse(<<"/:8042">>), + #{host:= <<>>, port := 8042} = + uri_string:parse(<<"//:8042">>), + #{host := <<"example.com">>, port:= 8042} = + uri_string:parse(<<"//example.com:8042">>), + #{scheme := <<"foo">>, path := <<"/:8042">>} = + uri_string:parse(<<"foo:/:8042">>), + #{scheme := <<"foo">>, host := <<>>, port := 8042} = + uri_string:parse(<<"foo://:8042">>), + #{scheme := <<"foo">>, host := <<"example.com">>, port := 8042} = + uri_string:parse(<<"foo://example.com:8042">>). + +parse_binary_path(_Config) -> + #{path := <<"over/there">>} = uri_string:parse(<<"over/there">>), + #{path := <<"/over/there">>} = uri_string:parse(<<"/over/there">>), + #{scheme := <<"foo">>, path := <<"/over/there">>} = + uri_string:parse(<<"foo:/over/there">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/over/there">>} = + uri_string:parse(<<"foo://example.com/over/there">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/over/there">>, port := 8042} = + uri_string:parse(<<"foo://example.com:8042/over/there">>). + +parse_binary_query(_Config) -> + #{scheme := <<"foo">>, query := <<"?name=ferret">>} = + uri_string:parse(<<"foo:?name=ferret">>), + #{scheme := <<"foo">>, path:= <<"over/there">>, query := <<"?name=ferret">>} = + uri_string:parse(<<"foo:over/there?name=ferret">>), + #{scheme := <<"foo">>, path:= <<"/over/there">>, query := <<"?name=ferret">>} = + uri_string:parse(<<"foo:/over/there?name=ferret">>), + #{scheme := <<"foo">>, host := <<"example.com">>, query := <<"?name=ferret">>} = + uri_string:parse(<<"foo://example.com?name=ferret">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, query := <<"?name=ferret">>} = + uri_string:parse(<<"foo://example.com/?name=ferret">>), + + #{query := <<"?name=ferret">>} = + uri_string:parse(<<"?name=ferret">>), + #{path := <<"over/there">>, query := <<"?name=ferret">>} = + uri_string:parse(<<"over/there?name=ferret">>), + #{path := <<"/">>, query := <<"?name=ferret">>} = + uri_string:parse(<<"/?name=ferret">>), + #{path := <<"/over/there">>, query := <<"?name=ferret">>} = + uri_string:parse(<<"/over/there?name=ferret">>), + #{host := <<"example.com">>, query := <<"?name=ferret">>} = + uri_string:parse(<<"//example.com?name=ferret">>), + #{host := <<"example.com">>, path := <<"/">>, query := <<"?name=ferret">>} = + uri_string:parse(<<"//example.com/?name=ferret">>). + + +parse_binary_fragment(_Config) -> + #{scheme := <<"foo">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo:#nose">>), + #{scheme := <<"foo">>, path:= <<"over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo:over/there#nose">>), + #{scheme := <<"foo">>, path:= <<"/over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo:/over/there#nose">>), + #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com#nose">>), + #{scheme := <<"foo">>, host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com/#nose">>), + #{scheme := <<"foo">>, host := <<"example.com">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com#nose">>), + + #{fragment := <<"nose">>} = + uri_string:parse(<<"#nose">>), + #{path := <<"over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"over/there#nose">>), + #{path := <<"/">>, fragment := <<"nose">>} = + uri_string:parse(<<"/#nose">>), + #{path := <<"/over/there">>, fragment := <<"nose">>} = + uri_string:parse(<<"/over/there#nose">>), + #{host := <<"example.com">>, fragment := <<"nose">>} = + uri_string:parse(<<"//example.com#nose">>), + #{host := <<"example.com">>, path := <<"/">>, fragment := <<"nose">>} = + uri_string:parse(<<"//example.com/#nose">>). + +parse_scheme(_Config) -> + #{} = uri_string:parse(""), + #{path := "foo"} = uri_string:parse("foo"), + #{scheme := "foo"} = uri_string:parse("foo:"), + #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"), + #{scheme := "foo", host := ""} = uri_string:parse("foo://"), + #{scheme := "foo", host := "", path := "/"} = uri_string:parse("foo:///"), + #{scheme := "foo", host := "", path := "//"} = uri_string:parse("foo:////"), + + #{path := "/"} = uri_string:parse("/"), + #{host := ""} = uri_string:parse("//"), + #{host := "", path := "/"} = uri_string:parse("///"). + +parse_userinfo(_Config) -> + #{scheme := "user", path := "password@localhost"} = uri_string:parse("user:password@localhost"), + #{path := "user@"} = uri_string:parse("user@"), + #{path := "/user@"} = uri_string:parse("/user@"), + #{path := "user@localhost"} = uri_string:parse("user@localhost"), + #{userinfo := "user", host := "localhost"} = uri_string:parse("//user@localhost"), + #{userinfo := "user:password", host := "localhost"} = + uri_string:parse("//user:password@localhost"), + #{scheme := "foo", path := "/user@"} = + uri_string:parse("foo:/user@"), + #{scheme := "foo", userinfo := "user", host := "localhost"} = + uri_string:parse("foo://user@localhost"), + #{scheme := "foo", userinfo := "user:password", host := "localhost"} = + uri_string:parse("foo://user:password@localhost"). + +parse_host(_Config) -> + #{host := "hostname"} = uri_string:parse("//hostname"), + #{host := "hostname",scheme := "foo"} = uri_string:parse("foo://hostname"), + #{host := "hostname",scheme := "foo", userinfo := "user"} = + uri_string:parse("foo://user@hostname"). + +parse_port(_Config) -> + #{path:= "/:8042"} = + uri_string:parse("/:8042"), + #{host:= "", port := 8042} = + uri_string:parse("//:8042"), + #{host := "example.com", port:= 8042} = + uri_string:parse("//example.com:8042"), + #{scheme := "foo", path := "/:8042"} = + uri_string:parse("foo:/:8042"), + #{scheme := "foo", host := "", port := 8042} = + uri_string:parse("foo://:8042"), + #{scheme := "foo", host := "example.com", port := 8042} = + uri_string:parse("foo://example.com:8042"). + +parse_path(_Config) -> + #{path := "over/there"} = uri_string:parse("over/there"), + #{path := "/over/there"} = uri_string:parse("/over/there"), + #{scheme := "foo", path := "/over/there"} = + uri_string:parse("foo:/over/there"), + #{scheme := "foo", host := "example.com", path := "/over/there"} = + uri_string:parse("foo://example.com/over/there"), + #{scheme := "foo", host := "example.com", path := "/over/there", port := 8042} = + uri_string:parse("foo://example.com:8042/over/there"). + +parse_query(_Config) -> + #{scheme := "foo", query := "?name=ferret"} = + uri_string:parse("foo:?name=ferret"), + #{scheme := "foo", path:= "over/there", query := "?name=ferret"} = + uri_string:parse("foo:over/there?name=ferret"), + #{scheme := "foo", path:= "/over/there", query := "?name=ferret"} = + uri_string:parse("foo:/over/there?name=ferret"), + #{scheme := "foo", host := "example.com", query := "?name=ferret"} = + uri_string:parse("foo://example.com?name=ferret"), + #{scheme := "foo", host := "example.com", path := "/", query := "?name=ferret"} = + uri_string:parse("foo://example.com/?name=ferret"), + + #{query := "?name=ferret"} = + uri_string:parse("?name=ferret"), + #{path := "over/there", query := "?name=ferret"} = + uri_string:parse("over/there?name=ferret"), + #{path := "/", query := "?name=ferret"} = + uri_string:parse("/?name=ferret"), + #{path := "/over/there", query := "?name=ferret"} = + uri_string:parse("/over/there?name=ferret"), + #{host := "example.com", query := "?name=ferret"} = + uri_string:parse("//example.com?name=ferret"), + #{host := "example.com", path := "/", query := "?name=ferret"} = + uri_string:parse("//example.com/?name=ferret"). + + +parse_fragment(_Config) -> + #{scheme := "foo", fragment := "nose"} = + uri_string:parse("foo:#nose"), + #{scheme := "foo", path:= "over/there", fragment := "nose"} = + uri_string:parse("foo:over/there#nose"), + #{scheme := "foo", path:= "/over/there", fragment := "nose"} = + uri_string:parse("foo:/over/there#nose"), + #{scheme := "foo", host := "example.com", fragment := "nose"} = + uri_string:parse("foo://example.com#nose"), + #{scheme := "foo", host := "example.com", path := "/", fragment := "nose"} = + uri_string:parse("foo://example.com/#nose"), + #{scheme := "foo", host := "example.com", fragment := "nose"} = + uri_string:parse("foo://example.com#nose"), + + #{fragment := "nose"} = + uri_string:parse("#nose"), + #{path := "over/there", fragment := "nose"} = + uri_string:parse("over/there#nose"), + #{path := "/", fragment := "nose"} = + uri_string:parse("/#nose"), + #{path := "/over/there", fragment := "nose"} = + uri_string:parse("/over/there#nose"), + #{host := "example.com", fragment := "nose"} = + uri_string:parse("//example.com#nose"), + #{host := "example.com", path := "/", fragment := "nose"} = + uri_string:parse("//example.com/#nose"). + + +parse_list(_Config) -> + #{scheme := "foo", path := "bar:nisse"} = uri_string:parse("foo:bar:nisse"), + #{scheme := "foo", host := "example.com", port := 8042, + path := "/over/there", query := "?name=ferret", fragment := "nose"} = + uri_string:parse("foo://example.com:8042/over/there?name=ferret#nose"), + #{scheme := "foo", userinfo := "admin:admin", host := "example.com", port := 8042, + path := "/over/there", query := "?name=ferret", fragment := "nose"} = + uri_string:parse("foo://admin:admin@example.com:8042/over/there?name=ferret#nose"). + +parse_binary(_Config) -> + #{scheme := <<"foo">>, path := <<"bar:nisse">>} = uri_string:parse(<<"foo:bar:nisse">>), + #{scheme := <<"foo">>, host := <<"example.com">>, port := 8042, + path := <<"/over/there">>, query := <<"?name=ferret">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://example.com:8042/over/there?name=ferret#nose">>), + #{scheme := <<"foo">>, userinfo := <<"admin:admin">>, host := <<"example.com">>, port := 8042, + path := <<"/over/there">>, query := <<"?name=ferret">>, fragment := <<"nose">>} = + uri_string:parse(<<"foo://admin:admin@example.com:8042/over/there?name=ferret#nose">>). + + +parse_mixed(_Config) -> + #{scheme := "foo", path := "bar"} = + uri_string:parse(lists:append("fo",<<"o:bar">>)), + #{scheme := "foo", path := "bar"} = + uri_string:parse(lists:append("foo:b",<<"ar">>)), + #{scheme := "foo", path := "bar:bar"} = + uri_string:parse([[102],[111,111],<<":bar">>,58,98,97,114]). + +parse_relative(_Config) -> + #{path := "/path"} = + uri_string:parse(lists:append("/pa",<<"th">>)), + #{path := "foo"} = + uri_string:parse(lists:append("fo",<<"o">>)). diff --git a/lib/stdlib/test/uri_string_property_test_SUITE.erl b/lib/stdlib/test/uri_string_property_test_SUITE.erl new file mode 100644 index 0000000000..de5edf54aa --- /dev/null +++ b/lib/stdlib/test/uri_string_property_test_SUITE.erl @@ -0,0 +1,42 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2017. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% +%% %CopyrightEnd% +%% +-module(uri_string_property_test_SUITE). + +-include_lib("common_test/include/ct.hrl"). + +-compile(export_all). + +all() -> [decode]. + +init_per_suite(Config) -> + ct_property_test:init_per_suite(Config). + +end_per_suite(Config) -> + Config. + +%%%================================================================ +%%% Test suites +%%% + +decode(Config) -> + ct_property_test:quickcheck( + uri_string_decode:prop_uri_string_decode(), + Config + ). -- cgit v1.2.3