From 7a4d4e183ae5567d6242184b8268918904c872c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= Date: Mon, 30 Oct 2017 16:57:49 +0100 Subject: stdlib: Refactor helper functions in uri_string --- lib/stdlib/doc/src/uri_string.xml | 2 +- lib/stdlib/src/uri_string.erl | 142 ++++++++++++++++++-------------------- 2 files changed, 67 insertions(+), 77 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml index 55d8690b98..8fa2a92370 100644 --- a/lib/stdlib/doc/src/uri_string.xml +++ b/lib/stdlib/doc/src/uri_string.xml @@ -119,7 +119,7 @@ invalid_utf8 missing_value -

The third component is a list or binary providing additional information about the +

The third component is a term providing additional information about the cause of the error.

diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index cf8c388f54..2c73e38324 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -273,7 +273,7 @@ %% %x96 ` grave / accent %%------------------------------------------------------------------------- -type uri_string() :: iodata(). --type error() :: {error, atom(), list() | binary()}. +-type error() :: {error, atom(), term()}. %%------------------------------------------------------------------------- @@ -298,10 +298,11 @@ normalize(URIString) -> %% Case normalization and percent-encoding normalization are achieved %% by running parse and recompose on the input URI string. - M = parse(URIString), - M1 = normalize_scheme_based(M), - M2 = normalize_path_segment(M1), - recompose(M2). + recompose( + normalize_path_segment( + normalize_scheme_based( + parse(URIString)))). + %%------------------------------------------------------------------------- %% Parse URIs @@ -311,8 +312,7 @@ normalize(URIString) -> URIMap :: uri_map() | error(). parse(URIString) when is_binary(URIString) -> - try parse_uri_reference(URIString, #{}) of - Result -> Result + try parse_uri_reference(URIString, #{}) catch throw:{error, Atom, RestData} -> {error, Atom, RestData} end; @@ -321,8 +321,6 @@ parse(URIString) when is_list(URIString) -> Binary = unicode:characters_to_binary(URIString), Map = parse_uri_reference(Binary, #{}), convert_mapfields_to_list(Map) - of - Result -> Result catch throw:{error, Atom, RestData} -> {error, Atom, RestData} end. @@ -348,8 +346,6 @@ recompose(Map) -> T4 = update_path(Map, T3), T5 = update_query(Map, T4), update_fragment(Map, T5) - of - Result -> Result catch throw:{error, Atom, RestData} -> {error, Atom, RestData} end @@ -371,8 +367,6 @@ transcode(URIString, Options) when is_binary(URIString) -> List = convert_list(URIString, InEnc), Output = transcode(List, [], InEnc, OutEnc), convert_binary(Output, utf8, OutEnc) - of - Result -> Result catch throw:{error, Atom, RestData} -> {error, Atom, RestData} end; @@ -380,8 +374,7 @@ transcode(URIString, Options) when is_list(URIString) -> InEnc = proplists:get_value(in_encoding, Options, utf8), OutEnc = proplists:get_value(out_encoding, Options, utf8), Flattened = flatten_list(URIString, InEnc), - try transcode(Flattened, [], InEnc, OutEnc) of - Result -> Result + try transcode(Flattened, [], InEnc, OutEnc) catch throw:{error, Atom, RestData} -> {error, Atom, RestData} end. @@ -413,8 +406,7 @@ compose_query(List) -> compose_query([],_Options) -> []; compose_query(List, Options) -> - try compose_query(List, Options, false, <<>>) of - Result -> Result + try compose_query(List, Options, false, <<>>) catch throw:{error, Atom, RestData} -> {error, Atom, RestData} end. @@ -423,8 +415,7 @@ compose_query([{Key,Value}|Rest], Options, IsList, Acc) -> Separator = get_separator(Options, Rest), K = form_urlencode(Key), V = form_urlencode(Value), - Flag = is_list(Key) orelse is_list(Value), - IsListNew = IsList orelse Flag, + IsListNew = IsList orelse is_list(Key) orelse is_list(Value), compose_query(Rest, Options, IsListNew, < -%% E.g. "//user@" - invalid URI -%% - userinfo port -%% E.g. "//user@:8080" => #{host => [],port => 8080,userinfo => "user"} -%% There is always at least an empty host when both userinfo and port -%% are present. -%% - #{path => "///"} otherwise the following would be true: -%% "/////" = uri_string:recompose(#{host => "", path => "///"}) -%% "/////" = uri_string:recompose(#{path => "/////"}) -%% AND -%% path-absolute = "/" [ segment-nz *( "/" segment ) ] -%%------------------------------------------------------------------------- -is_valid_map(Map) -> - case - ((not maps:is_key(userinfo, Map) andalso - not maps:is_key(host, Map) andalso - maps:is_key(port, Map)) - orelse - (maps:is_key(userinfo, Map) andalso - not maps:is_key(host, Map) andalso - not maps:is_key(port, Map)) - orelse - (maps:is_key(userinfo, Map) andalso - not maps:is_key(host, Map) andalso - maps:is_key(port, Map))) orelse - not maps:is_key(path, Map) orelse - not is_host_and_path_valid(Map) orelse - invalid_field_present(Map) - of +%% +%% The implementation is based on a decision tree that fulfills the +%% following rules: +%% - 'path' shall always be present in the input map +%% URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] +%% hier-part = "//" authority path-abempty +%% / path-absolute +%% / path-rootless +%% / path-empty +%% - 'host' shall be present in the input map when 'path' starts with +%% two slashes ("//") +%% path = path-abempty ; begins with "/" or is empty +%% / path-absolute ; begins with "/" but not "//" +%% / path-noscheme ; begins with a non-colon segment +%% / path-rootless ; begins with a segment +%% / path-empty ; zero characters +%% path-abempty = *( "/" segment ) +%% segment = *pchar +%% - 'host' shall be present if userinfo or port is present in input map +%% authority = [ userinfo "@" ] host [ ":" port ] +%% - All fields shall be valid (scheme, userinfo, host, port, path, query +%% or fragment). +%%------------------------------------------------------------------------- +is_valid_map(#{path := Path} = Map) -> + case starts_with_two_slash(Path) of true -> - false; + is_valid_map_host(Map); false -> - true - end. + case maps:is_key(userinfo, Map) of + true -> + is_valid_map_host(Map); + false -> + case maps:is_key(port, Map) of + true -> + is_valid_map_host(Map); + false -> + all_fields_valid(Map) + end + end + end; +is_valid_map(#{}) -> + false. -invalid_field_present(Map) -> - Fun = fun(K, _, AccIn) -> AccIn orelse - ((K =/= scheme) andalso (K =/= userinfo) - andalso (K =/= host) andalso (K =/= port) - andalso (K =/= path) andalso (K =/= query) - andalso (K =/= fragment)) - end, - maps:fold(Fun, false, Map). +is_valid_map_host(Map) -> + maps:is_key(host, Map) andalso all_fields_valid(Map). -is_host_and_path_valid(Map) -> - Host = maps:get(host, Map, undefined), - Path = maps:get(path, Map, undefined), - not (Host =:= undefined andalso starts_with_two_slash(Path)). +all_fields_valid(Map) -> + Fun = fun(scheme, _, Acc) -> Acc; + (userinfo, _, Acc) -> Acc; + (host, _, Acc) -> Acc; + (port, _, Acc) -> Acc; + (path, _, Acc) -> Acc; + (query, _, Acc) -> Acc; + (fragment, _, Acc) -> Acc; + (_, _, _) -> false + end, + maps:fold(Fun, true, Map). starts_with_two_slash([$/,$/|_]) -> -- cgit v1.2.3