From b0c682a8118c5775da784e9a0f569ee995319f80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= This module contains functions for parsing and handling URIs (RFC 3986) and
- form-urlencoded query strings (RFC 1866). This module contains functions for parsing and handling URIs
+ ( A URI is an identifier consisting of a sequence of characters matching the syntax
- rule named URI in RFC 3986.
The generic URI syntax consists of a hierarchical sequence of components referred to as the scheme, authority, path, query, and fragment:
@@ -55,16 +58,24 @@
The interpretation of a URI depends only on the characters used and not on how those characters are represented in a network protocol.
-The functions implemented by this module covers the following use cases:
+The functions implemented by this module cover the following use cases:
There are four different encodings present during the handling of URIs:
Unless otherwise specified the return value type and encoding are the same as the input
type and encoding. That is, binary input returns binary output, list input returns a list
- output but mixed input returns list output. Input and output encodings are the same except
- for
All of the functions but
Error tuple indicating the type of error. Possible values of the second component:
+List of unicode codepoints, UTF-8 encoded binary, or a mix of the two,
- representing an RFC 3986 compliant URI (percent-encoded form).
+ representing an
Composes a form-urlencoded
See also the opposite operation
Example:
-1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]). - +1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}], +1> [{separator, semicolon}]). +"foo+bar=1;city=%C3%B6rebro" +2> >,<<"1">>}, +2> {<<"city">>,<<"örebro"/utf8>>}]).]]> +>]]>
Same as
See also the opposite operation
Example:
1> uri_string:compose_query([{"foo bar","1"},{"city","örebro"}], -2> [{separator, semicolon}]). -"foo+bar=1;city=%C3%B6rebro" +1> [{separator, amp}]). +@@ -143,13 +181,19 @@
Dissects an urlencoded
See also the opposite operation
Example:
1> uri_string:dissect_query("foo+bar=1;city=%C3%B6rebro"). [{"foo bar","1"},{"city","örebro"}] +2> >).]]> +>,<<"1">>},{<<"city">>,<<"örebro"/utf8>>}] ]]>@@ -159,14 +203,19 @@
Returns a
If parsing fails, an error tuple is returned.
+ of theSee also the opposite operation
Example:
1> uri_string:parse("foo://user@example.com:8042/over/there?name=ferret#nose"). #{fragment => "nose",host => "example.com", path => "/over/there",port => 8042,query => "name=ferret", scheme => foo,userinfo => "user"} +2> >).]]> + <<"example.com">>,path => <<"/over/there">>, + port => 8042,query => <<"name=ferret">>,scheme => <<"foo">>, + userinfo => <<"user">>}]]>
Returns an RFC 3986 compliant
If the
Returns an
See also the opposite operation
Example:
1> URIMap = #{fragment => "nose", host => "example.com", path => "/over/there", -port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}. +1> port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}. #{fragment => "top",host => "example.com", path => "/over/there",port => 8042,query => "?name=ferret", scheme => foo,userinfo => "user"} @@ -194,14 +246,15 @@ port => 8042, query => "name=ferret", scheme => "foo", userinfo => "user"}.Transcode URI. - diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 09bf4aef1d..ca212284d2 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -229,7 +229,7 @@ -export([compose_query/1, compose_query/2, dissect_query/1, parse/1, recompose/1, transcode/2]). --export_type([uri_map/0, uri_string/0]). +-export_type([error/0, uri_map/0, uri_string/0]). %%------------------------------------------------------------------------- @@ -273,6 +273,8 @@ %% %x96 ` grave / accent %%------------------------------------------------------------------------- -type uri_string() :: iodata(). +-type error() :: {error, atom(), list() | binary()}. + %%------------------------------------------------------------------------- %% RFC 3986, Chapter 3. Syntax Components @@ -292,7 +294,7 @@ -spec parse(URIString) -> URIMap when URIString :: uri_string(), URIMap :: uri_map() - | {error, atom(), list() | binary()}. + | error(). parse(URIString) when is_binary(URIString) -> try parse_uri_reference(URIString, #{}) of Result -> Result @@ -317,7 +319,7 @@ parse(URIString) when is_list(URIString) -> -spec recompose(URIMap) -> URIString when URIMap :: uri_map(), URIString :: uri_string() - | {error, atom(), list() | binary()}. + | error(). recompose(Map) -> case is_valid_map(Map) of false -> @@ -346,7 +348,7 @@ recompose(Map) -> URIString :: uri_string(), Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}], Result :: uri_string() - | {error, atom(), list() | binary()}. + | error(). transcode(URIString, Options) when is_binary(URIString) -> try InEnc = proplists:get_value(in_encoding, Options, utf8), @@ -357,7 +359,7 @@ transcode(URIString, Options) when is_binary(URIString) -> of Result -> Result catch - throw:{error, _, RestData} -> {error, invalid_input, RestData} + throw:{error, Atom, RestData} -> {error, Atom, RestData} end; transcode(URIString, Options) when is_list(URIString) -> InEnc = proplists:get_value(in_encoding, Options, utf8), @@ -366,7 +368,7 @@ transcode(URIString, Options) when is_list(URIString) -> try transcode(Flattened, [], InEnc, OutEnc) of Result -> Result catch - throw:{error, _, RestData} -> {error, invalid_input, RestData} + throw:{error, Atom, RestData} -> {error, Atom, RestData} end. @@ -382,8 +384,8 @@ transcode(URIString, Options) when is_list(URIString) -> %%------------------------------------------------------------------------- -spec compose_query(QueryList) -> QueryString when QueryList :: [{uri_string(), uri_string()}], - QueryString :: string() - | {error, atom(), list() | binary()}. + QueryString :: uri_string() + | error(). compose_query(List) -> compose_query(List, []). @@ -391,8 +393,8 @@ compose_query(List) -> -spec compose_query(QueryList, Options) -> QueryString when QueryList :: [{uri_string(), uri_string()}], Options :: [{separator, atom()}], - QueryString :: string() - | {error, atom(), list() | binary()}. + QueryString :: uri_string() + | error(). compose_query([],_Options) -> []; compose_query(List, Options) -> @@ -421,8 +423,8 @@ compose_query([], _Options, IsList, Acc) -> %%------------------------------------------------------------------------- -spec dissect_query(QueryString) -> QueryList when QueryString :: uri_string(), - QueryList :: [{string(), string()}] - | {error, atom(), list() | binary()}. + QueryList :: [{uri_string(), uri_string()}] + | error(). dissect_query(<<>>) -> []; dissect_query([]) -> @@ -1249,9 +1251,9 @@ decode_fragment(Cs) -> check_utf8(Cs) -> case unicode:characters_to_list(Cs) of {incomplete,_,_} -> - throw({error,non_utf8,Cs}); + throw({error,invalid_utf8,Cs}); {error,_,_} -> - throw({error,non_utf8,Cs}); + throw({error,invalid_utf8,Cs}); _ -> Cs end. @@ -1304,12 +1306,12 @@ decode(<<$%,C0,C1,Cs/binary>>, Fun, Acc) -> true -> B = ?HEX2DEC(C0)*16+?HEX2DEC(C1), decode(Cs, Fun, <Transcodes an RFC 3986 compliant
, + URIString Transcodes an
-RFC 3986 + compliant, where URIString is a list of tagged tuples, specifying the inbound - ( Options in_encoding ) and outbound (out_encoding ) encodings.If an argument is invalid, an error tuple is returned.
+ (in_encoding ) and outbound (out_encoding ) encodings. + If an argument is invalid, an error tuple is returned.Example:
1> >,]]> -2> [{in_encoding, utf32},{out_encoding, utf8}]). +1> [{in_encoding, utf32},{out_encoding, utf8}]). >]]>>); - false -> throw({error,percent_decode,<<$%,C0,C1>>}) + false -> throw({error,invalid_percent_encoding,<<$%,C0,C1>>}) end; decode(< >, Fun, Acc) -> case Fun(C) of true -> decode(Cs, Fun, < >); - false -> throw({error,percent_decode,< >}) + false -> throw({error,invalid_percent_encoding,< >}) end; decode(<<>>, _Fun, Acc) -> Acc. @@ -1339,7 +1341,7 @@ encode(< >, Fun, Acc) -> C = encode_codepoint_binary(Char, Fun), encode(Rest, Fun, < >); encode(< >, _Fun, _Acc) -> - throw({error,percent_encode,< >}); + throw({error,invalid_input,< >}); encode(<<>>, _Fun, Acc) -> Acc. @@ -1647,12 +1649,12 @@ transcode([], Acc, List, _InEncoding, _OutEncoding) -> %% Transcode percent-encoded segment -transcode_pct([$%,C0,C1|Rest], Acc, B, InEncoding, OutEncoding) -> +transcode_pct([$%,C0,C1|Rest] = L, Acc, B, InEncoding, OutEncoding) -> case is_hex_digit(C0) andalso is_hex_digit(C1) of true -> Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1), transcode_pct(Rest, Acc, <>, InEncoding, OutEncoding); - false -> throw({error, lists:reverse(Acc),[C0,C1]}) + false -> throw({error, invalid_percent_encoding,L}) end; transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) -> OutBinary = convert_binary(B, InEncoding, OutEncoding), @@ -1706,7 +1708,7 @@ flatten_list([H|T], InEnc, Acc) -> flatten_list([], _InEnc, Acc) -> lists:reverse(Acc); flatten_list(Arg, _, _) -> - throw({error, badarg, Arg}). + throw({error, invalid_input, Arg}). percent_encode_segment(Segment) -> @@ -1752,7 +1754,7 @@ form_urlencode(< >, Acc) -> form_urlencode(< >, _Acc) -> throw({error,invalid_utf8,< >}); form_urlencode(H, _Acc) -> - throw({error,badarg, H}). + throw({error,invalid_input, H}). %% Return true if input char can appear in URL according to diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index 2fc4e1a092..95a49f5eb3 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -819,7 +819,7 @@ transcode_mixed(_Config) -> uri_string:transcode(["foo%00", <<"%00%0"/utf32>>,<<"0%F"/utf32>>,"6bar"], [{in_encoding, utf32},{out_encoding, utf8}]). transcode_negative(_Config) -> - {error,invalid_input,"BX"} = + {error,invalid_percent_encoding,"%BXbar"} = uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]), {error,invalid_input,<<"ö">>} = uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]). -- cgit v1.2.3