diff options
Diffstat (limited to 'lib/stdlib')
| -rw-r--r-- | lib/stdlib/src/uri_string.erl | 226 | ||||
| -rw-r--r-- | lib/stdlib/test/uri_string_SUITE.erl | 38 | 
2 files changed, 240 insertions, 24 deletions
| diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 7d180f73b8..1b8f8b828f 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -226,8 +226,9 @@  %%-------------------------------------------------------------------------  %% External API  %%------------------------------------------------------------------------- --export([compose_query/1, create_uri_reference/2, dissect_query/1, normalize/1, -         parse/1, recompose/1, resolve_uri_reference/2, transcode/2]). +-export([compose_query/1, compose_query/2, create_uri_reference/2, +         dissect_query/1, normalize/1, parse/1, +         recompose/1, resolve_uri_reference/2, transcode/2]).  -export_type([uri_map/0, uri_string/0]). @@ -377,28 +378,66 @@ transcode(URIString, Options) when is_list(URIString) ->  %%------------------------------------------------------------------------- -%% Working with query strings -%% HTML 2.0 - application/x-www-form-urlencoded -%% RFC 1866 [8.2.1] +%% Functions for working with the query part of a URI as a list +%% of key/value pairs. +%% HTML 2.0 (RFC 1866) defines a media type application/x-www-form-urlencoded +%% in section [8.2.1] "The form-urlencoded Media Type".  %%-------------------------------------------------------------------------  %%-------------------------------------------------------------------------  %% Compose urlencoded query string from a list of unescaped key/value pairs.  %%-------------------------------------------------------------------------  -spec compose_query(QueryList) -> QueryString when -      QueryList :: [{unicode:chardata(), unicode:chardata()}], -      QueryString :: uri_string(). -compose_query(_) -> -    "". +      QueryList :: [{uri_string(), uri_string()}], +      QueryString :: string(). +compose_query(List) -> +    compose_query(List, []). + + +-spec compose_query(QueryList, Options) -> QueryString when +      QueryList :: [{uri_string(), uri_string()}], +      Options :: [{separator, atom()}], +      QueryString :: string(). +compose_query([],_Options) -> +    []; +compose_query(List, Options) -> +    try compose_query(List, Options, []) of +        Result -> Result +    catch +      throw:{error, Atom, RestData} -> {error, Atom, RestData} +    end. +%% +compose_query([{Key,Value}|Rest], Options, Acc) -> +    Separator = get_separator(Options, Acc), +    K = form_urlencode(Key), +    V = form_urlencode(Value), +    compose_query(Rest, Options, Acc ++ Separator ++ K ++ "=" ++ V); +compose_query([], _Options, Acc) -> +    Acc. +  %%-------------------------------------------------------------------------  %% Dissect a query string into a list of unescaped key/value pairs.  %%-------------------------------------------------------------------------  -spec dissect_query(QueryString) -> QueryList when        QueryString :: uri_string(), -      QueryList :: [{unicode:chardata(), unicode:chardata()}]. -dissect_query(_) -> -     "". +      QueryList :: [{string(), string()}]. +dissect_query([]) -> +    []; +dissect_query(QueryString) when is_binary(QueryString) -> +    L = convert_list(QueryString, utf8), +    try dissect_query_key(L, [], [], []) of +        Result -> Result +    catch +        throw:{error, Atom, RestData} -> {error, Atom, RestData} +    end; +dissect_query(QueryString) -> +    L = flatten_list(QueryString, utf8), +    try dissect_query_key(L, [], [], []) of +        Result -> Result +    catch +        throw:{error, Atom, RestData} -> {error, Atom, RestData} +    end.  %%%======================================================================== @@ -1705,10 +1744,10 @@ transcode_pct([], Acc, B, InEncoding, OutEncoding) ->  %% Convert to binary  convert_binary(Binary, InEncoding, OutEncoding) ->      case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of -        {error, List, RestData} -> -            throw({error, List, RestData}); -        {incomplete, List, RestData} -> -            throw({error, List, RestData}); +        {error, _List, RestData} -> +            throw({error, unicode, RestData}); +        {incomplete, _List, RestData} -> +            throw({error, unicode, RestData});          Result ->              Result      end. @@ -1717,10 +1756,10 @@ convert_binary(Binary, InEncoding, OutEncoding) ->  %% Convert to list  convert_list(Binary, InEncoding) ->      case unicode:characters_to_list(Binary, InEncoding) of -        {error, List, RestData} -> -            throw({error, List, RestData}); -        {incomplete, List, RestData} -> -            throw({error, List, RestData}); +        {error, _List, RestData} -> +            throw({error, unicode, RestData}); +        {incomplete, _List, RestData} -> +            throw({error, unicode, RestData});          Result ->              Result      end. @@ -1740,8 +1779,153 @@ flatten_list([H|T], InEnc, Acc) when is_list(H) ->  flatten_list([H|T], InEnc, Acc) ->      flatten_list(T, InEnc, [H|Acc]);  flatten_list([], _InEnc, Acc) -> -    lists:reverse(Acc). +    lists:reverse(Acc); +flatten_list(Arg, _, _) -> +    throw({error, badarg, Arg}). +  percent_encode_segment(Segment) ->      percent_encode_binary(Segment, <<>>). + + +%%------------------------------------------------------------------------- +%% Helper functions for compose_query +%%------------------------------------------------------------------------- + +%% Returns separator to be used between key-value pairs +get_separator(_, Acc) when length(Acc) =:= 0 -> +    []; +get_separator([], _Acc) -> +    "&"; +get_separator([{separator, amp}], _Acc) -> +    "&"; +get_separator([{separator, semicolon}], _Acc) -> +    ";". + + +%% Form-urlencode input based on RFC 1866 [8.2.1] +form_urlencode(Cs) when is_binary(Cs) -> +    L = convert_list(Cs, utf8), +    form_urlencode(L, []); +form_urlencode(Cs) -> +    L = flatten_list(Cs, utf8), +    form_urlencode(L, []). +%% +form_urlencode([], Acc) -> +    lists:reverse(Acc); +form_urlencode([$ |T], Acc) -> +    form_urlencode(T, [$+|Acc]); +form_urlencode([H|T], Acc) -> +    case is_url_char(H) of +        true -> +            form_urlencode(T, [H|Acc]); +        false -> +            E = urlencode_char(H), +            form_urlencode(T, lists:reverse(E) ++ Acc) +    end. + + +urlencode_char(C) -> +    B = percent_encode_binary(C), +    unicode:characters_to_list(B). + + +%% Return true if input char can appear in URL according to +%% RFC 1738 "Uniform Resource Locators". +is_url_char(C) +  when 0 =< C, C =< 31; +       128 =< C, C =< 255 -> false; +is_url_char(127) -> false; +is_url_char(C) -> +    not (is_reserved(C) orelse is_unsafe(C)). + + +%% Reserved characters (RFC 1738) +is_reserved($;) -> true; +is_reserved($/) -> true; +is_reserved($?) -> true; +is_reserved($:) -> true; +is_reserved($@) -> true; +is_reserved($=) -> true; +is_reserved($&) -> true; +is_reserved(_) -> false. + + +%% Unsafe characters (RFC 1738) +is_unsafe(${) -> true; +is_unsafe($}) -> true; +is_unsafe($|) -> true; +is_unsafe($\\) -> true; +is_unsafe($^) -> true; +is_unsafe($~) -> true; +is_unsafe($[) -> true; +is_unsafe($]) -> true; +is_unsafe($`) -> true; +is_unsafe(_) -> false. + + +%%------------------------------------------------------------------------- +%% Helper functions for dissect_query +%%------------------------------------------------------------------------- +dissect_query_key([$=|T], Acc, Key, Value) -> +    dissect_query_value(T, Acc, Key, Value); +dissect_query_key([H|T], Acc, Key, Value) -> +    dissect_query_key(T, Acc, [H|Key], Value); +dissect_query_key(L, _, _, _) -> +    throw({error, missing_value, L}). + + +dissect_query_value([$&|_] = L, Acc, Key, Value) -> +    K = form_urldecode(lists:reverse(Key)), +    V = form_urldecode(lists:reverse(Value)), +    dissect_query_separator_amp(L, [{K,V}|Acc], [], []); +dissect_query_value([$;|_] = L, Acc, Key, Value) -> +    K = form_urldecode(lists:reverse(Key)), +    V = form_urldecode(lists:reverse(Value)), +    dissect_query_separator_semicolon(L, [{K,V}|Acc], [], []); +dissect_query_value([H|T], Acc, Key, Value) -> +    dissect_query_value(T, Acc, Key, [H|Value]); +dissect_query_value([], Acc, Key, Value) -> +    K = form_urldecode(lists:reverse(Key)), +    V = form_urldecode(lists:reverse(Value)), +    lists:reverse([{K,V}|Acc]). + + +dissect_query_separator_amp("&" ++ T, Acc, Key, Value) -> +    dissect_query_key(T, Acc, Key, Value); +dissect_query_separator_amp(L, _, _, _) -> +    throw({error, invalid_separator, L}). + + +dissect_query_separator_semicolon([$;|T], Acc, Key, Value) -> +    dissect_query_key(T, Acc, Key, Value). + + +%% Form-urldecode input based on RFC 1866 [8.2.1] +form_urldecode(Cs) -> +    B = convert_binary(Cs, utf8, utf8), +    Result = form_urldecode(B, <<>>), +    convert_list(Result, utf8). +%% +form_urldecode(<<>>, Acc) -> +    convert_list(Acc, utf8); +form_urldecode(<<$+,T/binary>>, Acc) -> +    form_urlencode(T, [$ |Acc]); +form_urldecode(<<$%,C0,C1,T/binary>>, Acc) -> +    case is_hex_digit(C0) andalso is_hex_digit(C1) of +        true -> +            V = ?HEX2DEC(C0)*16+?HEX2DEC(C1), +            form_urldecode(T, <<Acc/binary, V>>); +        false -> +            L = convert_list(<<$%,C0,C1,T/binary>>, utf8), +            throw({error, urldecode, L}) +    end; +form_urldecode(<<H,T/binary>>, Acc) -> +    case is_url_char(H) of +        true -> +            form_urldecode(T, <<Acc/binary,H>>); +        false -> +            L = convert_list(<<H,T/binary>>, utf8), +            throw({error, urldecode, L}) +    end. diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index 901d38a4da..beb534e023 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -37,7 +37,9 @@           recompose_query/1, recompose_parse_query/1,           recompose_path/1, recompose_parse_path/1,           recompose_autogen/1, parse_recompose_autogen/1, -         transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1 +         transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1, +         compose_query/1, compose_query_negative/1, +         dissect_query/1, dissect_query_negative/1          ]). @@ -104,7 +106,11 @@ all() ->       transcode_basic,       transcode_options,       transcode_mixed, -     transcode_negative +     transcode_negative, +     compose_query, +     compose_query_negative, +     dissect_query, +     dissect_query_negative      ].  groups() -> @@ -803,5 +809,31 @@ transcode_mixed(_Config) ->  transcode_negative(_Config) ->      {invalid_input,"foo","BX"} =          uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]), -    {invalid_input,<<>>,<<"ö">>} = +    {invalid_input,unicode,<<"ö">>} =          uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]). + +compose_query(_Config) -> +    [] = uri_string:compose_query([]), +    "foo=1&bar=2" = uri_string:compose_query([{<<"foo">>,"1"}, {"bar", "2"}]), +    "foo=1&bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,amp}]), +    "foo=1;bar=2" = uri_string:compose_query([{"foo","1"}, {"bar", "2"}],[{separator,semicolon}]), +    "foo+bar=1&%C3%B6=2" = uri_string:compose_query([{"foo bar","1"}, {"ö", "2"}]). + +compose_query_negative(_Config) -> +    {error,badarg,4} = uri_string:compose_query([{"",4}]). + +dissect_query(_Config) -> +    [] = uri_string:dissect_query(""), +    [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&bar=2"), +    [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1;bar=2"), +    [{"foo","1"}, {"bar", "222"}] = uri_string:dissect_query([<<"foo=1;bar=2">>,"22"]), +    [{"foo","ö"}, {"bar", "2"}] = uri_string:dissect_query("foo=%C3%B6&bar=2"). + +dissect_query_negative(_Config) -> +    {error,invalid_separator,"≈bar=2"} = +        uri_string:dissect_query("foo=1≈bar=2"), +    {error,urldecode,"&bar"} = +        uri_string:dissect_query("foo1&bar=2"), +    {error,urldecode,"%XX%B6"} = uri_string:dissect_query("foo=%XX%B6&bar=2"), +    {error,unicode,<<153,182>>} = +        uri_string:dissect_query("foo=%99%B6&bar=2"). | 
