From fd276f4a2a109d19d25cffee54a2c21ee4568085 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Dimitrov?= Date: Mon, 16 Oct 2017 16:12:18 +0200 Subject: stdlib: Improve support of mixed lists (transcode) - transcode/2 flattens input lists in order to be able to handle lists with percent-encoded parts that are split into muliple list and binary segments. - Add additional tests for transcoding mixed lists. --- lib/stdlib/src/uri_string.erl | 35 ++++++++++++++++++++++------------- lib/stdlib/test/uri_string_SUITE.erl | 6 +++++- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index f9e1e273bc..7d180f73b8 100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -368,12 +368,14 @@ transcode(URIString, Options) when is_binary(URIString) -> transcode(URIString, Options) when is_list(URIString) -> InEnc = proplists:get_value(in_encoding, Options, utf8), OutEnc = proplists:get_value(out_encoding, Options, utf8), - try transcode(URIString, [], InEnc, OutEnc) of + Flattened = flatten_list(URIString, InEnc), + try transcode(Flattened, [], InEnc, OutEnc) of Result -> Result catch throw:{error, List, RestData} -> {invalid_input, List, RestData} end. + %%------------------------------------------------------------------------- %% Working with query strings %% HTML 2.0 - application/x-www-form-urlencoded @@ -1672,11 +1674,6 @@ transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) -> transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) -> transcode(L, Acc, [], InEnc, OutEnc). %% -transcode([H|T], Acc, List, InEnc, OutEnc) when is_binary(H) -> - L = convert_list(H, InEnc), - transcode(L ++ T, Acc, List, InEnc, OutEnc); -transcode([H|T], Acc, List, InEnc, OutEnc) when is_list(H) -> - transcode(H ++ T, Acc, List, InEnc, OutEnc); transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) -> transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding); transcode([C|Rest], Acc, List, InEncoding, OutEncoding) -> @@ -1686,11 +1683,6 @@ transcode([], Acc, List, _InEncoding, _OutEncoding) -> %% Transcode percent-encoded segment -transcode_pct([H|T], Acc, B, InEnc, OutEnc) when is_binary(H) -> - L = convert_list(H, InEnc), - transcode_pct(L ++ T, Acc, B, InEnc, OutEnc); -transcode_pct([H|T], Acc, B, InEnc, OutEnc) when is_list(H) -> - transcode_pct(H ++ T, Acc, B, InEnc, OutEnc); transcode_pct([$%,C0,C1|Rest], Acc, B, InEncoding, OutEncoding) -> case is_hex_digit(C0) andalso is_hex_digit(C1) of true -> @@ -1710,7 +1702,7 @@ transcode_pct([], Acc, B, InEncoding, OutEncoding) -> lists:reverse(Acc) ++ Out. -% Convert binary +%% Convert to binary convert_binary(Binary, InEncoding, OutEncoding) -> case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of {error, List, RestData} -> @@ -1722,7 +1714,7 @@ convert_binary(Binary, InEncoding, OutEncoding) -> end. -% Convert binary +%% Convert to list convert_list(Binary, InEncoding) -> case unicode:characters_to_list(Binary, InEncoding) of {error, List, RestData} -> @@ -1734,5 +1726,22 @@ convert_list(Binary, InEncoding) -> end. +%% Flatten input list +flatten_list([], _) -> + []; +flatten_list(L, InEnc) -> + flatten_list(L, InEnc, []). +%% +flatten_list([H|T], InEnc, Acc) when is_binary(H) -> + L = convert_list(H, InEnc), + flatten_list(T, InEnc, lists:reverse(L) ++ Acc); +flatten_list([H|T], InEnc, Acc) when is_list(H) -> + flatten_list(H ++ T, InEnc, Acc); +flatten_list([H|T], InEnc, Acc) -> + flatten_list(T, InEnc, [H|Acc]); +flatten_list([], _InEnc, Acc) -> + lists:reverse(Acc). + + percent_encode_segment(Segment) -> percent_encode_binary(Segment, <<>>). diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index 8a10948f32..901d38a4da 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -794,7 +794,11 @@ transcode_options(_Config) -> transcode_mixed(_Config) -> "foo%00%00%00%F6bar" = - uri_string:transcode(["foo",<<"%C3%B6"/utf8>>,<<"ba"/utf8>>,"r"], [{out_encoding, utf32}]). + uri_string:transcode(["foo",<<"%C3%B6"/utf8>>,<<"ba"/utf8>>,"r"], [{out_encoding, utf32}]), + "foo%00%00%00%F6bar" = + uri_string:transcode(["foo",<<"%C3%"/utf8>>,<<"B6ba"/utf8>>,"r"], [{out_encoding, utf32}]), + "foo%C3%B6bar" = + uri_string:transcode(["foo%00", <<"%00%0"/utf32>>,<<"0%F"/utf32>>,"6bar"], [{in_encoding, utf32},{out_encoding, utf8}]). transcode_negative(_Config) -> {invalid_input,"foo","BX"} = -- cgit v1.2.3