diff options
author | Péter Dimitrov <[email protected]> | 2017-10-11 16:36:14 +0200 |
---|---|---|
committer | Péter Dimitrov <[email protected]> | 2017-10-23 15:53:29 +0200 |
commit | 4a2358bbf4a4049a765aab435a31daeeffbbd677 (patch) | |
tree | 952cd88b02a192fa6a2dc8d4d0476d222fe5f13c | |
parent | 1335e59a60d5e195baf519d2c52b0ca0aa96831f (diff) | |
download | otp-4a2358bbf4a4049a765aab435a31daeeffbbd677.tar.gz otp-4a2358bbf4a4049a765aab435a31daeeffbbd677.tar.bz2 otp-4a2358bbf4a4049a765aab435a31daeeffbbd677.zip |
stdlib: Implement transcode/2.
-rw-r--r--[-rwxr-xr-x] | lib/stdlib/src/uri_string.erl | 112 | ||||
-rw-r--r-- | lib/stdlib/test/uri_string_SUITE.erl | 39 |
2 files changed, 147 insertions, 4 deletions
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl index 893ba4c6bf..439ffa80da 100755..100644 --- a/lib/stdlib/src/uri_string.erl +++ b/lib/stdlib/src/uri_string.erl @@ -353,8 +353,26 @@ normalize(_) -> -spec transcode(URIString, Options) -> URIString when URIString :: uri_string(), Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}]. -transcode(_, _) -> - "". +transcode(URIString, Options) when is_binary(URIString) -> + try + InEnc = proplists:get_value(in_encoding, Options, utf8), + OutEnc = proplists:get_value(out_encoding, Options, utf8), + List = convert_list(URIString, InEnc), + Output = transcode(List, [], InEnc, OutEnc), + convert_binary(Output, utf8, OutEnc) + of + Result -> Result + catch + throw:{error, L, RestData} -> {invalid_input, L, RestData} + end; +transcode(URIString, Options) when is_list(URIString) -> + InEnc = proplists:get_value(in_encoding, Options, utf8), + OutEnc = proplists:get_value(out_encoding, Options, utf8), + try transcode(URIString, [], InEnc, OutEnc) of + Result -> Result + catch + throw:{error, List, RestData} -> {invalid_input, List, RestData} + end. %%------------------------------------------------------------------------- %% Working with query strings @@ -1624,3 +1642,93 @@ maybe_to_list(Comp) -> Comp. encode_port(Port) -> integer_to_binary(Port). + +%%------------------------------------------------------------------------- +%% Helper functions for transcode +%%------------------------------------------------------------------------- + +%%------------------------------------------------------------------------- +%% uri_string:transcode(<<"x%00%00%00%F6"/utf32>>). +%% 1. Convert (transcode/2) input to list form (list of unicode codepoints) +%% "x%00%00%00%F6" +%% 2. Accumulate characters until percent-encoded segment (transcode/4). +%% Acc = "x" +%% 3. Convert percent-encoded triplets to binary form (transcode_pct/4) +%% <<0,0,0,246>> +%% 4. Transcode in-encoded binary to out-encoding (utf32 -> utf8): +%% <<195,182>> +%% 5. Percent-encode out-encoded binary: +%% <<"%C3%B6"/utf8>> = <<37,67,51,37,66,54>> +%% 6. Convert binary to list form, reverse it and append the accumulator +%% "6B%3C%" + "x" +%% 7. Reverse Acc and return it +%%------------------------------------------------------------------------- +transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) -> + transcode_pct(L, Acc, <<>>, InEnc, OutEnc); +transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) -> + transcode(L, Acc, [], InEnc, OutEnc). +%% +transcode([H|T], Acc, List, InEnc, OutEnc) when is_binary(H) -> + L = convert_list(H, InEnc), + transcode(L ++ T, Acc, List, InEnc, OutEnc); +transcode([H|T], Acc, List, InEnc, OutEnc) when is_list(H) -> + transcode(H ++ T, Acc, List, InEnc, OutEnc); +transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) -> + transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding); +transcode([C|Rest], Acc, List, InEncoding, OutEncoding) -> + transcode(Rest, Acc, [C|List], InEncoding, OutEncoding); +transcode([], Acc, List, _InEncoding, _OutEncoding) -> + lists:reverse(List ++ Acc). + + +%% Transcode percent-encoded segment +transcode_pct([H|T], Acc, B, InEnc, OutEnc) when is_binary(H) -> + L = convert_list(H, InEnc), + transcode_pct(L ++ T, Acc, B, InEnc, OutEnc); +transcode_pct([H|T], Acc, B, InEnc, OutEnc) when is_list(H) -> + transcode_pct(H ++ T, Acc, B, InEnc, OutEnc); +transcode_pct([$%,C0,C1|Rest], Acc, B, InEncoding, OutEncoding) -> + case is_hex_digit(C0) andalso is_hex_digit(C1) of + true -> + Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1), + transcode_pct(Rest, Acc, <<B/binary, Int>>, InEncoding, OutEncoding); + false -> throw({error, lists:reverse(Acc),[C0,C1]}) + end; +transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) -> + OutBinary = convert_binary(B, InEncoding, OutEncoding), + PctEncUtf8 = percent_encode_segment(OutBinary), + Out = lists:reverse(convert_list(PctEncUtf8, utf8)), + transcode(L, Out ++ Acc, [], InEncoding, OutEncoding); +transcode_pct([], Acc, B, InEncoding, OutEncoding) -> + OutBinary = convert_binary(B, InEncoding, OutEncoding), + PctEncUtf8 = percent_encode_segment(OutBinary), + Out = convert_list(PctEncUtf8, utf8), + lists:reverse(Acc) ++ Out. + + +% Convert binary +convert_binary(Binary, InEncoding, OutEncoding) -> + case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of + {error, List, RestData} -> + throw({error, List, RestData}); + {incomplete, List, RestData} -> + throw({error, List, RestData}); + Result -> + Result + end. + + +% Convert binary +convert_list(Binary, InEncoding) -> + case unicode:characters_to_list(Binary, InEncoding) of + {error, List, RestData} -> + throw({error, List, RestData}); + {incomplete, List, RestData} -> + throw({error, List, RestData}); + Result -> + Result + end. + + +percent_encode_segment(Segment) -> + percent_encode_binary(Segment, <<>>). diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl index cd2e003d02..83f702dd13 100644 --- a/lib/stdlib/test/uri_string_SUITE.erl +++ b/lib/stdlib/test/uri_string_SUITE.erl @@ -36,7 +36,8 @@ recompose_fragment/1, recompose_parse_fragment/1, recompose_query/1, recompose_parse_query/1, recompose_path/1, recompose_parse_path/1, - recompose_autogen/1, parse_recompose_autogen/1 + recompose_autogen/1, parse_recompose_autogen/1, + transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1 ]). @@ -99,7 +100,11 @@ all() -> recompose_path, recompose_parse_path, recompose_autogen, - parse_recompose_autogen + parse_recompose_autogen, + transcode_basic, + transcode_options, + transcode_mixed, + transcode_negative ]. groups() -> @@ -763,3 +768,33 @@ recompose_autogen(_Config) -> parse_recompose_autogen(_Config) -> Tests = generate_test_vectors(uri_combinations()), lists:map(fun run_test_parse_recompose/1, Tests). + +transcode_basic(_Config) -> + <<"foo%C3%B6bar"/utf8>> = + uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32},{out_encoding, utf8}]), + "foo%C3%B6bar" = + uri_string:transcode("foo%00%00%00%F6bar", [{in_encoding, utf32},{out_encoding, utf8}]), + <<"foo%00%00%00%F6bar"/utf32>> = + uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]), + "foo%00%00%00%F6bar" = + uri_string:transcode("foo%C3%B6bar", [{in_encoding, utf8},{out_encoding, utf32}]), + "foo%C3%B6bar" = + uri_string:transcode("foo%F6bar", [{in_encoding, latin1},{out_encoding, utf8}]). + +transcode_options(_Config) -> + <<"foo%C3%B6bar"/utf8>> = + uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, []), + <<"foo%C3%B6bar"/utf8>> = + uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32}]), + <<"foo%00%00%00%F6bar"/utf32>> = + uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{out_encoding, utf32}]). + +transcode_mixed(_Config) -> + "foo%00%00%00%F6bar" = + uri_string:transcode(["foo",<<"%C3%B6"/utf8>>,<<"ba"/utf8>>,"r"], [{out_encoding, utf32}]). + +transcode_negative(_Config) -> + {invalid_input,"foo","BX"} = + uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]), + {invalid_input,<<>>,<<"ö">>} = + uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]). |