aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib
diff options
context:
space:
mode:
authorPéter Dimitrov <[email protected]>2017-10-11 16:36:14 +0200
committerPéter Dimitrov <[email protected]>2017-10-23 15:53:29 +0200
commit4a2358bbf4a4049a765aab435a31daeeffbbd677 (patch)
tree952cd88b02a192fa6a2dc8d4d0476d222fe5f13c /lib/stdlib
parent1335e59a60d5e195baf519d2c52b0ca0aa96831f (diff)
downloadotp-4a2358bbf4a4049a765aab435a31daeeffbbd677.tar.gz
otp-4a2358bbf4a4049a765aab435a31daeeffbbd677.tar.bz2
otp-4a2358bbf4a4049a765aab435a31daeeffbbd677.zip
stdlib: Implement transcode/2.
Diffstat (limited to 'lib/stdlib')
-rw-r--r--[-rwxr-xr-x]lib/stdlib/src/uri_string.erl112
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl39
2 files changed, 147 insertions, 4 deletions
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 893ba4c6bf..439ffa80da 100755..100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -353,8 +353,26 @@ normalize(_) ->
-spec transcode(URIString, Options) -> URIString when
URIString :: uri_string(),
Options :: [{in_encoding, unicode:encoding()}|{out_encoding, unicode:encoding()}].
-transcode(_, _) ->
- "".
+transcode(URIString, Options) when is_binary(URIString) ->
+ try
+ InEnc = proplists:get_value(in_encoding, Options, utf8),
+ OutEnc = proplists:get_value(out_encoding, Options, utf8),
+ List = convert_list(URIString, InEnc),
+ Output = transcode(List, [], InEnc, OutEnc),
+ convert_binary(Output, utf8, OutEnc)
+ of
+ Result -> Result
+ catch
+ throw:{error, L, RestData} -> {invalid_input, L, RestData}
+ end;
+transcode(URIString, Options) when is_list(URIString) ->
+ InEnc = proplists:get_value(in_encoding, Options, utf8),
+ OutEnc = proplists:get_value(out_encoding, Options, utf8),
+ try transcode(URIString, [], InEnc, OutEnc) of
+ Result -> Result
+ catch
+ throw:{error, List, RestData} -> {invalid_input, List, RestData}
+ end.
%%-------------------------------------------------------------------------
%% Working with query strings
@@ -1624,3 +1642,93 @@ maybe_to_list(Comp) -> Comp.
encode_port(Port) ->
integer_to_binary(Port).
+
+%%-------------------------------------------------------------------------
+%% Helper functions for transcode
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% uri_string:transcode(<<"x%00%00%00%F6"/utf32>>).
+%% 1. Convert (transcode/2) input to list form (list of unicode codepoints)
+%% "x%00%00%00%F6"
+%% 2. Accumulate characters until percent-encoded segment (transcode/4).
+%% Acc = "x"
+%% 3. Convert percent-encoded triplets to binary form (transcode_pct/4)
+%% <<0,0,0,246>>
+%% 4. Transcode in-encoded binary to out-encoding (utf32 -> utf8):
+%% <<195,182>>
+%% 5. Percent-encode out-encoded binary:
+%% <<"%C3%B6"/utf8>> = <<37,67,51,37,66,54>>
+%% 6. Convert binary to list form, reverse it and append the accumulator
+%% "6B%3C%" + "x"
+%% 7. Reverse Acc and return it
+%%-------------------------------------------------------------------------
+transcode([$%,_C0,_C1|_Rest] = L, Acc, InEnc, OutEnc) ->
+ transcode_pct(L, Acc, <<>>, InEnc, OutEnc);
+transcode([_C|_Rest] = L, Acc, InEnc, OutEnc) ->
+ transcode(L, Acc, [], InEnc, OutEnc).
+%%
+transcode([H|T], Acc, List, InEnc, OutEnc) when is_binary(H) ->
+ L = convert_list(H, InEnc),
+ transcode(L ++ T, Acc, List, InEnc, OutEnc);
+transcode([H|T], Acc, List, InEnc, OutEnc) when is_list(H) ->
+ transcode(H ++ T, Acc, List, InEnc, OutEnc);
+transcode([$%,_C0,_C1|_Rest] = L, Acc, List, InEncoding, OutEncoding) ->
+ transcode_pct(L, List ++ Acc, <<>>, InEncoding, OutEncoding);
+transcode([C|Rest], Acc, List, InEncoding, OutEncoding) ->
+ transcode(Rest, Acc, [C|List], InEncoding, OutEncoding);
+transcode([], Acc, List, _InEncoding, _OutEncoding) ->
+ lists:reverse(List ++ Acc).
+
+
+%% Transcode percent-encoded segment
+transcode_pct([H|T], Acc, B, InEnc, OutEnc) when is_binary(H) ->
+ L = convert_list(H, InEnc),
+ transcode_pct(L ++ T, Acc, B, InEnc, OutEnc);
+transcode_pct([H|T], Acc, B, InEnc, OutEnc) when is_list(H) ->
+ transcode_pct(H ++ T, Acc, B, InEnc, OutEnc);
+transcode_pct([$%,C0,C1|Rest], Acc, B, InEncoding, OutEncoding) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ Int = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ transcode_pct(Rest, Acc, <<B/binary, Int>>, InEncoding, OutEncoding);
+ false -> throw({error, lists:reverse(Acc),[C0,C1]})
+ end;
+transcode_pct([_C|_Rest] = L, Acc, B, InEncoding, OutEncoding) ->
+ OutBinary = convert_binary(B, InEncoding, OutEncoding),
+ PctEncUtf8 = percent_encode_segment(OutBinary),
+ Out = lists:reverse(convert_list(PctEncUtf8, utf8)),
+ transcode(L, Out ++ Acc, [], InEncoding, OutEncoding);
+transcode_pct([], Acc, B, InEncoding, OutEncoding) ->
+ OutBinary = convert_binary(B, InEncoding, OutEncoding),
+ PctEncUtf8 = percent_encode_segment(OutBinary),
+ Out = convert_list(PctEncUtf8, utf8),
+ lists:reverse(Acc) ++ Out.
+
+
+% Convert binary
+convert_binary(Binary, InEncoding, OutEncoding) ->
+ case unicode:characters_to_binary(Binary, InEncoding, OutEncoding) of
+ {error, List, RestData} ->
+ throw({error, List, RestData});
+ {incomplete, List, RestData} ->
+ throw({error, List, RestData});
+ Result ->
+ Result
+ end.
+
+
+% Convert binary
+convert_list(Binary, InEncoding) ->
+ case unicode:characters_to_list(Binary, InEncoding) of
+ {error, List, RestData} ->
+ throw({error, List, RestData});
+ {incomplete, List, RestData} ->
+ throw({error, List, RestData});
+ Result ->
+ Result
+ end.
+
+
+percent_encode_segment(Segment) ->
+ percent_encode_binary(Segment, <<>>).
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index cd2e003d02..83f702dd13 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -36,7 +36,8 @@
recompose_fragment/1, recompose_parse_fragment/1,
recompose_query/1, recompose_parse_query/1,
recompose_path/1, recompose_parse_path/1,
- recompose_autogen/1, parse_recompose_autogen/1
+ recompose_autogen/1, parse_recompose_autogen/1,
+ transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1
]).
@@ -99,7 +100,11 @@ all() ->
recompose_path,
recompose_parse_path,
recompose_autogen,
- parse_recompose_autogen
+ parse_recompose_autogen,
+ transcode_basic,
+ transcode_options,
+ transcode_mixed,
+ transcode_negative
].
groups() ->
@@ -763,3 +768,33 @@ recompose_autogen(_Config) ->
parse_recompose_autogen(_Config) ->
Tests = generate_test_vectors(uri_combinations()),
lists:map(fun run_test_parse_recompose/1, Tests).
+
+transcode_basic(_Config) ->
+ <<"foo%C3%B6bar"/utf8>> =
+ uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32},{out_encoding, utf8}]),
+ "foo%C3%B6bar" =
+ uri_string:transcode("foo%00%00%00%F6bar", [{in_encoding, utf32},{out_encoding, utf8}]),
+ <<"foo%00%00%00%F6bar"/utf32>> =
+ uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]),
+ "foo%00%00%00%F6bar" =
+ uri_string:transcode("foo%C3%B6bar", [{in_encoding, utf8},{out_encoding, utf32}]),
+ "foo%C3%B6bar" =
+ uri_string:transcode("foo%F6bar", [{in_encoding, latin1},{out_encoding, utf8}]).
+
+transcode_options(_Config) ->
+ <<"foo%C3%B6bar"/utf8>> =
+ uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, []),
+ <<"foo%C3%B6bar"/utf8>> =
+ uri_string:transcode(<<"foo%00%00%00%F6bar"/utf32>>, [{in_encoding, utf32}]),
+ <<"foo%00%00%00%F6bar"/utf32>> =
+ uri_string:transcode(<<"foo%C3%B6bar"/utf8>>, [{out_encoding, utf32}]).
+
+transcode_mixed(_Config) ->
+ "foo%00%00%00%F6bar" =
+ uri_string:transcode(["foo",<<"%C3%B6"/utf8>>,<<"ba"/utf8>>,"r"], [{out_encoding, utf32}]).
+
+transcode_negative(_Config) ->
+ {invalid_input,"foo","BX"} =
+ uri_string:transcode(<<"foo%C3%BXbar"/utf8>>, [{in_encoding, utf8},{out_encoding, utf32}]),
+ {invalid_input,<<>>,<<"ö">>} =
+ uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]).