aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Gudmundsson <[email protected]>2018-12-03 10:39:52 +0100
committerDan Gudmundsson <[email protected]>2018-12-03 10:39:52 +0100
commitbb5192f36d1effdd03edf23df9e31ba12eef17d7 (patch)
tree066a03574c8098a1ec72152f131da31f43e261cd
parent098c8f504c61646a73c5f0e3a4132c15fc6e8219 (diff)
parentd00c1adf7e12f791781995c9469a1acf94ddfb93 (diff)
downloadotp-bb5192f36d1effdd03edf23df9e31ba12eef17d7.tar.gz
otp-bb5192f36d1effdd03edf23df9e31ba12eef17d7.tar.bz2
otp-bb5192f36d1effdd03edf23df9e31ba12eef17d7.zip
Merge branch 'dgud/stdlib/unicode-binary-bug/ERL-777/OTP-15428' into maint
* dgud/stdlib/unicode-binary-bug/ERL-777/OTP-15428: unicode_util did not handle binary input data correctly
-rw-r--r--lib/stdlib/test/unicode_util_SUITE.erl27
-rwxr-xr-xlib/stdlib/uc_spec/gen_unicode_mod.escript2
2 files changed, 21 insertions, 8 deletions
diff --git a/lib/stdlib/test/unicode_util_SUITE.erl b/lib/stdlib/test/unicode_util_SUITE.erl
index 962b307b07..044b4e5834 100644
--- a/lib/stdlib/test/unicode_util_SUITE.erl
+++ b/lib/stdlib/test/unicode_util_SUITE.erl
@@ -126,17 +126,30 @@ verify_gc(Line0, N, Acc) ->
%io:format("Line: ~s~n",[Line]),
[Data|_Comments] = string:tokens(Line, "#"),
- %io:format("Data: ~w~n",[string:tokens(Data, " \t")]),
+ %% io:format("Data: ~w~n",[string:tokens(Data, " \t")]),
{Str,Res} = gc_test_data(string:tokens(Data, " \t"), [], [[]]),
- try
- Res = fetch(Str, fun unicode_util:gc/1),
- Acc
- catch _Cl:{badmatch, Other} ->
+ %% io:format("InputStr: ~w ~w~n",[Str,unicode:characters_to_binary(Str)]),
+ case verify_gc(Str, Res, N, Line) andalso
+ verify_gc(unicode:characters_to_binary(Str), Res, N, Line0) of
+ true -> Acc;
+ false -> Acc+1
+ end.
+
+verify_gc({error,_,[CP|_]}=Err, _Res, N, Line) ->
+ IsSurrogate = 16#D800 =< CP andalso CP =< 16#DFFF,
+ %% Surrogat is not valid in utf8 encoding only utf16
+ IsSurrogate orelse
+ io:format("~w: ~ts~n Error in unicode:characters_to_binary ~w~n", [N, Line, Err]),
+ IsSurrogate;
+verify_gc(Str, Res, N, Line) ->
+ try fetch(Str, fun unicode_util:gc/1) of
+ Res -> true;
+ Other ->
io:format("Failed: ~p~nInput: ~ts~n\t=> ~w |~ts|~n",[N, Line, Str, Str]),
io:format("Expected: ~p~n", [Res]),
io:format("Got: ~w~n", [Other]),
- Acc+1;
- Cl:R:Stacktrace ->
+ false
+ catch Cl:R:Stacktrace ->
io:format("~p: ~ts => |~tp|~n",[N, Line, Str]),
io:format("Expected: ~p~n", [Res]),
erlang:raise(Cl,R,Stacktrace)
diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript
index fe5a860d45..535f01a1c5 100755
--- a/lib/stdlib/uc_spec/gen_unicode_mod.escript
+++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript
@@ -646,7 +646,7 @@ gen_gc(Fd, GBP) ->
io:put_chars(Fd, "is_emodifier(_) -> false.\n\n"),
io:put_chars(Fd, "gc_zwj(R0, Acc) ->\n case cp(R0) of\n"),
- GenZWJGlue = fun(Range) -> io:format(Fd, "~8c~s gc_extend(R1, R0, [CP|Acc]);\n",
+ GenZWJGlue = fun(Range) -> io:format(Fd, "~8c~s gc_extend(cp(R1), R0, [CP|Acc]);\n",
[$\s,gen_case_clause(Range)]) end,
[GenZWJGlue(CP) || CP <- merge_ranges(maps:get(glue_after_zwj,GBP))],
GenZWJEBG = fun(Range) -> io:format(Fd, "~8c~s gc_e_cont(R1, [CP|Acc]);\n",