From e1370f924df65e72843b5f81400230e1c2591485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Valim?= Date: Fri, 19 May 2017 16:06:08 +0200 Subject: Return error tuple on unicode normalization functions Prior to this patch, the normalization functions in the unicode module would raise a function clause error for non-utf8 binaries. This patch changes it so it returns {error, SoFar, Invalid} as characters_to_binary and characters_to_list does in the unicode module. Note string:next_codepoint/1 and string:next_grapheme had to be changed accordingly and also return an error tuple. --- lib/stdlib/uc_spec/gen_unicode_mod.escript | 49 ++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 16 deletions(-) (limited to 'lib/stdlib/uc_spec') diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript index c8b815e435..fefd7d3b70 100755 --- a/lib/stdlib/uc_spec/gen_unicode_mod.escript +++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript @@ -170,7 +170,7 @@ gen_header(Fd) -> io:put_chars(Fd, "-export([spec_version/0, lookup/1, get_case/1]).\n"), io:put_chars(Fd, "-inline([class/1]).\n"), io:put_chars(Fd, "-compile(nowarn_unused_vars).\n"), - io:put_chars(Fd, "-dialyzer({no_improper_lists, cp/1}).\n"), + io:put_chars(Fd, "-dialyzer({no_improper_lists, [cp/1, gc_prepend/2, gc_e_cont/2]}).\n"), io:put_chars(Fd, "-type gc() :: char()|[char()].\n\n\n"), ok. @@ -237,39 +237,43 @@ gen_static(Fd) -> gen_norm(Fd) -> io:put_chars(Fd, - "-spec nfd(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()).\n" + "-spec nfd(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n" "nfd(Str0) ->\n" " case gc(Str0) of\n" " [GC|R] when GC < 127 -> [GC|R];\n" " [GC|Str] -> [decompose(GC)|Str];\n" - " [] -> []\n end.\n\n" + " [] -> [];\n" + " {error,_}=Error -> Error\n end.\n\n" ), io:put_chars(Fd, - "-spec nfkd(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()).\n" + "-spec nfkd(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n" "nfkd(Str0) ->\n" " case gc(Str0) of\n" " [GC|R] when GC < 127 -> [GC|R];\n" " [GC|Str] -> [decompose_compat(GC)|Str];\n" - " [] -> []\n end.\n\n" + " [] -> [];\n" + " {error,_}=Error -> Error\n end.\n\n" ), io:put_chars(Fd, - "-spec nfc(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()).\n" + "-spec nfc(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n" "nfc(Str0) ->\n" " case gc(Str0) of\n" " [GC|R] when GC < 255 -> [GC|R];\n" " [GC|Str] -> [compose(decompose(GC))|Str];\n" - " [] -> []\n end.\n\n" + " [] -> [];\n" + " {error,_}=Error -> Error\n end.\n\n" ), io:put_chars(Fd, - "-spec nfkc(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()).\n" + "-spec nfkc(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n" "nfkc(Str0) ->\n" " case gc(Str0) of\n" " [GC|R] when GC < 127 -> [GC|R];\n" " [GC|Str] -> [compose_compat_0(decompose_compat(GC))|Str];\n" - " [] -> []\n end.\n\n" + " [] -> [];\n" + " {error,_}=Error -> Error\n end.\n\n" ), io:put_chars(Fd, @@ -448,18 +452,20 @@ gen_ws(Fd, Props) -> gen_cp(Fd) -> io:put_chars(Fd, "-spec cp(String::unicode:chardata()) ->" - " maybe_improper_list().\n"), + " maybe_improper_list() | {error, unicode:chardata()}.\n"), io:put_chars(Fd, "cp([C|_]=L) when is_integer(C) -> L;\n"), io:put_chars(Fd, "cp([List]) -> cp(List);\n"), io:put_chars(Fd, "cp([List|R]) ->\n"), io:put_chars(Fd, " case cp(List) of\n"), io:put_chars(Fd, " [] -> cp(R);\n"), io:put_chars(Fd, " [CP] -> [CP|R];\n"), - io:put_chars(Fd, " [C|R0] -> [C|[R0|R]]\n"), + io:put_chars(Fd, " [C|R0] -> [C|[R0|R]];\n"), + io:put_chars(Fd, " {error,Error} -> {error,[Error|R]}\n"), io:put_chars(Fd, " end;\n"), io:put_chars(Fd, "cp([]) -> [];\n"), io:put_chars(Fd, "cp(<>) -> [C|R];\n"), - io:put_chars(Fd, "cp(<<>>) -> [].\n\n"), + io:put_chars(Fd, "cp(<<>>) -> [];\n"), + io:put_chars(Fd, "cp(<>) -> {error,R}.\n\n"), ok. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -468,7 +474,7 @@ gen_gc(Fd, GBP) -> %% see http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules io:put_chars(Fd, "-spec gc(String::unicode:chardata()) ->" - " maybe_improper_list().\n"), + " maybe_improper_list() | {error, unicode:chardata()}.\n"), io:put_chars(Fd, "gc(Str) ->\n" " gc_1(cp(Str)).\n\n" @@ -521,7 +527,8 @@ gen_gc(Fd, GBP) -> [GenEBG(CP) || CP <- merge_ranges(maps:get(e_base_gaz,GBP))], io:put_chars(Fd, "gc_1([CP|R]) -> gc_extend(R, CP);\n"), - io:put_chars(Fd, "gc_1([]) -> [].\n\n"), + io:put_chars(Fd, "gc_1([]) -> [];\n"), + io:put_chars(Fd, "gc_1({error,_}=Error) -> Error.\n\n"), io:put_chars(Fd, "%% Handle Prepend\n"), io:put_chars(Fd, @@ -536,7 +543,8 @@ gen_gc(Fd, GBP) -> " [GC|R1] -> [[CP0|GC]|R1]\n" " end\n" " end;\n" - " [] -> [CP0]\n" + " [] -> [CP0];\n" + " {error,R} -> [CP0|R]\n" " end.\n\n"), IsCtrl = fun(Range) -> io:format(Fd, "is_control~s true;\n", [gen_single_clause(Range)]) end, @@ -574,7 +582,10 @@ gen_gc(Fd, GBP) -> " [_]=Acc -> Acc;\n" " [_|_]=Acc -> [lists:reverse(Acc)];\n" " Acc -> [Acc]\n" - " end.\n\n"), + " end;\n" + "gc_extend({error,R}, T, Acc0) ->\n" + " gc_extend([], T, Acc0) ++ [R].\n\n" + ), [ZWJ] = maps:get(zwj, GBP), GenExtend = fun(R) when R =:= ZWJ -> io:format(Fd, "is_extend~s zwj;\n", [gen_single_clause(ZWJ)]); (Range) -> io:format(Fd, "is_extend~s true;\n", [gen_single_clause(Range)]) @@ -604,6 +615,11 @@ gen_gc(Fd, GBP) -> " case Acc of\n" " [A] -> [A];\n" " _ -> [lists:reverse(Acc)]\n" + " end;\n" + " {error,R} ->\n" + " case Acc of\n" + " [A] -> [A|R];\n" + " _ -> [lists:reverse(Acc)|R]\n" " end\n" " end.\n\n"), @@ -660,6 +676,7 @@ gen_gc(Fd, GBP) -> [GenHangulT_1(CP) || CP <- merge_ranges(maps:get(t,GBP))], io:put_chars(Fd, " R1 -> gc_extend(R1, R0, Acc)\n end.\n\n"), + io:put_chars(Fd, "gc_h_lv_lvt({error,_}=Error, Acc) -> gc_extend(Error, [], Acc);\n"), io:put_chars(Fd, "%% Handle Hangul LV\n"), GenHangulLV = fun(Range) -> io:format(Fd, "gc_h_lv_lvt~s gc_h_V(R1,[CP|Acc]);\n", [gen_clause2(Range)]) end, -- cgit v1.2.3