From e1370f924df65e72843b5f81400230e1c2591485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Valim?= Date: Fri, 19 May 2017 16:06:08 +0200 Subject: Return error tuple on unicode normalization functions Prior to this patch, the normalization functions in the unicode module would raise a function clause error for non-utf8 binaries. This patch changes it so it returns {error, SoFar, Invalid} as characters_to_binary and characters_to_list does in the unicode module. Note string:next_codepoint/1 and string:next_grapheme had to be changed accordingly and also return an error tuple. --- lib/stdlib/test/string_SUITE.erl | 4 ++++ lib/stdlib/test/unicode_SUITE.erl | 24 ++++++++++++++++++++++++ lib/stdlib/test/unicode_util_SUITE.erl | 4 ++++ 3 files changed, 32 insertions(+) (limited to 'lib/stdlib/test') diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl index 4320b735ac..90f980c0e5 100644 --- a/lib/stdlib/test/string_SUITE.erl +++ b/lib/stdlib/test/string_SUITE.erl @@ -582,6 +582,8 @@ cd_gc(_) -> [$e,778] = string:next_codepoint([$e,778]), [$e|<<204,138>>] = string:next_codepoint(<<$e,778/utf8>>), [778|_] = string:next_codepoint(tl(string:next_codepoint(<<$e,778/utf8>>))), + [0|<<128,1>>] = string:next_codepoint(<<0,128,1>>), + {error,<<128,1>>} = string:next_codepoint(<<128,1>>), [] = string:next_grapheme(""), [] = string:next_grapheme(<<>>), @@ -589,6 +591,8 @@ cd_gc(_) -> "abcd" = string:next_grapheme("abcd"), [[$e,778]] = string:next_grapheme([$e,778]), [[$e,778]] = string:next_grapheme(<<$e,778/utf8>>), + [0|<<128,1>>] = string:next_grapheme(<<0,128,1>>), + {error,<<128,1>>} = string:next_grapheme(<<128,1>>), ok. diff --git a/lib/stdlib/test/unicode_SUITE.erl b/lib/stdlib/test/unicode_SUITE.erl index 3d97ab93f1..e01ba3fbb0 100644 --- a/lib/stdlib/test/unicode_SUITE.erl +++ b/lib/stdlib/test/unicode_SUITE.erl @@ -998,6 +998,30 @@ normalize(_) -> true = unicode:characters_to_nfkc_list("ホンダ") =:= unicode:characters_to_nfkc_list("ホンダ"), true = unicode:characters_to_nfkd_list("32") =:= unicode:characters_to_nfkd_list("32"), + + {error, [0], <<128>>} = unicode:characters_to_nfc_list(<<0, 128>>), + {error, [0], <<128>>} = unicode:characters_to_nfkc_list(<<0, 128>>), + {error, [0], <<128>>} = unicode:characters_to_nfd_list(<<0, 128>>), + {error, [0], <<128>>} = unicode:characters_to_nfkd_list(<<0, 128>>), + + {error, <<0>>, <<128>>} = unicode:characters_to_nfc_binary(<<0, 128>>), + {error, <<0>>, <<128>>} = unicode:characters_to_nfkc_binary(<<0, 128>>), + {error, <<0>>, <<128>>} = unicode:characters_to_nfd_binary(<<0, 128>>), + {error, <<0>>, <<128>>} = unicode:characters_to_nfkd_binary(<<0, 128>>), + + LargeBin = binary:copy(<<"abcde">>, 50), + LargeList = binary_to_list(LargeBin), + + {error, LargeList, <<128>>} = unicode:characters_to_nfc_list(<>), + {error, LargeList, <<128>>} = unicode:characters_to_nfkc_list(<>), + {error, LargeList, <<128>>} = unicode:characters_to_nfd_list(<>), + {error, LargeList, <<128>>} = unicode:characters_to_nfkd_list(<>), + + {error, LargeBin, <<128>>} = unicode:characters_to_nfc_binary(<>), + {error, LargeBin, <<128>>} = unicode:characters_to_nfkc_binary(<>), + {error, LargeBin, <<128>>} = unicode:characters_to_nfd_binary(<>), + {error, LargeBin, <<128>>} = unicode:characters_to_nfkd_binary(<>), + ok. diff --git a/lib/stdlib/test/unicode_util_SUITE.erl b/lib/stdlib/test/unicode_util_SUITE.erl index e9b3d7f98d..03c24c7027 100644 --- a/lib/stdlib/test/unicode_util_SUITE.erl +++ b/lib/stdlib/test/unicode_util_SUITE.erl @@ -97,6 +97,8 @@ cp(_) -> "hejsan" = fetch(<<"hejsan">>, Get), "hejsan" = fetch(["hej",<<"san">>], Get), "hejsan" = fetch(["hej"|<<"san">>], Get), + {error, <<128>>} = Get(<<128>>), + {error, [<<128>>, 0]} = Get([<<128>>, 0]), ok. gc(Config) -> @@ -106,6 +108,8 @@ gc(Config) -> "hejsan" = fetch(<<"hejsan">>, Get), "hejsan" = fetch(["hej",<<"san">>], Get), "hejsan" = fetch(["hej"|<<"san">>], Get), + {error, <<128>>} = Get(<<128>>), + {error, [<<128>>, 0]} = Get([<<128>>, 0]), 0 = fold(fun verify_gc/3, 0, DataDir ++ "/GraphemeBreakTest.txt"), ok. -- cgit v1.2.3