diff options
author | José Valim <[email protected]> | 2017-05-19 16:06:08 +0200 |
---|---|---|
committer | José Valim <[email protected]> | 2017-05-22 15:08:31 +0200 |
commit | e1370f924df65e72843b5f81400230e1c2591485 (patch) | |
tree | 73babac731e86c0903ef584d14749e1777d5f54b /lib/stdlib/test | |
parent | 166d11bb8cbb386dfab4fef37f6f231ac2689b61 (diff) | |
download | otp-e1370f924df65e72843b5f81400230e1c2591485.tar.gz otp-e1370f924df65e72843b5f81400230e1c2591485.tar.bz2 otp-e1370f924df65e72843b5f81400230e1c2591485.zip |
Return error tuple on unicode normalization functions
Prior to this patch, the normalization functions in the
unicode module would raise a function clause error for
non-utf8 binaries.
This patch changes it so it returns {error, SoFar, Invalid}
as characters_to_binary and characters_to_list does in
the unicode module.
Note string:next_codepoint/1 and string:next_grapheme had
to be changed accordingly and also return an error tuple.
Diffstat (limited to 'lib/stdlib/test')
-rw-r--r-- | lib/stdlib/test/string_SUITE.erl | 4 | ||||
-rw-r--r-- | lib/stdlib/test/unicode_SUITE.erl | 24 | ||||
-rw-r--r-- | lib/stdlib/test/unicode_util_SUITE.erl | 4 |
3 files changed, 32 insertions, 0 deletions
diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl index 4320b735ac..90f980c0e5 100644 --- a/lib/stdlib/test/string_SUITE.erl +++ b/lib/stdlib/test/string_SUITE.erl @@ -582,6 +582,8 @@ cd_gc(_) -> [$e,778] = string:next_codepoint([$e,778]), [$e|<<204,138>>] = string:next_codepoint(<<$e,778/utf8>>), [778|_] = string:next_codepoint(tl(string:next_codepoint(<<$e,778/utf8>>))), + [0|<<128,1>>] = string:next_codepoint(<<0,128,1>>), + {error,<<128,1>>} = string:next_codepoint(<<128,1>>), [] = string:next_grapheme(""), [] = string:next_grapheme(<<>>), @@ -589,6 +591,8 @@ cd_gc(_) -> "abcd" = string:next_grapheme("abcd"), [[$e,778]] = string:next_grapheme([$e,778]), [[$e,778]] = string:next_grapheme(<<$e,778/utf8>>), + [0|<<128,1>>] = string:next_grapheme(<<0,128,1>>), + {error,<<128,1>>} = string:next_grapheme(<<128,1>>), ok. diff --git a/lib/stdlib/test/unicode_SUITE.erl b/lib/stdlib/test/unicode_SUITE.erl index 3d97ab93f1..e01ba3fbb0 100644 --- a/lib/stdlib/test/unicode_SUITE.erl +++ b/lib/stdlib/test/unicode_SUITE.erl @@ -998,6 +998,30 @@ normalize(_) -> true = unicode:characters_to_nfkc_list("ホンダ") =:= unicode:characters_to_nfkc_list("ホンダ"), true = unicode:characters_to_nfkd_list("32") =:= unicode:characters_to_nfkd_list("32"), + + {error, [0], <<128>>} = unicode:characters_to_nfc_list(<<0, 128>>), + {error, [0], <<128>>} = unicode:characters_to_nfkc_list(<<0, 128>>), + {error, [0], <<128>>} = unicode:characters_to_nfd_list(<<0, 128>>), + {error, [0], <<128>>} = unicode:characters_to_nfkd_list(<<0, 128>>), + + {error, <<0>>, <<128>>} = unicode:characters_to_nfc_binary(<<0, 128>>), + {error, <<0>>, <<128>>} = unicode:characters_to_nfkc_binary(<<0, 128>>), + {error, <<0>>, <<128>>} = unicode:characters_to_nfd_binary(<<0, 128>>), + {error, <<0>>, <<128>>} = unicode:characters_to_nfkd_binary(<<0, 128>>), + + LargeBin = binary:copy(<<"abcde">>, 50), + LargeList = binary_to_list(LargeBin), + + {error, LargeList, <<128>>} = unicode:characters_to_nfc_list(<<LargeBin/binary, 128>>), + {error, LargeList, <<128>>} = unicode:characters_to_nfkc_list(<<LargeBin/binary, 128>>), + {error, LargeList, <<128>>} = unicode:characters_to_nfd_list(<<LargeBin/binary, 128>>), + {error, LargeList, <<128>>} = unicode:characters_to_nfkd_list(<<LargeBin/binary, 128>>), + + {error, LargeBin, <<128>>} = unicode:characters_to_nfc_binary(<<LargeBin/binary, 128>>), + {error, LargeBin, <<128>>} = unicode:characters_to_nfkc_binary(<<LargeBin/binary, 128>>), + {error, LargeBin, <<128>>} = unicode:characters_to_nfd_binary(<<LargeBin/binary, 128>>), + {error, LargeBin, <<128>>} = unicode:characters_to_nfkd_binary(<<LargeBin/binary, 128>>), + ok. diff --git a/lib/stdlib/test/unicode_util_SUITE.erl b/lib/stdlib/test/unicode_util_SUITE.erl index e9b3d7f98d..03c24c7027 100644 --- a/lib/stdlib/test/unicode_util_SUITE.erl +++ b/lib/stdlib/test/unicode_util_SUITE.erl @@ -97,6 +97,8 @@ cp(_) -> "hejsan" = fetch(<<"hejsan">>, Get), "hejsan" = fetch(["hej",<<"san">>], Get), "hejsan" = fetch(["hej"|<<"san">>], Get), + {error, <<128>>} = Get(<<128>>), + {error, [<<128>>, 0]} = Get([<<128>>, 0]), ok. gc(Config) -> @@ -106,6 +108,8 @@ gc(Config) -> "hejsan" = fetch(<<"hejsan">>, Get), "hejsan" = fetch(["hej",<<"san">>], Get), "hejsan" = fetch(["hej"|<<"san">>], Get), + {error, <<128>>} = Get(<<128>>), + {error, [<<128>>, 0]} = Get([<<128>>, 0]), 0 = fold(fun verify_gc/3, 0, DataDir ++ "/GraphemeBreakTest.txt"), ok. |