diff options
author | José Valim <[email protected]> | 2017-05-19 16:06:08 +0200 |
---|---|---|
committer | José Valim <[email protected]> | 2017-05-22 15:08:31 +0200 |
commit | e1370f924df65e72843b5f81400230e1c2591485 (patch) | |
tree | 73babac731e86c0903ef584d14749e1777d5f54b /lib/stdlib/src/string.erl | |
parent | 166d11bb8cbb386dfab4fef37f6f231ac2689b61 (diff) | |
download | otp-e1370f924df65e72843b5f81400230e1c2591485.tar.gz otp-e1370f924df65e72843b5f81400230e1c2591485.tar.bz2 otp-e1370f924df65e72843b5f81400230e1c2591485.zip |
Return error tuple on unicode normalization functions
Prior to this patch, the normalization functions in the
unicode module would raise a function clause error for
non-utf8 binaries.
This patch changes it so it returns {error, SoFar, Invalid}
as characters_to_binary and characters_to_list does in
the unicode module.
Note string:next_codepoint/1 and string:next_grapheme had
to be changed accordingly and also return an error tuple.
Diffstat (limited to 'lib/stdlib/src/string.erl')
-rw-r--r-- | lib/stdlib/src/string.erl | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl index 17135dd64a..6f7009b5d9 100644 --- a/lib/stdlib/src/string.erl +++ b/lib/stdlib/src/string.erl @@ -486,12 +486,14 @@ find(String, SearchPattern, trailing) -> %% Fetch first codepoint and return rest in tail -spec next_grapheme(String::unicode:chardata()) -> - maybe_improper_list(grapheme_cluster(),unicode:chardata()). + maybe_improper_list(grapheme_cluster(),unicode:chardata()) | + {error,unicode:chardata()}. next_grapheme(CD) -> unicode_util:gc(CD). %% Fetch first grapheme cluster and return rest in tail -spec next_codepoint(String::unicode:chardata()) -> - maybe_improper_list(char(),unicode:chardata()). + maybe_improper_list(char(),unicode:chardata()) | + {error,unicode:chardata()}. next_codepoint(CD) -> unicode_util:cp(CD). %% Internals @@ -508,7 +510,7 @@ equal_1(A0,B0) -> case {unicode_util:cp(A0), unicode_util:cp(B0)} of {[CP|A],[CP|B]} -> equal_1(A,B); {[], []} -> true; - _ -> false + {L1,L2} when is_list(L1), is_list(L2) -> false end. equal_nocase(A, A) -> true; @@ -517,7 +519,7 @@ equal_nocase(A0, B0) -> unicode_util:cp(unicode_util:casefold(B0))} of {[CP|A],[CP|B]} -> equal_nocase(A,B); {[], []} -> true; - _ -> false + {L1,L2} when is_list(L1), is_list(L2) -> false end. equal_norm(A, A, _Norm) -> true; @@ -526,7 +528,7 @@ equal_norm(A0, B0, Norm) -> unicode_util:cp(unicode_util:Norm(B0))} of {[CP|A],[CP|B]} -> equal_norm(A,B, Norm); {[], []} -> true; - _ -> false + {L1,L2} when is_list(L1), is_list(L2) -> false end. equal_norm_nocase(A, A, _Norm) -> true; @@ -535,7 +537,7 @@ equal_norm_nocase(A0, B0, Norm) -> unicode_util:cp(unicode_util:casefold(unicode_util:Norm(B0)))} of {[CP|A],[CP|B]} -> equal_norm_nocase(A,B, Norm); {[], []} -> true; - _ -> false + {L1,L2} when is_list(L1), is_list(L2) -> false end. reverse_1(CD, Acc) -> |