aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2011-10-20 10:49:13 +0200
committerBjörn Gustavsson <[email protected]>2011-10-20 10:49:13 +0200
commit6ef9aef50dbe839098e4330a97247aa21a15ecde (patch)
tree4e556a50e08da2e9ab139ec0c82c00c7de1a4b4f /lib
parent907772538853d2f89d60702eb140e164a72503ad (diff)
parent34db76765561487e526fe66d3d19ecf3b3fb9dc8 (diff)
downloadotp-6ef9aef50dbe839098e4330a97247aa21a15ecde.tar.gz
otp-6ef9aef50dbe839098e4330a97247aa21a15ecde.tar.bz2
otp-6ef9aef50dbe839098e4330a97247aa21a15ecde.zip
Merge branch 'bjorn/unicode-noncharacters/OTP-9624'
* bjorn/unicode-noncharacters/OTP-9624: Allow noncharacter code points in unicode encoding and decoding
Diffstat (limited to 'lib')
-rw-r--r--lib/compiler/test/bs_utf_SUITE.erl21
-rw-r--r--lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangString.java5
-rw-r--r--lib/stdlib/doc/src/unicode.xml3
-rw-r--r--lib/stdlib/test/unicode_SUITE.erl4
4 files changed, 4 insertions, 29 deletions
diff --git a/lib/compiler/test/bs_utf_SUITE.erl b/lib/compiler/test/bs_utf_SUITE.erl
index f30a4d3fef..94549ad0d3 100644
--- a/lib/compiler/test/bs_utf_SUITE.erl
+++ b/lib/compiler/test/bs_utf_SUITE.erl
@@ -264,18 +264,10 @@ literals(Config) when is_list(Config) ->
?line {'EXIT',{badarg,_}} = (catch <<(-1)/utf32,I/utf8>>),
?line {'EXIT',{badarg,_}} = (catch <<(-1)/little-utf32,I/utf8>>),
?line {'EXIT',{badarg,_}} = (catch <<16#D800/utf8,I/utf8>>),
- ?line {'EXIT',{badarg,_}} = (catch <<16#FFFE/utf8,I/utf8>>),
- ?line {'EXIT',{badarg,_}} = (catch <<16#FFFF/utf8,I/utf8>>),
?line {'EXIT',{badarg,_}} = (catch <<16#D800/utf16,I/utf8>>),
?line {'EXIT',{badarg,_}} = (catch <<16#D800/little-utf16,I/utf8>>),
- ?line {'EXIT',{badarg,_}} = (catch <<16#FFFE/utf16,I/utf8>>),
- ?line {'EXIT',{badarg,_}} = (catch <<16#FFFE/little-utf16,I/utf8>>),
- ?line {'EXIT',{badarg,_}} = (catch <<16#FFFF/utf16,I/utf8>>),
- ?line {'EXIT',{badarg,_}} = (catch <<16#FFFF/little-utf16,I/utf8>>),
?line {'EXIT',{badarg,_}} = (catch <<16#D800/utf32,I/utf8>>),
?line {'EXIT',{badarg,_}} = (catch <<16#D800/little-utf32,I/utf8>>),
- ?line {'EXIT',{badarg,_}} = (catch <<16#FFFE/utf32,I/utf8>>),
- ?line {'EXIT',{badarg,_}} = (catch <<16#FFFF/little-utf32,I/utf8>>),
B = 16#10FFFF+1,
?line {'EXIT',{badarg,_}} = (catch <<B/utf8>>),
@@ -286,20 +278,11 @@ literals(Config) when is_list(Config) ->
%% Matching of bad literals.
?line error = bad_literal_match(<<237,160,128>>), %16#D800 in UTF-8
- ?line error = bad_literal_match(<<239,191,190>>), %16#FFFE in UTF-8
- ?line error = bad_literal_match(<<239,191,191>>), %16#FFFF in UTF-8
?line error = bad_literal_match(<<244,144,128,128>>), %16#110000 in UTF-8
- ?line error = bad_literal_match(<<255,254>>), %16#FFFE in UTF-16
- ?line error = bad_literal_match(<<255,255>>), %16#FFFF in UTF-16
-
?line error = bad_literal_match(<<16#D800:32>>),
- ?line error = bad_literal_match(<<16#FFFE:32>>),
- ?line error = bad_literal_match(<<16#FFFF:32>>),
?line error = bad_literal_match(<<16#110000:32>>),
?line error = bad_literal_match(<<16#D800:32/little>>),
- ?line error = bad_literal_match(<<16#FFFE:32/little>>),
- ?line error = bad_literal_match(<<16#FFFF:32/little>>),
?line error = bad_literal_match(<<16#110000:32/little>>),
ok.
@@ -314,11 +297,7 @@ match_literal(<<"bj\366rn"/big-utf16>>) -> bjorn_utf16be;
match_literal(<<"bj\366rn"/little-utf16>>) -> bjorn_utf16le.
bad_literal_match(<<16#D800/utf8>>) -> ok;
-bad_literal_match(<<16#FFFE/utf8>>) -> ok;
-bad_literal_match(<<16#FFFF/utf8>>) -> ok;
bad_literal_match(<<16#110000/utf8>>) -> ok;
-bad_literal_match(<<16#FFFE/utf16>>) -> ok;
-bad_literal_match(<<16#FFFF/utf16>>) -> ok;
bad_literal_match(<<16#D800/utf32>>) -> ok;
bad_literal_match(<<16#110000/utf32>>) -> ok;
bad_literal_match(<<16#D800/little-utf32>>) -> ok;
diff --git a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangString.java b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangString.java
index 19ee92e0d0..23734bf83b 100644
--- a/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangString.java
+++ b/lib/jinterface/java_src/com/ericsson/otp/erlang/OtpErlangString.java
@@ -166,7 +166,7 @@ public class OtpErlangString extends OtpErlangObject implements Serializable,
/**
* Validate a code point according to Erlang definition; Unicode 3.0.
* That is; valid in the range U+0..U+10FFFF, but not in the range
- * U+D800..U+DFFF (surrogat pairs), nor U+FFFE..U+FFFF (non-characters).
+ * U+D800..U+DFFF (surrogat pairs).
*
* @param cp
* the code point value to validate
@@ -179,8 +179,7 @@ public class OtpErlangString extends OtpErlangObject implements Serializable,
// Erlang definition of valid Unicode code points;
// Unicode 3.0, XML, et.al.
return (cp>>>16) <= 0x10 // in 0..10FFFF; Unicode range
- && (cp & ~0x7FF) != 0xD800 // not in D800..DFFF; surrogate range
- && (cp & ~1) != 0xFFFE; // not in FFFE..FFFF; non-characters
+ && (cp & ~0x7FF) != 0xD800; // not in D800..DFFF; surrogate range
}
/**
diff --git a/lib/stdlib/doc/src/unicode.xml b/lib/stdlib/doc/src/unicode.xml
index d02763f75c..1001ebbae4 100644
--- a/lib/stdlib/doc/src/unicode.xml
+++ b/lib/stdlib/doc/src/unicode.xml
@@ -203,8 +203,7 @@
<item>greater than <c>16#10FFFF</c>
(the maximum unicode character),</item>
<item>in the range <c>16#D800</c> to <c>16#DFFF</c>
- (invalid unicode range)</item>
- <item>or equal to 16#FFFE or 16#FFFF (non characters)</item>
+ (invalid range reserved for UTF-16 surrogate pairs)</item>
</list>
is found.
</item>
diff --git a/lib/stdlib/test/unicode_SUITE.erl b/lib/stdlib/test/unicode_SUITE.erl
index 9aa800209d..4055af2741 100644
--- a/lib/stdlib/test/unicode_SUITE.erl
+++ b/lib/stdlib/test/unicode_SUITE.erl
@@ -322,7 +322,7 @@ roundtrips(Config) when is_list(Config) ->
ex_roundtrips(Config) when is_list(Config) ->
?line L1 = ranges(0, 16#D800 - 1,
erlang:system_info(context_reductions) * 11),
- ?line L2 = ranges(16#DFFF + 1, 16#FFFE - 1,
+ ?line L2 = ranges(16#DFFF + 1, 16#10000 - 1,
erlang:system_info(context_reductions) * 11),
%?line L3 = ranges(16#FFFF + 1, 16#10FFFF,
% erlang:system_info(context_reductions) * 11),
@@ -569,7 +569,6 @@ utf16_illegal_sequences_bif(Config) when is_list(Config) ->
ex_utf16_illegal_sequences_bif(Config) when is_list(Config) ->
?line utf16_fail_range_bif_simple(16#10FFFF+1, 16#10FFFF+512), %Too large.
?line utf16_fail_range_bif(16#D800, 16#DFFF), %Reserved for UTF-16.
- ?line utf16_fail_range_bif(16#FFFE, 16#FFFF), %Non-characters.
?line lonely_hi_surrogate_bif(16#D800, 16#DBFF,incomplete),
?line lonely_hi_surrogate_bif(16#DC00, 16#DFFF,error),
@@ -644,7 +643,6 @@ utf8_illegal_sequences_bif(Config) when is_list(Config) ->
ex_utf8_illegal_sequences_bif(Config) when is_list(Config) ->
?line fail_range_bif(16#10FFFF+1, 16#10FFFF+512), %Too large.
?line fail_range_bif(16#D800, 16#DFFF), %Reserved for UTF-16.
- ?line fail_range_bif(16#FFFE, 16#FFFF), %Reserved (BOM).
%% Illegal first character.
?line [fail_bif(<<I,16#8F,16#8F,16#8F>>,unicode) || I <- lists:seq(16#80, 16#BF)],