Allow noncharacter code points in unicode encoding and decoding

The two noncharacter code points 16#FFFE and 16#FFFF were not allowed to be encoded or decoded using the unicode module or bit syntax. That causes an inconsistency, since the noncharacters 16#FDD0 to 16#FDEF could be encoded/decoded. There is two ways to fix that inconsistency. We have chosen to allow 16#FFFE and 16#FFFF to be encoded and decoded, because the noncharacters could be useful internally within an application and it will make encoding and decoding slightly faster. Reported-by: Alisdair Sullivan
author: Björn Gustavsson <[email protected]> 2011-08-30 11:51:11 +0200
committer: Björn Gustavsson <[email protected]> 2011-10-13 14:16:00 +0200
commit: 34db76765561487e526fe66d3d19ecf3b3fb9dc8 (patch)
tree: 9141e3c5729e46d03c8b27b14da3b29b1e54abca /lib/stdlib
parent: 6ca6dd3c670fb8185ebb9a20c2a731a7375c1cac (diff)
download: otp-34db76765561487e526fe66d3d19ecf3b3fb9dc8.tar.gz
otp-34db76765561487e526fe66d3d19ecf3b3fb9dc8.tar.bz2
otp-34db76765561487e526fe66d3d19ecf3b3fb9dc8.zip
2 files changed, 2 insertions, 5 deletions
diff --git a/lib/stdlib/doc/src/unicode.xml b/lib/stdlib/doc/src/unicode.xml
index d02763f75c..1001ebbae4 100644
--- a/lib/stdlib/doc/src/unicode.xml
+++ b/lib/stdlib/doc/src/unicode.xml
@@ -203,8 +203,7 @@
 	     <item>greater than <c>16#10FFFF</c>
 	     (the maximum unicode character),</item>
 	     <item>in the range <c>16#D800</c> to <c>16#DFFF</c>
-	     (invalid unicode range)</item>
-	     <item>or equal to 16#FFFE or 16#FFFF (non characters)</item>
+	       (invalid range reserved for UTF-16 surrogate pairs)</item>
 	   </list>
 	   is found.
 	   </item>
diff --git a/lib/stdlib/test/unicode_SUITE.erl b/lib/stdlib/test/unicode_SUITE.erl
index 9aa800209d..4055af2741 100644
--- a/lib/stdlib/test/unicode_SUITE.erl
+++ b/lib/stdlib/test/unicode_SUITE.erl
@@ -322,7 +322,7 @@ roundtrips(Config) when is_list(Config) ->
 ex_roundtrips(Config) when is_list(Config) ->
     ?line L1 = ranges(0, 16#D800 - 1, 
 		      erlang:system_info(context_reductions) * 11),
-    ?line L2 = ranges(16#DFFF + 1, 16#FFFE - 1, 
+    ?line L2 = ranges(16#DFFF + 1, 16#10000 - 1,
 		      erlang:system_info(context_reductions) * 11),
     %?line L3 = ranges(16#FFFF + 1, 16#10FFFF, 
     %		      erlang:system_info(context_reductions) * 11),
@@ -569,7 +569,6 @@ utf16_illegal_sequences_bif(Config) when is_list(Config) ->
 ex_utf16_illegal_sequences_bif(Config) when is_list(Config) ->
     ?line utf16_fail_range_bif_simple(16#10FFFF+1, 16#10FFFF+512), %Too large.
     ?line utf16_fail_range_bif(16#D800, 16#DFFF),		%Reserved for UTF-16.
-    ?line utf16_fail_range_bif(16#FFFE, 16#FFFF),		%Non-characters.
 
     ?line lonely_hi_surrogate_bif(16#D800, 16#DBFF,incomplete),
     ?line lonely_hi_surrogate_bif(16#DC00, 16#DFFF,error),
@@ -644,7 +643,6 @@ utf8_illegal_sequences_bif(Config) when is_list(Config) ->
 ex_utf8_illegal_sequences_bif(Config) when is_list(Config) ->
     ?line fail_range_bif(16#10FFFF+1, 16#10FFFF+512), %Too large.
     ?line fail_range_bif(16#D800, 16#DFFF),		%Reserved for UTF-16.
-    ?line fail_range_bif(16#FFFE, 16#FFFF),		%Reserved (BOM).
 
     %% Illegal first character.
     ?line [fail_bif(<<I,16#8F,16#8F,16#8F>>,unicode) || I <- lists:seq(16#80, 16#BF)],
author	Björn Gustavsson <[email protected]>	2011-08-30 11:51:11 +0200
committer	Björn Gustavsson <[email protected]>	2011-10-13 14:16:00 +0200
commit	34db76765561487e526fe66d3d19ecf3b3fb9dc8 (patch)
tree	9141e3c5729e46d03c8b27b14da3b29b1e54abca /lib/stdlib
parent	6ca6dd3c670fb8185ebb9a20c2a731a7375c1cac (diff)
download	otp-34db76765561487e526fe66d3d19ecf3b3fb9dc8.tar.gz otp-34db76765561487e526fe66d3d19ecf3b3fb9dc8.tar.bz2 otp-34db76765561487e526fe66d3d19ecf3b3fb9dc8.zip