From 34db76765561487e526fe66d3d19ecf3b3fb9dc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 30 Aug 2011 11:51:11 +0200 Subject: Allow noncharacter code points in unicode encoding and decoding The two noncharacter code points 16#FFFE and 16#FFFF were not allowed to be encoded or decoded using the unicode module or bit syntax. That causes an inconsistency, since the noncharacters 16#FDD0 to 16#FDEF could be encoded/decoded. There is two ways to fix that inconsistency. We have chosen to allow 16#FFFE and 16#FFFF to be encoded and decoded, because the noncharacters could be useful internally within an application and it will make encoding and decoding slightly faster. Reported-by: Alisdair Sullivan --- erts/emulator/beam/erl_bits.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'erts/emulator/beam/erl_bits.c') diff --git a/erts/emulator/beam/erl_bits.c b/erts/emulator/beam/erl_bits.c index 326a5c136b..6f7309f493 100644 --- a/erts/emulator/beam/erl_bits.c +++ b/erts/emulator/beam/erl_bits.c @@ -845,8 +845,7 @@ erts_bs_put_utf8(ERL_BITS_PROTO_1(Eterm arg)) dst[1] = 0x80 | (val & 0x3F); num_bits = 16; } else if (val < 0x10000UL) { - if ((0xD800 <= val && val <= 0xDFFF) || - val == 0xFFFE || val == 0xFFFF) { + if (0xD800 <= val && val <= 0xDFFF) { return 0; } dst[0] = 0xE0 | (val >> 12); @@ -886,8 +885,7 @@ erts_bs_put_utf16(ERL_BITS_PROTO_2(Eterm arg, Uint flags)) return 0; } val = unsigned_val(arg); - if (val > 0x10FFFF || (0xD800 <= val && val <= 0xDFFF) || - val == 0xFFFE || val == 0xFFFF) { + if (val > 0x10FFFF || (0xD800 <= val && val <= 0xDFFF)) { return 0; } @@ -1652,8 +1650,7 @@ erts_bs_get_utf8(ErlBinMatchBuffer* mb) return THE_NON_VALUE; } result = (((result << 6) + a) << 6) + b - (Eterm) 0x000E2080UL; - if ((0xD800 <= result && result <= 0xDFFF) || - result == 0xFFFE || result == 0xFFFF) { + if (0xD800 <= result && result <= 0xDFFF) { return THE_NON_VALUE; } mb->offset += 24; @@ -1723,9 +1720,6 @@ erts_bs_get_utf16(ErlBinMatchBuffer* mb, Uint flags) w1 = (src[0] << 8) | src[1]; } if (w1 < 0xD800 || w1 > 0xDFFF) { - if (w1 == 0xFFFE || w1 == 0xFFFF) { - return THE_NON_VALUE; - } mb->offset += 16; return make_small(w1); } else if (w1 > 0xDBFF) { -- cgit v1.2.3