aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2011-08-30 11:51:11 +0200
committerBjörn Gustavsson <[email protected]>2011-10-13 14:16:00 +0200
commit34db76765561487e526fe66d3d19ecf3b3fb9dc8 (patch)
tree9141e3c5729e46d03c8b27b14da3b29b1e54abca /erts/emulator/beam
parent6ca6dd3c670fb8185ebb9a20c2a731a7375c1cac (diff)
downloadotp-34db76765561487e526fe66d3d19ecf3b3fb9dc8.tar.gz
otp-34db76765561487e526fe66d3d19ecf3b3fb9dc8.tar.bz2
otp-34db76765561487e526fe66d3d19ecf3b3fb9dc8.zip
Allow noncharacter code points in unicode encoding and decoding
The two noncharacter code points 16#FFFE and 16#FFFF were not allowed to be encoded or decoded using the unicode module or bit syntax. That causes an inconsistency, since the noncharacters 16#FDD0 to 16#FDEF could be encoded/decoded. There is two ways to fix that inconsistency. We have chosen to allow 16#FFFE and 16#FFFF to be encoded and decoded, because the noncharacters could be useful internally within an application and it will make encoding and decoding slightly faster. Reported-by: Alisdair Sullivan
Diffstat (limited to 'erts/emulator/beam')
-rw-r--r--erts/emulator/beam/beam_emu.c7
-rw-r--r--erts/emulator/beam/erl_bits.c12
-rw-r--r--erts/emulator/beam/erl_unicode.c24
3 files changed, 11 insertions, 32 deletions
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index 4b5b5cbdaa..62a14dbacf 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -3967,8 +3967,7 @@ void process_main(void)
* too big numbers).
*/
if (is_not_small(val) || val > make_small(0x10FFFFUL) ||
- (make_small(0xD800UL) <= val && val <= make_small(0xDFFFUL)) ||
- val == make_small(0xFFFEUL) || val == make_small(0xFFFFUL)) {
+ (make_small(0xD800UL) <= val && val <= make_small(0xDFFFUL))) {
goto badarg;
}
Next(2);
@@ -3987,8 +3986,8 @@ void process_main(void)
* the valid range).
*/
if (is_not_small(tmp_arg1) || tmp_arg1 > make_small(0x10FFFFUL) ||
- (make_small(0xD800UL) <= tmp_arg1 && tmp_arg1 <= make_small(0xDFFFUL)) ||
- tmp_arg1 == make_small(0xFFFEUL) || tmp_arg1 == make_small(0xFFFFUL)) {
+ (make_small(0xD800UL) <= tmp_arg1 &&
+ tmp_arg1 <= make_small(0xDFFFUL))) {
ErlBinMatchBuffer *mb = ms_matchbuffer(tmp_arg2);
mb->offset -= 32;
diff --git a/erts/emulator/beam/erl_bits.c b/erts/emulator/beam/erl_bits.c
index 326a5c136b..6f7309f493 100644
--- a/erts/emulator/beam/erl_bits.c
+++ b/erts/emulator/beam/erl_bits.c
@@ -845,8 +845,7 @@ erts_bs_put_utf8(ERL_BITS_PROTO_1(Eterm arg))
dst[1] = 0x80 | (val & 0x3F);
num_bits = 16;
} else if (val < 0x10000UL) {
- if ((0xD800 <= val && val <= 0xDFFF) ||
- val == 0xFFFE || val == 0xFFFF) {
+ if (0xD800 <= val && val <= 0xDFFF) {
return 0;
}
dst[0] = 0xE0 | (val >> 12);
@@ -886,8 +885,7 @@ erts_bs_put_utf16(ERL_BITS_PROTO_2(Eterm arg, Uint flags))
return 0;
}
val = unsigned_val(arg);
- if (val > 0x10FFFF || (0xD800 <= val && val <= 0xDFFF) ||
- val == 0xFFFE || val == 0xFFFF) {
+ if (val > 0x10FFFF || (0xD800 <= val && val <= 0xDFFF)) {
return 0;
}
@@ -1652,8 +1650,7 @@ erts_bs_get_utf8(ErlBinMatchBuffer* mb)
return THE_NON_VALUE;
}
result = (((result << 6) + a) << 6) + b - (Eterm) 0x000E2080UL;
- if ((0xD800 <= result && result <= 0xDFFF) ||
- result == 0xFFFE || result == 0xFFFF) {
+ if (0xD800 <= result && result <= 0xDFFF) {
return THE_NON_VALUE;
}
mb->offset += 24;
@@ -1723,9 +1720,6 @@ erts_bs_get_utf16(ErlBinMatchBuffer* mb, Uint flags)
w1 = (src[0] << 8) | src[1];
}
if (w1 < 0xD800 || w1 > 0xDFFF) {
- if (w1 == 0xFFFE || w1 == 0xFFFF) {
- return THE_NON_VALUE;
- }
mb->offset += 16;
return make_small(w1);
} else if (w1 > 0xDBFF) {
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index 158eb361a4..bd5f3cc4c1 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -348,12 +348,6 @@ static int copy_utf8_bin(byte *target, byte *source, Uint size,
return copied;
}
- if (((*source) == 0xEF) && (source[1] == 0xBF) &&
- ((source[2] == 0xBE) || (source[2] == 0xBF))) {
- *err_pos = source;
- return copied;
- }
-
*(target++) = *(source++);
*(target++) = *(source++);
*(target++) = *(source++);
@@ -714,9 +708,8 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */
target[(*pos)++] = (((byte) (x & 0x3F)) |
((byte) 0x80));
} else if (x < 0x10000) {
- if ((x >= 0xD800 && x <= 0xDFFF) ||
- (x == 0xFFFE) ||
- (x == 0xFFFF)) { /* Invalid unicode range */
+ if (x >= 0xD800 && x <= 0xDFFF) {
+ /* Invalid unicode range */
*err = 1;
goto done;
}
@@ -1230,10 +1223,6 @@ int erts_analyze_utf8(byte *source, Uint size,
((source[1] & 0x20) != 0)) {
return ERTS_UTF8_ERROR;
}
- if (((*source) == 0xEF) && (source[1] == 0xBF) &&
- ((source[2] == 0xBE) || (source[2] == 0xBF))) {
- return ERTS_UTF8_ERROR;
- }
source += 3;
size -= 3;
} else if (((*source) & ((byte) 0xF8)) == 0xF0) {
@@ -2166,9 +2155,8 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */
} else if (x < 0x800) {
need += 2;
} else if (x < 0x10000) {
- if ((x >= 0xD800 && x <= 0xDFFF) ||
- (x == 0xFFFE) ||
- (x == 0xFFFF)) { /* Invalid unicode range */
+ if (x >= 0xD800 && x <= 0xDFFF) {
+ /* Invalid unicode range */
DESTROY_ESTACK(stack);
return ((Sint) -1);
}
@@ -2314,9 +2302,7 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */
*p++ = (((byte) (x & 0x3F)) |
((byte) 0x80));
} else if (x < 0x10000) {
- ASSERT(!((x >= 0xD800 && x <= 0xDFFF) ||
- (x == 0xFFFE) ||
- (x == 0xFFFF)));
+ ASSERT(!(x >= 0xD800 && x <= 0xDFFF));
*p++ = (((byte) (x >> 12)) |
((byte) 0xE0));
*p++ = ((((byte) (x >> 6)) & 0x3F) |