diff options
author | Björn Gustavsson <[email protected]> | 2011-08-30 11:51:11 +0200 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2011-10-13 14:16:00 +0200 |
commit | 34db76765561487e526fe66d3d19ecf3b3fb9dc8 (patch) | |
tree | 9141e3c5729e46d03c8b27b14da3b29b1e54abca /erts/emulator/beam/erl_unicode.c | |
parent | 6ca6dd3c670fb8185ebb9a20c2a731a7375c1cac (diff) | |
download | otp-34db76765561487e526fe66d3d19ecf3b3fb9dc8.tar.gz otp-34db76765561487e526fe66d3d19ecf3b3fb9dc8.tar.bz2 otp-34db76765561487e526fe66d3d19ecf3b3fb9dc8.zip |
Allow noncharacter code points in unicode encoding and decoding
The two noncharacter code points 16#FFFE and 16#FFFF were not
allowed to be encoded or decoded using the unicode module or
bit syntax. That causes an inconsistency, since the noncharacters
16#FDD0 to 16#FDEF could be encoded/decoded.
There is two ways to fix that inconsistency.
We have chosen to allow 16#FFFE and 16#FFFF to be encoded and
decoded, because the noncharacters could be useful internally
within an application and it will make encoding and decoding
slightly faster.
Reported-by: Alisdair Sullivan
Diffstat (limited to 'erts/emulator/beam/erl_unicode.c')
-rw-r--r-- | erts/emulator/beam/erl_unicode.c | 24 |
1 files changed, 5 insertions, 19 deletions
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 158eb361a4..bd5f3cc4c1 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -348,12 +348,6 @@ static int copy_utf8_bin(byte *target, byte *source, Uint size, return copied; } - if (((*source) == 0xEF) && (source[1] == 0xBF) && - ((source[2] == 0xBE) || (source[2] == 0xBF))) { - *err_pos = source; - return copied; - } - *(target++) = *(source++); *(target++) = *(source++); *(target++) = *(source++); @@ -714,9 +708,8 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ target[(*pos)++] = (((byte) (x & 0x3F)) | ((byte) 0x80)); } else if (x < 0x10000) { - if ((x >= 0xD800 && x <= 0xDFFF) || - (x == 0xFFFE) || - (x == 0xFFFF)) { /* Invalid unicode range */ + if (x >= 0xD800 && x <= 0xDFFF) { + /* Invalid unicode range */ *err = 1; goto done; } @@ -1230,10 +1223,6 @@ int erts_analyze_utf8(byte *source, Uint size, ((source[1] & 0x20) != 0)) { return ERTS_UTF8_ERROR; } - if (((*source) == 0xEF) && (source[1] == 0xBF) && - ((source[2] == 0xBE) || (source[2] == 0xBF))) { - return ERTS_UTF8_ERROR; - } source += 3; size -= 3; } else if (((*source) & ((byte) 0xF8)) == 0xF0) { @@ -2166,9 +2155,8 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ } else if (x < 0x800) { need += 2; } else if (x < 0x10000) { - if ((x >= 0xD800 && x <= 0xDFFF) || - (x == 0xFFFE) || - (x == 0xFFFF)) { /* Invalid unicode range */ + if (x >= 0xD800 && x <= 0xDFFF) { + /* Invalid unicode range */ DESTROY_ESTACK(stack); return ((Sint) -1); } @@ -2314,9 +2302,7 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ *p++ = (((byte) (x & 0x3F)) | ((byte) 0x80)); } else if (x < 0x10000) { - ASSERT(!((x >= 0xD800 && x <= 0xDFFF) || - (x == 0xFFFE) || - (x == 0xFFFF))); + ASSERT(!(x >= 0xD800 && x <= 0xDFFF)); *p++ = (((byte) (x >> 12)) | ((byte) 0xE0)); *p++ = ((((byte) (x >> 6)) & 0x3F) | |