diff options
author | Rickard Green <[email protected]> | 2017-06-14 16:01:54 +0200 |
---|---|---|
committer | Rickard Green <[email protected]> | 2017-06-14 16:01:54 +0200 |
commit | 48e67f5dd1d20b9a1f78c5a97cc7ed4afb489ba5 (patch) | |
tree | b48261ecb0de7a1da84bd693062d420fa50a2b52 /erts | |
parent | 8e1cd77c97835c7ac8ecc1adcc13a366fb3904a3 (diff) | |
parent | 03bedf49b6ad81ee6a78ec40137c782989fe98bf (diff) | |
download | otp-48e67f5dd1d20b9a1f78c5a97cc7ed4afb489ba5.tar.gz otp-48e67f5dd1d20b9a1f78c5a97cc7ed4afb489ba5.tar.bz2 otp-48e67f5dd1d20b9a1f78c5a97cc7ed4afb489ba5.zip |
Merge branch 'rickard/t2b-latin1-atom'
OTP-14337
* rickard/t2b-latin1-atom:
Update primary bootstrap
Update preloaded modules
Fix erl_interface tests
Introduce minor vsn 2 in term_to_binary/2
Revert "kernel: Try mend disk_log whitebox tests"
Revert "erts: Do not generate atoms on old latin1 external format"
Diffstat (limited to 'erts')
17 files changed, 69 insertions, 17 deletions
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index d9cc5ef936..687ff38cbf 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -8860,18 +8860,36 @@ hello <p>Option <c>{minor_version, <anno>Version</anno>}</c> can be used to control some encoding details. This option was introduced in Erlang/OTP R11B-4. - The valid values for <c><anno>Version</anno></c> are - <c>0</c> and <c>1</c>.</p> - <p>As from Erlang/OTP 17.0, <c>{minor_version, 1}</c> is the default. It - forces any floats in the term to be encoded in a more - space-efficient and exact way (namely in the 64-bit IEEE format, - rather than converted to a textual representation).</p> - <p>As from Erlang/OTP R11B-4, <c>binary_to_term/1</c> can decode this - representation.</p> - <p><c>{minor_version, 0}</c> means that floats are encoded - using a textual representation. This option is useful to - ensure that releases before Erlang/OTP R11B-4 can decode resulting - binary.</p> + The valid values for <c><anno>Version</anno></c> are:</p> + <taglist> + <tag><c>0</c></tag> + <item> + <p>Floats are encoded using a textual representation. + This option is useful to ensure that releases before Erlang/OTP + R11B-4 can decode resulting binary.</p> + <p>This version encode atoms that can be represented by a + latin1 string using latin1 encoding while only atoms that + cannot be represented by latin1 are encoded using utf8.</p> + </item> + <tag><c>1</c></tag> + <item> + <p>This is as of Erlang/OTP 17.0 the default. It forces any floats + in the term to be encoded in a more space-efficient and exact way + (namely in the 64-bit IEEE format, rather than converted to a + textual representation). As from Erlang/OTP R11B-4, + <c>binary_to_term/1</c> can decode this representation.</p> + <p>This version encode atoms that can be represented by a + latin1 string using latin1 encoding while only atoms that + cannot be represented by latin1 are encoded using utf8.</p> + </item> + <tag><c>2</c></tag> + <item> + <p>Drops usage of the latin1 atom encoding and unconditionally + use utf8 encoding for all atoms. This will be changed to the + default in a future major release of Erlang/OTP. Erlang/OTP + systems as of R16B can decode this representation.</p> + </item> + </taglist> <p>See also <seealso marker="#binary_to_term/1"> <c>binary_to_term/1</c></seealso>.</p> </desc> diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h index 93a651b24d..3e17645997 100644 --- a/erts/emulator/beam/dist.h +++ b/erts/emulator/beam/dist.h @@ -53,7 +53,6 @@ | DFLAG_EXPORT_PTR_TAG \ | DFLAG_BIT_BINARIES \ | DFLAG_MAP_TAG \ - | DFLAG_UTF8_ATOMS \ | DFLAG_BIG_CREATION) /* opcodes used in distribution messages */ diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 1190d90b8e..95275847f8 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -1129,8 +1129,10 @@ BIF_RETTYPE term_to_binary_2(BIF_ALIST_2) case 0: flags = TERM_TO_BINARY_DFLAGS & ~DFLAG_NEW_FLOATS; break; - case 1: + case 1: /* Current default... */ flags = TERM_TO_BINARY_DFLAGS; + case 2: + flags = TERM_TO_BINARY_DFLAGS | DFLAG_UTF8_ATOMS; break; default: goto error; @@ -2090,6 +2092,7 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint32 dflags) { int iix; int len; + int utf8_atoms = (int) (dflags & DFLAG_UTF8_ATOMS); ASSERT(is_atom(atom)); @@ -2118,8 +2121,8 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint32 dflags) if (iix < 0) { Atom *a = atom_tab(atom_val(atom)); len = a->len; - { - if (len > 255) { + if (utf8_atoms || a->latin1_chars < 0) { + if (len > 255) { *ep++ = ATOM_UTF8_EXT; put_int16(len, ep); ep += 2; @@ -2131,6 +2134,32 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint32 dflags) } sys_memcpy((char *) ep, (char *) a->name, len); } + else { + if (a->latin1_chars <= 255 && (dflags & DFLAG_SMALL_ATOM_TAGS)) { + *ep++ = SMALL_ATOM_EXT; + if (len == a->latin1_chars) { + sys_memcpy(ep+1, a->name, len); + } + else { + len = erts_utf8_to_latin1(ep+1, a->name, len); + ASSERT(len == a->latin1_chars); + } + put_int8(len, ep); + ep++; + } + else { + *ep++ = ATOM_EXT; + if (len == a->latin1_chars) { + sys_memcpy(ep+2, a->name, len); + } + else { + len = erts_utf8_to_latin1(ep+2, a->name, len); + ASSERT(len == a->latin1_chars); + } + put_int16(len, ep); + ep += 2; + } + } ep += len; return ep; } @@ -4053,13 +4082,19 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, else { Atom *a = atom_tab(atom_val(obj)); int alen; - { + if ((dflags & DFLAG_UTF8_ATOMS) || a->latin1_chars < 0) { alen = a->len; result += 1 + 1 + alen; if (alen > 255) { result++; /* ATOM_UTF8_EXT (not small) */ } } + else { + alen = a->latin1_chars; + result += 1 + 1 + alen; + if (alen > 255 || !(dflags & DFLAG_SMALL_ATOM_TAGS)) + result++; /* ATOM_EXT (not small) */ + } insert_acache_map(acmp, obj, dflags); } break; diff --git a/erts/preloaded/ebin/erl_prim_loader.beam b/erts/preloaded/ebin/erl_prim_loader.beam Binary files differindex 922e0eb009..5095367fea 100644 --- a/erts/preloaded/ebin/erl_prim_loader.beam +++ b/erts/preloaded/ebin/erl_prim_loader.beam diff --git a/erts/preloaded/ebin/erl_tracer.beam b/erts/preloaded/ebin/erl_tracer.beam Binary files differindex 578de5ffcb..9cb0ee6119 100644 --- a/erts/preloaded/ebin/erl_tracer.beam +++ b/erts/preloaded/ebin/erl_tracer.beam diff --git a/erts/preloaded/ebin/erlang.beam b/erts/preloaded/ebin/erlang.beam Binary files differindex 63518ed6e1..2efc6816e8 100644 --- a/erts/preloaded/ebin/erlang.beam +++ b/erts/preloaded/ebin/erlang.beam diff --git a/erts/preloaded/ebin/erts_code_purger.beam b/erts/preloaded/ebin/erts_code_purger.beam Binary files differindex 58218c715a..203e08c512 100644 --- a/erts/preloaded/ebin/erts_code_purger.beam +++ b/erts/preloaded/ebin/erts_code_purger.beam diff --git a/erts/preloaded/ebin/erts_dirty_process_code_checker.beam b/erts/preloaded/ebin/erts_dirty_process_code_checker.beam Binary files differindex af764347eb..39b6ae9c4a 100644 --- a/erts/preloaded/ebin/erts_dirty_process_code_checker.beam +++ b/erts/preloaded/ebin/erts_dirty_process_code_checker.beam diff --git a/erts/preloaded/ebin/erts_internal.beam b/erts/preloaded/ebin/erts_internal.beam Binary files differindex 072dc68712..e53ddea4c3 100644 --- a/erts/preloaded/ebin/erts_internal.beam +++ b/erts/preloaded/ebin/erts_internal.beam diff --git a/erts/preloaded/ebin/erts_literal_area_collector.beam b/erts/preloaded/ebin/erts_literal_area_collector.beam Binary files differindex fb1179ddae..96d8be2d69 100644 --- a/erts/preloaded/ebin/erts_literal_area_collector.beam +++ b/erts/preloaded/ebin/erts_literal_area_collector.beam diff --git a/erts/preloaded/ebin/init.beam b/erts/preloaded/ebin/init.beam Binary files differindex a87fcbf0fa..d20e76ba9c 100644 --- a/erts/preloaded/ebin/init.beam +++ b/erts/preloaded/ebin/init.beam diff --git a/erts/preloaded/ebin/otp_ring0.beam b/erts/preloaded/ebin/otp_ring0.beam Binary files differindex 1071606f07..cc98612499 100644 --- a/erts/preloaded/ebin/otp_ring0.beam +++ b/erts/preloaded/ebin/otp_ring0.beam diff --git a/erts/preloaded/ebin/prim_eval.beam b/erts/preloaded/ebin/prim_eval.beam Binary files differindex 752df348ff..0e429a93d5 100644 --- a/erts/preloaded/ebin/prim_eval.beam +++ b/erts/preloaded/ebin/prim_eval.beam diff --git a/erts/preloaded/ebin/prim_file.beam b/erts/preloaded/ebin/prim_file.beam Binary files differindex 50cb86d841..ed2a8767c1 100644 --- a/erts/preloaded/ebin/prim_file.beam +++ b/erts/preloaded/ebin/prim_file.beam diff --git a/erts/preloaded/ebin/prim_inet.beam b/erts/preloaded/ebin/prim_inet.beam Binary files differindex 16bf38fcd7..ef64e3fcbf 100644 --- a/erts/preloaded/ebin/prim_inet.beam +++ b/erts/preloaded/ebin/prim_inet.beam diff --git a/erts/preloaded/ebin/prim_zip.beam b/erts/preloaded/ebin/prim_zip.beam Binary files differindex bf6fc60752..9203fe632b 100644 --- a/erts/preloaded/ebin/prim_zip.beam +++ b/erts/preloaded/ebin/prim_zip.beam diff --git a/erts/preloaded/ebin/zlib.beam b/erts/preloaded/ebin/zlib.beam Binary files differindex 6554c5324a..c29359aa34 100644 --- a/erts/preloaded/ebin/zlib.beam +++ b/erts/preloaded/ebin/zlib.beam |