diff options
author | Sverker Eriksson <[email protected]> | 2017-04-12 19:34:44 +0200 |
---|---|---|
committer | Sverker Eriksson <[email protected]> | 2017-04-12 19:34:44 +0200 |
commit | 82e849adee6e2fd20e2a3faa6ecb463cc2c7256e (patch) | |
tree | e114f79d16681ab05e9723e0f3ac5a87c46a8527 /erts | |
parent | 4eeaec9bb5dcf94139d3907f2489a44674753153 (diff) | |
parent | a72e675fce23b9bebb7c9ff8beb6f962c4f9930a (diff) | |
download | otp-82e849adee6e2fd20e2a3faa6ecb463cc2c7256e.tar.gz otp-82e849adee6e2fd20e2a3faa6ecb463cc2c7256e.tar.bz2 otp-82e849adee6e2fd20e2a3faa6ecb463cc2c7256e.zip |
Merge branch sverker/remove-latin1-atom-encoding/OTP-14337
* sverker/remove-latin1-atom-encoding:
kernel: Fix erl_distribution_wb_SUITE:whitebox
kernel: Remove pg2_SUITE:compat
erts: Remove fun_r13_SUITE
stdlib: Remove test cases for R12 io protocol
kernel: Make DFLAG_UTF8_ATOMS mandatory
kernel: Rewrite distribution flag verification
tools: Update assumptions in lcnt about external atom format
stdlib: Tweak beam_lib_SUITE whitebox assumptions
orber: Remove hard dependency to external atom format
kernel: Try mend disk_log whitebox tests
erts: Mark latin1 atom encoding as deprecated
jinterface: Do not generate atoms on old latin1 external format
erl_interface: Do not generate atoms on old latin1 ext format
erts: Do not generate atoms on old latin1 external format
erts: Fix faulty ASSERT for failed dec_term
Diffstat (limited to 'erts')
-rw-r--r-- | erts/doc/src/erl_ext_dist.xml | 156 | ||||
-rw-r--r-- | erts/emulator/beam/dist.h | 1 | ||||
-rw-r--r-- | erts/emulator/beam/external.c | 41 | ||||
-rw-r--r-- | erts/emulator/test/Makefile | 1 | ||||
-rw-r--r-- | erts/emulator/test/fun_r13_SUITE.erl | 74 |
5 files changed, 81 insertions, 192 deletions
diff --git a/erts/doc/src/erl_ext_dist.xml b/erts/doc/src/erl_ext_dist.xml index a436a9ca74..da2dc94e5b 100644 --- a/erts/doc/src/erl_ext_dist.xml +++ b/erts/doc/src/erl_ext_dist.xml @@ -51,7 +51,7 @@ term into the external format. To convert binary data encoding to a term, the BIF <seealso marker="erts:erlang#binary_to_term/1"> - <c>erlang:binary_to_term/1</c>c></seealso> is used. + <c>erlang:binary_to_term/1</c></seealso> is used. </p> <p> The distribution does this implicitly when sending messages across @@ -119,22 +119,18 @@ <tcaption>Compressed Data Format when Expanded</tcaption></table> <marker id="utf8_atoms"/> <note> - <p>As from ERTS 9.0 (OTP 20), UTF-8 encoded atoms may contain any Unicode - character. Although the support for UTF-8 encoded atoms in the external - format is available since ERTS 5.10 (OTP R16), passing atoms that cannot - be encoded in Latin-1 is an <em>error</em> in versions earlier than - Erlang/OTP 20, and <em>the behavior is undefined</em>.</p> - <p>When distribution flag <seealso marker="erl_dist_protocol#dflags"> - <c>DFLAG_UTF8_ATOMS</c></seealso> has been exchanged between both nodes - in the <seealso marker="erl_dist_protocol#distribution_handshake"> - distribution handshake</seealso>, all atoms in the distribution header - are encoded in UTF-8, otherwise in Latin-1. The two - new tags <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso> - and <seealso marker="#SMALL_ATOM_UTF8_EXT"> - <c>SMALL_ATOM_UTF8_EXT</c></seealso> - are only used if the distribution flag <c>DFLAG_UTF8_ATOMS</c> has - been exchanged between nodes, or if an atom containing characters - that cannot be encoded in Latin-1 is encountered.</p> + <p>As from ERTS 9.0 (OTP 20), atoms may contain any Unicode + characters and are always encoded using the UTF-8 external formats + <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso> + or <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>. + The old Latin-1 formats <seealso marker="#ATOM_EXT"><c>ATOM_EXT</c></seealso> + and <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_EXT</c></seealso> + are deprecated and are only kept for backward + compatibility when decoding terms encoded by older nodes.</p> + <p>Support for UTF-8 encoded atoms in the external format has been + available since ERTS 5.10 (OTP R16). This abillity allows such old nodes + to decode, store and encode any Unicode atoms received from a new OTP 20 + node.</p> <p>The maximum number of allowed characters in an atom is 255. In the UTF-8 case, each character can need 4 bytes to be encoded.</p> </note> @@ -390,28 +386,6 @@ </section> <section> - <marker id="ATOM_EXT"/> - <title>ATOM_EXT</title> - <table align="left"> - <row> - <cell align="center">1</cell> - <cell align="center">2</cell> - <cell align="center">Len</cell> - </row> - <row> - <cell align="center"><c>100</c></cell> - <cell align="center"><c>Len</c></cell> - <cell align="center"><c>AtomName</c></cell> - </row> - <tcaption>ATOM_EXT</tcaption></table> - <p> - An atom is stored with a 2 byte unsigned length in big-endian order, - followed by <c>Len</c> numbers of 8-bit Latin-1 characters that forms - the <c>AtomName</c>. The maximum allowed value for <c>Len</c> is 255. - </p> - </section> - - <section> <marker id="REFERENCE_EXT"/> <title>REFERENCE_EXT</title> <table align="left"> @@ -432,8 +406,8 @@ Encodes a reference object (an object generated with <seealso marker="erlang:make_ref/0">erlang:make_ref/0</seealso>). The <c>Node</c> term is an encoded atom, that is, - <seealso marker="#ATOM_EXT"><c>ATOM_EXT</c></seealso>, - <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_EXT</c></seealso>, or + <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>, + <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>, or <seealso marker="#ATOM_CACHE_REF"><c>ATOM_CACHE_REF</c></seealso>. The <c>ID</c> field contains a big-endian unsigned integer, but <em>is to be regarded as uninterpreted data</em>, @@ -772,39 +746,6 @@ </section> <section> - <marker id="SMALL_ATOM_EXT"/> - <title>SMALL_ATOM_EXT</title> - <table align="left"> - <row> - <cell align="center">1</cell> - <cell align="center">1</cell> - <cell align="center">Len</cell> - </row> - <row> - <cell align="center"><c>115</c></cell> - <cell align="center"><c>Len</c></cell> - <cell align="center"><c>AtomName</c></cell> - </row> - <tcaption>SMALL_ATOM_EXT</tcaption></table> - <p> - An atom is stored with a 1 byte unsigned length, - followed by <c>Len</c> numbers of 8-bit Latin-1 characters that - forms the <c>AtomName</c>. Longer atoms can be represented - by <seealso marker="#ATOM_EXT"><c>ATOM_EXT</c></seealso>. - </p> - <note> - <p> - <c>SMALL_ATOM_EXT</c> was introduced in ERTS 5.7.2 and - require an exchange of distribution flag - <seealso marker="erl_dist_protocol#dflags"> - <c>DFLAG_SMALL_ATOM_TAGS</c></seealso> in the - <seealso marker="erl_dist_protocol#distribution_handshake"> - distribution handshake</seealso>. - </p> - </note> - </section> - - <section> <marker id="FUN_EXT"/> <title>FUN_EXT</title> <table align="left"> @@ -838,8 +779,8 @@ <tag><c>Module</c></tag> <item> <p>Encoded as an atom, using - <seealso marker="#ATOM_EXT"><c>ATOM_EXT</c></seealso>, - <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_EXT</c></seealso>, + <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>, + <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>, or <seealso marker="#ATOM_CACHE_REF"> <c>ATOM_CACHE_REF</c></seealso>. This is the module that the fun is implemented in. @@ -933,8 +874,8 @@ <tag><c>Module</c></tag> <item> <p>Encoded as an atom, using - <seealso marker="#ATOM_EXT"><c>ATOM_EXT</c></seealso>, - <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_EXT</c></seealso>, + <seealso marker="#ATOM_EXT"><c>ATOM_UTF8_EXT</c></seealso>, + <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>, or <seealso marker="#ATOM_CACHE_REF"> <c>ATOM_CACHE_REF</c></seealso>. Is the module that the fun is implemented in. @@ -996,8 +937,8 @@ </p> <p> <c>Module</c> and <c>Function</c> are atoms - (encoded using <seealso marker="#ATOM_EXT"><c>ATOM_EXT</c></seealso>, - <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_EXT</c></seealso>, or + (encoded using <seealso marker="#ATOM_EXT"><c>ATOM_UTF8_EXT</c></seealso>, + <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>, or <seealso marker="#ATOM_CACHE_REF"><c>ATOM_CACHE_REF</c></seealso>). </p> <p> @@ -1109,6 +1050,61 @@ in the beginning of this section. </p> </section> + + <section> + <marker id="ATOM_EXT"/> + <title>ATOM_EXT (deprecated)</title> + <table align="left"> + <row> + <cell align="center">1</cell> + <cell align="center">2</cell> + <cell align="center">Len</cell> + </row> + <row> + <cell align="center"><c>100</c></cell> + <cell align="center"><c>Len</c></cell> + <cell align="center"><c>AtomName</c></cell> + </row> + <tcaption>ATOM_EXT</tcaption></table> + <p> + An atom is stored with a 2 byte unsigned length in big-endian order, + followed by <c>Len</c> numbers of 8-bit Latin-1 characters that forms + the <c>AtomName</c>. The maximum allowed value for <c>Len</c> is 255. + </p> + </section> + + <section> + <marker id="SMALL_ATOM_EXT"/> + <title>SMALL_ATOM_EXT (deprecated)</title> + <table align="left"> + <row> + <cell align="center">1</cell> + <cell align="center">1</cell> + <cell align="center">Len</cell> + </row> + <row> + <cell align="center"><c>115</c></cell> + <cell align="center"><c>Len</c></cell> + <cell align="center"><c>AtomName</c></cell> + </row> + <tcaption>SMALL_ATOM_EXT</tcaption></table> + <p> + An atom is stored with a 1 byte unsigned length, + followed by <c>Len</c> numbers of 8-bit Latin-1 characters that + forms the <c>AtomName</c>. + </p> + <note> + <p> + <c>SMALL_ATOM_EXT</c> was introduced in ERTS 5.7.2 and + require an exchange of distribution flag + <seealso marker="erl_dist_protocol#dflags"> + <c>DFLAG_SMALL_ATOM_TAGS</c></seealso> in the + <seealso marker="erl_dist_protocol#distribution_handshake"> + distribution handshake</seealso>. + </p> + </note> + </section> + </chapter> diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h index 8f6be1061a..6ed36a478e 100644 --- a/erts/emulator/beam/dist.h +++ b/erts/emulator/beam/dist.h @@ -53,6 +53,7 @@ | DFLAG_EXPORT_PTR_TAG \ | DFLAG_BIT_BINARIES \ | DFLAG_MAP_TAG \ + | DFLAG_UTF8_ATOMS \ | DFLAG_BIG_CREATION) /* opcodes used in distribution messages */ diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 205a7711ec..285ae4ac78 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -2093,7 +2093,6 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint32 dflags) { int iix; int len; - int utf8_atoms = (int) (dflags & DFLAG_UTF8_ATOMS); ASSERT(is_atom(atom)); @@ -2122,8 +2121,8 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint32 dflags) if (iix < 0) { Atom *a = atom_tab(atom_val(atom)); len = a->len; - if (utf8_atoms || a->latin1_chars < 0) { - if (len > 255) { + { + if (len > 255) { *ep++ = ATOM_UTF8_EXT; put_int16(len, ep); ep += 2; @@ -2135,32 +2134,6 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint32 dflags) } sys_memcpy((char *) ep, (char *) a->name, len); } - else { - if (a->latin1_chars <= 255 && (dflags & DFLAG_SMALL_ATOM_TAGS)) { - *ep++ = SMALL_ATOM_EXT; - if (len == a->latin1_chars) { - sys_memcpy(ep+1, a->name, len); - } - else { - len = erts_utf8_to_latin1(ep+1, a->name, len); - ASSERT(len == a->latin1_chars); - } - put_int8(len, ep); - ep++; - } - else { - *ep++ = ATOM_EXT; - if (len == a->latin1_chars) { - sys_memcpy(ep+2, a->name, len); - } - else { - len = erts_utf8_to_latin1(ep+2, a->name, len); - ASSERT(len == a->latin1_chars); - } - put_int16(len, ep); - ep += 2; - } - } ep += len; return ep; } @@ -4010,7 +3983,7 @@ error: factory->hp = hp; /* the largest must be the freshest */ } } - else ASSERT(factory->hp == hp); + else ASSERT(!factory->hp || factory->hp == hp); error_hamt: erts_factory_undo(factory); @@ -4085,19 +4058,13 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, else { Atom *a = atom_tab(atom_val(obj)); int alen; - if ((dflags & DFLAG_UTF8_ATOMS) || a->latin1_chars < 0) { + { alen = a->len; result += 1 + 1 + alen; if (alen > 255) { result++; /* ATOM_UTF8_EXT (not small) */ } } - else { - alen = a->latin1_chars; - result += 1 + 1 + alen; - if (alen > 255 || !(dflags & DFLAG_SMALL_ATOM_TAGS)) - result++; /* ATOM_EXT (not small) */ - } insert_acache_map(acmp, obj, dflags); } break; diff --git a/erts/emulator/test/Makefile b/erts/emulator/test/Makefile index 186f9fef8d..8c8c73aa3e 100644 --- a/erts/emulator/test/Makefile +++ b/erts/emulator/test/Makefile @@ -66,7 +66,6 @@ MODULES= \ exception_SUITE \ float_SUITE \ fun_SUITE \ - fun_r13_SUITE \ gc_SUITE \ guard_SUITE \ hash_SUITE \ diff --git a/erts/emulator/test/fun_r13_SUITE.erl b/erts/emulator/test/fun_r13_SUITE.erl deleted file mode 100644 index a45ed08b9d..0000000000 --- a/erts/emulator/test/fun_r13_SUITE.erl +++ /dev/null @@ -1,74 +0,0 @@ -%% -%% %CopyrightBegin% -%% -%% Copyright Ericsson AB 2007-2016. All Rights Reserved. -%% -%% Licensed under the Apache License, Version 2.0 (the "License"); -%% you may not use this file except in compliance with the License. -%% You may obtain a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, software -%% distributed under the License is distributed on an "AS IS" BASIS, -%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -%% See the License for the specific language governing permissions and -%% limitations under the License. -%% -%% %CopyrightEnd% -%% - --module(fun_r13_SUITE). --compile(r13). - --export([all/0, suite/0, - dist_old_release/1]). - --include_lib("common_test/include/ct.hrl"). - -suite() -> - [{ct_hooks,[ts_install_cth]}, - {timetrap, {minutes, 1}}]. - -all() -> - [dist_old_release]. - -dist_old_release(Config) when is_list(Config) -> - case test_server:is_release_available("r12b") of - true -> do_dist_old(Config); - false -> {skip,"No R12B found"} - end. - -do_dist_old(Config) when is_list(Config) -> - Pa = filename:dirname(code:which(?MODULE)), - Name = fun_dist_r12, - {ok,Node} = test_server:start_node(Name, peer, - [{args,"-pa "++Pa}, - {erl,[{release,"r12b"}]}]), - - Pid = spawn_link(Node, - fun() -> - receive - Fun when is_function(Fun) -> - R12BFun = fun(H) -> cons(H, [b,c]) end, - Fun(Fun, R12BFun) - end - end), - Self = self(), - Fun = fun(F, R12BFun) -> - {pid,Self} = erlang:fun_info(F, pid), - {module,?MODULE} = erlang:fun_info(F, module), - Self ! {ok,F,R12BFun} - end, - Pid ! Fun, - receive - {ok,Fun,R12BFun} -> - [a,b,c] = R12BFun(a); - Other -> - ct:fail({bad_message,Other}) - end, - true = test_server:stop_node(Node), - ok. - -cons(H, T) -> - [H|T]. |