aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2017-04-12 19:34:44 +0200
committerSverker Eriksson <[email protected]>2017-04-12 19:34:44 +0200
commit82e849adee6e2fd20e2a3faa6ecb463cc2c7256e (patch)
treee114f79d16681ab05e9723e0f3ac5a87c46a8527 /erts
parent4eeaec9bb5dcf94139d3907f2489a44674753153 (diff)
parenta72e675fce23b9bebb7c9ff8beb6f962c4f9930a (diff)
downloadotp-82e849adee6e2fd20e2a3faa6ecb463cc2c7256e.tar.gz
otp-82e849adee6e2fd20e2a3faa6ecb463cc2c7256e.tar.bz2
otp-82e849adee6e2fd20e2a3faa6ecb463cc2c7256e.zip
Merge branch sverker/remove-latin1-atom-encoding/OTP-14337
* sverker/remove-latin1-atom-encoding: kernel: Fix erl_distribution_wb_SUITE:whitebox kernel: Remove pg2_SUITE:compat erts: Remove fun_r13_SUITE stdlib: Remove test cases for R12 io protocol kernel: Make DFLAG_UTF8_ATOMS mandatory kernel: Rewrite distribution flag verification tools: Update assumptions in lcnt about external atom format stdlib: Tweak beam_lib_SUITE whitebox assumptions orber: Remove hard dependency to external atom format kernel: Try mend disk_log whitebox tests erts: Mark latin1 atom encoding as deprecated jinterface: Do not generate atoms on old latin1 external format erl_interface: Do not generate atoms on old latin1 ext format erts: Do not generate atoms on old latin1 external format erts: Fix faulty ASSERT for failed dec_term
Diffstat (limited to 'erts')
-rw-r--r--erts/doc/src/erl_ext_dist.xml156
-rw-r--r--erts/emulator/beam/dist.h1
-rw-r--r--erts/emulator/beam/external.c41
-rw-r--r--erts/emulator/test/Makefile1
-rw-r--r--erts/emulator/test/fun_r13_SUITE.erl74
5 files changed, 81 insertions, 192 deletions
diff --git a/erts/doc/src/erl_ext_dist.xml b/erts/doc/src/erl_ext_dist.xml
index a436a9ca74..da2dc94e5b 100644
--- a/erts/doc/src/erl_ext_dist.xml
+++ b/erts/doc/src/erl_ext_dist.xml
@@ -51,7 +51,7 @@
term into the external format.
To convert binary data encoding to a term, the BIF
<seealso marker="erts:erlang#binary_to_term/1">
- <c>erlang:binary_to_term/1</c>c></seealso> is used.
+ <c>erlang:binary_to_term/1</c></seealso> is used.
</p>
<p>
The distribution does this implicitly when sending messages across
@@ -119,22 +119,18 @@
<tcaption>Compressed Data Format when Expanded</tcaption></table>
<marker id="utf8_atoms"/>
<note>
- <p>As from ERTS 9.0 (OTP 20), UTF-8 encoded atoms may contain any Unicode
- character. Although the support for UTF-8 encoded atoms in the external
- format is available since ERTS 5.10 (OTP R16), passing atoms that cannot
- be encoded in Latin-1 is an <em>error</em> in versions earlier than
- Erlang/OTP 20, and <em>the behavior is undefined</em>.</p>
- <p>When distribution flag <seealso marker="erl_dist_protocol#dflags">
- <c>DFLAG_UTF8_ATOMS</c></seealso> has been exchanged between both nodes
- in the <seealso marker="erl_dist_protocol#distribution_handshake">
- distribution handshake</seealso>, all atoms in the distribution header
- are encoded in UTF-8, otherwise in Latin-1. The two
- new tags <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>
- and <seealso marker="#SMALL_ATOM_UTF8_EXT">
- <c>SMALL_ATOM_UTF8_EXT</c></seealso>
- are only used if the distribution flag <c>DFLAG_UTF8_ATOMS</c> has
- been exchanged between nodes, or if an atom containing characters
- that cannot be encoded in Latin-1 is encountered.</p>
+ <p>As from ERTS 9.0 (OTP 20), atoms may contain any Unicode
+ characters and are always encoded using the UTF-8 external formats
+ <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>
+ or <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>.
+ The old Latin-1 formats <seealso marker="#ATOM_EXT"><c>ATOM_EXT</c></seealso>
+ and <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_EXT</c></seealso>
+ are deprecated and are only kept for backward
+ compatibility when decoding terms encoded by older nodes.</p>
+ <p>Support for UTF-8 encoded atoms in the external format has been
+ available since ERTS 5.10 (OTP R16). This abillity allows such old nodes
+ to decode, store and encode any Unicode atoms received from a new OTP 20
+ node.</p>
<p>The maximum number of allowed characters in an atom is 255. In the
UTF-8 case, each character can need 4 bytes to be encoded.</p>
</note>
@@ -390,28 +386,6 @@
</section>
<section>
- <marker id="ATOM_EXT"/>
- <title>ATOM_EXT</title>
- <table align="left">
- <row>
- <cell align="center">1</cell>
- <cell align="center">2</cell>
- <cell align="center">Len</cell>
- </row>
- <row>
- <cell align="center"><c>100</c></cell>
- <cell align="center"><c>Len</c></cell>
- <cell align="center"><c>AtomName</c></cell>
- </row>
- <tcaption>ATOM_EXT</tcaption></table>
- <p>
- An atom is stored with a 2 byte unsigned length in big-endian order,
- followed by <c>Len</c> numbers of 8-bit Latin-1 characters that forms
- the <c>AtomName</c>. The maximum allowed value for <c>Len</c> is 255.
- </p>
- </section>
-
- <section>
<marker id="REFERENCE_EXT"/>
<title>REFERENCE_EXT</title>
<table align="left">
@@ -432,8 +406,8 @@
Encodes a reference object (an object generated with
<seealso marker="erlang:make_ref/0">erlang:make_ref/0</seealso>).
The <c>Node</c> term is an encoded atom, that is,
- <seealso marker="#ATOM_EXT"><c>ATOM_EXT</c></seealso>,
- <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_EXT</c></seealso>, or
+ <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>,
+ <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>, or
<seealso marker="#ATOM_CACHE_REF"><c>ATOM_CACHE_REF</c></seealso>.
The <c>ID</c> field contains a big-endian unsigned integer,
but <em>is to be regarded as uninterpreted data</em>,
@@ -772,39 +746,6 @@
</section>
<section>
- <marker id="SMALL_ATOM_EXT"/>
- <title>SMALL_ATOM_EXT</title>
- <table align="left">
- <row>
- <cell align="center">1</cell>
- <cell align="center">1</cell>
- <cell align="center">Len</cell>
- </row>
- <row>
- <cell align="center"><c>115</c></cell>
- <cell align="center"><c>Len</c></cell>
- <cell align="center"><c>AtomName</c></cell>
- </row>
- <tcaption>SMALL_ATOM_EXT</tcaption></table>
- <p>
- An atom is stored with a 1 byte unsigned length,
- followed by <c>Len</c> numbers of 8-bit Latin-1 characters that
- forms the <c>AtomName</c>. Longer atoms can be represented
- by <seealso marker="#ATOM_EXT"><c>ATOM_EXT</c></seealso>.
- </p>
- <note>
- <p>
- <c>SMALL_ATOM_EXT</c> was introduced in ERTS 5.7.2 and
- require an exchange of distribution flag
- <seealso marker="erl_dist_protocol#dflags">
- <c>DFLAG_SMALL_ATOM_TAGS</c></seealso> in the
- <seealso marker="erl_dist_protocol#distribution_handshake">
- distribution handshake</seealso>.
- </p>
- </note>
- </section>
-
- <section>
<marker id="FUN_EXT"/>
<title>FUN_EXT</title>
<table align="left">
@@ -838,8 +779,8 @@
<tag><c>Module</c></tag>
<item>
<p>Encoded as an atom, using
- <seealso marker="#ATOM_EXT"><c>ATOM_EXT</c></seealso>,
- <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_EXT</c></seealso>,
+ <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>,
+ <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>,
or <seealso marker="#ATOM_CACHE_REF">
<c>ATOM_CACHE_REF</c></seealso>.
This is the module that the fun is implemented in.
@@ -933,8 +874,8 @@
<tag><c>Module</c></tag>
<item>
<p>Encoded as an atom, using
- <seealso marker="#ATOM_EXT"><c>ATOM_EXT</c></seealso>,
- <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_EXT</c></seealso>,
+ <seealso marker="#ATOM_EXT"><c>ATOM_UTF8_EXT</c></seealso>,
+ <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>,
or <seealso marker="#ATOM_CACHE_REF">
<c>ATOM_CACHE_REF</c></seealso>.
Is the module that the fun is implemented in.
@@ -996,8 +937,8 @@
</p>
<p>
<c>Module</c> and <c>Function</c> are atoms
- (encoded using <seealso marker="#ATOM_EXT"><c>ATOM_EXT</c></seealso>,
- <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_EXT</c></seealso>, or
+ (encoded using <seealso marker="#ATOM_EXT"><c>ATOM_UTF8_EXT</c></seealso>,
+ <seealso marker="#SMALL_ATOM_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>, or
<seealso marker="#ATOM_CACHE_REF"><c>ATOM_CACHE_REF</c></seealso>).
</p>
<p>
@@ -1109,6 +1050,61 @@
in the beginning of this section.
</p>
</section>
+
+ <section>
+ <marker id="ATOM_EXT"/>
+ <title>ATOM_EXT (deprecated)</title>
+ <table align="left">
+ <row>
+ <cell align="center">1</cell>
+ <cell align="center">2</cell>
+ <cell align="center">Len</cell>
+ </row>
+ <row>
+ <cell align="center"><c>100</c></cell>
+ <cell align="center"><c>Len</c></cell>
+ <cell align="center"><c>AtomName</c></cell>
+ </row>
+ <tcaption>ATOM_EXT</tcaption></table>
+ <p>
+ An atom is stored with a 2 byte unsigned length in big-endian order,
+ followed by <c>Len</c> numbers of 8-bit Latin-1 characters that forms
+ the <c>AtomName</c>. The maximum allowed value for <c>Len</c> is 255.
+ </p>
+ </section>
+
+ <section>
+ <marker id="SMALL_ATOM_EXT"/>
+ <title>SMALL_ATOM_EXT (deprecated)</title>
+ <table align="left">
+ <row>
+ <cell align="center">1</cell>
+ <cell align="center">1</cell>
+ <cell align="center">Len</cell>
+ </row>
+ <row>
+ <cell align="center"><c>115</c></cell>
+ <cell align="center"><c>Len</c></cell>
+ <cell align="center"><c>AtomName</c></cell>
+ </row>
+ <tcaption>SMALL_ATOM_EXT</tcaption></table>
+ <p>
+ An atom is stored with a 1 byte unsigned length,
+ followed by <c>Len</c> numbers of 8-bit Latin-1 characters that
+ forms the <c>AtomName</c>.
+ </p>
+ <note>
+ <p>
+ <c>SMALL_ATOM_EXT</c> was introduced in ERTS 5.7.2 and
+ require an exchange of distribution flag
+ <seealso marker="erl_dist_protocol#dflags">
+ <c>DFLAG_SMALL_ATOM_TAGS</c></seealso> in the
+ <seealso marker="erl_dist_protocol#distribution_handshake">
+ distribution handshake</seealso>.
+ </p>
+ </note>
+ </section>
+
</chapter>
diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h
index 8f6be1061a..6ed36a478e 100644
--- a/erts/emulator/beam/dist.h
+++ b/erts/emulator/beam/dist.h
@@ -53,6 +53,7 @@
| DFLAG_EXPORT_PTR_TAG \
| DFLAG_BIT_BINARIES \
| DFLAG_MAP_TAG \
+ | DFLAG_UTF8_ATOMS \
| DFLAG_BIG_CREATION)
/* opcodes used in distribution messages */
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index 205a7711ec..285ae4ac78 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -2093,7 +2093,6 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint32 dflags)
{
int iix;
int len;
- int utf8_atoms = (int) (dflags & DFLAG_UTF8_ATOMS);
ASSERT(is_atom(atom));
@@ -2122,8 +2121,8 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint32 dflags)
if (iix < 0) {
Atom *a = atom_tab(atom_val(atom));
len = a->len;
- if (utf8_atoms || a->latin1_chars < 0) {
- if (len > 255) {
+ {
+ if (len > 255) {
*ep++ = ATOM_UTF8_EXT;
put_int16(len, ep);
ep += 2;
@@ -2135,32 +2134,6 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint32 dflags)
}
sys_memcpy((char *) ep, (char *) a->name, len);
}
- else {
- if (a->latin1_chars <= 255 && (dflags & DFLAG_SMALL_ATOM_TAGS)) {
- *ep++ = SMALL_ATOM_EXT;
- if (len == a->latin1_chars) {
- sys_memcpy(ep+1, a->name, len);
- }
- else {
- len = erts_utf8_to_latin1(ep+1, a->name, len);
- ASSERT(len == a->latin1_chars);
- }
- put_int8(len, ep);
- ep++;
- }
- else {
- *ep++ = ATOM_EXT;
- if (len == a->latin1_chars) {
- sys_memcpy(ep+2, a->name, len);
- }
- else {
- len = erts_utf8_to_latin1(ep+2, a->name, len);
- ASSERT(len == a->latin1_chars);
- }
- put_int16(len, ep);
- ep += 2;
- }
- }
ep += len;
return ep;
}
@@ -4010,7 +3983,7 @@ error:
factory->hp = hp; /* the largest must be the freshest */
}
}
- else ASSERT(factory->hp == hp);
+ else ASSERT(!factory->hp || factory->hp == hp);
error_hamt:
erts_factory_undo(factory);
@@ -4085,19 +4058,13 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj,
else {
Atom *a = atom_tab(atom_val(obj));
int alen;
- if ((dflags & DFLAG_UTF8_ATOMS) || a->latin1_chars < 0) {
+ {
alen = a->len;
result += 1 + 1 + alen;
if (alen > 255) {
result++; /* ATOM_UTF8_EXT (not small) */
}
}
- else {
- alen = a->latin1_chars;
- result += 1 + 1 + alen;
- if (alen > 255 || !(dflags & DFLAG_SMALL_ATOM_TAGS))
- result++; /* ATOM_EXT (not small) */
- }
insert_acache_map(acmp, obj, dflags);
}
break;
diff --git a/erts/emulator/test/Makefile b/erts/emulator/test/Makefile
index 186f9fef8d..8c8c73aa3e 100644
--- a/erts/emulator/test/Makefile
+++ b/erts/emulator/test/Makefile
@@ -66,7 +66,6 @@ MODULES= \
exception_SUITE \
float_SUITE \
fun_SUITE \
- fun_r13_SUITE \
gc_SUITE \
guard_SUITE \
hash_SUITE \
diff --git a/erts/emulator/test/fun_r13_SUITE.erl b/erts/emulator/test/fun_r13_SUITE.erl
deleted file mode 100644
index a45ed08b9d..0000000000
--- a/erts/emulator/test/fun_r13_SUITE.erl
+++ /dev/null
@@ -1,74 +0,0 @@
-%%
-%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 2007-2016. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%% http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-%%
-%% %CopyrightEnd%
-%%
-
--module(fun_r13_SUITE).
--compile(r13).
-
--export([all/0, suite/0,
- dist_old_release/1]).
-
--include_lib("common_test/include/ct.hrl").
-
-suite() ->
- [{ct_hooks,[ts_install_cth]},
- {timetrap, {minutes, 1}}].
-
-all() ->
- [dist_old_release].
-
-dist_old_release(Config) when is_list(Config) ->
- case test_server:is_release_available("r12b") of
- true -> do_dist_old(Config);
- false -> {skip,"No R12B found"}
- end.
-
-do_dist_old(Config) when is_list(Config) ->
- Pa = filename:dirname(code:which(?MODULE)),
- Name = fun_dist_r12,
- {ok,Node} = test_server:start_node(Name, peer,
- [{args,"-pa "++Pa},
- {erl,[{release,"r12b"}]}]),
-
- Pid = spawn_link(Node,
- fun() ->
- receive
- Fun when is_function(Fun) ->
- R12BFun = fun(H) -> cons(H, [b,c]) end,
- Fun(Fun, R12BFun)
- end
- end),
- Self = self(),
- Fun = fun(F, R12BFun) ->
- {pid,Self} = erlang:fun_info(F, pid),
- {module,?MODULE} = erlang:fun_info(F, module),
- Self ! {ok,F,R12BFun}
- end,
- Pid ! Fun,
- receive
- {ok,Fun,R12BFun} ->
- [a,b,c] = R12BFun(a);
- Other ->
- ct:fail({bad_message,Other})
- end,
- true = test_server:stop_node(Node),
- ok.
-
-cons(H, T) ->
- [H|T].