diff options
Diffstat (limited to 'erts')
-rw-r--r-- | erts/configure.in | 50 | ||||
-rw-r--r-- | erts/doc/src/erl_dist_protocol.xml | 8 | ||||
-rw-r--r-- | erts/doc/src/erl_ext_dist.xml | 251 | ||||
-rw-r--r-- | erts/doc/src/notes.xml | 188 | ||||
-rw-r--r-- | erts/emulator/beam/beam_emu.c | 1 | ||||
-rw-r--r-- | erts/emulator/beam/beam_ranges.c | 8 | ||||
-rw-r--r-- | erts/emulator/beam/big.c | 7 | ||||
-rw-r--r-- | erts/emulator/beam/erl_alloc.types | 1 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_info.c | 11 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_lists.c | 797 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_unique.h | 6 | ||||
-rw-r--r-- | erts/emulator/beam/erl_db_hash.c | 12 | ||||
-rw-r--r-- | erts/emulator/beam/erl_db_tree.c | 26 | ||||
-rw-r--r-- | erts/emulator/beam/erl_nif.c | 12 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.c | 456 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.h | 2 | ||||
-rw-r--r-- | erts/emulator/beam/utils.c | 2 | ||||
-rw-r--r-- | erts/emulator/drivers/common/inet_drv.c | 9 | ||||
-rw-r--r-- | erts/emulator/test/big_SUITE.erl | 39 | ||||
-rw-r--r-- | erts/emulator/test/process_SUITE.erl | 54 | ||||
-rw-r--r-- | erts/emulator/test/ref_SUITE.erl | 28 | ||||
-rw-r--r-- | erts/vsn.mk | 2 |
22 files changed, 1563 insertions, 407 deletions
diff --git a/erts/configure.in b/erts/configure.in index 400d91154a..4a4f478521 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -422,6 +422,56 @@ if test X"$with_ets_write_concurrency_locks" != X""; then [Define to override the default number of write_concurrency locks]) fi +AC_ARG_WITH(spectre-mitigation, + AS_HELP_STRING([--with-spectre-mitigation={yes|incomplete}], + [enable spectre mitigation, either fully or with mitigations + disabled in a handful places like the interpreter]) + AS_HELP_STRING([--without-spectre-mitigation], + [build without spectre mitigation]), + [],[with_spectre_mitigation=no]) + +case "$with_spectre_mitigation" in + no) ;; + yes) ;; + incomplete) ;; + *) AC_MSG_ERROR([Invalid spectre mitigation setting]) ;; +esac + +i_noretpoline_attr="" + +if test X"$with_spectre_mitigation" != X"no"; then + CFLAGS="$CFLAGS -mindirect-branch=thunk" + + AC_MSG_CHECKING([for spectre mitigation]) + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([],[return 0;])], + [AC_MSG_RESULT([yes])], + [AC_MSG_ERROR([no])]) + + if test X"$with_spectre_mitigation" = X"incomplete"; then + # gcc and clang support this attribute if they're recent enough. Note + # that we must compile with -Werror to check for actual support as they + # warn rather than error out on unsupported attributes. + + i_noretpoline_attr='__attribute__((__indirect_branch__("keep")))' + i_preserve_cflags="$CFLAGS" + CFLAGS="$CFLAGS -Werror" + + AC_MSG_CHECKING([whether spectre mitigation can be disabled on a per-function basis]) + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM([$i_noretpoline_attr],[return 0;])], + [AC_MSG_RESULT([yes])], + [AC_MSG_ERROR([no])]) + + CFLAGS="$i_preserve_cflags" + fi +fi + +AC_DEFINE_UNQUOTED(ERTS_NO_RETPOLINE, $i_noretpoline_attr, + [Per-function attribute for disabling retpoline. This is + *only* defined when --with-spectre-mitigation=incomplete + and has no effects otherwise]) + dnl ---------------------------------------------------------------------- dnl Checks for programs. dnl ---------------------------------------------------------------------- diff --git a/erts/doc/src/erl_dist_protocol.xml b/erts/doc/src/erl_dist_protocol.xml index 610351db6c..79f703455a 100644 --- a/erts/doc/src/erl_dist_protocol.xml +++ b/erts/doc/src/erl_dist_protocol.xml @@ -829,6 +829,14 @@ DiB == gen_digest(ChA, ICA)? <item> <p>The node understand UTF-8 encoded atoms.</p> </item> + <tag><c>-define(DFLAG_MAP_TAG, 16#20000).</c></tag> + <item> + <p>The node understand the map tag.</p> + </item> + <tag><c>-define(DFLAG_BIG_CREATION, 16#40000).</c></tag> + <item> + <p>The node understand big node creation.</p> + </item> </taglist> </section> </section> diff --git a/erts/doc/src/erl_ext_dist.xml b/erts/doc/src/erl_ext_dist.xml index b7090d0472..5813af1d57 100644 --- a/erts/doc/src/erl_ext_dist.xml +++ b/erts/doc/src/erl_ext_dist.xml @@ -386,44 +386,6 @@ </section> <section> - <marker id="REFERENCE_EXT"/> - <title>REFERENCE_EXT</title> - <table align="left"> - <row> - <cell align="center">1</cell> - <cell align="center">N</cell> - <cell align="center">4</cell> - <cell align="center">1</cell> - </row> - <row> - <cell align="center"><c>101</c></cell> - <cell align="center"><c>Node</c></cell> - <cell align="center"><c>ID</c></cell> - <cell align="center"><c>Creation</c></cell> - </row> - <tcaption>REFERENCE_EXT</tcaption></table> - <p> - Encodes a reference object (an object generated with - <seealso marker="erlang:make_ref/0">erlang:make_ref/0</seealso>). - The <c>Node</c> term is an encoded atom, that is, - <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>, - <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>, or - <seealso marker="#ATOM_CACHE_REF"><c>ATOM_CACHE_REF</c></seealso>. - The <c>ID</c> field contains a big-endian unsigned integer, - but <em>is to be regarded as uninterpreted data</em>, - as this field is node-specific. - <c>Creation</c> is a byte containing a node serial number, which - makes it possible to separate old (crashed) nodes from a new one. - </p> - <p> - In <c>ID</c>, only 18 bits are significant; the rest are to be 0. - In <c>Creation</c>, only two bits are significant; the rest are to be 0. - See <seealso marker="#NEW_REFERENCE_EXT"> - <c>NEW_REFERENCE_EXT</c></seealso>. - </p> - </section> - - <section> <marker id="PORT_EXT"/> <title>PORT_EXT</title> <table align="left"> @@ -441,13 +403,46 @@ </row> <tcaption>PORT_EXT</tcaption></table> <p> - Encodes a port object (obtained from - <seealso marker="erlang:open_port/2"> - <c>erlang:open_port/2</c></seealso>). - The <c>ID</c> is a node-specific identifier for a local port. + Same as <seealso marker="#NEW_PORT_EXT"><c>NEW_PORT_EXT</c></seealso> + except the <c>Creation</c> field is only one byte and only two + bits are significant, the rest are to be 0. + </p> + </section> + + <section> + <marker id="NEW_PORT_EXT"/> + <title>NEW_PORT_EXT</title> + <table align="left"> + <row> + <cell align="center">1</cell> + <cell align="center">N</cell> + <cell align="center">4</cell> + <cell align="center">4</cell> + </row> + <row> + <cell align="center"><c>89</c></cell> + <cell align="center"><c>Node</c></cell> + <cell align="center"><c>ID</c></cell> + <cell align="center"><c>Creation</c></cell> + </row> + <tcaption>NEW_PORT_EXT</tcaption></table> + <p> + Encodes a port identifier (obtained from + <seealso marker="erlang#open_port/2"><c>erlang:open_port/2</c></seealso>). + <c>Node</c> is an encoded atom, that is, + <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>, + <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso> + or <seealso marker="#ATOM_CACHE_REF"><c>ATOM_CACHE_REF</c></seealso>. + <c>ID</c> is a 32-bit big endian unsigned integer. Only 28 bits are + significant; the rest are to be 0. The <c>Creation</c> works just like in + <seealso marker="#NEW_PID_EXT"><c>NEW_PID_EXT</c></seealso>. Port operations are not allowed across node boundaries. - The <c>Creation</c> works just like in - <seealso marker="#REFERENCE_EXT"><c>REFERENCE_EXT</c></seealso>. + </p> + <p>Introduced in OTP 19, but only to be decoded and echoed back. Not + encoded for local ports. Planned to supersede <seealso marker="#PORT_EXT"> + <c>PORT_EXT</c></seealso> in OTP 23 when + <seealso marker="erl_dist_protocol#dflags"><c>DFLAG_BIG_CREATON</c></seealso> + becomes mandatory. </p> </section> @@ -471,12 +466,65 @@ </row> <tcaption>PID_EXT</tcaption></table> <p> - Encodes a process identifier object (obtained from - <seealso marker="erlang:spawn/3"><c>erlang:spawn/3</c></seealso> or - friends). The <c>ID</c> and <c>Creation</c> fields works just like in - <seealso marker="#REFERENCE_EXT"><c>REFERENCE_EXT</c></seealso>, while - the <c>Serial</c> field is used to improve safety. - In <c>ID</c>, only 15 bits are significant; the rest are to be 0. + Same as <seealso marker="#NEW_PID_EXT"><c>NEW_PID_EXT</c></seealso> + except the <c>Creation</c> field is only one byte and only two + bits are significant, the rest are to be 0. + </p> + </section> + + <section> + <marker id="NEW_PID_EXT"/> + <title>NEW_PID_EXT</title> + <table align="left"> + <row> + <cell align="center">1</cell> + <cell align="center">N</cell> + <cell align="center">4</cell> + <cell align="center">4</cell> + <cell align="center">4</cell> + </row> + <row> + <cell align="center"><c>88</c></cell> + <cell align="center"><c>Node</c></cell> + <cell align="center"><c>ID</c></cell> + <cell align="center"><c>Serial</c></cell> + <cell align="center"><c>Creation</c></cell> + </row> + <tcaption>NEW_PID_EXT</tcaption></table> + <p> + Encodes an Erlang process identifier object. + </p> + <taglist> + <tag><c>Node</c></tag> + <item><p>The name of the originating node, encoded using + <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>, + <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso> + or <seealso + marker="#ATOM_CACHE_REF"><c>ATOM_CACHE_REF</c></seealso>.</p> + </item> + <tag><c>ID</c></tag> + <item><p>A 32-bit big endian unsigned integer. Only 15 bits are + significant; the rest are to be 0.</p> + </item> + <tag><c>Serial</c></tag> + <item><p>A 32-bit big endian unsigned integer. Only 13 bits are + significant; the rest are to be 0.</p> + </item> + <tag><c>Creation</c></tag> + <item><p>A 32-bit big endian unsigned integer. All identifiers + originating from the same node incarnation must have identical <c>Creation</c> + values. This makes it possible to separate identifiers from old + (crashed) nodes from a new one. The value zero should be avoided for + normal operations as it is used as a wild card for debug purpose + (like a pid returned by <seealso marker="erts:erlang#list_to_pid/1"> + erlang:list_to_pid/1</seealso>).</p> + </item> + </taglist> + <p>Introduced in OTP 19, but only to be decoded and echoed back. Not + encoded for local processes. Planned to supersede <seealso marker="#PID_EXT"> + <c>PID_EXT</c></seealso> in OTP 23 when + <seealso marker="erl_dist_protocol#dflags"><c>DFLAG_BIG_CREATON</c></seealso> + becomes mandatory. </p> </section> @@ -700,6 +748,30 @@ </section> <section> + <marker id="REFERENCE_EXT"/> + <title>REFERENCE_EXT (deprecated)</title> + <table align="left"> + <row> + <cell align="center">1</cell> + <cell align="center">N</cell> + <cell align="center">4</cell> + <cell align="center">1</cell> + </row> + <row> + <cell align="center"><c>101</c></cell> + <cell align="center"><c>Node</c></cell> + <cell align="center"><c>ID</c></cell> + <cell align="center"><c>Creation</c></cell> + </row> + <tcaption>REFERENCE_EXT</tcaption></table> + <p> + The same as <seealso marker="#NEW_REFERENCE_EXT"> + <c>NEW_REFERENCE_EXT</c></seealso> except <c>ID</c> is only one word + (<c>Len</c> = 1). + </p> + </section> + + <section> <marker id="NEW_REFERENCE_EXT"/> <title>NEW_REFERENCE_EXT</title> <table align="left"> @@ -719,29 +791,68 @@ </row> <tcaption>NEW_REFERENCE_EXT</tcaption></table> <p> - <c>Node</c> and <c>Creation</c> are as in - <seealso marker="#REFERENCE_EXT"><c>REFERENCE_EXT</c></seealso>. - </p> - <p> - <c>ID</c> contains a sequence of big-endian unsigned integers - (4 bytes each, so <c>N'</c> is a multiple of 4), - but is to be regarded as uninterpreted data. - </p> - <p> - <c>N'</c> = 4 * <c>Len</c>. - </p> - <p> - In the first word (4 bytes) of <c>ID</c>, only 18 bits are - significant, the rest are to be 0. - In <c>Creation</c>, only two bits are significant, - the rest are to be 0. + The same as <seealso marker="#NEWER_REFERENCE_EXT"> + <c>NEWER_REFERENCE_EXT</c></seealso> <em>except</em>: </p> + <taglist> + <tag><c>ID</c></tag> + <item><p>In the first word (4 bytes) of <c>ID</c>, only 18 bits are + significant, the rest must be 0.</p> + </item> + <tag><c>Creation</c></tag> + <item><p>Only one byte long and only two bits are significant, the rest must be 0.</p> + </item> + </taglist> + </section> + + <section> + <marker id="NEWER_REFERENCE_EXT"/> + <title>NEWER_REFERENCE_EXT</title> + <table align="left"> + <row> + <cell align="center">1</cell> + <cell align="center">2</cell> + <cell align="center">N</cell> + <cell align="center">4</cell> + <cell align="center">N'</cell> + </row> + <row> + <cell align="center"><c>90</c></cell> + <cell align="center"><c>Len</c></cell> + <cell align="center"><c>Node</c></cell> + <cell align="center"><c>Creation</c></cell> + <cell align="center"><c>ID ...</c></cell> + </row> + <tcaption>NEWER_REFERENCE_EXT</tcaption></table> <p> - <c>NEW_REFERENCE_EXT</c> was introduced with distribution version 4. - In version 4, <c>N'</c> is to be at most 12. + Encodes a reference term generated with + <seealso marker="erts:erlang#make_ref/0">erlang:make_ref/0</seealso>. </p> - <p> - See <seealso marker="#REFERENCE_EXT"><c>REFERENCE_EXT</c></seealso>. + <taglist> + <tag><c>Node</c></tag> + <item><p>The name of the originating node, encoded using + <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>, + <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso> + or <seealso marker="#ATOM_CACHE_REF"><c>ATOM_CACHE_REF</c></seealso>.</p> + </item> + <tag><c>Len</c></tag> + <item><p>A 16-bit big endian unsigned integer not larger than 3.</p> + </item> + <tag><c>ID</c></tag> + <item><p>A sequence of <c>Len</c> big-endian unsigned integers + (4 bytes each, so <c>N'</c> = 4 * <c>Len</c>), + but is to be regarded as uninterpreted data.</p> + </item> + <tag><c>Creation</c></tag> + <item><p>Works just like in + <seealso marker="#NEW_PID_EXT"><c>NEW_PID_EXT</c></seealso>.</p> + </item> + </taglist> + <p>Introduced in OTP 19, but only to be decoded and echoed back. Not + encoded for local references. Planned to supersede <seealso marker="#NEW_REFERENCE_EXT"> + <c>NEW_REFERENCE_EXT</c></seealso> in OTP 23 when + <seealso marker="erl_dist_protocol#dflags"><c>DFLAG_BIG_CREATON</c></seealso> + becomes mandatory. </p> </section> diff --git a/erts/doc/src/notes.xml b/erts/doc/src/notes.xml index 5b414853a3..56681c0250 100644 --- a/erts/doc/src/notes.xml +++ b/erts/doc/src/notes.xml @@ -31,6 +31,194 @@ </header> <p>This document describes the changes made to the ERTS application.</p> +<section><title>Erts 9.3.3.9</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>Added an optional <c>./configure</c> flag to compile + the emulator with spectre mitigation: + <c>--with-spectre-mitigation</c></p> + <p>Note that this requires a recent version of GCC with + support for spectre mitigation and the + <c>--mindirect-branch=thunk</c> flag, such as + <c>8.1</c>.</p> + <p> + Own Id: OTP-15430 Aux Id: ERIERL-237 </p> + </item> + </list> + </section> + +</section> + +<section><title>Erts 9.3.3.8</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + A bug that could cause dirty schedulers to become + unresponsive has been fixed.</p> + <p> + Own Id: OTP-15509 Aux Id: PR-2027, PR-2093 </p> + </item> + </list> + </section> + +</section> + +<section><title>Erts 9.3.3.7</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Fixed bug in operator <c>band</c> of two negative + operands causing erroneous result if the absolute value + of one of the operands have the lowest <c>N*W</c> bits as + zero and the other absolute value is not larger than + <c>N*W</c> bits. <c>N</c> is an integer of 1 or larger + and <c>W</c> is 32 or 64 depending on word size.</p> + <p> + Own Id: OTP-15487 Aux Id: ERL-804 </p> + </item> + </list> + </section> + +</section> + +<section><title>Erts 9.3.3.6</title> + + <section><title>Improvements and New Features</title> + <list> + <item> + <p>List subtraction (The <c>--</c> operator) will now + yield properly on large inputs.</p> + <p> + Own Id: OTP-15371</p> + </item> + </list> + </section> + +</section> + +<section><title>Erts 9.3.3.5</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + ERTS internal trees of monitor structures could get into + an inconsistent state. This could cause <c>'DOWN'</c> + messages not to be delivered when they should, as well as + delivery of <c>'DOWN'</c> messages that should not be + delivered.</p> + <p> + This bug was introduced in ERTS version 9.0 (OTP 20.0) + and was fixed in ERTS version 10.0 (OTP 21.0) due to a + rewrite of the monitor code. That is, this bug only exist + in the OTP 20 release.</p> + <p> + Own Id: OTP-15399 Aux Id: ERL-751, ERIERL-262, OTP-14205 </p> + </item> + </list> + </section> + +</section> + +<section><title>Erts 9.3.3.4</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Fixed bug in <c>ets:select_replace</c> when called with a + fully bound key could cause a following call to + <c>ets:next</c> or <c>ets:prev</c> to crash the emulator + or return invalid result.</p> + <p> + Own Id: OTP-15346</p> + </item> + </list> + </section> + +</section> + +<section><title>Erts 9.3.3.3</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p> + Fixed a bug which caused an emulator crash when + <c>enif_send()</c> was called by a NIF that executed on a + dirty scheduler. The bug was either triggered when the + NIF called <c>enif_send()</c> without a message + environment, or when the process executing the NIF was + <c>send</c> traced.</p> + <p> + Own Id: OTP-15223</p> + </item> + <item> + <p> + Fixed a bug causing some Erlang references to be + inconsistently ordered. This could for example cause + failure to look up certain elements with references as + keys in search data structures. This bug was introduced + in R13B02.</p> + <p> + Thanks to Simon Cornish for finding the bug and supplying + a fix.</p> + <p> + *** POTENTIAL INCOMPATIBILITY ***</p> + <p> + Own Id: OTP-15225</p> + </item> + </list> + </section> + +</section> + +<section><title>Erts 9.3.3.2</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p>Fixed a race condition in the inet driver that could + cause receive to hang when the emulator was compiled with + gcc 8.</p> + <p> + Own Id: OTP-15158 Aux Id: ERL-654 </p> + </item> + <item> + <p> + Fix bug in generation of erl_crash.dump, which could + cause VM to crash.</p> + <p> + Bug exist since erts-9.2 (OTP-20.2).</p> + <p> + Own Id: OTP-15181</p> + </item> + </list> + </section> + +</section> + +<section><title>Erts 9.3.3.1</title> + + <section><title>Fixed Bugs and Malfunctions</title> + <list> + <item> + <p>Fixed a rare bug that could cause processes to be + scheduled after they had been freed.</p> + <p> + Own Id: OTP-15067 Aux Id: ERL-573 </p> + </item> + </list> + </section> + +</section> + <section><title>Erts 9.3.3</title> <section><title>Fixed Bugs and Malfunctions</title> diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index b4e6c35579..7db0938b39 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1237,6 +1237,7 @@ init_emulator(void) * the instructions' C labels to the loader. * The second call starts execution of BEAM code. This call never returns. */ +ERTS_NO_RETPOLINE void process_main(Eterm * x_reg_array, FloatDef* f_reg_array) { static int init_done = 0; diff --git a/erts/emulator/beam/beam_ranges.c b/erts/emulator/beam/beam_ranges.c index c7b17d58f3..9570fb34db 100644 --- a/erts/emulator/beam/beam_ranges.c +++ b/erts/emulator/beam/beam_ranges.c @@ -35,10 +35,8 @@ typedef struct { /* * Used for crash dumping of literals. The size of erts_dump_lit_areas is - * always twice the number of active ranges (to allow for literals in both - * current and old code). + * always at least the number of active ranges. */ - ErtsLiteralArea** erts_dump_lit_areas; Uint erts_dump_num_lit_areas; @@ -180,8 +178,8 @@ erts_end_staging_ranges(int commit) (erts_aint_t) (r[dst].modules + r[dst].n / 2)); - if (r[dst].allocated * 2 > erts_dump_num_lit_areas) { - erts_dump_num_lit_areas *= 2; + if (r[dst].allocated > erts_dump_num_lit_areas) { + erts_dump_num_lit_areas = r[dst].allocated * 2; erts_dump_lit_areas = (ErtsLiteralArea **) erts_realloc(ERTS_ALC_T_CRASH_DUMP, (void *) erts_dump_lit_areas, diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c index c5cb268f09..60f0ecf42b 100644 --- a/erts/emulator/beam/big.c +++ b/erts/emulator/beam/big.c @@ -1159,8 +1159,11 @@ static dsize_t I_band(ErtsDigit* x, dsize_t xl, short xsgn, *r++ = ~c1 & ~c2; x++; y++; } - while(xl--) - *r++ = ~*x++; + while(xl--) { + DSUBb(*x,0,b1,c1); + *r++ = ~c1; + x++; + } } } return I_btrail(r0, r, sign); diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index 252bf1cc7e..af1133b853 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -322,6 +322,7 @@ type THR_PRGR_DATA LONG_LIVED SYSTEM thr_prgr_data type T_THR_PRGR_DATA SHORT_LIVED SYSTEM temp_thr_prgr_data type RELEASE_LAREA SHORT_LIVED SYSTEM release_literal_area +endif +type LIST_TRAP SHORT_LIVED PROCESSES list_bif_trap_state # # Types used for special emulators diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index 4050fb6146..43d99db0b2 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -4459,7 +4459,16 @@ BIF_RETTYPE erts_debug_set_internal_state_2(BIF_ALIST_2) refbin)); } } - + else if (ERTS_IS_ATOM_STR("mbuf", BIF_ARG_1)) { + Uint sz = size_object(BIF_ARG_2); + ErlHeapFragment* frag = new_message_buffer(sz); + Eterm *hp = frag->mem; + Eterm copy = copy_struct(BIF_ARG_2, sz, &hp, &frag->off_heap); + frag->next = BIF_P->mbuf; + BIF_P->mbuf = frag; + BIF_P->mbuf_sz += sz; + BIF_RET(copy); + } } BIF_ERROR(BIF_P, BADARG); diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c index 73d327da3e..94a41c285a 100644 --- a/erts/emulator/beam/erl_bif_lists.c +++ b/erts/emulator/beam/erl_bif_lists.c @@ -29,12 +29,13 @@ #include "sys.h" #include "erl_vm.h" #include "global.h" -#include "erl_process.h" -#include "error.h" #include "bif.h" +#include "erl_binary.h" + static Eterm keyfind(int Bif, Process* p, Eterm Key, Eterm Pos, Eterm List); + static BIF_RETTYPE append(Process* p, Eterm A, Eterm B) { Eterm list; @@ -146,103 +147,725 @@ BIF_RETTYPE append_2(BIF_ALIST_2) return append(BIF_P, BIF_ARG_1, BIF_ARG_2); } -/* - * erlang:'--'/2 - */ +/* erlang:'--'/2 + * + * Subtracts a list from another (LHS -- RHS), removing the first occurrence of + * each element in LHS from RHS. There is no type coercion so the elements must + * match exactly. + * + * The BIF is broken into several stages that can all trap individually, and it + * chooses its algorithm based on input size. If either input is small it will + * use a linear scan tuned to which side it's on, and if both inputs are large + * enough it will convert RHS into a multiset to provide good asymptotic + * behavior. */ -#define SMALL_VEC_SIZE 10 -static Eterm subtract(Process* p, Eterm A, Eterm B) -{ - Eterm list; - Eterm* hp; - Uint need; - Eterm res; - Eterm small_vec[SMALL_VEC_SIZE]; /* Preallocated memory for small lists */ - Eterm* vec_p; - Eterm* vp; - Sint i; - Sint n; - Sint m; - - if ((n = erts_list_length(A)) < 0) { - BIF_ERROR(p, BADARG); +#define SUBTRACT_LHS_THRESHOLD 16 +#define SUBTRACT_RHS_THRESHOLD 16 + +typedef enum { + SUBTRACT_STAGE_START, + SUBTRACT_STAGE_LEN_LHS, + + /* Naive linear scan that's efficient when + * LEN_LHS <= SUBTRACT_LHS_THRESHOLD. */ + SUBTRACT_STAGE_NAIVE_LHS, + + SUBTRACT_STAGE_LEN_RHS, + + /* As SUBTRACT_STAGE_NAIVE_LHS but for RHS. */ + SUBTRACT_STAGE_NAIVE_RHS, + + /* Creates a multiset from RHS for faster lookups before sweeping through + * LHS. The set is implemented as a red-black tree and duplicate elements + * are handled by a counter on each node. */ + SUBTRACT_STAGE_SET_BUILD, + SUBTRACT_STAGE_SET_FINISH +} ErtsSubtractCtxStage; + +typedef struct subtract_node__ { + struct subtract_node__ *parent; + struct subtract_node__ *left; + struct subtract_node__ *right; + int is_red; + + Eterm key; + Uint count; +} subtract_tree_t; + +typedef struct { + ErtsSubtractCtxStage stage; + + Eterm lhs_original; + Eterm rhs_original; + + Uint lhs_remaining; + Uint rhs_remaining; + + Eterm iterator; + + Eterm *result_cdr; + Eterm result; + + union { + Eterm lhs_elements[SUBTRACT_LHS_THRESHOLD]; + Eterm rhs_elements[SUBTRACT_RHS_THRESHOLD]; + + struct { + subtract_tree_t *tree; + + /* A memory area for the tree's nodes, saving us the need to have + * one allocation per node. */ + subtract_tree_t *alloc_start; + subtract_tree_t *alloc; + } rhs_set; + } u; +} ErtsSubtractContext; + +#define ERTS_RBT_PREFIX subtract +#define ERTS_RBT_T subtract_tree_t +#define ERTS_RBT_KEY_T Eterm +#define ERTS_RBT_FLAGS_T int +#define ERTS_RBT_INIT_EMPTY_TNODE(T) \ + do { \ + (T)->parent = NULL; \ + (T)->left = NULL; \ + (T)->right = NULL; \ + } while(0) +#define ERTS_RBT_IS_RED(T) ((T)->is_red) +#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1) +#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T)) +#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0) +#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red) +#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F) +#define ERTS_RBT_GET_PARENT(T) ((T)->parent) +#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P) +#define ERTS_RBT_GET_RIGHT(T) ((T)->right) +#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R)) +#define ERTS_RBT_GET_LEFT(T) ((T)->left) +#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L)) +#define ERTS_RBT_GET_KEY(T) ((T)->key) +#define ERTS_RBT_IS_LT(KX, KY) (CMP_TERM(KX, KY) < 0) +#define ERTS_RBT_IS_EQ(KX, KY) EQ(KX, KY) +#define ERTS_RBT_WANT_LOOKUP_INSERT +#define ERTS_RBT_WANT_LOOKUP +#define ERTS_RBT_WANT_DELETE +#define ERTS_RBT_UNDEF + +#include "erl_rbtree.h" + +static int subtract_continue(Process *p, ErtsSubtractContext *context); + +static void subtract_ctx_dtor(ErtsSubtractContext *context) { + switch (context->stage) { + case SUBTRACT_STAGE_SET_BUILD: + case SUBTRACT_STAGE_SET_FINISH: + erts_free(ERTS_ALC_T_LIST_TRAP, context->u.rhs_set.alloc_start); + break; + default: + break; } - if ((m = erts_list_length(B)) < 0) { - BIF_ERROR(p, BADARG); +} + +static int subtract_ctx_bin_dtor(Binary *context_bin) { + ErtsSubtractContext *context = ERTS_MAGIC_BIN_DATA(context_bin); + subtract_ctx_dtor(context); + return 1; +} + +static void subtract_ctx_move(ErtsSubtractContext *from, + ErtsSubtractContext *to) { + int uses_result_cdr = 0; + + to->stage = from->stage; + + to->lhs_original = from->lhs_original; + to->rhs_original = from->rhs_original; + + to->lhs_remaining = from->lhs_remaining; + to->rhs_remaining = from->rhs_remaining; + + to->iterator = from->iterator; + to->result = from->result; + + switch (to->stage) { + case SUBTRACT_STAGE_NAIVE_LHS: + sys_memcpy(to->u.lhs_elements, + from->u.lhs_elements, + sizeof(Eterm) * to->lhs_remaining); + break; + case SUBTRACT_STAGE_NAIVE_RHS: + sys_memcpy(to->u.rhs_elements, + from->u.rhs_elements, + sizeof(Eterm) * to->rhs_remaining); + + uses_result_cdr = 1; + break; + case SUBTRACT_STAGE_SET_FINISH: + uses_result_cdr = 1; + /* FALL THROUGH */ + case SUBTRACT_STAGE_SET_BUILD: + to->u.rhs_set.alloc_start = from->u.rhs_set.alloc_start; + to->u.rhs_set.alloc = from->u.rhs_set.alloc; + to->u.rhs_set.tree = from->u.rhs_set.tree; + break; + default: + break; } - - if (n == 0) - BIF_RET(NIL); - if (m == 0) - BIF_RET(A); - - /* allocate element vector */ - if (n <= SMALL_VEC_SIZE) - vec_p = small_vec; - else - vec_p = (Eterm*) erts_alloc(ERTS_ALC_T_TMP, n * sizeof(Eterm)); - - /* PUT ALL ELEMENTS IN VP */ - vp = vec_p; - list = A; - i = n; - while(i--) { - Eterm* listp = list_val(list); - *vp++ = CAR(listp); - list = CDR(listp); + + if (uses_result_cdr) { + if (from->result_cdr == &from->result) { + to->result_cdr = &to->result; + } else { + to->result_cdr = from->result_cdr; + } } - - /* UNMARK ALL DELETED CELLS */ - list = B; - m = 0; /* number of deleted elements */ - while(is_list(list)) { - Eterm* listp = list_val(list); - Eterm elem = CAR(listp); - i = n; - vp = vec_p; - while(i--) { - if (is_value(*vp) && eq(*vp, elem)) { - *vp = THE_NON_VALUE; - m++; - break; - } - vp++; - } - list = CDR(listp); +} + +static Eterm subtract_create_trap_state(Process *p, + ErtsSubtractContext *context) { + Binary *state_bin; + Eterm *hp; + + state_bin = erts_create_magic_binary(sizeof(ErtsSubtractContext), + subtract_ctx_bin_dtor); + + subtract_ctx_move(context, ERTS_MAGIC_BIN_DATA(state_bin)); + + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); + + return erts_mk_magic_ref(&hp, &MSO(p), state_bin); +} + +static int subtract_enter_len_lhs(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_LEN_LHS; + + context->iterator = context->lhs_original; + context->lhs_remaining = 0; + + return subtract_continue(p, context); +} + +static int subtract_enter_len_rhs(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_LEN_RHS; + + context->iterator = context->rhs_original; + context->rhs_remaining = 0; + + return subtract_continue(p, context); +} + +static int subtract_get_length(Process *p, Eterm *iterator_p, Uint *count_p) { + static const Sint ELEMENTS_PER_RED = 32; + + Sint budget, count; + Eterm iterator; + + budget = ELEMENTS_PER_RED * ERTS_BIF_REDS_LEFT(p); + iterator = *iterator_p; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + for (count = 0; count < budget && is_list(iterator); count++) { + iterator = CDR(list_val(iterator)); } - - if (m == n) /* All deleted ? */ - res = NIL; - else if (m == 0) /* None deleted ? */ - res = A; - else { /* REBUILD LIST */ - res = NIL; - need = 2*(n - m); - hp = HAlloc(p, need); - vp = vec_p + n - 1; - while(vp >= vec_p) { - if (is_value(*vp)) { - res = CONS(hp, *vp, res); - hp += 2; - } - vp--; - } + + if (!is_list(iterator) && !is_nil(iterator)) { + return -1; + } + + BUMP_REDS(p, count / ELEMENTS_PER_RED); + + *iterator_p = iterator; + *count_p += count; + + if (is_nil(iterator)) { + return 1; } - if (vec_p != small_vec) - erts_free(ERTS_ALC_T_TMP, (void *) vec_p); - BIF_RET(res); + + return 0; } -BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) -{ - return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2); +static int subtract_enter_naive_lhs(Process *p, ErtsSubtractContext *context) { + Eterm iterator; + int i = 0; + + context->stage = SUBTRACT_STAGE_NAIVE_LHS; + + context->iterator = context->rhs_original; + context->result = NIL; + + iterator = context->lhs_original; + + while (is_list(iterator)) { + const Eterm *cell = list_val(iterator); + + ASSERT(i < SUBTRACT_LHS_THRESHOLD); + + context->u.lhs_elements[i++] = CAR(cell); + iterator = CDR(cell); + } + + ASSERT(i == context->lhs_remaining); + + return subtract_continue(p, context); } -BIF_RETTYPE subtract_2(BIF_ALIST_2) -{ - return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2); +static int subtract_naive_lhs(Process *p, ErtsSubtractContext *context) { + const Sint CHECKS_PER_RED = 16; + Sint checks, budget; + + budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p); + checks = 0; + + while (checks < budget && is_list(context->iterator)) { + const Eterm *cell; + Eterm value, next; + int found_at; + + cell = list_val(context->iterator); + + value = CAR(cell); + next = CDR(cell); + + for (found_at = 0; found_at < context->lhs_remaining; found_at++) { + if (EQ(value, context->u.lhs_elements[found_at])) { + /* We shift the array one step down as we have to preserve + * order. + * + * Note that we can't exit early as that would suppress errors + * in the right-hand side (this runs prior to determining the + * length of RHS). */ + + context->lhs_remaining--; + sys_memmove(&context->u.lhs_elements[found_at], + &context->u.lhs_elements[found_at + 1], + (context->lhs_remaining - found_at) * sizeof(Eterm)); + break; + } + } + + checks += MAX(1, context->lhs_remaining); + context->iterator = next; + } + + BUMP_REDS(p, MIN(checks, budget) / CHECKS_PER_RED); + + if (is_list(context->iterator)) { + return 0; + } else if (!is_nil(context->iterator)) { + return -1; + } + + if (context->lhs_remaining > 0) { + Eterm *hp; + int i; + + hp = HAlloc(p, context->lhs_remaining * 2); + + for (i = context->lhs_remaining - 1; i >= 0; i--) { + Eterm value = context->u.lhs_elements[i]; + + context->result = CONS(hp, value, context->result); + hp += 2; + } + } + + ASSERT(context->lhs_remaining > 0 || context->result == NIL); + + return 1; +} + +static int subtract_enter_naive_rhs(Process *p, ErtsSubtractContext *context) { + Eterm iterator; + int i = 0; + + context->stage = SUBTRACT_STAGE_NAIVE_RHS; + + context->iterator = context->lhs_original; + context->result_cdr = &context->result; + context->result = NIL; + + iterator = context->rhs_original; + + while (is_list(iterator)) { + const Eterm *cell = list_val(iterator); + + ASSERT(i < SUBTRACT_RHS_THRESHOLD); + + context->u.rhs_elements[i++] = CAR(cell); + iterator = CDR(cell); + } + + ASSERT(i == context->rhs_remaining); + + return subtract_continue(p, context); +} + +static int subtract_naive_rhs(Process *p, ErtsSubtractContext *context) { + const Sint CHECKS_PER_RED = 16; + Sint checks, budget; + + budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p); + checks = 0; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + while (checks < budget && is_list(context->iterator)) { + const Eterm *cell; + Eterm value, next; + int found_at; + + cell = list_val(context->iterator); + value = CAR(cell); + next = CDR(cell); + + for (found_at = context->rhs_remaining - 1; found_at >= 0; found_at--) { + if (EQ(value, context->u.rhs_elements[found_at])) { + break; + } + } + + if (found_at < 0) { + /* Destructively add the value to the result. This is safe + * since the GC is disabled and the unfinished term is never + * leaked to the outside world. */ + Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2); + + *context->result_cdr = make_list(hp); + context->result_cdr = &CDR(hp); + + CAR(hp) = value; + } else if (found_at >= 0) { + Eterm swap; + + if (context->rhs_remaining-- == 1) { + /* We've run out of items to remove, so the rest of the + * result will be equal to the remainder of the input. We know + * that LHS is well-formed as any errors would've been reported + * during length determination. */ + *context->result_cdr = next; + + BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED); + + return 1; + } + + swap = context->u.rhs_elements[context->rhs_remaining]; + context->u.rhs_elements[found_at] = swap; + } + + checks += context->rhs_remaining; + context->iterator = next; + context->lhs_remaining--; + } + + /* The result only has to be terminated when returning it to the user, but + * we're doing it when trapping as well to prevent headaches when + * debugging. */ + *context->result_cdr = NIL; + + BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED); + + if (is_list(context->iterator)) { + ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0); + return 0; + } + + return 1; +} + +static int subtract_enter_set_build(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_SET_BUILD; + + context->u.rhs_set.alloc_start = + erts_alloc(ERTS_ALC_T_LIST_TRAP, + context->rhs_remaining * sizeof(subtract_tree_t)); + + context->u.rhs_set.alloc = context->u.rhs_set.alloc_start; + context->u.rhs_set.tree = NULL; + + context->iterator = context->rhs_original; + + return subtract_continue(p, context); +} + +static int subtract_set_build(Process *p, ErtsSubtractContext *context) { + const static Sint INSERTIONS_PER_RED = 16; + Sint budget, insertions; + + budget = INSERTIONS_PER_RED * ERTS_BIF_REDS_LEFT(p); + insertions = 0; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + while (insertions < budget && is_list(context->iterator)) { + subtract_tree_t *existing_node, *new_node; + const Eterm *cell; + Eterm value, next; + + cell = list_val(context->iterator); + value = CAR(cell); + next = CDR(cell); + + new_node = context->u.rhs_set.alloc; + new_node->key = value; + new_node->count = 1; + + existing_node = subtract_rbt_lookup_insert(&context->u.rhs_set.tree, + new_node); + + if (existing_node != NULL) { + existing_node->count++; + } else { + context->u.rhs_set.alloc++; + } + + context->iterator = next; + insertions++; + } + + BUMP_REDS(p, insertions / INSERTIONS_PER_RED); + + ASSERT(is_list(context->iterator) || is_nil(context->iterator)); + ASSERT(context->u.rhs_set.tree != NULL); + + return is_nil(context->iterator); +} + +static int subtract_enter_set_finish(Process *p, ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_SET_FINISH; + + context->result_cdr = &context->result; + context->result = NIL; + + context->iterator = context->lhs_original; + + return subtract_continue(p, context); +} + +static int subtract_set_finish(Process *p, ErtsSubtractContext *context) { + const Sint CHECKS_PER_RED = 8; + Sint checks, budget; + + budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p); + checks = 0; + +#ifdef DEBUG + budget = budget / 10 + 1; +#endif + + while (checks < budget && is_list(context->iterator)) { + subtract_tree_t *node; + const Eterm *cell; + Eterm value, next; + + cell = list_val(context->iterator); + value = CAR(cell); + next = CDR(cell); + + ASSERT(context->rhs_remaining > 0); + + node = subtract_rbt_lookup(context->u.rhs_set.tree, value); + + if (node == NULL) { + Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2); + + *context->result_cdr = make_list(hp); + context->result_cdr = &CDR(hp); + + CAR(hp) = value; + } else { + if (context->rhs_remaining-- == 1) { + *context->result_cdr = next; + + BUMP_REDS(p, checks / CHECKS_PER_RED); + + return 1; + } + + if (node->count-- == 1) { + subtract_rbt_delete(&context->u.rhs_set.tree, node); + } + } + + context->iterator = next; + context->lhs_remaining--; + checks++; + } + + *context->result_cdr = NIL; + + BUMP_REDS(p, checks / CHECKS_PER_RED); + + if (is_list(context->iterator)) { + ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0); + return 0; + } + + return 1; +} + +static int subtract_continue(Process *p, ErtsSubtractContext *context) { + switch (context->stage) { + case SUBTRACT_STAGE_START: { + return subtract_enter_len_lhs(p, context); + } + + case SUBTRACT_STAGE_LEN_LHS: { + int res = subtract_get_length(p, + &context->iterator, + &context->lhs_remaining); + + if (res != 1) { + return res; + } + + if (context->lhs_remaining <= SUBTRACT_LHS_THRESHOLD) { + return subtract_enter_naive_lhs(p, context); + } + + return subtract_enter_len_rhs(p, context); + } + + case SUBTRACT_STAGE_NAIVE_LHS: { + return subtract_naive_lhs(p, context); + } + + case SUBTRACT_STAGE_LEN_RHS: { + int res = subtract_get_length(p, + &context->iterator, + &context->rhs_remaining); + + if (res != 1) { + return res; + } + + /* We've walked through both lists fully now so we no longer need + * to check for errors past this point. */ + + if (context->rhs_remaining <= SUBTRACT_RHS_THRESHOLD) { + return subtract_enter_naive_rhs(p, context); + } + + return subtract_enter_set_build(p, context); + } + + case SUBTRACT_STAGE_NAIVE_RHS: { + return subtract_naive_rhs(p, context); + } + + case SUBTRACT_STAGE_SET_BUILD: { + int res = subtract_set_build(p, context); + + if (res != 1) { + return res; + } + + return subtract_enter_set_finish(p, context); + } + + case SUBTRACT_STAGE_SET_FINISH: { + return subtract_set_finish(p, context); + } + + default: + ERTS_ASSERT(!"unreachable"); + } +} + +static int subtract_start(Process *p, Eterm lhs, Eterm rhs, + ErtsSubtractContext *context) { + context->stage = SUBTRACT_STAGE_START; + + context->lhs_original = lhs; + context->rhs_original = rhs; + + return subtract_continue(p, context); } +/* erlang:'--'/2 */ +static Eterm subtract(Export *bif_entry, BIF_ALIST_2) { + Eterm lhs = BIF_ARG_1, rhs = BIF_ARG_2; + + if ((is_list(lhs) || is_nil(lhs)) && (is_list(rhs) || is_nil(rhs))) { + /* We start with the context on the stack in the hopes that we won't + * have to trap. */ + ErtsSubtractContext context; + int res; + + res = subtract_start(BIF_P, lhs, rhs, &context); + + if (res == 0) { + Eterm state_mref; + + state_mref = subtract_create_trap_state(BIF_P, &context); + erts_set_gc_state(BIF_P, 0); + + BIF_TRAP2(bif_entry, BIF_P, state_mref, NIL); + } + + subtract_ctx_dtor(&context); + + if (res < 0) { + BIF_ERROR(BIF_P, BADARG); + } + + BIF_RET(context.result); + } else if (is_internal_magic_ref(lhs)) { + ErtsSubtractContext *context; + int (*dtor)(Binary*); + Binary *magic_bin; + + int res; + + magic_bin = erts_magic_ref2bin(lhs); + dtor = ERTS_MAGIC_BIN_DESTRUCTOR(magic_bin); + + if (dtor != subtract_ctx_bin_dtor) { + BIF_ERROR(BIF_P, BADARG); + } + + ASSERT(BIF_P->flags & F_DISABLE_GC); + ASSERT(rhs == NIL); + + context = ERTS_MAGIC_BIN_DATA(magic_bin); + res = subtract_continue(BIF_P, context); + + if (res == 0) { + BIF_TRAP2(bif_entry, BIF_P, lhs, NIL); + } + + erts_set_gc_state(BIF_P, 1); + + if (res < 0) { + ERTS_BIF_ERROR_TRAPPED2(BIF_P, BADARG, bif_entry, + context->lhs_original, + context->rhs_original); + } + + BIF_RET(context->result); + } + + ASSERT(!(BIF_P->flags & F_DISABLE_GC)); + + BIF_ERROR(BIF_P, BADARG); +} + +BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) { + return subtract(bif_export[BIF_ebif_minusminus_2], BIF_CALL_ARGS); +} + +BIF_RETTYPE subtract_2(BIF_ALIST_2) { + return subtract(bif_export[BIF_subtract_2], BIF_CALL_ARGS); +} + + BIF_RETTYPE lists_member_2(BIF_ALIST_2) { Eterm term; diff --git a/erts/emulator/beam/erl_bif_unique.h b/erts/emulator/beam/erl_bif_unique.h index 9aa631fde9..e3a633080d 100644 --- a/erts/emulator/beam/erl_bif_unique.h +++ b/erts/emulator/beam/erl_bif_unique.h @@ -242,11 +242,11 @@ erts_internal_ref_number_cmp(Uint32 num1[ERTS_REF_NUMBERS], Uint32 num2[ERTS_REF_NUMBERS]) { if (num1[2] != num2[2]) - return (int) ((Sint64) num1[2] - (Sint64) num2[2]); + return num1[2] > num2[2] ? 1 : -1; if (num1[1] != num2[1]) - return (int) ((Sint64) num1[1] - (Sint64) num2[1]); + return num1[1] > num2[1] ? 1 : -1; if (num1[0] != num2[0]) - return (int) ((Sint64) num1[0] - (Sint64) num2[0]); + return num1[0] > num2[0] ? 1 : -1; return 0; } diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c index cf928a9035..47d304f860 100644 --- a/erts/emulator/beam/erl_db_hash.c +++ b/erts/emulator/beam/erl_db_hash.c @@ -1328,11 +1328,7 @@ static int match_traverse(Process* p, DbTableHash* tb, unlock_hash_function(lck); break; } - if (iterations_left <= 0 || MBUF(p)) { - /* - * We have either reached our limit, or just created some heap fragments. - * Since many heap fragments will make the GC slower, trap and GC now. - */ + if (iterations_left <= 0) { unlock_hash_function(lck); ret_value = on_trap(context_ptr, slot_ix, got, &mpi.mp, ret); goto done; @@ -1451,11 +1447,7 @@ static int match_traverse_continue(Process* p, DbTableHash* tb, unlock_hash_function(lck); break; } - if (iterations_left <= 0 || MBUF(p)) { - /* - * We have either reached our limit, or just created some heap fragments. - * Since many heap fragments will make the GC slower, trap and GC now. - */ + if (iterations_left <= 0) { unlock_hash_function(lck); ret_value = on_trap(context_ptr, slot_ix, got, mpp, ret); goto done; diff --git a/erts/emulator/beam/erl_db_tree.c b/erts/emulator/beam/erl_db_tree.c index 7c80e92e50..d2b0bf95bd 100644 --- a/erts/emulator/beam/erl_db_tree.c +++ b/erts/emulator/beam/erl_db_tree.c @@ -1884,22 +1884,14 @@ static int db_select_replace_tree(Process *p, DbTable *tbl, Eterm tid, sc.mp = mpi.mp; sc.all_objects = mpi.all_objects; - stack = get_static_stack(tb); if (!mpi.got_partial && mpi.some_limitation && CMP_EQ(mpi.least,mpi.most)) { - TreeDbTerm* term = *(mpi.save_term); doit_select_replace(tb,mpi.save_term,&sc,0 /* dummy */); - if (stack != NULL) { - if (TOP_NODE(stack) == term) - // throw away potentially invalid reference - REPLACE_TOP_NODE(stack, *(mpi.save_term)); - release_stack(tb, stack); - } + reset_static_stack(tb); /* may refer replaced term */ RET_TO_BIF(erts_make_integer(sc.replaced,p),DB_ERROR_NONE); } - if (stack == NULL) - stack = get_any_stack(tb); + stack = get_any_stack(tb); if (mpi.some_limitation) { if ((this = find_next_from_pb_key(tb, stack, mpi.most)) != NULL) { @@ -3344,13 +3336,6 @@ static int doit_select(DbTableTree *tb, TreeDbTerm *this, void *ptr, if (is_value(ret)) { sc->accum = CONS(hp, ret, sc->accum); } - if (MBUF(sc->p)) { - /* - * Force a trap and GC if a heap fragment was created. Many heap fragments - * make the GC slow. - */ - sc->max = 0; - } if (--(sc->max) <= 0) { return 0; } @@ -3407,13 +3392,6 @@ static int doit_select_chunk(DbTableTree *tb, TreeDbTerm *this, void *ptr, ++(sc->got); sc->accum = CONS(hp, ret, sc->accum); } - if (MBUF(sc->p)) { - /* - * Force a trap and GC if a heap fragment was created. Many heap fragments - * make the GC slow. - */ - sc->max = 0; - } if (--(sc->max) <= 0 || sc->got == sc->chunk_size) { return 0; } diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index 4e479c26ef..f8e964e2cf 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -409,8 +409,18 @@ static void full_flush_env(ErlNifEnv* env) static void full_cache_env(ErlNifEnv* env) { #ifdef ERTS_DIRTY_SCHEDULERS - if (env->proc->static_flags & ERTS_STC_FLG_SHADOW_PROC) + if (env->proc->static_flags & ERTS_STC_FLG_SHADOW_PROC) { erts_cache_dirty_shadow_proc(env->proc); + /* + * If shadow proc had heap fragments when flushed + * those have now been moved to the real proc. + * Ensure heap pointers do not point into a heap + * fragment on real proc... + */ + ASSERT(!env->proc->mbuf); + env->hp_end = HEAP_LIMIT(env->proc); + env->hp = HEAP_TOP(env->proc); + } #endif cache_env(env); } diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 9f6adb03d0..be94418755 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -88,10 +88,6 @@ #undef HARDDEBUG #endif -#ifdef HARDDEBUG -#define HARDDEBUG_RUNQS -#endif - #ifdef HIPE #include "hipe_mode_switch.h" /* for hipe_init_process() */ #include "hipe_signal.h" /* for hipe_thread_signal_init() */ @@ -3420,6 +3416,46 @@ static void suspend_scheduler(ErtsSchedulerData *esdp); #endif /* ERTS_SMP */ +#ifdef HARDDEBUG +#define ERTS_HDBG_CHK_SLEEP_LIST(SL, L, F, FN) \ + check_sleepers_list((SL), (L), (F), (FN)) +static void check_sleepers_list(ErtsSchedulerSleepList *sl, + int lock, + ErtsSchedulerSleepInfo *find, + ErtsSchedulerSleepInfo *find_not) +{ + ErtsSchedulerSleepInfo *last_out; + int found = 0; + + if (lock) + erts_smp_spin_lock(&sl->lock); + + ERTS_ASSERT(!find_not || (!find_not->next && !find_not->prev)); + + last_out = sl->list; + if (last_out) { + ErtsSchedulerSleepInfo *tmp = last_out; + do { + ERTS_ASSERT(tmp->next); + ERTS_ASSERT(tmp->prev); + ERTS_ASSERT(tmp->next->prev == tmp); + ERTS_ASSERT(tmp->prev->next == tmp); + ERTS_ASSERT(tmp != find_not); + if (tmp == find) + found = !0; + tmp = tmp->next; + + } while (tmp != last_out); + } + ERTS_ASSERT(!find || found); + + if (lock) + erts_smp_spin_unlock(&sl->lock); +} +#else +#define ERTS_HDBG_CHK_SLEEP_LIST(SL, L, F, FN) ((void) 0) +#endif + static void scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) { @@ -3436,32 +3472,12 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); -#ifdef ERTS_DIRTY_SCHEDULERS - if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) - erts_smp_spin_lock(&rq->sleepers.lock); -#endif flgs = sched_prep_spin_wait(ssi); if (flgs & ERTS_SSI_FLG_SUSPENDED) { /* Go suspend instead... */ -#ifdef ERTS_DIRTY_SCHEDULERS - if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) - erts_smp_spin_unlock(&rq->sleepers.lock); -#endif return; } -#ifdef ERTS_DIRTY_SCHEDULERS - if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) { - ssi->prev = NULL; - ssi->next = rq->sleepers.list; - if (rq->sleepers.list) - rq->sleepers.list->prev = ssi; - rq->sleepers.list = ssi; - erts_smp_spin_unlock(&rq->sleepers.lock); - dirty_active(esdp, -1); - } -#endif - /* * If all schedulers are waiting, one of them *should* * be waiting in erl_sys_schedule() @@ -3471,6 +3487,28 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) sched_waiting(esdp->no, rq); +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) { + erts_smp_spin_lock(&rq->sleepers.lock); + ERTS_HDBG_CHK_SLEEP_LIST(&rq->sleepers, 0, NULL, ssi); + ASSERT(!ssi->next); /* Not in sleepers list */ + ASSERT(!ssi->prev); + if (!rq->sleepers.list) { + ssi->next = ssi->prev = ssi; + rq->sleepers.list = ssi; + } + else { + ssi->prev = rq->sleepers.list; + ssi->next = rq->sleepers.list->next; + ssi->prev->next = ssi; + ssi->next->prev = ssi; + } + ERTS_HDBG_CHK_SLEEP_LIST(&rq->sleepers, 0, ssi, NULL); + erts_smp_spin_unlock(&rq->sleepers.lock); + dirty_active(esdp, -1); + } +#endif + erts_smp_runq_unlock(rq); spincount = sched_busy_wait.tse; @@ -3597,7 +3635,31 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) erts_thr_progress_active(esdp, thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } - + +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) { + erts_smp_spin_lock(&rq->sleepers.lock); + ERTS_HDBG_CHK_SLEEP_LIST(&rq->sleepers, 0, ssi->next ? ssi : NULL, NULL); + if (ssi->next) { /* Still in list... */ + if (ssi->next == ssi) { + ASSERT(rq->sleepers.list == ssi); + ASSERT(ssi->prev == ssi); + rq->sleepers.list = NULL; + } + else { + ASSERT(ssi->prev != ssi); + if (rq->sleepers.list == ssi) + rq->sleepers.list = ssi->next; + ssi->prev->next = ssi->next; + ssi->next->prev = ssi->prev; + } + ssi->next = ssi->prev = NULL; + } + ERTS_HDBG_CHK_SLEEP_LIST(&rq->sleepers, 0, NULL, ssi); + erts_smp_spin_unlock(&rq->sleepers.lock); + } +#endif + erts_smp_runq_lock(rq); sched_active(esdp->no, rq); @@ -3883,55 +3945,44 @@ wake_scheduler(ErtsRunQueue *rq) #ifdef ERTS_DIRTY_SCHEDULERS static void -wake_dirty_schedulers(ErtsRunQueue *rq, int one) +wake_dirty_scheduler(ErtsRunQueue *rq) { - ErtsSchedulerSleepInfo *ssi; + ErtsSchedulerSleepInfo *lo_ssi, *fo_ssi; ErtsSchedulerSleepList *sl; ASSERT(ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); sl = &rq->sleepers; erts_smp_spin_lock(&sl->lock); - ssi = sl->list; - if (!ssi) { + ERTS_HDBG_CHK_SLEEP_LIST(&rq->sleepers, 0, NULL, NULL); + lo_ssi = sl->list; + if (!lo_ssi) { erts_smp_spin_unlock(&sl->lock); - if (one) - wake_scheduler(rq); - } else if (one) { + wake_scheduler(rq); + } + else { erts_aint32_t flgs; - if (ssi->prev) - ssi->prev->next = ssi->next; - else { - ASSERT(sl->list == ssi); - sl->list = ssi->next; + fo_ssi = lo_ssi->next; + ASSERT(fo_ssi->prev == lo_ssi); + if (fo_ssi == lo_ssi) { + ASSERT(lo_ssi->prev == lo_ssi); + sl->list = NULL; + } + else { + ASSERT(lo_ssi->prev != lo_ssi); + lo_ssi->next = fo_ssi->next; + fo_ssi->next->prev = fo_ssi->prev; } - if (ssi->next) - ssi->next->prev = ssi->prev; - - erts_smp_spin_unlock(&sl->lock); - - ERTS_THR_MEMORY_BARRIER; - flgs = ssi_flags_set_wake(ssi); - erts_sched_finish_poke(ssi, flgs); - } else { - sl->list = NULL; + fo_ssi->next = fo_ssi->prev = NULL; + ERTS_HDBG_CHK_SLEEP_LIST(&rq->sleepers, 0, NULL, fo_ssi); erts_smp_spin_unlock(&sl->lock); ERTS_THR_MEMORY_BARRIER; - do { - ErtsSchedulerSleepInfo *wake_ssi = ssi; - ssi = ssi->next; - erts_sched_finish_poke(wake_ssi, ssi_flags_set_wake(wake_ssi)); - } while (ssi); + flgs = ssi_flags_set_wake(fo_ssi); + erts_sched_finish_poke(fo_ssi, flgs); } } -static void -wake_dirty_scheduler(ErtsRunQueue *rq) -{ - wake_dirty_schedulers(rq, 1); -} - #endif #define ERTS_NO_USED_RUNQS_SHIFT 16 @@ -4176,6 +4227,8 @@ dequeue_process(ErtsRunQueue *runq, int prio_q, erts_aint32_t *statep) ERTS_SMP_DATA_DEPENDENCY_READ_MEMORY_BARRIER; state = erts_smp_atomic32_read_nob(&p->state); + ASSERT(state & ERTS_PSFLG_IN_RUNQ); + if (statep) *statep = state; @@ -4183,8 +4236,7 @@ dequeue_process(ErtsRunQueue *runq, int prio_q, erts_aint32_t *statep) rqi = &runq->procs.prio_info[prio]; - if (p) - unqueue_process(runq, rpq, rqi, prio, NULL, p); + unqueue_process(runq, rpq, rqi, prio, NULL, p); return p; } @@ -4512,7 +4564,7 @@ evacuate_run_queue(ErtsRunQueue *rq, erts_smp_runq_unlock(to_rq); smp_notify_inc_runq(to_rq); - erts_smp_runq_lock(to_rq); + erts_smp_runq_lock(rq); } if (rq->ports.start) { @@ -4593,22 +4645,17 @@ evacuate_run_queue(ErtsRunQueue *rq, free_proxy_proc(proc); else { erts_aint32_t clr_bits; -#ifdef DEBUG - erts_aint32_t old; -#endif clr_bits = ERTS_PSFLG_IN_RUNQ; clr_bits |= qbit << ERTS_PSFLGS_IN_PRQ_MASK_OFFSET; -#ifdef DEBUG - old = -#else - (void) -#endif - erts_smp_atomic32_read_band_mb(&proc->state, - ~clr_bits); - ASSERT((old & clr_bits) == clr_bits); + state = erts_smp_atomic32_read_band_mb(&proc->state, ~clr_bits); + ASSERT((state & clr_bits) == clr_bits); + if (state & ERTS_PSFLG_FREE) { + /* free and not queued by proxy */ + erts_proc_dec_refc(proc); + } } goto handle_next_proc; @@ -6371,6 +6418,8 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online for (ix = 0; ix < no_dirty_cpu_schedulers; ix++) { ErtsSchedulerSleepInfo *ssi = &aligned_dirty_cpu_sched_sleep_info[ix].ssi; erts_smp_atomic32_init_nob(&ssi->flags, 0); + ssi->next = NULL; + ssi->prev = NULL; ssi->event = NULL; /* initialized in sched_dirty_cpu_thread_func */ erts_atomic32_init_nob(&ssi->aux_work, 0); } @@ -6381,6 +6430,8 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online for (ix = 0; ix < no_dirty_io_schedulers; ix++) { ErtsSchedulerSleepInfo *ssi = &aligned_dirty_io_sched_sleep_info[ix].ssi; erts_smp_atomic32_init_nob(&ssi->flags, 0); + ssi->next = NULL; + ssi->prev = NULL; ssi->event = NULL; /* initialized in sched_dirty_io_thread_func */ erts_atomic32_init_nob(&ssi->aux_work, 0); } @@ -6726,13 +6777,14 @@ fin_dirty_enq_s_change(Process *p, /* Already enqueue by someone else... */ if (pstruct_reserved) { /* We reserved process struct for enqueue; clear it... */ -#ifdef DEBUG - erts_aint32_t old = -#else - (void) -#endif - erts_smp_atomic32_read_band_nob(&p->state, ~ERTS_PSFLG_IN_RUNQ); - ASSERT(old & ERTS_PSFLG_IN_RUNQ); + erts_aint32_t state; + + state = erts_smp_atomic32_read_band_nob(&p->state, ~ERTS_PSFLG_IN_RUNQ); + ASSERT(state & ERTS_PSFLG_IN_RUNQ); + + if (state & ERTS_PSFLG_FREE) { + erts_proc_dec_refc(p); + } } return 0; } @@ -6903,8 +6955,9 @@ schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, enqueue = ERTS_ENQUEUE_NOT; n &= ~running_flgs; - if ((a & (ERTS_PSFLG_ACTIVE_SYS|ERTS_PSFLG_DIRTY_ACTIVE_SYS)) - || (a & (ERTS_PSFLG_ACTIVE|ERTS_PSFLG_SUSPENDED)) == ERTS_PSFLG_ACTIVE) { + if ((!!(a & (ERTS_PSFLG_ACTIVE_SYS|ERTS_PSFLG_DIRTY_ACTIVE_SYS)) + | ((a & (ERTS_PSFLG_ACTIVE|ERTS_PSFLG_SUSPENDED)) == ERTS_PSFLG_ACTIVE)) + & !(a & ERTS_PSFLG_FREE)) { enqueue = check_enqueue_in_prio_queue(p, &enq_prio, &n, a); } a = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e); @@ -7158,129 +7211,72 @@ erts_schedule_process(Process *p, erts_aint32_t state, ErtsProcLocks locks) schedule_process(p, state, locks); } -static int -schedule_process_sys_task(Process *p, erts_aint32_t prio, ErtsProcSysTask *st, - erts_aint32_t *fail_state_p) -{ - int res; - int locked; - ErtsProcSysTaskQs *stqs, *free_stqs; - erts_aint32_t fail_state, state, a, n, enq_prio; +/* Enqueues the given sys task on the process and schedules it. The task may be + * NULL if only scheduling is desired. */ +static ERTS_INLINE erts_aint32_t +active_sys_enqueue(Process *p, ErtsProcSysTask *sys_task, + erts_aint32_t task_prio, erts_aint32_t enable_flags, + erts_aint32_t state, erts_aint32_t *fail_state_p) +{ + int runnable_procs = erts_system_profile_flags.runnable_procs; + erts_aint32_t n, a, enq_prio, fail_state; + int already_scheduled; + int status_locked; int enqueue; /* < 0 -> use proxy */ - unsigned int prof_runnable_procs; + enable_flags |= ERTS_PSFLG_ACTIVE_SYS; fail_state = *fail_state_p; - - res = 1; /* prepare for success */ - st->next = st->prev = st; /* Prep for empty prio queue */ - state = erts_smp_atomic32_read_nob(&p->state); - prof_runnable_procs = erts_system_profile_flags.runnable_procs; - locked = 0; - free_stqs = NULL; - if (state & ERTS_PSFLG_ACTIVE_SYS) - stqs = NULL; - else { - alloc_qs: - stqs = proc_sys_task_queues_alloc(); - stqs->qmask = 1 << prio; - stqs->ncount = 0; - stqs->q[PRIORITY_MAX] = NULL; - stqs->q[PRIORITY_HIGH] = NULL; - stqs->q[PRIORITY_NORMAL] = NULL; - stqs->q[PRIORITY_LOW] = NULL; - stqs->q[prio] = st; - } - - if (!locked) { - locked = 1; - erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); - - state = erts_smp_atomic32_read_nob(&p->state); - if (state & fail_state) { - *fail_state_p = (state & fail_state); - free_stqs = stqs; - res = 0; - goto cleanup; - } - } - - if (!p->sys_task_qs) { - if (stqs) - p->sys_task_qs = stqs; - else - goto alloc_qs; - } - else { - free_stqs = stqs; - stqs = p->sys_task_qs; - if (!stqs->q[prio]) { - stqs->q[prio] = st; - stqs->qmask |= 1 << prio; - } - else { - st->next = stqs->q[prio]; - st->prev = stqs->q[prio]->prev; - st->next->prev = st; - st->prev->next = st; - ASSERT(stqs->qmask & (1 << prio)); - } - } - - if (ERTS_PSFLGS_GET_ACT_PRIO(state) > prio) { - erts_aint32_t n, a, e; - /* Need to elevate actual prio */ - - a = state; - do { - if (ERTS_PSFLGS_GET_ACT_PRIO(a) <= prio) { - n = a; - break; - } - n = e = a; - n &= ~ERTS_PSFLGS_ACT_PRIO_MASK; - n |= (prio << ERTS_PSFLGS_ACT_PRIO_OFFSET); - a = erts_smp_atomic32_cmpxchg_nob(&p->state, n, e); - } while (a != e); - state = n; - } - - - a = state; + already_scheduled = 0; + status_locked = 0; enq_prio = -1; + a = state; - /* Status lock prevents out of order "runnable proc" trace msgs */ - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + ERTS_SMP_LC_ASSERT(!(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p))); + ASSERT(fail_state & (ERTS_PSFLG_EXITING | ERTS_PSFLG_FREE)); + ASSERT(!(fail_state & enable_flags)); + ASSERT(!(state & ERTS_PSFLG_PROXY)); - if (!prof_runnable_procs) { - erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); - locked = 0; + /* When runnable_procs is enabled, we need to take the status lock to + * prevent trace messages from being sent in the wrong order. The lock must + * be held over the call to add2runq. + * + * Otherwise, we only need to take it when we're enqueuing a task and can + * safely release it before add2runq. */ + if (sys_task || runnable_procs) { + erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + status_locked = 1; } - ASSERT(!(state & ERTS_PSFLG_PROXY)); - while (1) { erts_aint32_t e; n = e = a; - if (a & ERTS_PSFLG_FREE) - goto cleanup; /* We don't want to schedule free processes... */ + if (a & fail_state) { + *fail_state_p = a & fail_state; + goto cleanup; + } enqueue = ERTS_ENQUEUE_NOT; - n |= ERTS_PSFLG_ACTIVE_SYS; + n |= enable_flags; + if (!(a & (ERTS_PSFLG_RUNNING | ERTS_PSFLG_RUNNING_SYS | ERTS_PSFLG_DIRTY_RUNNING - | ERTS_PSFLG_DIRTY_RUNNING_SYS))) + | ERTS_PSFLG_DIRTY_RUNNING_SYS))) { enqueue = check_enqueue_in_prio_queue(p, &enq_prio, &n, a); + } + a = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e); - if (a == e) + if (a == e) { break; - if (a == n && enqueue == ERTS_ENQUEUE_NOT) - goto cleanup; + } + else if (a == n && enqueue == ERTS_ENQUEUE_NOT) { + already_scheduled = 1; + break; + } } - if (prof_runnable_procs) { - + if (!already_scheduled && runnable_procs) { if (!(a & (ERTS_PSFLG_ACTIVE_SYS | ERTS_PSFLG_RUNNING | ERTS_PSFLG_RUNNING_SYS @@ -7290,24 +7286,89 @@ schedule_process_sys_task(Process *p, erts_aint32_t prio, ErtsProcSysTask *st, /* We activated a prevously inactive process */ profile_runnable_proc(p, am_active); } + } - erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); - locked = 0; + if (sys_task) { + ErtsProcSysTaskQs *stqs = p->sys_task_qs; + + if (!stqs) { + sys_task->next = sys_task->prev = sys_task; + + stqs = proc_sys_task_queues_alloc(); + + stqs->qmask = 1 << task_prio; + stqs->ncount = 0; + stqs->q[PRIORITY_MAX] = NULL; + stqs->q[PRIORITY_HIGH] = NULL; + stqs->q[PRIORITY_NORMAL] = NULL; + stqs->q[PRIORITY_LOW] = NULL; + stqs->q[task_prio] = sys_task; + + p->sys_task_qs = stqs; + } + else { + if (!stqs->q[task_prio]) { + sys_task->next = sys_task->prev = sys_task; + + stqs->q[task_prio] = sys_task; + stqs->qmask |= 1 << task_prio; + } + else { + sys_task->next = stqs->q[task_prio]; + sys_task->prev = stqs->q[task_prio]->prev; + sys_task->next->prev = sys_task; + sys_task->prev->next = sys_task; + ASSERT(stqs->qmask & (1 << task_prio)); + } + } } - add2runq(enqueue, enq_prio, p, n, NULL); + if (status_locked && !runnable_procs) { + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + status_locked = 0; + } + + if (!already_scheduled) { + add2runq(enqueue, enq_prio, p, n, NULL); + } cleanup: + if (status_locked) { + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + } - if (locked) - erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + return n; +} - if (free_stqs) - proc_sys_task_queues_free(free_stqs); +static int +schedule_process_sys_task(Process *p, erts_aint32_t prio, ErtsProcSysTask *st, + erts_aint32_t *fail_state_p) +{ + erts_aint32_t fail_state, state; - ERTS_SMP_LC_ASSERT(!(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p))); + /* Elevate priority if needed. */ + state = erts_smp_atomic32_read_nob(&p->state); + if (ERTS_PSFLGS_GET_ACT_PRIO(state) > prio) { + erts_aint32_t n, a, e; - return res; + a = state; + do { + if (ERTS_PSFLGS_GET_ACT_PRIO(a) <= prio) { + n = a; + break; + } + n = e = a; + n &= ~ERTS_PSFLGS_ACT_PRIO_MASK; + n |= (prio << ERTS_PSFLGS_ACT_PRIO_OFFSET); + a = erts_smp_atomic32_cmpxchg_nob(&p->state, n, e); + } while (a != e); + + state = n; + } + + fail_state = *fail_state_p; + + return !(active_sys_enqueue(p, st, prio, 0, state, fail_state_p) & fail_state); } static ERTS_INLINE int @@ -7895,6 +7956,11 @@ suspend_scheduler(ErtsSchedulerData *esdp) return; } +#ifdef HARDDEBUG + if (sched_type != ERTS_SCHED_NORMAL) + ERTS_HDBG_CHK_SLEEP_LIST(&esdp->run_queue->sleepers, !0, NULL, ssi); +#endif + if (erts_smp_atomic32_read_nob(&ssi->flags) & ERTS_SSI_FLG_MSB_EXEC) { ASSERT(no == 1); if (!msb_scheduler_type_switch(sched_type, esdp, no)) @@ -10484,6 +10550,7 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) if (!is_normal_sched & !!(flags & ERTS_RUNQ_FLG_HALTING)) { /* Wait for emulator to terminate... */ + erts_smp_runq_unlock(rq); while (1) erts_milli_sleep(1000*1000); } @@ -10758,6 +10825,7 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) } else if (state & ERTS_PSFLG_FREE) { /* free and not queued by proxy */ + ASSERT(state & ERTS_PSFLG_IN_RUNQ); erts_proc_dec_refc(p); } if (!is_normal_sched) @@ -14071,7 +14139,9 @@ erts_continue_exit_process(Process *p) n = e = a; ASSERT(a & ERTS_PSFLG_EXITING); n |= ERTS_PSFLG_FREE; - n &= ~ERTS_PSFLG_ACTIVE; + n &= ~(ERTS_PSFLG_ACTIVE + | ERTS_PSFLG_ACTIVE_SYS + | ERTS_PSFLG_DIRTY_ACTIVE_SYS); if ((n & ERTS_PSFLG_IN_RUNQ) && !refc_inced) { erts_proc_inc_refc(p); refc_inced = 1; @@ -14524,12 +14594,12 @@ void erts_halt(int code) if (-1 == erts_smp_atomic32_cmpxchg_acqb(&erts_halt_progress, erts_no_schedulers, -1)) { + notify_reap_ports_relb(); #ifdef ERTS_DIRTY_SCHEDULERS ERTS_RUNQ_FLGS_SET(ERTS_DIRTY_CPU_RUNQ, ERTS_RUNQ_FLG_HALTING); ERTS_RUNQ_FLGS_SET(ERTS_DIRTY_IO_RUNQ, ERTS_RUNQ_FLG_HALTING); #endif erts_halt_code = code; - notify_reap_ports_relb(); } } diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 5cac939771..b3385b780f 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -368,7 +368,7 @@ typedef struct ErtsSchedulerSleepInfo_ ErtsSchedulerSleepInfo; #ifdef ERTS_DIRTY_SCHEDULERS typedef struct { erts_smp_spinlock_t lock; - ErtsSchedulerSleepInfo *list; + ErtsSchedulerSleepInfo *list; /* circular lifo list; points to last out */ } ErtsSchedulerSleepList; #endif diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c index d7116bd2c3..5fc42f231a 100644 --- a/erts/emulator/beam/utils.c +++ b/erts/emulator/beam/utils.c @@ -3142,7 +3142,7 @@ tailrecur_ne: ASSERT(alen == blen); for (i = (Sint) alen - 1; i >= 0; i--) if (anum[i] != bnum[i]) - RETURN_NEQ((Sint32) (anum[i] - bnum[i])); + RETURN_NEQ(anum[i] < bnum[i] ? -1 : 1); goto pop_next; case (_TAG_HEADER_EXTERNAL_REF >> _TAG_PRIMARY_SIZE): if (is_internal_ref(b)) { diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 554c48059f..f6de343f72 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -1018,6 +1018,7 @@ typedef struct { inet_async_op* oph; /* queue head or NULL */ inet_async_op* opt; /* queue tail or NULL */ inet_async_op op_queue[INET_MAX_ASYNC]; /* call queue */ + int op_ref; /* queue reference generator */ int active; /* 0 = passive, 1 = active, 2 = active once */ Sint16 active_count; /* counter for {active,N} */ @@ -1268,8 +1269,7 @@ static int packet_inet_output(udp_descriptor* udesc, HANDLE event); /* convert descriptor pointer to inet_descriptor pointer */ #define INETP(d) (&(d)->inet) -static int async_ref = 0; /* async reference id generator */ -#define NEW_ASYNC_ID() ((async_ref++) & 0xffff) +#define NEW_ASYNC_ID(desc) ((desc)->op_ref++ & 0xffff) /* check for transition from active to passive */ #define INET_CHECK_ACTIVE_TO_PASSIVE(inet) \ @@ -1921,7 +1921,7 @@ static void enq_multi_op(tcp_descriptor *desc, char *buf, int req, ErlDrvTermData caller, MultiTimerData *timeout, ErlDrvMonitor *monitorp) { - int id = NEW_ASYNC_ID(); + int id = NEW_ASYNC_ID(INETP(desc)); enq_old_multi_op(desc,id,req,caller,timeout,monitorp); if (buf != NULL) put_int16(id, buf); @@ -1990,7 +1990,7 @@ static int remove_multi_op(tcp_descriptor *desc, int *id_p, int *req_p, static int enq_async_w_tmo(inet_descriptor* desc, char* buf, int req, unsigned timeout, ErlDrvMonitor *monitorp) { - int id = NEW_ASYNC_ID(); + int id = NEW_ASYNC_ID(desc); inet_async_op* opp; if ((opp = desc->oph) == NULL) /* queue empty */ @@ -8423,6 +8423,7 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol) desc->delimiter = '\n'; /* line delimiting char */ desc->oph = NULL; desc->opt = NULL; + desc->op_ref = 0; desc->peer_ptr = NULL; desc->name_ptr = NULL; diff --git a/erts/emulator/test/big_SUITE.erl b/erts/emulator/test/big_SUITE.erl index c308760211..b25868d3cb 100644 --- a/erts/emulator/test/big_SUITE.erl +++ b/erts/emulator/test/big_SUITE.erl @@ -24,7 +24,7 @@ -export([t_div/1, eq_28/1, eq_32/1, eq_big/1, eq_math/1, big_literals/1, borders/1, negative/1, big_float_1/1, big_float_2/1, - bxor_2pow/1, + bxor_2pow/1, band_2pow/1, shift_limit_1/1, powmod/1, system_limit/1, toobig/1, otp_6692/1]). %% Internal exports. @@ -43,7 +43,7 @@ suite() -> all() -> [t_div, eq_28, eq_32, eq_big, eq_math, big_literals, borders, negative, {group, big_float}, shift_limit_1, - bxor_2pow, + bxor_2pow, band_2pow, powmod, system_limit, toobig, otp_6692]. groups() -> @@ -449,3 +449,38 @@ my_bxor(A, B, N, Acc0) -> false -> Acc0 bor (1 bsl N) end, my_bxor(A bsr 1, B bsr 1, N+1, Acc1). + + +%% ERL-804 +band_2pow(_Config) -> + IL = lists:seq(8*3, 8*16, 4), + JL = lists:seq(0, 64), + [band_2pow_1((1 bsl I), (1 bsl J)) + || I <- IL, J <- JL], + ok. + +band_2pow_1(A, B) -> + for(-1,1, fun(Ad) -> + for(-1,1, fun(Bd) -> + band_2pow_2(A+Ad, B+Bd), + band_2pow_2(-A+Ad, B+Bd), + band_2pow_2(A+Ad, -B+Bd), + band_2pow_2(-A+Ad, -B+Bd) + end) + end). + +band_2pow_2(A, B) -> + Correct = my_band(A, B), + case A band B of + Correct -> ok; + Wrong -> + io:format("~.16# band ~.16#\n", [A,B]), + io:format("Expected ~.16#\n", [Correct]), + io:format("Got ~.16#\n", [Wrong]), + ct:fail({failed, 'band'}) + + end. + +%% Implement band without band +my_band(A, B) -> + bnot ((bnot A) bor (bnot B)). diff --git a/erts/emulator/test/process_SUITE.erl b/erts/emulator/test/process_SUITE.erl index a8bcfac84d..899a5c26bd 100644 --- a/erts/emulator/test/process_SUITE.erl +++ b/erts/emulator/test/process_SUITE.erl @@ -58,6 +58,7 @@ no_priority_inversion2/1, system_task_blast/1, system_task_on_suspended/1, + system_task_failed_enqueue/1, gc_request_when_gc_disabled/1, gc_request_blast_when_gc_disabled/1]). -export([prio_server/2, prio_client/2, init/1, handle_event/2]). @@ -104,7 +105,7 @@ groups() -> otp_7738_resume]}, {system_task, [], [no_priority_inversion, no_priority_inversion2, - system_task_blast, system_task_on_suspended, + system_task_blast, system_task_on_suspended, system_task_failed_enqueue, gc_request_when_gc_disabled, gc_request_blast_when_gc_disabled]}]. init_per_suite(Config) -> @@ -2531,6 +2532,57 @@ system_task_on_suspended(Config) when is_list(Config) -> ok end. +%% When a system task couldn't be enqueued due to the process being in an +%% incompatible state, it would linger in the system task list and get executed +%% anyway the next time the process was scheduled. This would result in a +%% double-free at best. +%% +%% This test continuously purges modules while other processes run dirty code, +%% which will provoke this error as ERTS_PSTT_CPC can't be enqueued while a +%% process is running dirty code. +system_task_failed_enqueue(Config) when is_list(Config) -> + case erlang:system_info(dirty_cpu_schedulers) of + N when N > 0 -> + system_task_failed_enqueue_1(Config); + _ -> + {skipped, "No dirty scheduler support"} + end. + +system_task_failed_enqueue_1(Config) -> + Priv = proplists:get_value(priv_dir, Config), + + Purgers = [spawn_link(fun() -> purge_loop(Priv, Id) end) + || Id <- lists:seq(1, erlang:system_info(schedulers))], + Hogs = [spawn_link(fun() -> dirty_loop() end) + || _ <- lists:seq(1, erlang:system_info(dirty_cpu_schedulers))], + + ct:sleep(5000), + + [begin + unlink(Pid), + exit(Pid, kill) + end || Pid <- (Purgers ++ Hogs)], + + ok. + +purge_loop(PrivDir, Id) -> + Mod = "failed_enq_" ++ integer_to_list(Id), + Path = PrivDir ++ "/" ++ Mod, + file:write_file(Path ++ ".erl", + "-module('" ++ Mod ++ "').\n" ++ + "-export([t/0]).\n" ++ + "t() -> ok."), + purge_loop_1(Path). +purge_loop_1(Path) -> + {ok, Mod} = compile:file(Path, []), + erlang:delete_module(Mod), + erts_code_purger:purge(Mod), + purge_loop_1(Path). + +dirty_loop() -> + ok = erts_debug:dirty_cpu(reschedule, 10000), + dirty_loop(). + gc_request_when_gc_disabled(Config) when is_list(Config) -> AIS = erts_debug:set_internal_state(available_internal_state, true), gc_request_when_gc_disabled_do(ref), diff --git a/erts/emulator/test/ref_SUITE.erl b/erts/emulator/test/ref_SUITE.erl index 5f519d522e..74df857c65 100644 --- a/erts/emulator/test/ref_SUITE.erl +++ b/erts/emulator/test/ref_SUITE.erl @@ -22,6 +22,7 @@ -export([all/0, suite/0]). -export([wrap_1/1]). +-export([compare_list/1, compare_ets/1]). -export([loop_ref/1]). @@ -32,7 +33,7 @@ suite() -> {timetrap, {minutes, 2}}]. all() -> - [wrap_1]. + [wrap_1, compare_list, compare_ets]. %% Check that refs don't wrap around easily. wrap_1(Config) when is_list(Config) -> @@ -53,3 +54,28 @@ loop_ref(Parent) -> loop_ref(R, R, _) -> ok; loop_ref(R0, _, N) -> loop_ref(R0, make_ref(), N+1). + +%% Check that ref ordering works +compare_list(Config) when is_list(Config) -> + %% Although this test uses external refs, it would apply the same to plain refs + ExtRef1 = <<131,114,0,3,100,0,3,110,64,98,3, 0,0,173,156, 0,216,0,4, 0,0,0,0>>, + ExtRef2 = <<131,114,0,3,100,0,3,110,64,98,3, 0,1,31,27, 129,4,0,1, 0,0,0,0>>, + + Ref1 = binary_to_term(ExtRef1), %% #Ref<[email protected]> + Ref2 = binary_to_term(ExtRef2), %% #Ref<[email protected]> + OrderedList = [Ref1, Ref2], + OrderedList = lists:sort(OrderedList), + ok. + +%% This is the scarier case since it makes terms "invisible" in ets or Mnesia +%% (the underlying fault cause is the same as compare_list/1) +compare_ets(Config) when is_list(Config) -> + W2s = [610350147,899574699,2994196869,686384822,2397690439, 923302211], + ExtRefBase = <<131,114,0,3,100,0,3,110,64,98,3>>, + ExtRefs = [<<ExtRefBase/binary, 1:32, W2:32, 0:32>> || W2 <- W2s], + Refs = [binary_to_term(Bin) || Bin <- ExtRefs], + + Ets = ets:new(refbug, [ordered_set]), + ets:insert(Ets, [{Ref,Ref} || Ref <- Refs]), + 0 = length([R || R <- ets:tab2list(Ets), ets:lookup(Ets, element(1,R)) == []]), + ok. diff --git a/erts/vsn.mk b/erts/vsn.mk index 9222b74f81..317ce9b800 100644 --- a/erts/vsn.mk +++ b/erts/vsn.mk @@ -18,7 +18,7 @@ # %CopyrightEnd% # -VSN = 9.3.3 +VSN = 9.3.3.9 # Port number 4365 in 4.2 # Port number 4366 in 4.3 |