From 1ea57e5498d92b730644cb37b659d16e51f56b4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Thu, 6 Dec 2018 13:07:42 +0100
Subject: Make length/1 yielding

The guard BIF `length/1` would calculate the length of the list in one
go without yielding, even if the list was were long. To make it even
worse, the call to `length/1` would only cost a single reduction.

This commit reimplements `length/1` so that it eats a number of
reductions proportional to the length of the list, and yields if the
available reductions run out.
---
 erts/emulator/beam/bif_instrs.tab  |  81 ++++++++++++++++++++++++
 erts/emulator/beam/erl_bif_guard.c | 122 +++++++++++++++++++++++++++++++++----
 erts/emulator/beam/erl_db_util.c   |  23 ++++++-
 erts/emulator/beam/erl_init.c      |   1 +
 erts/emulator/beam/erl_vm.h        |   4 +-
 erts/emulator/beam/global.h        |   5 ++
 erts/emulator/beam/ops.tab         |  11 ++++
 erts/emulator/test/bif_SUITE.erl   |  54 +++++++++++++++-
 8 files changed, 282 insertions(+), 19 deletions(-)

(limited to 'erts')

diff --git a/erts/emulator/beam/bif_instrs.tab b/erts/emulator/beam/bif_instrs.tab
index 3abd062552..ce9e61a838 100644
--- a/erts/emulator/beam/bif_instrs.tab
+++ b/erts/emulator/beam/bif_instrs.tab
@@ -160,6 +160,87 @@ i_bif3_body(Bif, Src1, Src2, Src3, Dst) {
     goto post_error_handling;
 }
 
+//
+// length/1 is the only guard BIF that does not execute in constant
+// time. Here follows special instructions to allow the calculation of
+// the list length to be broken in several chunks to avoid hogging
+// the scheduler for a long time.
+//
+
+i_length_setup(Live, Src) {
+    Uint live = $Live;
+    Eterm src = $Src;
+
+    reg[live] = src;
+    reg[live+1] = make_small(0);
+    reg[live+2] = src;
+
+    /* This instruction is always followed by i_length */
+    SET_I($NEXT_INSTRUCTION);
+    goto i_length_start__;
+    //| -no_next
+}
+
+//
+// This instruction can be executed one or more times. When entering
+// this instruction, the X registers have the following contents:
+//
+// reg[live+0] The remainder of the list.
+// reg[live+1] The length so far (tagged integer).
+// reg[live+2] The original list. Only used if an error is generated
+//             (if the final tail of the list is not []).
+//
+
+i_length := i_length.start.execute;
+
+i_length.start() {
+ i_length_start__:
+    ;
+}
+
+i_length.execute(Fail, Live, Dst) {
+    Eterm result;
+    Uint live;
+
+    ERTS_DBG_CHK_REDS(c_p, FCALLS);
+    c_p->fcalls = FCALLS;
+    PROCESS_MAIN_CHK_LOCKS(c_p);
+    ASSERT(!ERTS_PROC_IS_EXITING(c_p));
+    ERTS_CHK_MBUF_SZ(c_p);
+    DEBUG_SWAPOUT;
+
+    live = $Live;
+    result = erts_trapping_length_1(c_p, reg+live);
+
+    DEBUG_SWAPIN;
+    ERTS_CHK_MBUF_SZ(c_p);
+    ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(result));
+    ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p);
+    PROCESS_MAIN_CHK_LOCKS(c_p);
+    ERTS_HOLE_CHECK(c_p);
+    FCALLS = c_p->fcalls;
+    ERTS_DBG_CHK_REDS(c_p, FCALLS);
+    if (ERTS_LIKELY(is_value(result))) {
+        /* Successful calculation of the list length. */
+        $REFRESH_GEN_DEST();
+        $Dst = result;
+        $NEXT0();
+    } else if (c_p->freason == TRAP) {
+        /*
+         * Good so far, but there is more work to do. Yield.
+         */
+        $SET_CP_I_ABS(I);
+        SWAPOUT;
+        c_p->arity = live + 3;
+        c_p->current = NULL;
+        goto context_switch3;
+    } else {
+        /* Error. */
+        $BIF_ERROR_ARITY_1($Fail, BIF_length_1, reg[live+2]);
+    }
+    //| -no_next
+}
+
 //
 // The most general BIF call.  The BIF may build any amount of data
 // on the heap.  The result is always returned in r(0).
diff --git a/erts/emulator/beam/erl_bif_guard.c b/erts/emulator/beam/erl_bif_guard.c
index 84783e71a0..c921b66a7e 100644
--- a/erts/emulator/beam/erl_bif_guard.c
+++ b/erts/emulator/beam/erl_bif_guard.c
@@ -42,6 +42,15 @@
 #include "erl_map.h"
 
 static Eterm double_to_integer(Process* p, double x);
+static BIF_RETTYPE erlang_length_trap(BIF_ALIST_3);
+static Export erlang_length_export;
+
+void erts_init_bif_guard(void)
+{
+    erts_init_trap_export(&erlang_length_export,
+			  am_erlang, am_length, 3,
+			  &erlang_length_trap);
+}
 
 BIF_RETTYPE abs_1(BIF_ALIST_1)
 {
@@ -192,26 +201,113 @@ BIF_RETTYPE round_1(BIF_ALIST_1)
     BIF_RET(res);
 }
 
+/*
+ * This version of length/1 is called from native code and apply/3.
+ */
+
 BIF_RETTYPE length_1(BIF_ALIST_1)
+{
+    Eterm args[3];
+
+    /*
+     * Arrange argument registers the way expected by
+     * erts_trapping_length_1(). We save the original argument in
+     * args[2] in case an error should signaled.
+     */
+
+    args[0] = BIF_ARG_1;
+    args[1] = make_small(0);
+    args[2] = BIF_ARG_1;
+    return erlang_length_trap(BIF_P, args, A__I);
+}
+
+static BIF_RETTYPE erlang_length_trap(BIF_ALIST_3)
+{
+    Eterm res;
+
+    res = erts_trapping_length_1(BIF_P, BIF__ARGS);
+    if (is_value(res)) {        /* Success. */
+        BIF_RET(res);
+    } else {                    /* Trap or error. */
+        if (BIF_P->freason == TRAP) {
+            /*
+             * The available reductions were exceeded. Trap.
+             */
+            BIF_TRAP3(&erlang_length_export, BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3);
+        } else {
+            /*
+             * Signal an error. The original argument was tucked away in BIF_ARG_3.
+             */
+            ERTS_BIF_ERROR_TRAPPED1(BIF_P, BIF_P->freason,
+                                    bif_export[BIF_length_1], BIF_ARG_3);
+        }
+    }
+}
+
+/*
+ * Trappable helper function for calculating length/1.
+ *
+ * When calling this function, entries in args[] should be set up as
+ * follows:
+ *
+ *   args[0] = List to calculate length for.
+ *   args[1] = Length accumulator (tagged integer).
+ *
+ * If the return value is a tagged integer, the length was calculated
+ * successfully.
+ *
+ * Otherwise, if return value is THE_NON_VALUE and p->freason is TRAP,
+ * the available reductions were exceeded and this function must be called
+ * again after rescheduling. args[0] and args[1] have been updated to
+ * contain the next part of the list and length so far, respectively.
+ *
+ * Otherwise, if return value is THE_NON_VALUE, the list did not end
+ * in an empty list (and p->freason is BADARG).
+ */
+
+Eterm erts_trapping_length_1(Process* p, Eterm* args)
 {
     Eterm list;
     Uint i;
-     
-    if (is_nil(BIF_ARG_1)) 
-	BIF_RET(SMALL_ZERO);
-    if (is_not_list(BIF_ARG_1)) {
-	BIF_ERROR(BIF_P, BADARG);
-    }
-    list = BIF_ARG_1;
-    i = 0;
-    while (is_list(list)) {
-	i++;
+    Uint max_iter;
+    Uint saved_max_iter;
+
+#if defined(DEBUG) || defined(VALGRIND)
+    max_iter = 50;
+#else
+    max_iter = ERTS_BIF_REDS_LEFT(p) * 16;
+#endif
+    saved_max_iter = max_iter;
+    ASSERT(max_iter > 0);
+
+    list = args[0];
+    i = unsigned_val(args[1]);
+    while (is_list(list) && max_iter != 0) {
 	list = CDR(list_val(list));
+	i++, max_iter--;
     }
-    if (is_not_nil(list))  {
-	BIF_ERROR(BIF_P, BADARG);
+
+    if (is_list(list)) {
+        /*
+         * We have exceeded the alloted number of iterations.
+         * Save the result so far and signal a trap.
+         */
+        args[0] = list;
+        args[1] = make_small(i);
+        p->freason = TRAP;
+        BUMP_ALL_REDS(p);
+        return THE_NON_VALUE;
+    } else if (is_not_nil(list))  {
+        /* Error. Should be NIL. */
+	BIF_ERROR(p, BADARG);
     }
-    BIF_RET(make_small(i));
+
+    /*
+     * We reached the end of the list successfully. Bump reductions
+     * and return result.
+     */
+    BUMP_REDS(p, saved_max_iter / 16);
+    return make_small(i);
 }
 
 /* returns the size of a tuple or a binary */
diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c
index a78623f490..957762d4b0 100644
--- a/erts/emulator/beam/erl_db_util.c
+++ b/erts/emulator/beam/erl_db_util.c
@@ -497,6 +497,7 @@ static erts_atomic32_t trace_control_word;
 /* This needs to be here, before the bif table... */
 
 static Eterm db_set_trace_control_word_fake_1(BIF_ALIST_1);
+static Eterm db_length_1(BIF_ALIST_1);
 
 /*
 ** The table of callable bif's, i e guard bif's and 
@@ -603,7 +604,7 @@ static DMCGuardBif guard_tab[] =
     },
     {
 	am_length,
-	&length_1,
+	&db_length_1,
 	1,
 	DBIF_ALL
     },
@@ -971,6 +972,26 @@ BIF_RETTYPE db_set_trace_control_word_1(BIF_ALIST_1)
     BIF_RET(db_set_trace_control_word(BIF_P, BIF_ARG_1));
 }
 
+/*
+ * Implementation of length/1 for match specs (non-trapping).
+ */
+static Eterm db_length_1(BIF_ALIST_1)
+{
+    Eterm list;
+    Uint i;
+
+    list = BIF_ARG_1;
+    i = 0;
+    while (is_list(list)) {
+	i++;
+	list = CDR(list_val(list));
+    }
+    if (is_not_nil(list)) {
+	BIF_ERROR(BIF_P, BADARG);
+    }
+    BIF_RET(make_small(i));
+}
+
 static Eterm db_set_trace_control_word_fake_1(BIF_ALIST_1)
 {
     Process *p = BIF_P;
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index 08c9125840..2b19d2cfd3 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -353,6 +353,7 @@ erl_init(int ncpu,
     erts_init_bif();
     erts_init_bif_chksum();
     erts_init_bif_binary();
+    erts_init_bif_guard();
     erts_init_bif_persistent_term();
     erts_init_bif_re();
     erts_init_unicode(); /* after RE to get access to PCRE unicode */
diff --git a/erts/emulator/beam/erl_vm.h b/erts/emulator/beam/erl_vm.h
index fd46a804d3..35eae18394 100644
--- a/erts/emulator/beam/erl_vm.h
+++ b/erts/emulator/beam/erl_vm.h
@@ -41,8 +41,8 @@
 #define MAX_REG 1024            /* Max number of x(N) registers used */
 
 /*
- * The new arithmetic operations need some extra X registers in the register array.
- * so does the gc_bif's (i_gc_bif3 need 3 extra).
+ * The new trapping length/1 implementation need 3 extra registers in the
+ * register array.
  */
 #define ERTS_X_REGS_ALLOCATED (MAX_REG+3)
 
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index 322981ca1d..77b5a3ca05 100644
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -891,6 +891,11 @@ void erts_init_bif(void);
 Eterm erl_send(Process *p, Eterm to, Eterm msg);
 int erts_set_group_leader(Process *proc, Eterm new_gl);
 
+/* erl_bif_guard.c */
+
+void erts_init_bif_guard(void);
+Eterm erts_trapping_length_1(Process* p, Eterm* args);
+
 /* erl_bif_op.c */
 
 Eterm erl_is_function(Process* p, Eterm arg1, Eterm arg2);
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index fbed2e56e1..cb414143fc 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -1587,6 +1587,17 @@ bif1 Fail u$bif:erlang:float/1 s d => too_old_compiler
 bif1 Fail u$bif:erlang:round/1 s d => too_old_compiler
 bif1 Fail u$bif:erlang:trunc/1 s d => too_old_compiler
 
+#
+# Handle the length/1 guard BIF specially to make it trappable.
+#
+
+gc_bif1 Fail=j Live u$bif:erlang:length/1 Src Dst => \
+   i_length_setup Live Src | i_length Fail Live Dst
+
+i_length_setup t xyc
+
+i_length j? t d
+
 #
 # Guard BIFs.
 #
diff --git a/erts/emulator/test/bif_SUITE.erl b/erts/emulator/test/bif_SUITE.erl
index 9e7bcd5255..3eedf2f6a6 100644
--- a/erts/emulator/test/bif_SUITE.erl
+++ b/erts/emulator/test/bif_SUITE.erl
@@ -37,7 +37,8 @@
          group_leader_prio/1, group_leader_prio_dirty/1,
          is_process_alive/1,
          process_info_blast/1,
-         os_env_case_sensitivity/1]).
+         os_env_case_sensitivity/1,
+         test_length/1]).
 
 suite() ->
     [{ct_hooks,[ts_install_cth]},
@@ -52,7 +53,8 @@ all() ->
      erl_crash_dump_bytes, min_max, erlang_halt, is_builtin,
      error_stacktrace, error_stacktrace_during_call_trace,
      group_leader_prio, group_leader_prio_dirty,
-     is_process_alive, process_info_blast, os_env_case_sensitivity].
+     is_process_alive, process_info_blast, os_env_case_sensitivity,
+     test_length].
 
 %% Uses erlang:display to test that erts_printf does not do deep recursion
 display(Config) when is_list(Config) ->
@@ -1181,7 +1183,53 @@ consume_msgs() ->
     after 0 ->
               ok
     end.
-                              
+
+%% Test that length/1 returns the correct result after trapping, and
+%% also that the argument is correct in the stacktrace for a badarg
+%% exception.
+
+test_length(_Config) ->
+    {Start,Inc} = case test_server:timetrap_scale_factor() of
+                      1 -> {16*4000,3977};
+                      _ -> {100,1}
+            end,
+    Good = lists:reverse(lists:seq(1, Start)),
+    Bad = Good ++ [bad|cons],
+    test_length(Start, 10*Start, Inc, Good, Bad),
+
+    %% Test that calling length/1 from a match spec works.
+    MsList = lists:seq(1, 2*Start),
+    MsInput = [{tag,Good},{tag,MsList}],
+    Ms0 = [{{tag,'$1'},[{'>',{length,'$1'},Start}],['$1']}],
+    Ms = ets:match_spec_compile(Ms0),
+    [MsList] = ets:match_spec_run(MsInput, Ms),
+    ok.
+
+test_length(I, N, Inc, Good, Bad) when I < N ->
+    Length = id(length),
+    I = length(Good),
+    I = erlang:Length(Good),
+
+    %% Test length/1 in guards.
+    if
+        length(Good) =:= I ->
+            ok
+    end,
+    if
+        length(Bad) =:= I ->
+            error(should_fail);
+        true ->
+            ok
+    end,
+
+    {'EXIT',{badarg,[{erlang,length,[[I|_]],_}|_]}} = (catch length(Bad)),
+    {'EXIT',{badarg,[{erlang,length,[[I|_]],_}|_]}} = (catch erlang:Length(Bad)),
+    IncSeq = lists:seq(I + 1, I + Inc),
+    test_length(I+Inc, N, Inc,
+                lists:reverse(IncSeq, Good),
+                lists:reverse(IncSeq, Bad));
+test_length(_, _, _, _, _) -> ok.
+
 %% helpers
     
 id(I) -> I.
-- 
cgit v1.2.3