Optimize operator '--' and yield on large inputs

The removal set now uses a red-black tree instead of an array on large inputs, decreasing runtime complexity from `n*n` to `n*log(n)`. It will also exit early when there are no more items left in the removal set, drastically improving performance and memory use when the items to be removed are present near the head of the list. This got a lot more complicated than before as the overhead of always using a red-black tree was unacceptable when either of the inputs were small, but this compromise has okay-to-decent performance regardless of input size. Co-authored-by: Dmytro Lytovchenko <[email protected]>
author: John Högberg <[email protected]> 2018-10-15 18:17:12 +0200
committer: John Högberg <[email protected]> 2018-10-29 08:10:55 +0100
commit: eb9ee88f4cc640065f4902e270d834bfb596d5fc (patch)
tree: a7ca57899b6d52417900eedd12995183f54f3bd5 /lib/stdlib
parent: 1056d2d1fd49f669a2001f03890e13c9cba76c1e (diff)
download: otp-eb9ee88f4cc640065f4902e270d834bfb596d5fc.tar.gz
otp-eb9ee88f4cc640065f4902e270d834bfb596d5fc.tar.bz2
otp-eb9ee88f4cc640065f4902e270d834bfb596d5fc.zip
2 files changed, 42 insertions, 8 deletions
diff --git a/lib/stdlib/doc/src/lists.xml b/lib/stdlib/doc/src/lists.xml
index c3d5d7e07a..e4215a5336 100644
--- a/lib/stdlib/doc/src/lists.xml
+++ b/lib/stdlib/doc/src/lists.xml
@@ -850,14 +850,6 @@ splitwith(Pred, List) ->
 > <input>lists:subtract("123212", "212").</input>
 "312".</pre>
         <p><c>lists:subtract(A, B)</c> is equivalent to <c>A -- B</c>.</p>
-        <warning>
-          <p>The complexity of <c>lists:subtract(A, B)</c> is proportional to
-            <c>length(A)*length(B)</c>, meaning that it is very slow if both
-            <c>A</c> and <c>B</c> are long lists. (If both lists are long, it
-            is a much better choice to use ordered lists and
-            <seealso marker="ordsets#subtract/2">
-            <c>ordsets:subtract/2</c></seealso>.</p>
-        </warning>
       </desc>
     </func>
 
diff --git a/lib/stdlib/test/lists_SUITE.erl b/lib/stdlib/test/lists_SUITE.erl
index 837ab4e97e..9a94bcc012 100644
--- a/lib/stdlib/test/lists_SUITE.erl
+++ b/lib/stdlib/test/lists_SUITE.erl
@@ -2597,6 +2597,13 @@ subtract(Config) when is_list(Config) ->
     {'EXIT',_} = (catch sub([a|b], [])),
     {'EXIT',_} = (catch sub([a|b], [a])),
 
+    %% Trapping, both crashing and otherwise.
+    [sub_trapping(N) || N <- lists:seq(0, 18)],
+
+    %% The current implementation chooses which algorithm to use based on
+    %% certain thresholds, and we need proper coverage for all corner cases.
+    [sub_thresholds(N) || N <- lists:seq(0, 32)],
+
     ok.
 
 sub_non_matching(A, B) ->
@@ -2606,6 +2613,41 @@ sub(A, B) ->
     Res = A -- B,
     Res = lists:subtract(A, B).
 
+sub_trapping(N) ->
+    List = lists:duplicate(N + (1 bsl N), gurka),
+    ImproperList = List ++ crash,
+
+    {'EXIT',_} = (catch sub_trapping_1(ImproperList, [])),
+    {'EXIT',_} = (catch sub_trapping_1(List, ImproperList)),
+
+    List = List -- lists:duplicate(N + (1 bsl N), gaffel),
+    ok = sub_trapping_1(List, []).
+
+sub_trapping_1([], _) -> ok;
+sub_trapping_1(L, R) -> sub_trapping_1(L -- R, [gurka | R]).
+
+sub_thresholds(N) ->
+    %% This needs to be long enough to cause trapping.
+    OtherLen = 1 bsl 18,
+    Other = lists:seq(0, OtherLen - 1),
+
+    Disjoint = lists:seq(-N, -1),
+    Subset = lists:seq(1, N),
+
+    %% LHS is disjoint from RHS, so all elements must be retained.
+    Disjoint = Disjoint -- Other,
+
+    %% LHS is covered by RHS, so all elements must be removed.
+    [] = Subset -- Other,
+
+    %% RHS is disjoint from LHS, so all elements must be retained.
+    Other = Other -- Disjoint,
+
+    %% RHS is covered by LHS, so N elements must be removed.
+    N = OtherLen - length(Other -- Subset),
+
+    ok.
+
 %% Test lists:droplast/1
 droplast(Config) when is_list(Config) ->
     [] = lists:droplast([x]),
author	John Högberg <[email protected]>	2018-10-15 18:17:12 +0200
committer	John Högberg <[email protected]>	2018-10-29 08:10:55 +0100
commit	eb9ee88f4cc640065f4902e270d834bfb596d5fc (patch)
tree	a7ca57899b6d52417900eedd12995183f54f3bd5 /lib/stdlib
parent	1056d2d1fd49f669a2001f03890e13c9cba76c1e (diff)
download	otp-eb9ee88f4cc640065f4902e270d834bfb596d5fc.tar.gz otp-eb9ee88f4cc640065f4902e270d834bfb596d5fc.tar.bz2 otp-eb9ee88f4cc640065f4902e270d834bfb596d5fc.zip