From d98da38562ec79360b58eed87eced3a506f1ff6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20H=C3=B6gberg?= Date: Mon, 15 Oct 2018 18:17:12 +0200 Subject: Optimize operator '--' and yield on large inputs The removal set now uses a red-black tree instead of an array on large inputs, decreasing runtime complexity from `n*n` to `n*log(n)`. It will also exit early when there are no more items left in the removal set, drastically improving performance and memory use when the items to be removed are present near the head of the list. This got a lot more complicated than before as the overhead of always using a red-black tree was unacceptable when either of the inputs were small, but this compromise has okay-to-decent performance regardless of input size. Co-authored-by: Dmytro Lytovchenko --- lib/stdlib/doc/src/lists.xml | 6 ------ lib/stdlib/test/lists_SUITE.erl | 42 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/stdlib/doc/src/lists.xml b/lib/stdlib/doc/src/lists.xml index 89ba5238b5..4a4c170bba 100644 --- a/lib/stdlib/doc/src/lists.xml +++ b/lib/stdlib/doc/src/lists.xml @@ -708,12 +708,6 @@ splitwith(Pred, List) -> > lists:subtract("123212", "212"). "312".

lists:subtract(A, B) is equivalent to A -- B.

-

The complexity of lists:subtract(A, B) is proportional - to length(A)*length(B), meaning that it will be very slow if - both A and B are long lists. - (Using ordered lists and - ordsets:subtract/2 - is a much better choice if both lists are long.)

diff --git a/lib/stdlib/test/lists_SUITE.erl b/lib/stdlib/test/lists_SUITE.erl index a0f7fd2744..21b2dd1777 100644 --- a/lib/stdlib/test/lists_SUITE.erl +++ b/lib/stdlib/test/lists_SUITE.erl @@ -2692,6 +2692,13 @@ subtract(Config) when is_list(Config) -> ?line {'EXIT',_} = (catch sub([a|b], [])), ?line {'EXIT',_} = (catch sub([a|b], [a])), + %% Trapping, both crashing and otherwise. + [sub_trapping(N) || N <- lists:seq(0, 18)], + + %% The current implementation chooses which algorithm to use based on + %% certain thresholds, and we need proper coverage for all corner cases. + [sub_thresholds(N) || N <- lists:seq(0, 32)], + ok. sub_non_matching(A, B) -> @@ -2701,6 +2708,41 @@ sub(A, B) -> Res = A -- B, Res = lists:subtract(A, B). +sub_trapping(N) -> + List = lists:duplicate(N + (1 bsl N), gurka), + ImproperList = List ++ crash, + + {'EXIT',_} = (catch sub_trapping_1(ImproperList, [])), + {'EXIT',_} = (catch sub_trapping_1(List, ImproperList)), + + List = List -- lists:duplicate(N + (1 bsl N), gaffel), + ok = sub_trapping_1(List, []). + +sub_trapping_1([], _) -> ok; +sub_trapping_1(L, R) -> sub_trapping_1(L -- R, [gurka | R]). + +sub_thresholds(N) -> + %% This needs to be long enough to cause trapping. + OtherLen = 1 bsl 18, + Other = lists:seq(0, OtherLen - 1), + + Disjoint = lists:seq(-N, -1), + Subset = lists:seq(1, N), + + %% LHS is disjoint from RHS, so all elements must be retained. + Disjoint = Disjoint -- Other, + + %% LHS is covered by RHS, so all elements must be removed. + [] = Subset -- Other, + + %% RHS is disjoint from LHS, so all elements must be retained. + Other = Other -- Disjoint, + + %% RHS is covered by LHS, so N elements must be removed. + N = OtherLen - length(Other -- Subset), + + ok. + %% Test lists:droplast/1 droplast(Config) when is_list(Config) -> ?line [] = lists:droplast([x]), -- cgit v1.2.3 From d8aef84f6ecb12f1367a3b9c783da1306b66a10c Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Fri, 9 Nov 2018 15:24:48 +0100 Subject: Prepare release --- lib/stdlib/doc/src/notes.xml | 15 +++++++++++++++ lib/stdlib/vsn.mk | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/stdlib/doc/src/notes.xml b/lib/stdlib/doc/src/notes.xml index 5d4f9d912f..c06e97d63e 100644 --- a/lib/stdlib/doc/src/notes.xml +++ b/lib/stdlib/doc/src/notes.xml @@ -31,6 +31,21 @@

This document describes the changes made to the STDLIB application.

+
STDLIB 2.8.0.1 + +
Improvements and New Features + + +

List subtraction (The -- operator) will now + yield properly on large inputs.

+

+ Own Id: OTP-15371

+
+
+
+ +
+
STDLIB 2.8
Fixed Bugs and Malfunctions diff --git a/lib/stdlib/vsn.mk b/lib/stdlib/vsn.mk index 5bac4be9d7..99dca8415d 100644 --- a/lib/stdlib/vsn.mk +++ b/lib/stdlib/vsn.mk @@ -1 +1 @@ -STDLIB_VSN = 2.8 +STDLIB_VSN = 2.8.0.1 -- cgit v1.2.3