From 7e42e6a63d6876b6a03f50f571e5e850c9b771e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Valim?= <jose.valim@plataformatec.com.br>
Date: Fri, 19 Jul 2019 23:25:16 +0200
Subject: Optimize is_subset and is_disjoint in cerl_sets

The new implementation use maps iterators and are more
perfomant in banchmarks by roughly 10%. More importantly,
the iterators approach allow us to short-circuit and abort
early.

fold and filter have also been changed to use iterators.
We could simply delegate to the maps' functions, but
inlining the implementation allows us to skip a double
anonymous function dispatch.
---
 lib/compiler/src/cerl_sets.erl | 65 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 12 deletions(-)

(limited to 'lib/compiler')

diff --git a/lib/compiler/src/cerl_sets.erl b/lib/compiler/src/cerl_sets.erl
index f489baf238..84e488fc55 100644
--- a/lib/compiler/src/cerl_sets.erl
+++ b/lib/compiler/src/cerl_sets.erl
@@ -153,14 +153,21 @@ intersection1(S1, []) -> S1.
       Set1 :: set(Element),
       Set2 :: set(Element).
 
-is_disjoint(S1, S2) when map_size(S1) < map_size(S2) ->
-    fold(fun (_, false) -> false;
-	     (E, true) -> not is_element(E, S2)
-	 end, true, S1);
+is_disjoint(S1, S2) when map_size(S1) > map_size(S2) ->
+    is_disjoint_1(S1, maps:iterator(S2));
 is_disjoint(S1, S2) ->
-    fold(fun (_, false) -> false;
-	     (E, true) -> not is_element(E, S1)
-	 end, true, S2).
+    is_disjoint_1(S2, maps:iterator(S1)).
+
+is_disjoint_1(Set, Iter) ->
+    case maps:next(Iter) of
+        {K, _, NextIter} ->
+            case Set of
+                #{K := _} -> false;
+                #{} -> is_disjoint_1(Set, NextIter)
+            end;
+        none ->
+            true
+    end.
 
 %% subtract(Set1, Set2) -> Set.
 %%  Return all and only the elements of Set1 which are not also in
@@ -180,8 +187,21 @@ subtract(S1, S2) ->
       Set1 :: set(Element),
       Set2 :: set(Element).
 
+is_subset(S1, S2) when map_size(S1) > map_size(S2) ->
+    false;
 is_subset(S1, S2) ->
-    fold(fun (E, Sub) -> Sub andalso is_element(E, S2) end, true, S1).
+    is_subset_1(S2, maps:iterator(S1)).
+
+is_subset_1(Set, Iter) ->
+    case maps:next(Iter) of
+        {K, _, NextIter} ->
+            case Set of
+                #{K := _} -> is_subset_1(Set, NextIter);
+                #{} -> false
+            end;
+        none ->
+            true
+    end.
 
 %% fold(Fun, Accumulator, Set) -> Accumulator.
 %%  Fold function Fun over all elements in Set and return Accumulator.
@@ -193,8 +213,16 @@ is_subset(S1, S2) ->
       AccIn :: Acc,
       AccOut :: Acc.
 
-fold(F, Init, D) ->
-    lists:foldl(fun(E,Acc) -> F(E,Acc) end,Init,maps:keys(D)).
+fold(Fun, Init, Set) ->
+    fold_1(Fun, Init, maps:iterator(Set)).
+
+fold_1(Fun, Acc, Iter) ->
+    case maps:next(Iter) of
+        {K, _, NextIter} ->
+            fold_1(Fun, Fun(K,Acc), NextIter);
+        none ->
+            Acc
+    end.
 
 %% filter(Fun, Set) -> Set.
 %%  Filter Set with Fun.
@@ -203,5 +231,18 @@ fold(F, Init, D) ->
       Set1 :: set(Element),
       Set2 :: set(Element).
 
-filter(F, D) ->
-    maps:filter(fun(K,_) -> F(K) end, D).
+filter(Fun, Set) ->
+    maps:from_list(filter_1(Fun, maps:iterator(Set))).
+
+filter_1(Fun, Iter) ->
+    case maps:next(Iter) of
+        {K, _, NextIter} ->
+            case Fun(K) of
+                true ->
+                    [{K,ok} | filter_1(Fun, NextIter)];
+                false ->
+                    filter_1(Fun, NextIter)
+            end;
+        none ->
+            []
+    end.
-- 
cgit v1.2.3