Optimize is_subset and is_disjoint in cerl_sets

The new implementation use maps iterators and are more perfomant in banchmarks by roughly 10%. More importantly, the iterators approach allow us to short-circuit and abort early. fold and filter have also been changed to use iterators. We could simply delegate to the maps' functions, but inlining the implementation allows us to skip a double anonymous function dispatch.
author: José Valim <[email protected]> 2019-07-19 23:25:16 +0200
committer: José Valim <[email protected]> 2019-07-19 23:31:52 +0200
commit: 7e42e6a63d6876b6a03f50f571e5e850c9b771e8 (patch)
tree: ed9f896511904bef55b14d4e9980211e134af157 /lib
parent: bb4955e083e34be40a9e0d5c0831e5b1498d11df (diff)
download: otp-7e42e6a63d6876b6a03f50f571e5e850c9b771e8.tar.gz
otp-7e42e6a63d6876b6a03f50f571e5e850c9b771e8.tar.bz2
otp-7e42e6a63d6876b6a03f50f571e5e850c9b771e8.zip
1 files changed, 53 insertions, 12 deletions
diff --git a/lib/compiler/src/cerl_sets.erl b/lib/compiler/src/cerl_sets.erl
index f489baf238..84e488fc55 100644
--- a/lib/compiler/src/cerl_sets.erl
+++ b/lib/compiler/src/cerl_sets.erl
@@ -153,14 +153,21 @@ intersection1(S1, []) -> S1.
       Set1 :: set(Element),
       Set2 :: set(Element).
 
-is_disjoint(S1, S2) when map_size(S1) < map_size(S2) ->
-    fold(fun (_, false) -> false;
-	     (E, true) -> not is_element(E, S2)
-	 end, true, S1);
+is_disjoint(S1, S2) when map_size(S1) > map_size(S2) ->
+    is_disjoint_1(S1, maps:iterator(S2));
 is_disjoint(S1, S2) ->
-    fold(fun (_, false) -> false;
-	     (E, true) -> not is_element(E, S1)
-	 end, true, S2).
+    is_disjoint_1(S2, maps:iterator(S1)).
+
+is_disjoint_1(Set, Iter) ->
+    case maps:next(Iter) of
+        {K, _, NextIter} ->
+            case Set of
+                #{K := _} -> false;
+                #{} -> is_disjoint_1(Set, NextIter)
+            end;
+        none ->
+            true
+    end.
 
 %% subtract(Set1, Set2) -> Set.
 %%  Return all and only the elements of Set1 which are not also in
@@ -180,8 +187,21 @@ subtract(S1, S2) ->
       Set1 :: set(Element),
       Set2 :: set(Element).
 
+is_subset(S1, S2) when map_size(S1) > map_size(S2) ->
+    false;
 is_subset(S1, S2) ->
-    fold(fun (E, Sub) -> Sub andalso is_element(E, S2) end, true, S1).
+    is_subset_1(S2, maps:iterator(S1)).
+
+is_subset_1(Set, Iter) ->
+    case maps:next(Iter) of
+        {K, _, NextIter} ->
+            case Set of
+                #{K := _} -> is_subset_1(Set, NextIter);
+                #{} -> false
+            end;
+        none ->
+            true
+    end.
 
 %% fold(Fun, Accumulator, Set) -> Accumulator.
 %%  Fold function Fun over all elements in Set and return Accumulator.
@@ -193,8 +213,16 @@ is_subset(S1, S2) ->
       AccIn :: Acc,
       AccOut :: Acc.
 
-fold(F, Init, D) ->
-    lists:foldl(fun(E,Acc) -> F(E,Acc) end,Init,maps:keys(D)).
+fold(Fun, Init, Set) ->
+    fold_1(Fun, Init, maps:iterator(Set)).
+
+fold_1(Fun, Acc, Iter) ->
+    case maps:next(Iter) of
+        {K, _, NextIter} ->
+            fold_1(Fun, Fun(K,Acc), NextIter);
+        none ->
+            Acc
+    end.
 
 %% filter(Fun, Set) -> Set.
 %%  Filter Set with Fun.
@@ -203,5 +231,18 @@ fold(F, Init, D) ->
       Set1 :: set(Element),
       Set2 :: set(Element).
 
-filter(F, D) ->
-    maps:filter(fun(K,_) -> F(K) end, D).
+filter(Fun, Set) ->
+    maps:from_list(filter_1(Fun, maps:iterator(Set))).
+
+filter_1(Fun, Iter) ->
+    case maps:next(Iter) of
+        {K, _, NextIter} ->
+            case Fun(K) of
+                true ->
+                    [{K,ok} | filter_1(Fun, NextIter)];
+                false ->
+                    filter_1(Fun, NextIter)
+            end;
+        none ->
+            []
+    end.
author	José Valim <[email protected]>	2019-07-19 23:25:16 +0200
committer	José Valim <[email protected]>	2019-07-19 23:31:52 +0200
commit	7e42e6a63d6876b6a03f50f571e5e850c9b771e8 (patch)
tree	ed9f896511904bef55b14d4e9980211e134af157 /lib
parent	bb4955e083e34be40a9e0d5c0831e5b1498d11df (diff)
download	otp-7e42e6a63d6876b6a03f50f571e5e850c9b771e8.tar.gz otp-7e42e6a63d6876b6a03f50f571e5e850c9b771e8.tar.bz2 otp-7e42e6a63d6876b6a03f50f571e5e850c9b771e8.zip