From 7e42e6a63d6876b6a03f50f571e5e850c9b771e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Valim?= Date: Fri, 19 Jul 2019 23:25:16 +0200 Subject: Optimize is_subset and is_disjoint in cerl_sets The new implementation use maps iterators and are more perfomant in banchmarks by roughly 10%. More importantly, the iterators approach allow us to short-circuit and abort early. fold and filter have also been changed to use iterators. We could simply delegate to the maps' functions, but inlining the implementation allows us to skip a double anonymous function dispatch. --- lib/compiler/src/cerl_sets.erl | 65 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 12 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/cerl_sets.erl b/lib/compiler/src/cerl_sets.erl index f489baf238..84e488fc55 100644 --- a/lib/compiler/src/cerl_sets.erl +++ b/lib/compiler/src/cerl_sets.erl @@ -153,14 +153,21 @@ intersection1(S1, []) -> S1. Set1 :: set(Element), Set2 :: set(Element). -is_disjoint(S1, S2) when map_size(S1) < map_size(S2) -> - fold(fun (_, false) -> false; - (E, true) -> not is_element(E, S2) - end, true, S1); +is_disjoint(S1, S2) when map_size(S1) > map_size(S2) -> + is_disjoint_1(S1, maps:iterator(S2)); is_disjoint(S1, S2) -> - fold(fun (_, false) -> false; - (E, true) -> not is_element(E, S1) - end, true, S2). + is_disjoint_1(S2, maps:iterator(S1)). + +is_disjoint_1(Set, Iter) -> + case maps:next(Iter) of + {K, _, NextIter} -> + case Set of + #{K := _} -> false; + #{} -> is_disjoint_1(Set, NextIter) + end; + none -> + true + end. %% subtract(Set1, Set2) -> Set. %% Return all and only the elements of Set1 which are not also in @@ -180,8 +187,21 @@ subtract(S1, S2) -> Set1 :: set(Element), Set2 :: set(Element). +is_subset(S1, S2) when map_size(S1) > map_size(S2) -> + false; is_subset(S1, S2) -> - fold(fun (E, Sub) -> Sub andalso is_element(E, S2) end, true, S1). + is_subset_1(S2, maps:iterator(S1)). + +is_subset_1(Set, Iter) -> + case maps:next(Iter) of + {K, _, NextIter} -> + case Set of + #{K := _} -> is_subset_1(Set, NextIter); + #{} -> false + end; + none -> + true + end. %% fold(Fun, Accumulator, Set) -> Accumulator. %% Fold function Fun over all elements in Set and return Accumulator. @@ -193,8 +213,16 @@ is_subset(S1, S2) -> AccIn :: Acc, AccOut :: Acc. -fold(F, Init, D) -> - lists:foldl(fun(E,Acc) -> F(E,Acc) end,Init,maps:keys(D)). +fold(Fun, Init, Set) -> + fold_1(Fun, Init, maps:iterator(Set)). + +fold_1(Fun, Acc, Iter) -> + case maps:next(Iter) of + {K, _, NextIter} -> + fold_1(Fun, Fun(K,Acc), NextIter); + none -> + Acc + end. %% filter(Fun, Set) -> Set. %% Filter Set with Fun. @@ -203,5 +231,18 @@ fold(F, Init, D) -> Set1 :: set(Element), Set2 :: set(Element). -filter(F, D) -> - maps:filter(fun(K,_) -> F(K) end, D). +filter(Fun, Set) -> + maps:from_list(filter_1(Fun, maps:iterator(Set))). + +filter_1(Fun, Iter) -> + case maps:next(Iter) of + {K, _, NextIter} -> + case Fun(K) of + true -> + [{K,ok} | filter_1(Fun, NextIter)]; + false -> + filter_1(Fun, NextIter) + end; + none -> + [] + end. -- cgit v1.2.3