The R13B03 release.OTP_R13B03

author: Erlang/OTP <[email protected]> 2009-11-20 14:54:40 +0000
committer: Erlang/OTP <[email protected]> 2009-11-20 14:54:40 +0000
commit: 84adefa331c4159d432d22840663c38f155cd4c1 (patch)
tree: bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/stdlib/src/sets.erl
download: otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
1 files changed, 417 insertions, 0 deletions
diff --git a/lib/stdlib/src/sets.erl b/lib/stdlib/src/sets.erl
new file mode 100644
index 0000000000..bcddca2567
--- /dev/null
+++ b/lib/stdlib/src/sets.erl
@@ -0,0 +1,417 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2000-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+%% We use the dynamic hashing techniques by Per-�ke Larsson as
+%% described in "The Design and Implementation of Dynamic Hashing for
+%% Sets and Tables in Icon" by Griswold and Townsend.  Much of the
+%% terminology comes from that paper as well.
+
+%% The segments are all of the same fixed size and we just keep
+%% increasing the size of the top tuple as the table grows.  At the
+%% end of the segments tuple we keep an empty segment which we use
+%% when we expand the segments.  The segments are expanded by doubling
+%% every time n reaches maxn instead of increasing the tuple one
+%% element at a time.  It is easier and does not seem detrimental to
+%% speed.  The same applies when contracting the segments.
+%%
+%% Note that as the order of the keys is undefined we may freely
+%% reorder keys within in a bucket.
+
+-module(sets).
+
+%% Standard interface.
+-export([new/0,is_set/1,size/1,to_list/1,from_list/1]).
+-export([is_element/2,add_element/2,del_element/2]).
+-export([union/2,union/1,intersection/2,intersection/1]).
+-export([is_disjoint/2]).
+-export([subtract/2,is_subset/2]).
+-export([fold/3,filter/2]).
+
+%% Note: mk_seg/1 must be changed too if seg_size is changed.
+-define(seg_size, 16).
+-define(max_seg, 32).
+-define(expand_load, 5).
+-define(contract_load, 3).
+-define(exp_size, ?seg_size * ?expand_load).
+-define(con_size, ?seg_size * ?contract_load).
+
+%%------------------------------------------------------------------------------
+
+-type seg()  :: tuple().
+-type segs() :: tuple().
+
+%% Define a hash set.  The default values are the standard ones.
+-record(set,
+	{size=0              :: non_neg_integer(),	% Number of elements
+	 n=?seg_size         :: non_neg_integer(),	% Number of active slots
+	 maxn=?seg_size      :: pos_integer(),  	% Maximum slots
+	 bso=?seg_size div 2 :: non_neg_integer(),      % Buddy slot offset
+	 exp_size=?exp_size  :: non_neg_integer(),	% Size to expand at
+	 con_size=?con_size  :: non_neg_integer(),	% Size to contract at
+	 empty               :: seg(),			% Empty segment
+	 segs                :: segs()			% Segments
+	}).
+%% A declaration equivalent to the following one is hard-coded in erl_types.
+%% That declaration contains hard-coded information about the #set{}
+%% record and the types of its fields.  So, please make sure that any
+%% changes to its structure are also propagated to erl_types.erl.
+%%
+%% -opaque set() :: #set{}.
+
+%%------------------------------------------------------------------------------
+
+%% new() -> Set
+-spec new() -> set().
+new() ->
+    Empty = mk_seg(?seg_size),
+    #set{empty = Empty, segs = {Empty}}.
+
+%% is_set(Set) -> boolean().
+%%  Return 'true' if Set is a set of elements, else 'false'.
+-spec is_set(term()) -> boolean().
+is_set(#set{}) -> true;
+is_set(_) -> false.
+
+%% size(Set) -> int().
+%%  Return the number of elements in Set.
+-spec size(set()) -> non_neg_integer().
+size(S) -> S#set.size. 
+
+%% to_list(Set) -> [Elem].
+%%  Return the elements in Set as a list.
+-spec to_list(set()) -> [term()].
+to_list(S) ->
+    fold(fun (Elem, List) -> [Elem|List] end, [], S).
+
+%% from_list([Elem]) -> Set.
+%%  Build a set from the elements in List.
+-spec from_list([term()]) -> set().
+from_list(L) ->
+    lists:foldl(fun (E, S) -> add_element(E, S) end, new(), L).
+
+%% is_element(Element, Set) -> boolean().
+%%  Return 'true' if Element is an element of Set, else 'false'.
+-spec is_element(term(), set()) -> boolean().
+is_element(E, S) ->
+    Slot = get_slot(S, E),
+    Bkt = get_bucket(S, Slot),
+    lists:member(E, Bkt).
+
+%% add_element(Element, Set) -> Set.
+%%  Return Set with Element inserted in it.
+-spec add_element(term(), set()) -> set().
+add_element(E, S0) ->
+    Slot = get_slot(S0, E),
+    {S1,Ic} = on_bucket(fun (B0) -> add_bkt_el(E, B0, B0) end, S0, Slot),
+    maybe_expand(S1, Ic).
+
+-spec add_bkt_el(T, [T], [T]) -> {[T], 0 | 1}.
+add_bkt_el(E, [E|_], Bkt) -> {Bkt,0};
+add_bkt_el(E, [_|B], Bkt) ->
+    add_bkt_el(E, B, Bkt);
+add_bkt_el(E, [], Bkt) -> {[E|Bkt],1}.
+
+%% del_element(Element, Set) -> Set.
+%%  Return Set but with Element removed.
+-spec del_element(term(), set()) -> set().
+del_element(E, S0) ->
+    Slot = get_slot(S0, E),
+    {S1,Dc} = on_bucket(fun (B0) -> del_bkt_el(E, B0) end, S0, Slot),
+    maybe_contract(S1, Dc).
+
+-spec del_bkt_el(T, [T]) -> {[T], 0 | 1}.
+del_bkt_el(E, [E|Bkt]) -> {Bkt,1};
+del_bkt_el(E, [Other|Bkt0]) ->
+    {Bkt1,Dc} = del_bkt_el(E, Bkt0),
+    {[Other|Bkt1],Dc};
+del_bkt_el(_, []) -> {[],0}.
+
+%% union(Set1, Set2) -> Set
+%%  Return the union of Set1 and Set2.
+-spec union(set(), set()) -> set().
+union(S1, S2) when S1#set.size < S2#set.size ->
+    fold(fun (E, S) -> add_element(E, S) end, S2, S1);
+union(S1, S2) ->
+    fold(fun (E, S) -> add_element(E, S) end, S1, S2).
+
+%% union([Set]) -> Set
+%%  Return the union of the list of sets.
+-spec union([set()]) -> set().
+union([S1,S2|Ss]) ->
+    union1(union(S1, S2), Ss);
+union([S]) -> S;
+union([]) -> new().
+
+-spec union1(set(), [set()]) -> set().
+union1(S1, [S2|Ss]) ->
+    union1(union(S1, S2), Ss);
+union1(S1, []) -> S1.
+
+%% intersection(Set1, Set2) -> Set.
+%%  Return the intersection of Set1 and Set2.
+-spec intersection(set(), set()) -> set().
+intersection(S1, S2) when S1#set.size < S2#set.size ->
+    filter(fun (E) -> is_element(E, S2) end, S1);
+intersection(S1, S2) ->
+    filter(fun (E) -> is_element(E, S1) end, S2).
+
+%% intersection([Set]) -> Set.
+%%  Return the intersection of the list of sets.
+-spec intersection([set(),...]) -> set().
+intersection([S1,S2|Ss]) ->
+    intersection1(intersection(S1, S2), Ss);
+intersection([S]) -> S.
+
+-spec intersection1(set(), [set()]) -> set().
+intersection1(S1, [S2|Ss]) ->
+    intersection1(intersection(S1, S2), Ss);
+intersection1(S1, []) -> S1.
+
+%% is_disjoint(Set1, Set2) -> boolean().
+%%  Check whether Set1 and Set2 are disjoint.
+-spec is_disjoint(set(), set()) -> boolean().
+is_disjoint(S1, S2) when S1#set.size < S2#set.size ->
+    fold(fun (_, false) -> false;
+	     (E, true) -> not is_element(E, S2)
+	 end, true, S1);
+is_disjoint(S1, S2) ->
+    fold(fun (_, false) -> false;
+	     (E, true) -> not is_element(E, S1)
+	 end, true, S2).
+
+%% subtract(Set1, Set2) -> Set.
+%%  Return all and only the elements of Set1 which are not also in
+%%  Set2.
+-spec subtract(set(), set()) -> set().
+subtract(S1, S2) ->
+    filter(fun (E) -> not is_element(E, S2) end, S1).
+
+%% is_subset(Set1, Set2) -> boolean().
+%%  Return 'true' when every element of Set1 is also a member of
+%%  Set2, else 'false'.
+-spec is_subset(set(), set()) -> boolean().
+is_subset(S1, S2) ->
+    fold(fun (E, Sub) -> Sub andalso is_element(E, S2) end, true, S1).
+
+%% fold(Fun, Accumulator, Set) -> Accumulator.
+%%  Fold function Fun over all elements in Set and return Accumulator.
+-spec fold(fun((_,_) -> _), T, set()) -> T.
+fold(F, Acc, D) -> fold_set(F, Acc, D).
+
+%% filter(Fun, Set) -> Set.
+%%  Filter Set with Fun.
+-spec filter(fun((_) -> boolean()), set()) -> set().
+filter(F, D) -> filter_set(F, D).
+
+%% get_slot(Hashdb, Key) -> Slot.
+%%  Get the slot.  First hash on the new range, if we hit a bucket
+%%  which has not been split use the unsplit buddy bucket.
+-spec get_slot(set(), term()) -> non_neg_integer().
+get_slot(T, Key) ->
+    H = erlang:phash(Key, T#set.maxn),
+    if
+	H > T#set.n -> H - T#set.bso;
+	true -> H
+    end.
+
+%% get_bucket(Hashdb, Slot) -> Bucket.
+-spec get_bucket(set(), non_neg_integer()) -> term().
+get_bucket(T, Slot) -> get_bucket_s(T#set.segs, Slot).
+
+%% on_bucket(Fun, Hashdb, Slot) -> {NewHashDb,Result}.
+%%  Apply Fun to the bucket in Slot and replace the returned bucket.
+-spec on_bucket(fun((_) -> {[_], 0 | 1}), set(), non_neg_integer()) ->
+	  {set(), 0 | 1}.
+on_bucket(F, T, Slot) ->
+    SegI = ((Slot-1) div ?seg_size) + 1,
+    BktI = ((Slot-1) rem ?seg_size) + 1,
+    Segs = T#set.segs,
+    Seg = element(SegI, Segs),
+    B0 = element(BktI, Seg),
+    {B1, Res} = F(B0),				%Op on the bucket.
+    {T#set{segs = setelement(SegI, Segs, setelement(BktI, Seg, B1))},Res}.
+
+%% fold_set(Fun, Acc, Dictionary) -> Dictionary.
+%% filter_set(Fun, Dictionary) -> Dictionary.
+
+%%  Work functions for fold and filter operations.  These traverse the
+%%  hash structure rebuilding as necessary.  Note we could have
+%%  implemented map and hash using fold but these should be faster.
+%%  We hope!
+
+fold_set(F, Acc, D) when is_function(F, 2) ->
+    Segs = D#set.segs,
+    fold_segs(F, Acc, Segs, tuple_size(Segs)).
+
+fold_segs(F, Acc, Segs, I) when I >= 1 ->
+    Seg = element(I, Segs),
+    fold_segs(F, fold_seg(F, Acc, Seg, tuple_size(Seg)), Segs, I-1);
+fold_segs(_, Acc, _, _) -> Acc.
+
+fold_seg(F, Acc, Seg, I) when I >= 1 ->
+    fold_seg(F, fold_bucket(F, Acc, element(I, Seg)), Seg, I-1);
+fold_seg(_, Acc, _, _) -> Acc.
+
+fold_bucket(F, Acc, [E|Bkt]) ->
+    fold_bucket(F, F(E, Acc), Bkt);
+fold_bucket(_, Acc, []) -> Acc.
+
+filter_set(F, D) when is_function(F, 1) ->
+    Segs0 = tuple_to_list(D#set.segs),
+    {Segs1,Fc} = filter_seg_list(F, Segs0, [], 0),
+    maybe_contract(D#set{segs = list_to_tuple(Segs1)}, Fc).
+
+filter_seg_list(F, [Seg|Segs], Fss, Fc0) ->
+    Bkts0 = tuple_to_list(Seg),
+    {Bkts1,Fc1} = filter_bkt_list(F, Bkts0, [], Fc0),
+    filter_seg_list(F, Segs, [list_to_tuple(Bkts1)|Fss], Fc1);
+filter_seg_list(_, [], Fss, Fc) ->
+    {lists:reverse(Fss, []),Fc}.
+
+filter_bkt_list(F, [Bkt0|Bkts], Fbs, Fc0) ->
+    {Bkt1,Fc1} = filter_bucket(F, Bkt0, [], Fc0),
+    filter_bkt_list(F, Bkts, [Bkt1|Fbs], Fc1);
+filter_bkt_list(_, [], Fbs, Fc) ->
+    {lists:reverse(Fbs),Fc}.
+
+filter_bucket(F, [E|Bkt], Fb, Fc) ->
+    case F(E) of
+	true -> filter_bucket(F, Bkt, [E|Fb], Fc);
+	false -> filter_bucket(F, Bkt, Fb, Fc+1)
+    end;
+filter_bucket(_, [], Fb, Fc) -> {Fb,Fc}.
+
+%% get_bucket_s(Segments, Slot) -> Bucket.
+%% put_bucket_s(Segments, Slot, Bucket) -> NewSegments.
+
+get_bucket_s(Segs, Slot) ->
+    SegI = ((Slot-1) div ?seg_size) + 1,
+    BktI = ((Slot-1) rem ?seg_size) + 1,
+    element(BktI, element(SegI, Segs)).
+
+put_bucket_s(Segs, Slot, Bkt) ->
+    SegI = ((Slot-1) div ?seg_size) + 1,
+    BktI = ((Slot-1) rem ?seg_size) + 1,
+    Seg = setelement(BktI, element(SegI, Segs), Bkt),
+    setelement(SegI, Segs, Seg).
+
+-spec maybe_expand(set(), 0 | 1) -> set().
+maybe_expand(T0, Ic) when T0#set.size + Ic > T0#set.exp_size ->
+    T = maybe_expand_segs(T0),			%Do we need more segments.
+    N = T#set.n + 1,				%Next slot to expand into
+    Segs0 = T#set.segs,
+    Slot1 = N - T#set.bso,
+    B = get_bucket_s(Segs0, Slot1),
+    Slot2 = N,
+    {B1,B2} = rehash(B, Slot1, Slot2, T#set.maxn),
+    Segs1 = put_bucket_s(Segs0, Slot1, B1),
+    Segs2 = put_bucket_s(Segs1, Slot2, B2),
+    T#set{size = T#set.size + Ic,
+	  n = N,
+	  exp_size = N * ?expand_load,
+	  con_size = N * ?contract_load,
+	  segs = Segs2};
+maybe_expand(T, Ic) -> T#set{size = T#set.size + Ic}.
+
+-spec maybe_expand_segs(set()) -> set().
+maybe_expand_segs(T) when T#set.n =:= T#set.maxn ->
+    T#set{maxn = 2 * T#set.maxn,
+	  bso  = 2 * T#set.bso,
+	  segs = expand_segs(T#set.segs, T#set.empty)};
+maybe_expand_segs(T) -> T.
+
+-spec maybe_contract(set(), non_neg_integer()) -> set().
+maybe_contract(T, Dc) when T#set.size - Dc < T#set.con_size,
+			   T#set.n > ?seg_size ->
+    N = T#set.n,
+    Slot1 = N - T#set.bso,
+    Segs0 = T#set.segs,
+    B1 = get_bucket_s(Segs0, Slot1),
+    Slot2 = N,
+    B2 = get_bucket_s(Segs0, Slot2),
+    Segs1 = put_bucket_s(Segs0, Slot1, B1 ++ B2),
+    Segs2 = put_bucket_s(Segs1, Slot2, []),	%Clear the upper bucket
+    N1 = N - 1,
+    maybe_contract_segs(T#set{size = T#set.size - Dc,
+			      n = N1,
+			      exp_size = N1 * ?expand_load,
+			      con_size = N1 * ?contract_load,
+			      segs = Segs2});
+maybe_contract(T, Dc) -> T#set{size = T#set.size - Dc}.
+
+-spec maybe_contract_segs(set()) -> set().
+maybe_contract_segs(T) when T#set.n =:= T#set.bso ->
+    T#set{maxn = T#set.maxn div 2,
+	  bso  = T#set.bso div 2,
+	  segs = contract_segs(T#set.segs)};
+maybe_contract_segs(T) -> T.
+
+%% rehash(Bucket, Slot1, Slot2, MaxN) -> {Bucket1,Bucket2}.
+-spec rehash([T], integer(), pos_integer(), pos_integer()) -> {[T],[T]}.
+rehash([E|T], Slot1, Slot2, MaxN) ->
+    {L1,L2} = rehash(T, Slot1, Slot2, MaxN),
+    case erlang:phash(E, MaxN) of
+	Slot1 -> {[E|L1],L2};
+	Slot2 -> {L1,[E|L2]}
+    end;
+rehash([], _, _, _) -> {[],[]}.
+
+%% mk_seg(Size) -> Segment.
+-spec mk_seg(16) -> seg().
+mk_seg(16) -> {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}.
+
+%% expand_segs(Segs, EmptySeg) -> NewSegs.
+%% contract_segs(Segs) -> NewSegs.
+%%  Expand/contract the segment tuple by doubling/halving the number
+%%  of segments.  We special case the powers of 2 upto 32, this should
+%%  catch most case.  N.B. the last element in the segments tuple is
+%%  an extra element containing a default empty segment.
+-spec expand_segs(segs(), seg()) -> segs().
+expand_segs({B1}, Empty) ->
+    {B1,Empty};
+expand_segs({B1,B2}, Empty) ->
+    {B1,B2,Empty,Empty};
+expand_segs({B1,B2,B3,B4}, Empty) ->
+    {B1,B2,B3,B4,Empty,Empty,Empty,Empty};
+expand_segs({B1,B2,B3,B4,B5,B6,B7,B8}, Empty) ->
+    {B1,B2,B3,B4,B5,B6,B7,B8,
+     Empty,Empty,Empty,Empty,Empty,Empty,Empty,Empty};
+expand_segs({B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16}, Empty) ->
+    {B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16,
+     Empty,Empty,Empty,Empty,Empty,Empty,Empty,Empty,
+     Empty,Empty,Empty,Empty,Empty,Empty,Empty,Empty};
+expand_segs(Segs, Empty) ->
+    list_to_tuple(tuple_to_list(Segs) 
+    ++ lists:duplicate(tuple_size(Segs), Empty)).
+
+-spec contract_segs(segs()) -> segs().
+contract_segs({B1,_}) ->
+    {B1};
+contract_segs({B1,B2,_,_}) ->
+    {B1,B2};
+contract_segs({B1,B2,B3,B4,_,_,_,_}) ->
+    {B1,B2,B3,B4};
+contract_segs({B1,B2,B3,B4,B5,B6,B7,B8,_,_,_,_,_,_,_,_}) ->
+    {B1,B2,B3,B4,B5,B6,B7,B8};
+contract_segs({B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16,
+	       _,_,_,_,_,_,_,_,_,_,_,_,_,_,_,_}) ->
+    {B1,B2,B3,B4,B5,B6,B7,B8,B9,B10,B11,B12,B13,B14,B15,B16};
+contract_segs(Segs) ->
+    Ss = tuple_size(Segs) div 2,
+    list_to_tuple(lists:sublist(tuple_to_list(Segs), 1, Ss)).
author	Erlang/OTP <[email protected]>	2009-11-20 14:54:40 +0000
committer	Erlang/OTP <[email protected]>	2009-11-20 14:54:40 +0000
commit	84adefa331c4159d432d22840663c38f155cd4c1 (patch)
tree	bff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/stdlib/src/sets.erl
download	otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2 otp-84adefa331c4159d432d22840663c38f155cd4c1.zip