aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src/gb_trees.erl
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /lib/stdlib/src/gb_trees.erl
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'lib/stdlib/src/gb_trees.erl')
-rw-r--r--lib/stdlib/src/gb_trees.erl515
1 files changed, 515 insertions, 0 deletions
diff --git a/lib/stdlib/src/gb_trees.erl b/lib/stdlib/src/gb_trees.erl
new file mode 100644
index 0000000000..d37786a100
--- /dev/null
+++ b/lib/stdlib/src/gb_trees.erl
@@ -0,0 +1,515 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% =====================================================================
+%% General Balanced Trees - highly efficient dictionaries.
+%%
+%% Copyright (C) 1999-2001 Sven-Olof Nystr�m, Richard Carlsson
+%%
+%% An efficient implementation of Prof. Arne Andersson's General
+%% Balanced Trees. These have no storage overhead compared to plain
+%% unbalanced binary trees, and their performance is in general better
+%% than AVL trees.
+%% ---------------------------------------------------------------------
+%% Operations:
+%%
+%% - empty(): returns empty tree.
+%%
+%% - is_empty(T): returns 'true' if T is an empty tree, and 'false'
+%% otherwise.
+%%
+%% - size(T): returns the number of nodes in the tree as an integer.
+%% Returns 0 (zero) if the tree is empty.
+%%
+%% - lookup(X, T): looks up key X in tree T; returns {value, V}, or
+%% `none' if the key is not present.
+%%
+%% - get(X, T): retreives the value stored with key X in tree T. Assumes
+%% that the key is present in the tree.
+%%
+%% - insert(X, V, T): inserts key X with value V into tree T; returns
+%% the new tree. Assumes that the key is *not* present in the tree.
+%%
+%% - update(X, V, T): updates key X to value V in tree T; returns the
+%% new tree. Assumes that the key is present in the tree.
+%%
+%% - enter(X, V, T): inserts key X with value V into tree T if the key
+%% is not present in the tree, otherwise updates key X to value V in
+%% T. Returns the new tree.
+%%
+%% - delete(X, T): removes key X from tree T; returns new tree. Assumes
+%% that the key is present in the tree.
+%%
+%% - delete_any(X, T): removes key X from tree T if the key is present
+%% in the tree, otherwise does nothing; returns new tree.
+%%
+%% - balance(T): rebalances tree T. Note that this is rarely necessary,
+%% but may be motivated when a large number of entries have been
+%% deleted from the tree without further insertions. Rebalancing could
+%% then be forced in order to minimise lookup times, since deletion
+%% only does not rebalance the tree.
+%%
+%% - is_defined(X, T): returns `true' if key X is present in tree T, and
+%% `false' otherwise.
+%%
+%% - keys(T): returns an ordered list of all keys in tree T.
+%%
+%% - values(T): returns the list of values for all keys in tree T,
+%% sorted by their corresponding keys. Duplicates are not removed.
+%%
+%% - to_list(T): returns an ordered list of {Key, Value} pairs for all
+%% keys in tree T.
+%%
+%% - from_orddict(L): turns an ordered list L of {Key, Value} pairs into
+%% a tree. The list must not contain duplicate keys.
+%%
+%% - smallest(T): returns {X, V}, where X is the smallest key in tree T,
+%% and V is the value associated with X in T. Assumes that the tree T
+%% is nonempty.
+%%
+%% - largest(T): returns {X, V}, where X is the largest key in tree T,
+%% and V is the value associated with X in T. Assumes that the tree T
+%% is nonempty.
+%%
+%% - take_smallest(T): returns {X, V, T1}, where X is the smallest key
+%% in tree T, V is the value associated with X in T, and T1 is the
+%% tree T with key X deleted. Assumes that the tree T is nonempty.
+%%
+%% - take_largest(T): returns {X, V, T1}, where X is the largest key
+%% in tree T, V is the value associated with X in T, and T1 is the
+%% tree T with key X deleted. Assumes that the tree T is nonempty.
+%%
+%% - iterator(T): returns an iterator that can be used for traversing
+%% the entries of tree T; see `next'. The implementation of this is
+%% very efficient; traversing the whole tree using `next' is only
+%% slightly slower than getting the list of all elements using
+%% `to_list' and traversing that. The main advantage of the iterator
+%% approach is that it does not require the complete list of all
+%% elements to be built in memory at one time.
+%%
+%% - next(S): returns {X, V, S1} where X is the smallest key referred to
+%% by the iterator S, and S1 is the new iterator to be used for
+%% traversing the remaining entries, or the atom `none' if no entries
+%% remain.
+%%
+%% - map(F, T): maps the function F(K, V) -> V' to all key-value pairs
+%% of the tree T and returns a new tree T' with the same set of keys
+%% as T and the new set of values V'.
+
+-module(gb_trees).
+
+-export([empty/0, is_empty/1, size/1, lookup/2, get/2, insert/3,
+ update/3, enter/3, delete/2, delete_any/2, balance/1,
+ is_defined/2, keys/1, values/1, to_list/1, from_orddict/1,
+ smallest/1, largest/1, take_smallest/1, take_largest/1,
+ iterator/1, next/1, map/2]).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Data structure:
+%% - {Size, Tree}, where `Tree' is composed of nodes of the form:
+%% - {Key, Value, Smaller, Bigger}, and the "empty tree" node:
+%% - nil.
+%%
+%% I make no attempt to balance trees after deletions. Since deletions
+%% don't increase the height of a tree, I figure this is OK.
+%%
+%% Original balance condition h(T) <= ceil(c * log(|T|)) has been
+%% changed to the similar (but not quite equivalent) condition 2 ^ h(T)
+%% <= |T| ^ c. I figure this should also be OK.
+%%
+%% Performance is comparable to the AVL trees in the Erlang book (and
+%% faster in general due to less overhead); the difference is that
+%% deletion works for my trees, but not for the book's trees. Behaviour
+%% is logaritmic (as it should be).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Some macros.
+
+-define(p, 2). % It seems that p = 2 is optimal for sorted keys
+
+-define(pow(A, _), A * A). % correct with exponent as defined above.
+
+-define(div2(X), X bsr 1).
+
+-define(mul2(X), X bsl 1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Some types.
+
+-type gb_tree_node() :: 'nil' | {_, _, _, _}.
+
+%% A declaration equivalent to the following is currently hard-coded
+%% in erl_types.erl
+%%
+%% -opaque gb_tree() :: {non_neg_integer(), gb_tree_node()}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec empty() -> gb_tree().
+
+empty() ->
+ {0, nil}.
+
+-spec is_empty(gb_tree()) -> boolean().
+
+is_empty({0, nil}) ->
+ true;
+is_empty(_) ->
+ false.
+
+-spec size(gb_tree()) -> non_neg_integer().
+
+size({Size, _}) when is_integer(Size), Size >= 0 ->
+ Size.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec lookup(term(), gb_tree()) -> 'none' | {'value', term()}.
+
+lookup(Key, {_, T}) ->
+ lookup_1(Key, T).
+
+%% The term order is an arithmetic total order, so we should not
+%% test exact equality for the keys. (If we do, then it becomes
+%% possible that neither `>', `<', nor `=:=' matches.) Testing '<'
+%% and '>' first is statistically better than testing for
+%% equality, and also allows us to skip the test completely in the
+%% remaining case.
+
+lookup_1(Key, {Key1, _, Smaller, _}) when Key < Key1 ->
+ lookup_1(Key, Smaller);
+lookup_1(Key, {Key1, _, _, Bigger}) when Key > Key1 ->
+ lookup_1(Key, Bigger);
+lookup_1(_, {_, Value, _, _}) ->
+ {value, Value};
+lookup_1(_, nil) ->
+ none.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% This is a specialized version of `lookup'.
+
+-spec is_defined(term(), gb_tree()) -> boolean().
+
+is_defined(Key, {_, T}) ->
+ is_defined_1(Key, T).
+
+is_defined_1(Key, {Key1, _, Smaller, _}) when Key < Key1 ->
+ is_defined_1(Key, Smaller);
+is_defined_1(Key, {Key1, _, _, Bigger}) when Key > Key1 ->
+ is_defined_1(Key, Bigger);
+is_defined_1(_, {_, _, _, _}) ->
+ true;
+is_defined_1(_, nil) ->
+ false.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% This is a specialized version of `lookup'.
+
+-spec get(term(), gb_tree()) -> term().
+
+get(Key, {_, T}) ->
+ get_1(Key, T).
+
+get_1(Key, {Key1, _, Smaller, _}) when Key < Key1 ->
+ get_1(Key, Smaller);
+get_1(Key, {Key1, _, _, Bigger}) when Key > Key1 ->
+ get_1(Key, Bigger);
+get_1(_, {_, Value, _, _}) ->
+ Value.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec update(term(), term(), gb_tree()) -> gb_tree().
+
+update(Key, Val, {S, T}) ->
+ T1 = update_1(Key, Val, T),
+ {S, T1}.
+
+%% See `lookup' for notes on the term comparison order.
+
+update_1(Key, Value, {Key1, V, Smaller, Bigger}) when Key < Key1 ->
+ {Key1, V, update_1(Key, Value, Smaller), Bigger};
+update_1(Key, Value, {Key1, V, Smaller, Bigger}) when Key > Key1 ->
+ {Key1, V, Smaller, update_1(Key, Value, Bigger)};
+update_1(Key, Value, {_, _, Smaller, Bigger}) ->
+ {Key, Value, Smaller, Bigger}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec insert(term(), term(), gb_tree()) -> gb_tree().
+
+insert(Key, Val, {S, T}) when is_integer(S) ->
+ S1 = S+1,
+ {S1, insert_1(Key, Val, T, ?pow(S1, ?p))}.
+
+insert_1(Key, Value, {Key1, V, Smaller, Bigger}, S) when Key < Key1 ->
+ case insert_1(Key, Value, Smaller, ?div2(S)) of
+ {T1, H1, S1} ->
+ T = {Key1, V, T1, Bigger},
+ {H2, S2} = count(Bigger),
+ H = ?mul2(erlang:max(H1, H2)),
+ SS = S1 + S2 + 1,
+ P = ?pow(SS, ?p),
+ if
+ H > P ->
+ balance(T, SS);
+ true ->
+ {T, H, SS}
+ end;
+ T1 ->
+ {Key1, V, T1, Bigger}
+ end;
+insert_1(Key, Value, {Key1, V, Smaller, Bigger}, S) when Key > Key1 ->
+ case insert_1(Key, Value, Bigger, ?div2(S)) of
+ {T1, H1, S1} ->
+ T = {Key1, V, Smaller, T1},
+ {H2, S2} = count(Smaller),
+ H = ?mul2(erlang:max(H1, H2)),
+ SS = S1 + S2 + 1,
+ P = ?pow(SS, ?p),
+ if
+ H > P ->
+ balance(T, SS);
+ true ->
+ {T, H, SS}
+ end;
+ T1 ->
+ {Key1, V, Smaller, T1}
+ end;
+insert_1(Key, Value, nil, S) when S =:= 0 ->
+ {{Key, Value, nil, nil}, 1, 1};
+insert_1(Key, Value, nil, _S) ->
+ {Key, Value, nil, nil};
+insert_1(Key, _, _, _) ->
+ erlang:error({key_exists, Key}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec enter(term(), term(), gb_tree()) -> gb_tree().
+
+enter(Key, Val, T) ->
+ case is_defined(Key, T) of
+ true ->
+ update(Key, Val, T);
+ false ->
+ insert(Key, Val, T)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+count({_, _, nil, nil}) ->
+ {1, 1};
+count({_, _, Sm, Bi}) ->
+ {H1, S1} = count(Sm),
+ {H2, S2} = count(Bi),
+ {?mul2(erlang:max(H1, H2)), S1 + S2 + 1};
+count(nil) ->
+ {1, 0}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec balance(gb_tree()) -> gb_tree().
+
+balance({S, T}) ->
+ {S, balance(T, S)}.
+
+balance(T, S) ->
+ balance_list(to_list_1(T), S).
+
+balance_list(L, S) ->
+ {T, []} = balance_list_1(L, S),
+ T.
+
+balance_list_1(L, S) when S > 1 ->
+ Sm = S - 1,
+ S2 = Sm div 2,
+ S1 = Sm - S2,
+ {T1, [{K, V} | L1]} = balance_list_1(L, S1),
+ {T2, L2} = balance_list_1(L1, S2),
+ T = {K, V, T1, T2},
+ {T, L2};
+balance_list_1([{Key, Val} | L], 1) ->
+ {{Key, Val, nil, nil}, L};
+balance_list_1(L, 0) ->
+ {nil, L}.
+
+-spec from_orddict([{_,_}]) -> gb_tree().
+
+from_orddict(L) ->
+ S = length(L),
+ {S, balance_list(L, S)}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec delete_any(term(), gb_tree()) -> gb_tree().
+
+delete_any(Key, T) ->
+ case is_defined(Key, T) of
+ true ->
+ delete(Key, T);
+ false ->
+ T
+ end.
+
+%%% delete. Assumes that key is present.
+
+-spec delete(term(), gb_tree()) -> gb_tree().
+
+delete(Key, {S, T}) when is_integer(S), S >= 0 ->
+ {S - 1, delete_1(Key, T)}.
+
+%% See `lookup' for notes on the term comparison order.
+
+delete_1(Key, {Key1, Value, Smaller, Larger}) when Key < Key1 ->
+ Smaller1 = delete_1(Key, Smaller),
+ {Key1, Value, Smaller1, Larger};
+delete_1(Key, {Key1, Value, Smaller, Bigger}) when Key > Key1 ->
+ Bigger1 = delete_1(Key, Bigger),
+ {Key1, Value, Smaller, Bigger1};
+delete_1(_, {_, _, Smaller, Larger}) ->
+ merge(Smaller, Larger).
+
+merge(Smaller, nil) ->
+ Smaller;
+merge(nil, Larger) ->
+ Larger;
+merge(Smaller, Larger) ->
+ {Key, Value, Larger1} = take_smallest1(Larger),
+ {Key, Value, Smaller, Larger1}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec take_smallest(gb_tree()) -> {term(), term(), gb_tree()}.
+
+take_smallest({Size, Tree}) when is_integer(Size), Size >= 0 ->
+ {Key, Value, Larger} = take_smallest1(Tree),
+ {Key, Value, {Size - 1, Larger}}.
+
+take_smallest1({Key, Value, nil, Larger}) ->
+ {Key, Value, Larger};
+take_smallest1({Key, Value, Smaller, Larger}) ->
+ {Key1, Value1, Smaller1} = take_smallest1(Smaller),
+ {Key1, Value1, {Key, Value, Smaller1, Larger}}.
+
+-spec smallest(gb_tree()) -> {term(), term()}.
+
+smallest({_, Tree}) ->
+ smallest_1(Tree).
+
+smallest_1({Key, Value, nil, _Larger}) ->
+ {Key, Value};
+smallest_1({_Key, _Value, Smaller, _Larger}) ->
+ smallest_1(Smaller).
+
+-spec take_largest(gb_tree()) -> {term(), term(), gb_tree()}.
+
+take_largest({Size, Tree}) when is_integer(Size), Size >= 0 ->
+ {Key, Value, Smaller} = take_largest1(Tree),
+ {Key, Value, {Size - 1, Smaller}}.
+
+take_largest1({Key, Value, Smaller, nil}) ->
+ {Key, Value, Smaller};
+take_largest1({Key, Value, Smaller, Larger}) ->
+ {Key1, Value1, Larger1} = take_largest1(Larger),
+ {Key1, Value1, {Key, Value, Smaller, Larger1}}.
+
+-spec largest(gb_tree()) -> {term(), term()}.
+
+largest({_, Tree}) ->
+ largest_1(Tree).
+
+largest_1({Key, Value, _Smaller, nil}) ->
+ {Key, Value};
+largest_1({_Key, _Value, _Smaller, Larger}) ->
+ largest_1(Larger).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec to_list(gb_tree()) -> [{term(), term()}].
+
+to_list({_, T}) ->
+ to_list(T, []).
+
+to_list_1(T) -> to_list(T, []).
+
+to_list({Key, Value, Small, Big}, L) ->
+ to_list(Small, [{Key, Value} | to_list(Big, L)]);
+to_list(nil, L) -> L.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec keys(gb_tree()) -> [term()].
+
+keys({_, T}) ->
+ keys(T, []).
+
+keys({Key, _Value, Small, Big}, L) ->
+ keys(Small, [Key | keys(Big, L)]);
+keys(nil, L) -> L.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec values(gb_tree()) -> [term()].
+
+values({_, T}) ->
+ values(T, []).
+
+values({_Key, Value, Small, Big}, L) ->
+ values(Small, [Value | values(Big, L)]);
+values(nil, L) -> L.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec iterator(gb_tree()) -> [gb_tree_node()].
+
+iterator({_, T}) ->
+ iterator_1(T).
+
+iterator_1(T) ->
+ iterator(T, []).
+
+%% The iterator structure is really just a list corresponding to
+%% the call stack of an in-order traversal. This is quite fast.
+
+iterator({_, _, nil, _} = T, As) ->
+ [T | As];
+iterator({_, _, L, _} = T, As) ->
+ iterator(L, [T | As]);
+iterator(nil, As) ->
+ As.
+
+-spec next([gb_tree_node()]) -> 'none' | {term(), term(), [gb_tree_node()]}.
+
+next([{X, V, _, T} | As]) ->
+ {X, V, iterator(T, As)};
+next([]) ->
+ none.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec map(fun((term(), term()) -> term()), gb_tree()) -> gb_tree().
+
+map(F, {Size, Tree}) when is_function(F, 2) ->
+ {Size, map_1(F, Tree)}.
+
+map_1(_, nil) -> nil;
+map_1(F, {K, V, Smaller, Larger}) ->
+ {K, F(K, V), map_1(F, Smaller), map_1(F, Larger)}.