1 files changed, 515 insertions, 0 deletions
diff --git a/lib/stdlib/src/gb_trees.erl b/lib/stdlib/src/gb_trees.erl
new file mode 100644
index 0000000000..d37786a100
--- /dev/null
+++ b/lib/stdlib/src/gb_trees.erl
@@ -0,0 +1,515 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
+%% 
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%% 
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%% 
+%% %CopyrightEnd%
+%%
+%% =====================================================================
+%% General Balanced Trees - highly efficient dictionaries.
+%%
+%% Copyright (C) 1999-2001 Sven-Olof Nystr�m, Richard Carlsson
+%%
+%% An efficient implementation of Prof. Arne Andersson's General
+%% Balanced Trees. These have no storage overhead compared to plain
+%% unbalanced binary trees, and their performance is in general better
+%% than AVL trees.
+%% ---------------------------------------------------------------------
+%% Operations:
+%%
+%% - empty(): returns empty tree.
+%%
+%% - is_empty(T): returns 'true' if T is an empty tree, and 'false'
+%%   otherwise.
+%%
+%% - size(T): returns the number of nodes in the tree as an integer.
+%%   Returns 0 (zero) if the tree is empty.
+%%
+%% - lookup(X, T): looks up key X in tree T; returns {value, V}, or
+%%   `none' if the key is not present.
+%%
+%% - get(X, T): retreives the value stored with key X in tree T. Assumes
+%%   that the key is present in the tree.
+%%
+%% - insert(X, V, T): inserts key X with value V into tree T; returns
+%%   the new tree. Assumes that the key is *not* present in the tree.
+%%
+%% - update(X, V, T): updates key X to value V in tree T; returns the
+%%   new tree. Assumes that the key is present in the tree.
+%%
+%% - enter(X, V, T): inserts key X with value V into tree T if the key
+%%   is not present in the tree, otherwise updates key X to value V in
+%%   T. Returns the new tree.
+%%
+%% - delete(X, T): removes key X from tree T; returns new tree. Assumes
+%%   that the key is present in the tree.
+%%
+%% - delete_any(X, T): removes key X from tree T if the key is present
+%%   in the tree, otherwise does nothing; returns new tree.
+%%
+%% - balance(T): rebalances tree T. Note that this is rarely necessary,
+%%   but may be motivated when a large number of entries have been
+%%   deleted from the tree without further insertions. Rebalancing could
+%%   then be forced in order to minimise lookup times, since deletion
+%%   only does not rebalance the tree.
+%%
+%% - is_defined(X, T): returns `true' if key X is present in tree T, and
+%%   `false' otherwise.
+%%
+%% - keys(T): returns an ordered list of all keys in tree T.
+%%
+%% - values(T): returns the list of values for all keys in tree T,
+%%   sorted by their corresponding keys. Duplicates are not removed.
+%%
+%% - to_list(T): returns an ordered list of {Key, Value} pairs for all
+%%   keys in tree T.
+%%
+%% - from_orddict(L): turns an ordered list L of {Key, Value} pairs into
+%%   a tree. The list must not contain duplicate keys.
+%%
+%% - smallest(T): returns {X, V}, where X is the smallest key in tree T,
+%%   and V is the value associated with X in T. Assumes that the tree T
+%%   is nonempty.
+%%
+%% - largest(T): returns {X, V}, where X is the largest key in tree T,
+%%   and V is the value associated with X in T. Assumes that the tree T
+%%   is nonempty.
+%%
+%% - take_smallest(T): returns {X, V, T1}, where X is the smallest key
+%%   in tree T, V is the value associated with X in T, and T1 is the
+%%   tree T with key X deleted. Assumes that the tree T is nonempty.
+%%
+%% - take_largest(T): returns {X, V, T1}, where X is the largest key
+%%   in tree T, V is the value associated with X in T, and T1 is the
+%%   tree T with key X deleted. Assumes that the tree T is nonempty.
+%%
+%% - iterator(T): returns an iterator that can be used for traversing
+%%   the entries of tree T; see `next'. The implementation of this is
+%%   very efficient; traversing the whole tree using `next' is only
+%%   slightly slower than getting the list of all elements using
+%%   `to_list' and traversing that. The main advantage of the iterator
+%%   approach is that it does not require the complete list of all
+%%   elements to be built in memory at one time.
+%%
+%% - next(S): returns {X, V, S1} where X is the smallest key referred to
+%%   by the iterator S, and S1 is the new iterator to be used for
+%%   traversing the remaining entries, or the atom `none' if no entries
+%%   remain.
+%%
+%% - map(F, T): maps the function F(K, V) -> V' to all key-value pairs
+%%   of the tree T and returns a new tree T' with the same set of keys
+%%   as T and the new set of values V'.
+
+-module(gb_trees).
+
+-export([empty/0, is_empty/1, size/1, lookup/2, get/2, insert/3,
+	 update/3, enter/3, delete/2, delete_any/2, balance/1,
+	 is_defined/2, keys/1, values/1, to_list/1, from_orddict/1,
+	 smallest/1, largest/1, take_smallest/1, take_largest/1,
+	 iterator/1, next/1, map/2]).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Data structure:
+%% - {Size, Tree}, where `Tree' is composed of nodes of the form:
+%%   - {Key, Value, Smaller, Bigger}, and the "empty tree" node:
+%%   - nil.
+%%
+%% I make no attempt to balance trees after deletions. Since deletions
+%% don't increase the height of a tree, I figure this is OK.
+%%
+%% Original balance condition h(T) <= ceil(c * log(|T|)) has been
+%% changed to the similar (but not quite equivalent) condition 2 ^ h(T)
+%% <= |T| ^ c. I figure this should also be OK.
+%%
+%% Performance is comparable to the AVL trees in the Erlang book (and
+%% faster in general due to less overhead); the difference is that
+%% deletion works for my trees, but not for the book's trees. Behaviour
+%% is logaritmic (as it should be).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Some macros.
+
+-define(p, 2). % It seems that p = 2 is optimal for sorted keys
+
+-define(pow(A, _), A * A). % correct with exponent as defined above.
+
+-define(div2(X), X bsr 1). 
+
+-define(mul2(X), X bsl 1).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Some types.
+
+-type gb_tree_node() :: 'nil' | {_, _, _, _}.
+
+%% A declaration equivalent to the following is currently hard-coded
+%% in erl_types.erl
+%%
+%% -opaque gb_tree() :: {non_neg_integer(), gb_tree_node()}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec empty() -> gb_tree().
+
+empty() ->
+    {0, nil}.
+
+-spec is_empty(gb_tree()) -> boolean().
+
+is_empty({0, nil}) ->
+    true;
+is_empty(_) ->
+    false.
+
+-spec size(gb_tree()) -> non_neg_integer().
+
+size({Size, _}) when is_integer(Size), Size >= 0 ->
+    Size.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec lookup(term(), gb_tree()) -> 'none' | {'value', term()}.
+
+lookup(Key, {_, T}) ->
+    lookup_1(Key, T).
+
+%% The term order is an arithmetic total order, so we should not
+%% test exact equality for the keys. (If we do, then it becomes
+%% possible that neither `>', `<', nor `=:=' matches.) Testing '<'
+%% and '>' first is statistically better than testing for
+%% equality, and also allows us to skip the test completely in the
+%% remaining case.
+
+lookup_1(Key, {Key1, _, Smaller, _}) when Key < Key1 ->
+    lookup_1(Key, Smaller);
+lookup_1(Key, {Key1, _, _, Bigger}) when Key > Key1 ->
+    lookup_1(Key, Bigger);
+lookup_1(_, {_, Value, _, _}) ->
+    {value, Value};
+lookup_1(_, nil) ->
+    none.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% This is a specialized version of `lookup'.
+
+-spec is_defined(term(), gb_tree()) -> boolean().
+
+is_defined(Key, {_, T}) ->
+    is_defined_1(Key, T).
+
+is_defined_1(Key, {Key1, _, Smaller, _}) when Key < Key1 ->
+    is_defined_1(Key, Smaller);
+is_defined_1(Key, {Key1, _, _, Bigger}) when Key > Key1 ->
+    is_defined_1(Key, Bigger);
+is_defined_1(_, {_, _, _, _}) ->
+    true;
+is_defined_1(_, nil) ->
+    false.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% This is a specialized version of `lookup'.
+
+-spec get(term(), gb_tree()) -> term().
+
+get(Key, {_, T}) ->
+    get_1(Key, T).
+
+get_1(Key, {Key1, _, Smaller, _}) when Key < Key1 ->
+    get_1(Key, Smaller);
+get_1(Key, {Key1, _, _, Bigger}) when Key > Key1 ->
+    get_1(Key, Bigger);
+get_1(_, {_, Value, _, _}) ->
+    Value.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec update(term(), term(), gb_tree()) -> gb_tree().
+
+update(Key, Val, {S, T}) ->
+    T1 = update_1(Key, Val, T),
+    {S, T1}.
+
+%% See `lookup' for notes on the term comparison order.
+
+update_1(Key, Value, {Key1, V, Smaller, Bigger}) when Key < Key1 -> 
+    {Key1, V, update_1(Key, Value, Smaller), Bigger};
+update_1(Key, Value, {Key1, V, Smaller, Bigger}) when Key > Key1 ->
+    {Key1, V, Smaller, update_1(Key, Value, Bigger)};
+update_1(Key, Value, {_, _, Smaller, Bigger}) ->
+    {Key, Value, Smaller, Bigger}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec insert(term(), term(), gb_tree()) -> gb_tree().
+
+insert(Key, Val, {S, T}) when is_integer(S) ->
+    S1 = S+1,
+    {S1, insert_1(Key, Val, T, ?pow(S1, ?p))}.
+
+insert_1(Key, Value, {Key1, V, Smaller, Bigger}, S) when Key < Key1 -> 
+    case insert_1(Key, Value, Smaller, ?div2(S)) of
+	{T1, H1, S1} ->
+	    T = {Key1, V, T1, Bigger},
+	    {H2, S2} = count(Bigger),
+	    H = ?mul2(erlang:max(H1, H2)),
+	    SS = S1 + S2 + 1,
+	    P = ?pow(SS, ?p),
+	    if
+		H > P -> 
+		    balance(T, SS);
+		true ->
+		    {T, H, SS}
+	    end;
+	T1 ->
+	    {Key1, V, T1, Bigger}
+    end;
+insert_1(Key, Value, {Key1, V, Smaller, Bigger}, S) when Key > Key1 -> 
+    case insert_1(Key, Value, Bigger, ?div2(S)) of
+	{T1, H1, S1} ->
+	    T = {Key1, V, Smaller, T1},
+	    {H2, S2} = count(Smaller),
+	    H = ?mul2(erlang:max(H1, H2)),
+	    SS = S1 + S2 + 1,
+	    P = ?pow(SS, ?p),
+	    if
+		H > P -> 
+		    balance(T, SS);
+		true ->
+		    {T, H, SS}
+	    end;
+	T1 ->
+	    {Key1, V, Smaller, T1}
+    end;
+insert_1(Key, Value, nil, S) when S =:= 0 ->
+    {{Key, Value, nil, nil}, 1, 1};
+insert_1(Key, Value, nil, _S) ->
+    {Key, Value, nil, nil};
+insert_1(Key, _, _, _) ->
+    erlang:error({key_exists, Key}).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec enter(term(), term(), gb_tree()) -> gb_tree().
+
+enter(Key, Val, T) ->
+    case is_defined(Key, T) of
+	true ->
+	    update(Key, Val, T);
+	false ->
+	    insert(Key, Val, T)
+    end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+count({_, _, nil, nil}) ->
+    {1, 1};
+count({_, _, Sm, Bi}) ->
+    {H1, S1} = count(Sm),
+    {H2, S2} = count(Bi),
+    {?mul2(erlang:max(H1, H2)), S1 + S2 + 1};
+count(nil) ->
+    {1, 0}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec balance(gb_tree()) -> gb_tree().
+
+balance({S, T}) ->
+    {S, balance(T, S)}.
+
+balance(T, S) ->
+    balance_list(to_list_1(T), S).
+
+balance_list(L, S) ->
+    {T, []} = balance_list_1(L, S),
+    T.
+
+balance_list_1(L, S) when S > 1 ->
+    Sm = S - 1,
+    S2 = Sm div 2,
+    S1 = Sm - S2,
+    {T1, [{K, V} | L1]} = balance_list_1(L, S1),
+    {T2, L2} = balance_list_1(L1, S2),
+    T = {K, V, T1, T2},
+    {T, L2};
+balance_list_1([{Key, Val} | L], 1) ->
+    {{Key, Val, nil, nil}, L};
+balance_list_1(L, 0) ->
+    {nil, L}.
+
+-spec from_orddict([{_,_}]) -> gb_tree().
+
+from_orddict(L) ->
+    S = length(L),
+    {S, balance_list(L, S)}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec delete_any(term(), gb_tree()) -> gb_tree().
+
+delete_any(Key, T) ->
+    case is_defined(Key, T) of
+	true ->
+	    delete(Key, T);
+	false ->
+	    T
+    end.
+
+%%% delete. Assumes that key is present.
+
+-spec delete(term(), gb_tree()) -> gb_tree().
+
+delete(Key, {S, T}) when is_integer(S), S >= 0 ->
+    {S - 1, delete_1(Key, T)}.
+
+%% See `lookup' for notes on the term comparison order.
+
+delete_1(Key, {Key1, Value, Smaller, Larger}) when Key < Key1 ->
+    Smaller1 = delete_1(Key, Smaller),
+    {Key1, Value, Smaller1, Larger};
+delete_1(Key, {Key1, Value, Smaller, Bigger}) when Key > Key1 ->
+    Bigger1 = delete_1(Key, Bigger),
+    {Key1, Value, Smaller, Bigger1};
+delete_1(_, {_, _, Smaller, Larger}) ->
+    merge(Smaller, Larger).
+
+merge(Smaller, nil) ->
+    Smaller;
+merge(nil, Larger) ->
+    Larger;
+merge(Smaller, Larger) ->
+    {Key, Value, Larger1} = take_smallest1(Larger),
+    {Key, Value, Smaller, Larger1}.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec take_smallest(gb_tree()) -> {term(), term(), gb_tree()}.
+
+take_smallest({Size, Tree}) when is_integer(Size), Size >= 0 ->
+    {Key, Value, Larger} = take_smallest1(Tree),
+    {Key, Value, {Size - 1, Larger}}.
+
+take_smallest1({Key, Value, nil, Larger}) ->
+    {Key, Value, Larger};
+take_smallest1({Key, Value, Smaller, Larger}) ->
+    {Key1, Value1, Smaller1} = take_smallest1(Smaller),
+    {Key1, Value1, {Key, Value, Smaller1, Larger}}.
+
+-spec smallest(gb_tree()) -> {term(), term()}.
+
+smallest({_, Tree}) ->
+    smallest_1(Tree).
+
+smallest_1({Key, Value, nil, _Larger}) ->
+    {Key, Value};
+smallest_1({_Key, _Value, Smaller, _Larger}) ->
+    smallest_1(Smaller).
+
+-spec take_largest(gb_tree()) -> {term(), term(), gb_tree()}.
+
+take_largest({Size, Tree}) when is_integer(Size), Size >= 0 ->
+    {Key, Value, Smaller} = take_largest1(Tree),
+    {Key, Value, {Size - 1, Smaller}}.
+
+take_largest1({Key, Value, Smaller, nil}) ->
+    {Key, Value, Smaller};
+take_largest1({Key, Value, Smaller, Larger}) ->
+    {Key1, Value1, Larger1} = take_largest1(Larger),
+    {Key1, Value1, {Key, Value, Smaller, Larger1}}.
+
+-spec largest(gb_tree()) -> {term(), term()}.
+
+largest({_, Tree}) ->
+    largest_1(Tree).
+
+largest_1({Key, Value, _Smaller, nil}) ->
+    {Key, Value};
+largest_1({_Key, _Value, _Smaller, Larger}) ->
+    largest_1(Larger).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec to_list(gb_tree()) -> [{term(), term()}].
+			   
+to_list({_, T}) ->
+    to_list(T, []).
+
+to_list_1(T) -> to_list(T, []).
+
+to_list({Key, Value, Small, Big}, L) ->
+    to_list(Small, [{Key, Value} | to_list(Big, L)]);
+to_list(nil, L) -> L.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec keys(gb_tree()) -> [term()].
+
+keys({_, T}) ->
+    keys(T, []).
+
+keys({Key, _Value, Small, Big}, L) ->
+    keys(Small, [Key | keys(Big, L)]);
+keys(nil, L) -> L.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec values(gb_tree()) -> [term()].
+
+values({_, T}) ->
+    values(T, []).
+
+values({_Key, Value, Small, Big}, L) ->
+    values(Small, [Value | values(Big, L)]);
+values(nil, L) -> L.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec iterator(gb_tree()) -> [gb_tree_node()].
+
+iterator({_, T}) ->
+    iterator_1(T).
+
+iterator_1(T) ->
+    iterator(T, []).
+
+%% The iterator structure is really just a list corresponding to
+%% the call stack of an in-order traversal. This is quite fast.
+
+iterator({_, _, nil, _} = T, As) ->
+    [T | As];
+iterator({_, _, L, _} = T, As) ->
+    iterator(L, [T | As]);
+iterator(nil, As) ->
+    As.
+
+-spec next([gb_tree_node()]) -> 'none' | {term(), term(), [gb_tree_node()]}.
+
+next([{X, V, _, T} | As]) ->
+    {X, V, iterator(T, As)};
+next([]) ->
+    none.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-spec map(fun((term(), term()) -> term()), gb_tree()) -> gb_tree().
+
+map(F, {Size, Tree}) when is_function(F, 2) ->
+    {Size, map_1(F, Tree)}.
+
+map_1(_, nil) -> nil;
+map_1(F, {K, V, Smaller, Larger}) ->
+    {K, F(K, V), map_1(F, Smaller), map_1(F, Larger)}.