From 84adefa331c4159d432d22840663c38f155cd4c1 Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Fri, 20 Nov 2009 14:54:40 +0000 Subject: The R13B03 release. --- lib/stdlib/src/dets_utils.erl | 1801 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1801 insertions(+) create mode 100644 lib/stdlib/src/dets_utils.erl (limited to 'lib/stdlib/src/dets_utils.erl') diff --git a/lib/stdlib/src/dets_utils.erl b/lib/stdlib/src/dets_utils.erl new file mode 100644 index 0000000000..5db2ad3049 --- /dev/null +++ b/lib/stdlib/src/dets_utils.erl @@ -0,0 +1,1801 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +-module(dets_utils). + +%% Utility functions common to several dets file formats. +%% To be used from dets, dets_v8 and dets_v9 only. + +-export([cmp/2, msort/1, mkeysort/2, mkeysearch/3, family/1]). + +-export([rename/2, pread/2, pread/4, ipread/3, pwrite/2, write/2, + truncate/2, position/2, sync/1, open/2, truncate/3, fwrite/3, + write_file/2, position/3, position_close/3, pwrite/4, + pwrite/3, pread_close/4, read_n/2, pread_n/3, read_4/2]). + +-export([code_to_type/1, type_to_code/1]). + +-export([corrupt_reason/2, corrupt/2, corrupt_file/2, + vformat/2, file_error/2]). + +-export([debug_mode/0, bad_object/2]). + +-export([cache_lookup/4, cache_size/1, new_cache/1, + reset_cache/1, is_empty_cache/1]). + +-export([empty_free_lists/0, init_alloc/1, alloc_many/4, alloc/2, + free/3, get_freelists/1, all_free/1, all_allocated/1, + all_allocated_as_list/1, find_allocated/4, find_next_allocated/3, + log2/1, make_zeros/1]). + +-export([init_slots_from_old_file/2]). + +-export([list_to_tree/1, tree_to_bin/5]). + +-compile({inline, [{sz2pos,1}, {adjust_addr,3}]}). +-compile({inline, [{bplus_mk_leaf,1}, {bplus_get_size,1}, + {bplus_get_tree,2}, {bplus_get_lkey,2}, + {bplus_get_rkey,2}]}). + +%% Debug +-export([init_disk_map/1, stop_disk_map/0, + disk_map_segment_p/2, disk_map_segment/2]). + +-include("dets.hrl"). + +%%% A total ordering of all Erlang terms. + +%% -> -1 | 0 | 1. T1 is (smaller than | equal | greater than) T2. +%% If is_integer(I), is_float(F), I == F then I is deemed smaller than F. +cmp(T, T) -> + 0; +cmp([E1 | T1], [E2 | T2]) -> + case cmp(E1, E2) of + 0 -> cmp(T1, T2); + R -> R + end; +cmp(T1, T2) when tuple_size(T1) =:= tuple_size(T2) -> + tcmp(T1, T2, 1, tuple_size(T1)); +cmp(I, F) when is_integer(I), is_float(F) -> + -1; +cmp(F, I) when is_float(F), is_integer(I) -> + 1; +cmp(T1, T2) when T1 < T2 -> + -1; +cmp(_T1, _T2) -> % when _T1 > _T2 + 1. + +tcmp(T1, T2, I, I) -> + cmp(element(I, T1), element(I, T2)); +tcmp(T1, T2, I, N) -> + case cmp(element(I, T1), element(I, T2)) of + 0 -> tcmp(T1, T2, I + 1, N); + R -> R + end. + +msort(L) -> + %% sort is very much faster than msort, let it do most of the work. + F = fun(X, Y) -> cmp(X, Y) =< 0 end, + lists:sort(F, lists:sort(L)). + +mkeysort(I, L) -> + F = fun(X, Y) -> cmp(element(I, X), element(I, Y)) =< 0 end, + %% keysort is much faster than mkeysort, let it do most of the work. + lists:sort(F, lists:keysort(I, L)). + +mkeysearch(Key, I, L) -> + case lists:keysearch(Key, I, L) of + {value, Value}=Reply when element(I, Value) =:= Key -> + Reply; + false -> + false; + _ -> + mkeysearch2(Key, I, L) + end. + +mkeysearch2(_Key, _I, []) -> + false; +mkeysearch2(Key, I, [E | _L]) when element(I, E) =:= Key -> + {value, E}; +mkeysearch2(Key, I, [_ | L]) -> + mkeysearch2(Key, I, L). + +%% Be careful never to compare keys, but use matching instead. +%% Otherwise sofs could have been used: +%% sofs:to_external(sofs:relation_to_family(sofs:relation(L, 2))). +family([]) -> + []; +family(L) -> + [{K,V}|KVL] = mkeysort(1, L), + per_key(KVL, K, [V], []). + +per_key([], K, Vs, KVs) -> + lists:reverse(KVs, [{K,msort(Vs)}]); +per_key([{K,V}|L], K, Vs, KVs) -> % match + per_key(L, K, [V|Vs], KVs); +per_key([{K1,V}|L], K, Vs, KVs) -> + per_key(L, K1, [V], [{K,msort(Vs)}|KVs]). + +rename(From, To) -> + case file:rename(From, To) of + ok -> + ok; + {error, Reason} -> + {error, {file_error, {From, To}, Reason}} + end. + +%% -> {ok, Bins} | throw({NewHead, Error}) +pread(Positions, Head) -> + R = case file:pread(Head#head.fptr, Positions) of + {ok, Bins} -> + %% file:pread/2 can return 'eof' as "data". + case lists:member(eof, Bins) of + true -> + {error, {premature_eof, Head#head.filename}}; + false -> + {ok, Bins} + end; + {error, Reason} when enomem =:= Reason; einval =:= Reason -> + {error, {bad_object_header, Head#head.filename}}; + {error, Reason} -> + {file_error, Head#head.filename, Reason} + end, + case R of + {ok, _Bins} -> + R; + Error -> + throw(corrupt(Head, Error)) + end. + +%% -> {ok, binary()} | throw({NewHead, Error}) +pread(Head, Pos, Min, Extra) -> + R = case file:pread(Head#head.fptr, Pos, Min+Extra) of + {error, Reason} when enomem =:= Reason; einval =:= Reason -> + {error, {bad_object_header, Head#head.filename}}; + {error, Reason} -> + {file_error, Head#head.filename, Reason}; + {ok, Bin} when byte_size(Bin) < Min -> + {error, {premature_eof, Head#head.filename}}; + OK -> OK + end, + case R of + {ok, _Bin} -> + R; + Error -> + throw(corrupt(Head, Error)) + end. + +%% -> eof | [] | {ok, {Size, Pointer, binary()}} +ipread(Head, Pos1, MaxSize) -> + try + disk_map_pread(Pos1) + catch Bad -> + throw(corrupt_reason(Head, {disk_map, Bad})) + end, + case file:ipread_s32bu_p32bu(Head#head.fptr, Pos1, MaxSize) of + {ok, {0, 0, eof}} -> + []; + {ok, Reply} -> + {ok, Reply}; + _Else -> + eof + end. + +%% -> {Head, ok} | throw({Head, Error}) +pwrite(Head, []) -> + {Head, ok}; +pwrite(Head, Bins) -> + try + disk_map(Bins) + catch Bad -> + throw(corrupt_reason(Head, {disk_map, Bad, Bins})) + end, + case file:pwrite(Head#head.fptr, Bins) of + ok -> + {Head, ok}; + Error -> + corrupt_file(Head, Error) + end. + +%% -> ok | throw({Head, Error}) +write(_Head, []) -> + ok; +write(Head, Bins) -> + case file:write(Head#head.fptr, Bins) of + ok -> + ok; + Error -> + corrupt_file(Head, Error) + end. + +%% -> ok | throw({Head, Error}) +%% Same as file:write_file/2, but calls file:sync/1. +write_file(Head, Bin) -> + R = case file:open(Head#head.filename, [binary, raw, write]) of + {ok, Fd} -> + R1 = file:write(Fd, Bin), + R2 = file:sync(Fd), + file:close(Fd), + if R1 =:= ok -> R2; true -> R1 end; + Else -> + Else + end, + case R of + ok -> + ok; + Error -> + corrupt_file(Head, Error) + end. + +%% -> ok | throw({Head, Error}) +truncate(Head, Pos) -> + case catch truncate(Head#head.fptr, Head#head.filename, Pos) of + ok -> + ok; + Error -> + throw(corrupt(Head, Error)) + end. + +%% -> {ok, Pos} | throw({Head, Error}) +position(Head, Pos) -> + case file:position(Head#head.fptr, Pos) of + {error, _Reason} = Error -> + corrupt_file(Head, Error); + OK -> OK + end. + +%% -> ok | throw({Head, Error}) +sync(Head) -> + case file:sync(Head#head.fptr) of + ok -> + ok; + Error -> + corrupt_file(Head, Error) + end. + +open(FileSpec, Args) -> + case file:open(FileSpec, Args) of + {ok, Fd} -> + {ok, Fd}; + Error -> + file_error(FileSpec, Error) + end. + +truncate(Fd, FileName, Pos) -> + if + Pos =:= cur -> + ok; + true -> + position(Fd, FileName, Pos) + end, + case file:truncate(Fd) of + ok -> + ok; + Error -> + file_error(FileName, {error, Error}) + end. + +fwrite(Fd, FileName, B) -> + case file:write(Fd, B) of + ok -> ok; + Error -> file_error_close(Fd, FileName, Error) + end. + +position(Fd, FileName, Pos) -> + case file:position(Fd, Pos) of + {error, Error} -> file_error(FileName, {error, Error}); + OK -> OK + end. + +position_close(Fd, FileName, Pos) -> + case file:position(Fd, Pos) of + {error, Error} -> file_error_close(Fd, FileName, {error, Error}); + OK -> OK + end. + +pwrite(Fd, FileName, Position, B) -> + case file:pwrite(Fd, Position, B) of + ok -> ok; + Error -> file_error(FileName, {error, Error}) + end. + +pwrite(Fd, FileName, Bins) -> + case file:pwrite(Fd, Bins) of + ok -> + ok; + {error, {_NoWrites, Reason}} -> + file_error(FileName, {error, Reason}) + end. + +pread_close(Fd, FileName, Pos, Size) -> + case file:pread(Fd, Pos, Size) of + {error, Error} -> + file_error_close(Fd, FileName, {error, Error}); + {ok, Bin} when byte_size(Bin) < Size -> + file:close(Fd), + throw({error, {tooshort, FileName}}); + eof -> + file:close(Fd), + throw({error, {tooshort, FileName}}); + OK -> OK + end. + +file_error(FileName, {error, Reason}) -> + throw({error, {file_error, FileName, Reason}}). + +file_error_close(Fd, FileName, {error, Reason}) -> + file:close(Fd), + throw({error, {file_error, FileName, Reason}}). + +debug_mode() -> + os:getenv("DETS_DEBUG") =:= "true". + +bad_object(Where, Extra) -> + case debug_mode() of + true -> + {bad_object, Where, Extra}; + false -> + %% Avoid showing possibly secret data on the error logger. + {bad_object, Where} + end. + +read_n(Fd, Max) -> + case file:read(Fd, Max) of + {ok, Bin} -> + Bin; + _Else -> + eof + end. + +pread_n(Fd, Position, Max) -> + case file:pread(Fd, Position, Max) of + {ok, Bin} -> + Bin; + _ -> + eof + end. + +read_4(Fd, Position) -> + {ok, _} = file:position(Fd, Position), + <> = dets_utils:read_n(Fd, 4), + Four. + +corrupt_file(Head, {error, Reason}) -> + Error = {error, {file_error, Head#head.filename, Reason}}, + throw(corrupt(Head, Error)). + +%% -> {NewHead, Error} +corrupt_reason(Head, Reason0) -> + Reason = case get_disk_map() of + no_disk_map -> + Reason0; + DM -> + ST = erlang:get_stacktrace(), + PD = get(), + {Reason0, ST, PD, DM} + end, + Error = {error, {Reason, Head#head.filename}}, + corrupt(Head, Error). + +corrupt(Head, Error) -> + case get(verbose) of + yes -> + error_logger:format("** dets: Corrupt table ~p: ~p\n", + [Head#head.name, Error]); + _ -> ok + end, + case Head#head.update_mode of + {error, _} -> + {Head, Error}; + _ -> + {Head#head{update_mode = Error}, Error} + end. + +vformat(F, As) -> + case get(verbose) of + yes -> error_logger:format(F, As); + _ -> ok + end. + +code_to_type(?SET) -> set; +code_to_type(?BAG) -> bag; +code_to_type(?DUPLICATE_BAG) -> duplicate_bag; +code_to_type(_Type) -> badtype. + +type_to_code(set) -> ?SET; +type_to_code(bag) -> ?BAG; +type_to_code(duplicate_bag) -> ?DUPLICATE_BAG. + +%%% +%%% Write Cache +%%% + +cache_size(C) -> + {C#cache.delay, C#cache.tsize}. + +%% -> [object()] | false +cache_lookup(Type, [Key | Keys], CL, LU) -> + %% mkeysearch returns the _first_ tuple with a matching key. + case mkeysearch(Key, 1, CL) of + {value, {Key,{_Seq,{insert,Object}}}} when Type =:= set -> + cache_lookup(Type, Keys, CL, [Object | LU]); + {value, {Key,{_Seq,delete_key}}} -> + cache_lookup(Type, Keys, CL, LU); + _ -> + false + end; +cache_lookup(_Type, [], _CL, LU) -> + LU. + +reset_cache(C) -> + WrTime = C#cache.wrtime, + NewWrTime = if + WrTime =:= undefined -> + WrTime; + true -> + now() + end, + PK = family(C#cache.cache), + NewC = C#cache{cache = [], csize = 0, inserts = 0, wrtime = NewWrTime}, + {NewC, C#cache.inserts, PK}. + +is_empty_cache(Cache) -> + Cache#cache.cache =:= []. + +new_cache({Delay, Size}) -> + #cache{cache = [], csize = 0, inserts = 0, + tsize = Size, wrtime = undefined, delay = Delay}. + +%%% +%%% Buddy System +%%% + +%% Definitions for the buddy allocator. +-define(MAXBUD, 32). % 2 GB is maximum file size +-define(MAXFREELISTS, 50000000). % Bytes reserved for the free lists (at end). + +%%-define(DEBUG(X, Y), io:format(X, Y)). +-define(DEBUG(X, Y), true). + +%%% Algorithm : We use a buddy system on each file. This is nicely described +%%% in i.e. the last chapter of the first-grade text book +%%% Data structures and algorithms by Aho, Hopcroft and +%%% Ullman. I think buddy systems were invented by Knuth, a long +%%% time ago. + +init_slots_from_old_file([{Slot,Addr} | T], Ftab) -> + init_slot(Slot+1,[{Slot,Addr} | T], Ftab); +init_slots_from_old_file([], Ftab) -> + Ftab. + +init_slot(_Slot,[], Ftab) -> + Ftab; % should never happen +init_slot(_Slot,[{_Addr,0}|T], Ftab) -> + init_slots_from_old_file(T, Ftab); +init_slot(Slot,[{_Slot1,Addr}|T], Ftab) -> + Stree = element(Slot, Ftab), + %% io:format("init_slot ~p:~p~n",[Slot, Addr]), + init_slot(Slot,T,setelement(Slot, Ftab, bplus_insert(Stree, Addr))). + +%%% The free lists are kept in RAM, and written to the end of the file +%%% from time to time. It is possible that a considerable amount of +%%% memory is used for a fragmented file. +%%% +%%% To make things (slightly) worse (from a memory usage point of +%%% view), each traversal of the file starts with making a "map" of +%%% the allocated areas; only the allocated areas will be +%%% traversed. Creating a map involves inspecting and sorting the free +%%% lists. Since the map is passed on between client and server, it +%%% has to be a binary (to avoid copying a possibly huge term). +%%% +%%% An active map should always be protected by fixing the table. This +%%% prevents insertion of objects into the mapped area (where some +%%% objects may have been deleted). The means for implementing this +%%% protection is a copy of the free lists (using even more memory, if +%%% objects are inserted). The position to write an inserted object is +%%% found by looking at the free lists from the time when the table +%%% was fixed; areas within the mapped area that have been freed are +%%% hidden from the allocator. + +%% -> free_table() +%% A free table is a tuple of ?MAXBUD elements, element i handling +%% buddies of size 2^(i-1). +init_alloc(Base) -> + Ftab = empty_free_lists(), + Empty = bplus_empty_tree(), + setelement(?MAXBUD, Ftab, bplus_insert(Empty, Base)). + +empty_free_lists() -> + Empty = bplus_empty_tree(), + %% initiate a tuple with ?MAXBUD "Empty" elements + erlang:make_tuple(?MAXBUD, Empty). + +%% Only used when repairing or initiating. +alloc_many(Head, _Sz, 0, _A0) -> + Head; +alloc_many(Head, Sz, N, A0) -> + Ftab = Head#head.freelists, + Head#head{freelists = alloc_many1(Ftab, 1, Sz * N, A0, Head)}. + +%% -> NewFtab | throw(Error) +alloc_many1(Ftab, Pos, Size, A0, H) -> + {FPos, Addr} = find_first_free(Ftab, Pos, Pos, H), + true = Addr >= A0, % assertion + if + ?POW(FPos - 1) >= Size -> + alloc_many2(Ftab, sz2pos(Size), Size, A0, H); + true -> + NewFtab = reserve_buddy(Ftab, FPos, FPos, Addr), + NSize = Size - ?POW(FPos-1), + alloc_many1(NewFtab, FPos, NSize, Addr, H) + end. + +alloc_many2(Ftab, _Pos, 0, _A0, _H) -> + Ftab; +alloc_many2(Ftab, Pos, Size, A0, H) when Size band ?POW(Pos-1) > 0 -> + {FPos, Addr} = find_first_free(Ftab, Pos, Pos, H), + true = Addr >= A0, % assertion + NewFtab = reserve_buddy(Ftab, FPos, Pos, Addr), + NSize = Size - ?POW(Pos - 1), + alloc_many2(NewFtab, Pos-1, NSize, Addr, H); +alloc_many2(Ftab, Pos, Size, A0, H) -> + alloc_many2(Ftab, Pos-1, Size, A0, H). + +%% -> {NewHead, Addr, Log2} | throw(Error) +alloc(Head, Sz) when Head#head.fixed =/= false -> % when Sz > 0 + ?DEBUG("alloc of size ~p (fixed)", [Sz]), + Pos = sz2pos(Sz), + {Frozen, Ftab} = Head#head.freelists, + {FPos, Addr} = find_first_free(Frozen, Pos, Pos, Head), + NewFrozen = reserve_buddy(Frozen, FPos, Pos, Addr), + Ftab1 = undo_free(Ftab, FPos, Addr, Head#head.base), + NewFtab = move_down(Ftab1, FPos, Pos, Addr), + NewFreelists = {NewFrozen, NewFtab}, + {Head#head{freelists = NewFreelists}, Addr, Pos}; +alloc(Head, Sz) when Head#head.fixed =:= false -> % when Sz > 0 + ?DEBUG("alloc of size ~p", [Sz]), + Pos = sz2pos(Sz), + Ftab = Head#head.freelists, + {FPos, Addr} = find_first_free(Ftab, Pos, Pos, Head), + NewFtab = reserve_buddy(Ftab, FPos, Pos, Addr), + {Head#head{freelists = NewFtab}, Addr, Pos}. + +find_first_free(_Ftab, Pos, _Pos0, Head) when Pos > ?MAXBUD -> + throw({error, {no_more_space_on_file, Head#head.filename}}); +find_first_free(Ftab, Pos, Pos0, Head) -> + PosTab = element(Pos, Ftab), + case bplus_lookup_first(PosTab) of + undefined -> + find_first_free(Ftab, Pos+1, Pos0, Head); + {ok, Addr} when Addr + ?POW(Pos0-1) > ?POW(?MAXBUD-1)-?MAXFREELISTS -> + %% We would occupy (some of) the area reserved for the free lists. + throw({error, {no_more_space_on_file, Head#head.filename}}); + {ok, Addr} -> + {Pos, Addr} + end. + +%% When the table is fixed, free/4 may have joined buddies so that the +%% requested block is now part of some larger block. We have to find +%% that block, and insert free buddies along the way. +undo_free(Ftab, Pos, Addr, Base) -> + PosTab = element(Pos, Ftab), + case bplus_lookup(PosTab, Addr) of + undefined -> + {BuddyAddr, MoveUpAddr} = my_buddy(Addr, ?POW(Pos-1), Base), + NewFtab = setelement(Pos, Ftab, bplus_insert(PosTab, BuddyAddr)), + undo_free(NewFtab, Pos+1, MoveUpAddr, Base); + {ok, Addr} -> + NewPosTab = bplus_delete(PosTab, Addr), + setelement(Pos, Ftab, NewPosTab) + end. + +reserve_buddy(Ftab, Pos, Pos0, Addr) -> + PosTab = element(Pos, Ftab), + NewPosTab = bplus_delete(PosTab, Addr), + NewFtab = setelement(Pos, Ftab, NewPosTab), + move_down(NewFtab, Pos, Pos0, Addr). + +move_down(Ftab, Pos, Pos, _Addr) -> + ?DEBUG(" to address ~p, table ~p (~p bytes)~n", + [_Addr, Pos, ?POW(Pos-1)]), + Ftab; +move_down(Ftab, Pos, Pos0, Addr) -> + Pos_1 = Pos - 1, + Size = ?POW(Pos_1), + HighBuddy = (Addr + (Size bsr 1)), + NewPosTab_1 = bplus_insert(element(Pos_1, Ftab), HighBuddy), + NewFtab = setelement(Pos_1, Ftab, NewPosTab_1), + move_down(NewFtab, Pos_1, Pos0, Addr). + +%% -> {Head, Log2} +free(Head, Addr, Sz) -> + ?DEBUG("free of size ~p at address ~p~n", [Sz, Addr]), + Ftab = get_freelists(Head), + Pos = sz2pos(Sz), + {set_freelists(Head, free_in_pos(Ftab, Addr, Pos, Head#head.base)), Pos}. + +free_in_pos(Ftab, _Addr, Pos, _Base) when Pos > ?MAXBUD -> + Ftab; +free_in_pos(Ftab, Addr, Pos, Base) -> + PosTab = element(Pos, Ftab), + {BuddyAddr, MoveUpAddr} = my_buddy(Addr, ?POW(Pos-1), Base), + case bplus_lookup(PosTab, BuddyAddr) of + undefined -> % no buddy found + ?DEBUG(" table ~p, no buddy~n", [Pos]), + setelement(Pos, Ftab, bplus_insert(PosTab, Addr)); + {ok, BuddyAddr} -> % buddy found + PosTab1 = bplus_delete(PosTab, Addr), + PosTab2 = bplus_delete(PosTab1, BuddyAddr), + ?DEBUG(" table ~p, with buddy ~p~n", [Pos, BuddyAddr]), + NewFtab = setelement(Pos, Ftab, PosTab2), + free_in_pos(NewFtab, MoveUpAddr, Pos+1, Base) + end. + +get_freelists(Head) when Head#head.fixed =:= false -> + Head#head.freelists; +get_freelists(Head) when Head#head.fixed =/= false -> + {_Frozen, Current} = Head#head.freelists, + Current. + +set_freelists(Head, Ftab) when Head#head.fixed =:= false -> + Head#head{freelists = Ftab}; +set_freelists(Head, Ftab) when Head#head.fixed =/= false -> + {Frozen, _} = Head#head.freelists, + Head#head{freelists = {Frozen,Ftab}}. + +%% Bug: If Sz0 is equal to 2^k for some k, then 2^(k+1) bytes are +%% allocated (wasting 2^k bytes). Inlined. +sz2pos(N) when N > 0 -> + 1 + log2(N+1). + +%% Returns the i such that 2^(i-1) < N =< 2^i. +log2(N) when is_integer(N), N >= 0 -> + if N > ?POW(8) -> + if N > ?POW(10) -> + if N > ?POW(11) -> + if N > ?POW(12) -> + 12 + if N band (?POW(12)-1) =:= 0 -> + log2(N bsr 12); + true -> log2(1 + (N bsr 12)) + end; + true -> 12 + end; + true -> 11 + end; + N > ?POW(9) -> 10; + true -> 9 + end; + N > ?POW(4) -> + if N > ?POW(6) -> + if N > ?POW(7) -> 8; + true -> 7 + end; + N > ?POW(5) -> 6; + true -> 5 + end; + N > ?POW(2) -> + if + N > ?POW(3) -> 4; + true -> 3 + end; + N > ?POW(1) -> 2; + N >= ?POW(0) -> 1; + true -> 0 + end. + +make_zeros(0) -> []; +make_zeros(N) when N rem 2 =:= 0 -> + P = make_zeros(N div 2), + [P|P]; +make_zeros(N) -> + P = make_zeros(N div 2), + [0,P|P]. + +%% Calculate the buddy of Addr +my_buddy(Addr, Sz, Base) -> + case (Addr - Base) band Sz of + 0 -> % even, buddy is higher addr + {Addr+Sz, Addr}; + _ -> % odd, buddy is lower addr + T = Addr-Sz, + {T, T} + end. + +all_free(Head) -> + Tab = get_freelists(Head), + Base = Head#head.base, + case all_free(all(Tab), Base, Base, []) of + [{Base,Base} | L] -> L; + L -> L + end. + +all_free([], X0, Y0, F) -> + lists:reverse([{X0,Y0} | F]); +all_free([{X,Y} | L], X0, Y0, F) when Y0 =:= X -> + all_free(L, X0, Y, F); +all_free([{X,Y} | L], X0, Y0, F) when Y0 < X -> + all_free(L, X, Y, [{X0,Y0} | F]). + +all_allocated(Head) -> + all_allocated(all(get_freelists(Head)), 0, Head#head.base, []). + +all_allocated([], _X0, _Y0, []) -> + <<>>; +all_allocated([], _X0, _Y0, A0) -> + [<> | A] = lists:reverse(A0), + {From, To, list_to_binary(A)}; +all_allocated([{X,Y} | L], X0, Y0, A) when Y0 =:= X -> + all_allocated(L, X0, Y, A); +all_allocated([{X,Y} | L], _X0, Y0, A) when Y0 < X -> + all_allocated(L, X, Y, [<> | A]). + +all_allocated_as_list(Head) -> + all_allocated_as_list(all(get_freelists(Head)), 0, Head#head.base, []). + +all_allocated_as_list([], _X0, _Y0, []) -> + []; +all_allocated_as_list([], _X0, _Y0, A) -> + lists:reverse(A); +all_allocated_as_list([{X,Y} | L], X0, Y0, A) when Y0 =:= X -> + all_allocated_as_list(L, X0, Y, A); +all_allocated_as_list([{X,Y} | L], _X0, Y0, A) when Y0 < X -> + all_allocated_as_list(L, X, Y, [[Y0 | X] | A]). + +all(Tab) -> + all(Tab, tuple_size(Tab), []). + +all(_Tab, 0, L) -> + %% This is not as bad as it looks. L contains less than 32 runs, + %% so there will be only a small number of merges. + lists:sort(L); +all(Tab, I, L) -> + LL = collect_tree(element(I, Tab), I, L), + all(Tab, I-1, LL). + +%% Finds allocated areas between Addr (approx.) and Addr+Length. +find_allocated(Ftab, Addr, Length, Base) -> + MaxAddr = Addr + Length, + Ints = collect_all_interval(Ftab, Addr, MaxAddr, Base), + allocated(Ints, Addr, MaxAddr, Ftab, Base). + +allocated(Some, Addr, Max, Ftab, Base) -> + case allocated1(Some, Addr, Max, []) of + [] -> + case find_next_allocated(Ftab, Addr, Base) of + {From,_} -> + find_allocated(Ftab, From, ?CHUNK_SIZE, Base); + none -> + <<>> + end; + L -> + list_to_binary(lists:reverse(L)) + end. + +allocated1([], Y0, Max, A) when Y0 < Max -> + [<> | A]; +allocated1([], _Y0, _Max, A) -> + A; +allocated1([{X,Y} | L], Y0, Max, A) when Y0 >= X -> + allocated1(L, Y, Max, A); +allocated1([{X,Y} | L], Y0, Max, A) -> % when Y0 < X + allocated1(L, Y, Max, [<> | A]). + +%% Finds the first allocated area starting at Addr or later. +find_next_allocated(Ftab, Addr, Base) -> + case find_next_free(Ftab, Addr, Base) of + none -> + none; + {Addr1, Pos} when Addr1 =< Addr -> + find_next_allocated(Ftab, Addr1 + ?POW(Pos-1), Base); + {Next, _Pos} -> + {Addr, Next} + end. + +%% Finds the first free address starting att Addr or later. +%% -> none | {FirstFreeAddress, FtabPosition} +find_next_free(Ftab, Addr, Base) -> + MaxBud = tuple_size(Ftab), + find_next_free(Ftab, Addr, 1, MaxBud, -1, -1, Base). + +find_next_free(Ftab, Addr0, Pos, MaxBud, Next, PosN, Base) + when Pos =< MaxBud -> + Addr = adjust_addr(Addr0, Pos, Base), + PosTab = element(Pos, Ftab), + case bplus_lookup_next(PosTab, Addr-1) of + undefined -> + find_next_free(Ftab, Addr0, Pos+1, MaxBud, Next, PosN, Base); + {ok, Next1} when PosN =:= -1; Next1 < Next -> + find_next_free(Ftab, Addr0, Pos+1, MaxBud, Next1, Pos, Base); + {ok, _} -> + find_next_free(Ftab, Addr0, Pos+1, MaxBud, Next, PosN, Base) + end; +find_next_free(_Ftab, _Addr, _Pos, _MaxBud, -1, _PosN, _Base) -> + none; +find_next_free(_Ftab, _Addr, _Pos, _MaxBud, Next, PosN, _Base) -> + {Next, PosN}. + +collect_all_interval(Ftab, Addr, MaxAddr, Base) -> + MaxBud = tuple_size(Ftab), + collect_all_interval(Ftab, Addr, MaxAddr, 1, MaxBud, Base, []). + +collect_all_interval(Ftab, L0, U, Pos, MaxBud, Base, Acc0) when Pos =< MaxBud -> + PosTab = element(Pos, Ftab), + L = adjust_addr(L0, Pos, Base), + Acc = collect_interval(PosTab, Pos, L, U, Acc0), + collect_all_interval(Ftab, L0, U, Pos+1, MaxBud, Base, Acc); +collect_all_interval(_Ftab, _L, _U, _Pos, _MaxBud, _Base, Acc) -> + lists:sort(Acc). + +%% It could be that Addr is inside a free area. This function adjusts +%% the address so that is placed on a boundary in the Pos tree. Inlined. +adjust_addr(Addr, Pos, Base) -> + Pow = ?POW(Pos - 1), + Rem = (Addr - Base) rem Pow, + if + Rem =:= 0 -> + Addr; + Addr < Pow -> + Addr; + true -> + Addr - Rem + end. + +%%%----------------------------------------------------------------- +%%% The Disk Map is used for debugging only. +%%% Very tightly coupled to the way dets_v9 works. +%%%----------------------------------------------------------------- + +-define(DM, disk_map). + +get_disk_map() -> + case get(?DM) of + undefined -> no_disk_map; + T -> {disk_map, ets:tab2list(T)} + end. + +init_disk_map(Name) -> + error_logger:info_msg("** dets: (debug) using disk map for ~p~n", [Name]), + put(?DM, ets:new(any,[ordered_set])). + +stop_disk_map() -> + catch ets:delete(erase(?DM)). + +disk_map_segment_p(Fd, P) -> + case get(?DM) of + undefined -> + ok; + _T -> + disk_map_segment(P, pread_n(Fd, P, 8*256)) + end. + +disk_map_segment(P, Segment) -> + case get(?DM) of + undefined -> + ok; + T -> + Ps = segment_fragment_to_pointers(P, iolist_to_binary(Segment)), + Ss = [{X,<>} || + {_P1,<>} <- Ps, + X > 0], % optimization + dm(Ps ++ Ss, T) + end. + +disk_map_pread(P) -> + case get(?DM) of + undefined -> + ok; + T -> + case ets:lookup(T, P) of + [] -> + throw({pread, P, 8}); + [{P,{pointer,0,0}}] -> + ok; + [{P,{pointer,Pointer,Sz}}] -> + case ets:lookup(T, Pointer) of + %% _P =/= P after re-hash... + [{Pointer,{slot,_P,Sz}}] -> + ok; + Got -> + throw({pread, P, Pointer, Got}) + end; + Got -> + throw({pread, P, Got}) + end + end. + +-define(STATUS_POS, 4). +-define(BASE, 1336). +disk_map(Bins) -> + case get(?DM) of + undefined -> + ok; + T -> + Bs = [{P,iolist_to_binary(Io)} || {P,Io} <- Bins], + dm(Bs, T) + end. + +dm([{P,_Header} | Bs], T) when P < ?BASE -> + dm(Bs, T); +dm([{P0,<>} | Bs], T) -> + P = P0 - ?STATUS_POS, + case ets:lookup(T, P) of + [] -> + throw({free, P0}); + [{P,_OldSz}] -> + true = ets:delete(T, P) + end, + dm(Bs, T); +dm([{SlotP,<>} | Bs], T) -> + Ptr = case ets:lookup(T, {pointer,SlotP}) of + [{{pointer,SlotP}, Pointer}] -> + case ets:lookup(T, Pointer) of + [{Pointer,{pointer,SlotP,Sz2}}] -> + case log2(Sz) =:= log2(Sz2) of + true -> + Pointer; + false -> + throw({active, SlotP, Sz, Pointer, Sz2}) + end; + Got -> + throw({active, SlotP, Sz, Got}) + end; + [] -> + throw({active, SlotP, Sz}) + end, + true = ets:insert(T, {SlotP,{slot,Ptr,Sz}}), + dm(Bs, T); +dm([{P,<>} | Bs], T) -> + %% Look for slot object in Bs? + case prev(P, T) of + {Prev, PrevSz} -> + throw({prev, P, Sz, X, Prev, PrevSz}); + ok -> + ok + end, + case next(P, 8, T) of + {next, Next} -> + %% Can (should?) do more... + throw({next, P, Sz, X, Next}); + ok -> + ok + end, + true = ets:insert(T, {P,{pointer,X,Sz}}), + if + Sz =:= 0 -> + X = 0; + true -> + true = ets:insert(T, {{pointer,X}, P}) + end, + dm(Bs, T); +dm([{P,<>} | Bs], T) -> + case ets:lookup(T, X) of + [] -> throw({segment, P, X}); + [{X,{pointer,0,0}}] -> ok; + [{X,{pointer,P,X}}] -> ok + end, + dm(Bs, T); +dm([{P,<<_Sz:32,B0/binary>>=B} | Bs], T) -> + Overwrite = + case catch binary_to_term(B0) of % accepts garbage at end of binary + {'EXIT', _} -> + <<_Sz1:32,B1/binary>> = B0, + case catch binary_to_term(B1) of + {'EXIT', _} -> + false; + _ -> + true + end; + _ -> + true + end, + if + Overwrite -> + %% overwrite same + dm([{P-8,<<(byte_size(B) + 8):32,?ACTIVE:32,B/binary>>} | Bs], T); + true -> + dm(segment_fragment_to_pointers(P, B)++Bs, T) + end; +dm([], _T) -> + ok. + +segment_fragment_to_pointers(_P, <<>>) -> + []; +segment_fragment_to_pointers(P, <>) -> + [{P,SzP} | segment_fragment_to_pointers(P+8, B)]. + +prev(P, T) -> + case ets:prev(T, P) of + '$end_of_table' -> ok; + Prev -> + case ets:lookup(T, Prev) of + [{Prev,{pointer,_Ptr,_}}] when Prev + 8 > P -> + {Prev, 8}; + [{Prev,{slot,_,Sz}}] when Prev + Sz > P -> + {Prev, Sz}; + _ -> + ok + end + end. + +next(P, PSz, T) -> + case ets:next(T, P) of + '$end_of_table' -> ok; + Next when P + PSz > Next -> + {next, Next}; + _ -> + ok + end. + +%%%----------------------------------------------------------------- +%%% These functions implement a B+ tree. +%%%----------------------------------------------------------------- + +-define(max_size, 16). +-define(min_size, 8). +%%----------------------------------------------------------------- +%% Finds out the type of the node: 'l' or 'n'. +%%----------------------------------------------------------------- +-define(NODE_TYPE(Tree), element(1, Tree)). +%% Finds out if a node/leaf is full or not. +-define(FULL(Tree), (bplus_get_size(Tree) >= ?max_size)). +%% Finds out if a node/leaf is filled up over its limit. +-define(OVER_FULL(Tree), (bplus_get_size(Tree) > ?max_size)). +%% Finds out if a node/leaf has less items than allowed. +-define(UNDER_FILLED(Tree), (bplus_get_size(Tree) < ?min_size)). +%% Finds out if a node/leaf has as few items as minimum allowed. +-define(LOW_FILLED(Tree), (bplus_get_size(Tree) =< ?min_size)). +%%Returns a key in a leaf at position Pos. +-define(GET_LEAF_KEY(Leaf, Pos), element(Pos+1, Leaf)). + +%% Special for dets. +collect_tree(v, _TI, Acc) -> Acc; +collect_tree(T, TI, Acc) -> + Pow = ?POW(TI-1), + collect_tree2(T, Pow, Acc). + +collect_tree2(Tree, Pow, Acc) -> + S = bplus_get_size(Tree), + case ?NODE_TYPE(Tree) of + l -> + collect_leaf(Tree, S, Pow, Acc); + n -> + collect_node(Tree, S, Pow, Acc) + end. + +collect_leaf(_Leaf, 0, _Pow, Acc) -> + Acc; +collect_leaf(Leaf, I, Pow, Acc) -> + Key = ?GET_LEAF_KEY(Leaf, I), + V = {Key, Key+Pow}, + collect_leaf(Leaf, I-1, Pow, [V | Acc]). + +collect_node(_Node, 0, _Pow, Acc) -> + Acc; +collect_node(Node, I, Pow, Acc) -> + Acc1 = collect_tree2(bplus_get_tree(Node, I), Pow, Acc), + collect_node(Node, I-1, Pow, Acc1). + +%% Special for dets. +tree_to_bin(v, _F, _Max, Ws, WsSz) -> {Ws, WsSz}; +tree_to_bin(T, F, Max, Ws, WsSz) -> + {N, L1, Ws1, WsSz1} = tree_to_bin2(T, F, Max, 0, [], Ws, WsSz), + {N1, L2, Ws2, WsSz2} = F(N, lists:reverse(L1), Ws1, WsSz1), + {0, [], NWs, NWsSz} = F(N1, L2, Ws2, WsSz2), + {NWs, NWsSz}. + +tree_to_bin2(Tree, F, Max, N, Acc, Ws, WsSz) when N >= Max -> + {NN, NAcc, NWs, NWsSz} = F(N, lists:reverse(Acc), Ws, WsSz), + tree_to_bin2(Tree, F, Max, NN, lists:reverse(NAcc), NWs, NWsSz); +tree_to_bin2(Tree, F, Max, N, Acc, Ws, WsSz) -> + S = bplus_get_size(Tree), + case ?NODE_TYPE(Tree) of + l -> + {N+S, leaf_to_bin(bplus_leaf_to_list(Tree), Acc), Ws, WsSz}; + n -> + node_to_bin(Tree, F, Max, N, Acc, 1, S, Ws, WsSz) + end. + +node_to_bin(_Node, _F, _Max, N, Acc, I, S, Ws, WsSz) when I > S -> + {N, Acc, Ws, WsSz}; +node_to_bin(Node, F, Max, N, Acc, I, S, Ws, WsSz) -> + {N1,Acc1,Ws1,WsSz1} = + tree_to_bin2(bplus_get_tree(Node, I), F, Max, N, Acc, Ws, WsSz), + node_to_bin(Node, F, Max, N1, Acc1, I+1, S, Ws1, WsSz1). + +leaf_to_bin([N | L], Acc) -> + leaf_to_bin(L, [<> | Acc]); +leaf_to_bin([], Acc) -> + Acc. + +%% Special for dets. +list_to_tree(L) -> + leafs_to_nodes(L, length(L), fun bplus_mk_leaf/1, []). + +leafs_to_nodes([], 0, _F, [T]) -> + T; +leafs_to_nodes([], 0, _F, L) -> + leafs_to_nodes(lists:reverse(L), length(L), fun mk_node/1, []); +leafs_to_nodes(Ls, Sz, F, L) -> + I = if + Sz =< 16 -> Sz; + Sz =< 32 -> Sz div 2; + true -> 12 + end, + {L1, R} = split_list(Ls, I, []), + N = F(L1), + Sz1 = Sz - I, + leafs_to_nodes(R, Sz1, F, [N | L]). + +mk_node([E | Es]) -> + NL = [E | lists:foldr(fun(X, A) -> [get_first_key(X), X | A] end, [], Es)], + bplus_mk_node(NL). + +split_list(L, 0, SL) -> + {SL, L}; +split_list([E | Es], I, SL) -> + split_list(Es, I-1, [E | SL]). + +get_first_key(T) -> + case ?NODE_TYPE(T) of + l -> + ?GET_LEAF_KEY(T, 1); + n -> + get_first_key(bplus_get_tree(T, 1)) + end. + +%% Special for dets. +collect_interval(v, _TI, _L, _U, Acc) -> Acc; +collect_interval(T, TI, L, U, Acc) -> + Pow = ?POW(TI-1), + collect_interval2(T, Pow, L, U, Acc). + +collect_interval2(Tree, Pow, L, U, Acc) -> + S = bplus_get_size(Tree), + case ?NODE_TYPE(Tree) of + l -> + collect_leaf_interval(Tree, S, Pow, L, U, Acc); + n -> + {Max, _} = bplus_select_sub_tree(Tree, U), + {Min, _} = bplus_select_sub_tree_2(Tree, L, Max), + collect_node_interval(Tree, Min, Max, Pow, L, U, Acc) + end. + +collect_leaf_interval(_Leaf, 0, _Pow, _L, _U, Acc) -> + Acc; +collect_leaf_interval(Leaf, I, Pow, L, U, Acc) -> + Key = ?GET_LEAF_KEY(Leaf, I), + if + Key < L -> + Acc; + Key > U -> + collect_leaf_interval(Leaf, I-1, Pow, L, U, Acc); + true -> + collect_leaf_interval(Leaf, I-1, Pow, L, U, [{Key,Key+Pow} | Acc]) + end. + +collect_node_interval(_Node, I, UP, _Pow, _L, _U, Acc) when I > UP -> + Acc; +collect_node_interval(Node, I, UP, Pow, L, U, Acc) -> + Acc1 = collect_interval2(bplus_get_tree(Node, I), Pow, L, U, Acc), + collect_node_interval(Node, I+1, UP, Pow, L, U, Acc1). + +%%----------------------------------------------------------------- +%% Func: empty_tree/0 +%% Purpose: Creates a new empty tree. +%% Returns: tree() +%%----------------------------------------------------------------- +bplus_empty_tree() -> v. + +%%----------------------------------------------------------------- +%% Func: lookup/2 +%% Purpose: Looks for Key in the Tree. +%% Returns: {ok, {Key, Val}} | 'undefined'. +%%----------------------------------------------------------------- +bplus_lookup(v, _Key) -> undefined; +bplus_lookup(Tree, Key) -> + case ?NODE_TYPE(Tree) of + l -> + bplus_lookup_leaf(Key, Tree); + n -> + {_, SubTree} = bplus_select_sub_tree(Tree, Key), + bplus_lookup(SubTree, Key) + end. + +%%----------------------------------------------------------------- +%% Searches through a leaf until the Key is ok or +%% when it is determined that it does not exist. +%%----------------------------------------------------------------- +bplus_lookup_leaf(Key, Leaf) -> + bplus_lookup_leaf_2(Key, Leaf, bplus_get_size(Leaf)). + +bplus_lookup_leaf_2(_, _, 0) -> undefined; +bplus_lookup_leaf_2(Key, Leaf, N) -> + case ?GET_LEAF_KEY(Leaf, N) of + Key -> {ok, Key}; + _ -> + bplus_lookup_leaf_2(Key, Leaf, N-1) + end. + +%%----------------------------------------------------------------- +%% Func: lookup_first/1 +%% Purpose: Finds the smallest key in the entire Tree. +%% Returns: {ok, {Key, Val}} | 'undefined'. +%%----------------------------------------------------------------- +bplus_lookup_first(v) -> undefined; +bplus_lookup_first(Tree) -> + case ?NODE_TYPE(Tree) of + l -> + % Then it is the leftmost key here. + {ok, ?GET_LEAF_KEY(Tree, 1)}; + n -> + % Look in the leftmost subtree. + bplus_lookup_first(bplus_get_tree(Tree, 1)) + end. + + +%%----------------------------------------------------------------- +%% Func: lookup_next/2 +%% Purpose: Finds the next key nearest after Key. +%% Returns: {ok, {Key, Val}} | 'undefined'. NIX!!! +%%----------------------------------------------------------------- +bplus_lookup_next(v, _) -> undefined; +bplus_lookup_next(Tree, Key) -> + case ?NODE_TYPE(Tree) of + l -> + lookup_next_leaf(Key, Tree); + n -> + {Pos, SubTree} = bplus_select_sub_tree(Tree, Key), + case bplus_lookup_next(SubTree, Key) of + undefined -> + S = bplus_get_size(Tree), + if + % There is a right brother. + S > Pos -> + bplus_lookup_first(bplus_get_tree(Tree, Pos+1)); + % No there is no right brother. + true -> + undefined + end; + % We ok a next item. + Result -> + Result + end + end. + +%%----------------------------------------------------------------- +%% Returns {ok, NextKey} if there is a key in the leaf which is greater. +%% If there is no such key we return 'undefined' instead. +%% Key does not have to be a key in the structure, just a search value. +%%----------------------------------------------------------------- +lookup_next_leaf(Key, Leaf) -> + lookup_next_leaf_2(Key, Leaf, bplus_get_size(Leaf), 1). + +lookup_next_leaf_2(Key, Leaf, Size, Size) -> + % This is the rightmost key. + K = ?GET_LEAF_KEY(Leaf, Size), + if + K > Key -> + {ok, ?GET_LEAF_KEY(Leaf, Size)}; + true -> + undefined + end; +lookup_next_leaf_2(Key, Leaf, Size, N) -> + K = ?GET_LEAF_KEY(Leaf, N), + if + K < Key -> + % K is still smaller, try next in the leaf. + lookup_next_leaf_2(Key, Leaf, Size, N+1); + Key == K -> + % Since this is exact Key it must be the next. + {ok, ?GET_LEAF_KEY(Leaf, N+1)}; + true -> + % Key was not an exact specification. + % It must be K that is next greater. + {ok, ?GET_LEAF_KEY(Leaf, N)} + end. + +%%----------------------------------------------------------------- +%% Func: insert/3 +%% Purpose: Inserts a new {Key, Value} into the tree. +%% Returns: tree() +%%----------------------------------------------------------------- +bplus_insert(v, Key) -> bplus_mk_leaf([Key]); +bplus_insert(Tree, Key) -> + NewTree = bplus_insert_in(Tree, Key), + case ?OVER_FULL(NewTree) of + false -> + NewTree; + % If the node is over-full the tree will grow. + true -> + {LTree, DKey, RTree} = + case ?NODE_TYPE(NewTree) of + l -> + bplus_split_leaf(NewTree); + n -> + bplus_split_node(NewTree) + end, + bplus_mk_node([LTree, DKey, RTree]) + end. + +%%----------------------------------------------------------------- +%% Func: delete/2 +%% Purpose: Deletes a key from the tree (if present). +%% Returns: tree() +%%----------------------------------------------------------------- +bplus_delete(v, _Key) -> v; +bplus_delete(Tree, Key) -> + NewTree = bplus_delete_in(Tree, Key), + S = bplus_get_size(NewTree), + case ?NODE_TYPE(NewTree) of + l -> + if + S =:= 0 -> + v; + true -> + NewTree + end; + n -> + if + S =:= 1 -> + bplus_get_tree(NewTree, 1); + true -> + NewTree + end + end. + + +%%% ----------------------- +%%% Help function to insert. +%%% ----------------------- + +bplus_insert_in(Tree, Key) -> + case ?NODE_TYPE(Tree) of + l -> + bplus_insert_in_leaf(Tree, Key); + n -> + {Pos, SubTree} = bplus_select_sub_tree(Tree, Key), + % Pos = "the position of the subtree". + NewSubTree = bplus_insert_in(SubTree, Key), + case ?OVER_FULL(NewSubTree) of + false -> + bplus_put_subtree(Tree, [NewSubTree, Pos]); + true -> + case bplus_reorganize_tree_ins(Tree, NewSubTree, Pos) of + {left, {LeftT, DKey, MiddleT}} -> + bplus_put_subtree(bplus_put_lkey(Tree, DKey, Pos), + [LeftT, Pos-1, MiddleT, Pos]); + {right, {MiddleT, DKey, RightT}} -> + bplus_put_subtree(bplus_put_rkey(Tree, DKey, Pos), + [MiddleT, Pos, RightT, Pos+1]); + {split, {LeftT, DKey, RightT}} -> + bplus_extend_tree(Tree, {LeftT, DKey, RightT}, Pos) + end + end + end. + +%%----------------------------------------------------------------- +%% Inserts a key in correct position in a leaf. +%%----------------------------------------------------------------- +bplus_insert_in_leaf(Leaf, Key) -> + bplus_insert_in_leaf_2(Leaf, Key, bplus_get_size(Leaf), []). + +bplus_insert_in_leaf_2(Leaf, Key, 0, Accum) -> + bplus_insert_in_leaf_3(Leaf, 0, [Key|Accum]); +bplus_insert_in_leaf_2(Leaf, Key, N, Accum) -> + K = ?GET_LEAF_KEY(Leaf, N), + if + Key < K -> + % Not here! + bplus_insert_in_leaf_2(Leaf, Key, N-1, [K|Accum]); + K < Key -> + % Insert here. + bplus_insert_in_leaf_3(Leaf, N-1, [K, Key|Accum]); + K == Key -> + % Replace (?). + bplus_insert_in_leaf_3(Leaf, N-1, [ Key|Accum]) + end. + +bplus_insert_in_leaf_3(_Leaf, 0, LeafList) -> + bplus_mk_leaf(LeafList); +bplus_insert_in_leaf_3(Leaf, N, LeafList) -> + bplus_insert_in_leaf_3(Leaf, N-1, [?GET_LEAF_KEY(Leaf, N)|LeafList]). + + +%%% ------------------------- +%%% Help functions for delete. +%%% ------------------------- + +bplus_delete_in(Tree, Key) -> + case ?NODE_TYPE(Tree) of + l -> + bplus_delete_in_leaf(Tree, Key); + n -> + {Pos, SubTree} = bplus_select_sub_tree(Tree, Key), + % Pos = "the position of the subtree". + NewSubTree = bplus_delete_in(SubTree, Key), + % Check if it has become to small now + case ?UNDER_FILLED(NewSubTree) of + false -> + bplus_put_subtree(Tree, [NewSubTree, Pos]); + true -> + case bplus_reorganize_tree_del(Tree, NewSubTree, Pos) of + {left, {LeftT, DKey, MiddleT}} -> + bplus_put_subtree(bplus_put_lkey(Tree, DKey, Pos), + [LeftT, Pos-1, MiddleT, Pos]); + {right, {MiddleT, DKey, RightT}} -> + bplus_put_subtree(bplus_put_rkey(Tree, DKey, Pos), + [MiddleT, Pos, RightT, Pos+1]); + {join_left, JoinedTree} -> + bplus_joinleft_tree(Tree, JoinedTree, Pos); + {join_right, JoinedTree} -> + bplus_joinright_tree(Tree, JoinedTree, Pos) + end + end + end. + +%%----------------------------------------------------------------- +%% Deletes a key from the leaf returning a new (smaller) leaf. +%%----------------------------------------------------------------- +bplus_delete_in_leaf(Leaf, Key) -> + bplus_delete_in_leaf_2(Leaf, Key, bplus_get_size(Leaf), []). + +bplus_delete_in_leaf_2(Leaf, _, 0, _) -> Leaf; +bplus_delete_in_leaf_2(Leaf, Key, N, Accum) -> + K = ?GET_LEAF_KEY(Leaf, N), + if + Key == K -> + % Remove this one! + bplus_delete_in_leaf_3(Leaf, N-1, Accum); + true -> + bplus_delete_in_leaf_2(Leaf, Key, N-1, [K|Accum]) + end. + +bplus_delete_in_leaf_3(_Leaf, 0, LeafList) -> + bplus_mk_leaf(LeafList); +bplus_delete_in_leaf_3(Leaf, N, LeafList) -> + bplus_delete_in_leaf_3(Leaf, N-1, [?GET_LEAF_KEY(Leaf, N)|LeafList]). + + + +%%----------------------------------------------------------------- +%% Selects and returns which subtree the search should continue in. +%%----------------------------------------------------------------- +bplus_select_sub_tree(Tree, Key) -> + bplus_select_sub_tree_2(Tree, Key, bplus_get_size(Tree)). + +bplus_select_sub_tree_2(Tree, _Key, 1) -> {1, bplus_get_tree(Tree, 1)}; +bplus_select_sub_tree_2(Tree, Key, N) -> + K = bplus_get_lkey(Tree, N), + if + K > Key -> + bplus_select_sub_tree_2(Tree, Key, N-1); + K =< Key -> + % Here it is! + {N, bplus_get_tree(Tree, N)} + end. + +%%----------------------------------------------------------------- +%% Selects which brother that should take over some of our items. +%% Or if they are both full makes a split. +%%----------------------------------------------------------------- +bplus_reorganize_tree_ins(Tree, NewSubTree, 1) -> + RTree = bplus_get_tree(Tree, 2), % 2 = Pos+1 = 1+1. + case ?FULL(RTree) of + false -> + bplus_reorganize_tree_r(Tree, NewSubTree, 1, RTree); + true -> + % It is full, we must split this one! + bplus_reorganize_tree_s(NewSubTree) + end; +bplus_reorganize_tree_ins(Tree, NewSubTree, Pos) -> + Size = bplus_get_size(Tree), + if + Pos == Size -> + % Pos is the rightmost postion!. + % Our only chance is the left one. + LTree = bplus_get_tree(Tree, Pos-1), + case ?FULL(LTree) of + false -> + bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); + true -> + % It is full, we must split this one! + bplus_reorganize_tree_s(NewSubTree) + end; + true -> + % Pos is somewhere inside the node. + LTree = bplus_get_tree(Tree, Pos-1), + RTree = bplus_get_tree(Tree, Pos+1), + SL = bplus_get_size(LTree), + SR = bplus_get_size(RTree), + if + SL > SR -> + bplus_reorganize_tree_r(Tree, NewSubTree, Pos, RTree); + SL < SR -> + bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); + true -> + case ?FULL(LTree) of + false -> + bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); + true -> + bplus_reorganize_tree_s(NewSubTree) + end + end + end. + +%%----------------------------------------------------------------- +%% This function fills over items from brothers to maintain the minimum +%% number of items per node/leaf. +%%----------------------------------------------------------------- +bplus_reorganize_tree_del(Tree, NewSubTree, 1) -> + % The case when Pos is at leftmost position. + RTree = bplus_get_tree(Tree, 2), % 2 = Pos+1 = 1+1. + case ?LOW_FILLED(RTree) of + false -> + bplus_reorganize_tree_r(Tree, NewSubTree, 1, RTree); + true -> + % It is to small, we must join them! + bplus_reorganize_tree_jr(Tree, NewSubTree, 1, RTree) + end; +bplus_reorganize_tree_del(Tree, NewSubTree, Pos) -> + Size = bplus_get_size(Tree), + if + Pos == Size -> + % Pos is the rightmost postion!. + % Our only chance is the left one. + LTree = bplus_get_tree(Tree, Pos-1), + case ?LOW_FILLED(LTree) of + false -> + bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); + true -> + % It is to small, we must join this one! + bplus_reorganize_tree_jl(Tree, NewSubTree, Pos, LTree) + end; + true -> + % Pos is somewhere inside the node. + LTree = bplus_get_tree(Tree, Pos-1), + RTree = bplus_get_tree(Tree, Pos+1), + SL = bplus_get_size(LTree), + SR = bplus_get_size(RTree), + if + SL>SR -> + bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); + SL < SR -> + bplus_reorganize_tree_r(Tree, NewSubTree, Pos, RTree); + true -> + case ?LOW_FILLED(LTree) of + false -> + bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); + true -> + bplus_reorganize_tree_jl(Tree, NewSubTree, Pos, LTree) + end + end + end. + + +bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree) -> + case ?NODE_TYPE(NewSubTree) of + l -> + {left, bplus_split_leaf( + bplus_mk_leaf( + lists:append(bplus_leaf_to_list(LTree), + bplus_leaf_to_list(NewSubTree))))}; + n -> + {left, bplus_split_node( + bplus_mk_node( + lists:append([bplus_node_to_list(LTree), + [bplus_get_lkey(Tree, Pos)], + bplus_node_to_list(NewSubTree)])))} + end. + +bplus_reorganize_tree_r(Tree, NewSubTree, Pos, RTree) -> + case ?NODE_TYPE(NewSubTree) of + l -> + {right, + bplus_split_leaf( + bplus_mk_leaf( + lists:append([bplus_leaf_to_list(NewSubTree), + bplus_leaf_to_list(RTree)])))}; + n -> + {right, + bplus_split_node( + bplus_mk_node( + lists:append([bplus_node_to_list(NewSubTree), + [bplus_get_rkey(Tree, Pos)], + bplus_node_to_list(RTree)])))} + end. + +bplus_reorganize_tree_s(NewSubTree) -> + case ?NODE_TYPE(NewSubTree) of + l -> + {split, bplus_split_leaf(NewSubTree)}; + n -> + {split, bplus_split_node(NewSubTree)} + end. + +bplus_reorganize_tree_jl(Tree, NewSubTree, Pos, LTree) -> + case ?NODE_TYPE(NewSubTree) of + l -> + {join_left, + bplus_mk_leaf(lists:append([bplus_leaf_to_list(LTree), + bplus_leaf_to_list(NewSubTree)]))}; + n -> + {join_left, + bplus_mk_node(lists:append([bplus_node_to_list(LTree), + [bplus_get_lkey(Tree, Pos)], + bplus_node_to_list(NewSubTree)]))} + end. + +bplus_reorganize_tree_jr(Tree, NewSubTree, Pos, RTree) -> + case ?NODE_TYPE(NewSubTree) of + l -> + {join_right, + bplus_mk_leaf(lists:append([bplus_leaf_to_list(NewSubTree), + bplus_leaf_to_list(RTree)]))}; + n -> + {join_right, + bplus_mk_node(lists:append([bplus_node_to_list(NewSubTree), + [bplus_get_rkey(Tree, Pos)], + bplus_node_to_list(RTree)]))} + end. + + +%%----------------------------------------------------------------- +%% Takes a leaf and divides it into two equal big leaves. +%% The result is returned in a tuple. The dividing key is also returned. +%%----------------------------------------------------------------- +bplus_split_leaf(Leaf) -> + S = bplus_get_size(Leaf), + bplus_split_leaf_2(Leaf, S, S div 2, []). + +bplus_split_leaf_2(Leaf, Pos, 1, Accum) -> + K = ?GET_LEAF_KEY(Leaf, Pos), + bplus_split_leaf_3(Leaf, Pos-1, [], K, [K|Accum]); +bplus_split_leaf_2(Leaf, Pos, N, Accum) -> + bplus_split_leaf_2(Leaf, Pos-1, N-1, [?GET_LEAF_KEY(Leaf, Pos)|Accum]). + +bplus_split_leaf_3(_, 0, LeftAcc, DKey, RightAcc) -> + {bplus_mk_leaf(LeftAcc), DKey, bplus_mk_leaf(RightAcc)}; +bplus_split_leaf_3(Leaf, Pos, LeftAcc, DKey, RightAcc) -> + bplus_split_leaf_3(Leaf, Pos-1, [?GET_LEAF_KEY(Leaf, Pos)|LeftAcc], + DKey, RightAcc). + +%%----------------------------------------------------------------- +%% Takes a node and divides it into two equal big nodes. +%% The result is returned in a tuple. The dividing key is also returned. +%%----------------------------------------------------------------- +bplus_split_node(Node) -> + S = bplus_get_size(Node), + bplus_split_node_2(Node, S, S div 2, []). + +bplus_split_node_2(Node, Pos, 1, Accum) -> + bplus_split_node_3(Node, Pos-1, [], bplus_get_lkey(Node, Pos), + [bplus_get_tree(Node, Pos)|Accum]); +bplus_split_node_2(Node, Pos, N, Accum) -> + bplus_split_node_2(Node, Pos-1, N-1, [bplus_get_lkey(Node, Pos), + bplus_get_tree(Node, Pos)|Accum]). + +bplus_split_node_3(Node, 1, LeftAcc, DKey, RightAcc) -> + {bplus_mk_node([bplus_get_tree(Node, 1)|LeftAcc]), DKey, + bplus_mk_node(RightAcc)}; +bplus_split_node_3(Node, Pos, LeftAcc, DKey, RightAcc) -> + bplus_split_node_3(Node, Pos-1, + [bplus_get_lkey(Node, Pos), + bplus_get_tree(Node, Pos)|LeftAcc], + DKey, RightAcc). + +%%----------------------------------------------------------------- +%% Inserts a joined tree insted of the old one at position Pos and +%% the one nearest left/right brother. +%%----------------------------------------------------------------- +bplus_joinleft_tree(Tree, JoinedTree, Pos) -> + bplus_join_tree_2(Tree, JoinedTree, Pos, bplus_get_size(Tree), []). +bplus_joinright_tree(Tree, JoinedTree, Pos) -> + bplus_join_tree_2(Tree, JoinedTree, Pos+1, bplus_get_size(Tree), []). + +bplus_join_tree_2(Tree, JoinedTree, Pos, Pos, Accum) -> + bplus_join_tree_3(Tree, Pos-2, [JoinedTree|Accum]); +bplus_join_tree_2(Tree, JoinedTree, Pos, N, Accum) -> + bplus_join_tree_2(Tree, JoinedTree, Pos, N-1, + [bplus_get_lkey(Tree, N), bplus_get_tree(Tree, N)|Accum]). + +bplus_join_tree_3(_Tree, 0, Accum) -> bplus_mk_node(Accum); +bplus_join_tree_3(Tree, Pos, Accum) -> + bplus_join_tree_3(Tree, Pos-1, [bplus_get_tree(Tree, Pos), + bplus_get_rkey(Tree, Pos)|Accum]). + +%%% --------------------------------- +%%% Primitive datastructure functions. +%%% --------------------------------- + +%%----------------------------------------------------------------- +%% Constructs a node out of list format. +%%----------------------------------------------------------------- +bplus_mk_node(NodeList) -> list_to_tuple([ n |NodeList]). + +%%----------------------------------------------------------------- +%% Converts the node into list format. +%%----------------------------------------------------------------- +bplus_node_to_list(Node) -> + [_|NodeList] = tuple_to_list(Node), + NodeList. + +%%----------------------------------------------------------------- +%% Constructs a leaf out of list format. +%%----------------------------------------------------------------- +bplus_mk_leaf(KeyList) -> list_to_tuple([l|KeyList]). + +%%----------------------------------------------------------------- +%% Converts a leaf into list format. +%%----------------------------------------------------------------- +bplus_leaf_to_list(Leaf) -> + [_|LeafList] = tuple_to_list(Leaf), + LeafList. + +%%----------------------------------------------------------------- +%% Changes subtree "pointers" in a node. +%%----------------------------------------------------------------- +bplus_put_subtree(Tree, []) -> Tree; +bplus_put_subtree(Tree, [NewSubTree, Pos|Rest]) -> + bplus_put_subtree(setelement(Pos*2, Tree, NewSubTree), Rest). + +%%----------------------------------------------------------------- +%% Replaces the tree at position Pos with two new trees. +%%----------------------------------------------------------------- +bplus_extend_tree(Tree, Inserts, Pos) -> + bplus_extend_tree_2(Tree, Inserts, Pos, bplus_get_size(Tree), []). + +bplus_extend_tree_2(Tree, {T1, DKey, T2}, Pos, Pos, Accum) -> + bplus_extend_tree_3(Tree, Pos-1, [T1, DKey, T2|Accum]); +bplus_extend_tree_2(Tree, Inserts, Pos, N, Accum) -> + bplus_extend_tree_2(Tree, Inserts, Pos, N-1, + [bplus_get_lkey(Tree, N), bplus_get_tree(Tree, N)|Accum]). + +bplus_extend_tree_3(_, 0, Accum) -> bplus_mk_node(Accum); +bplus_extend_tree_3(Tree, N, Accum) -> + bplus_extend_tree_3(Tree, N-1, [bplus_get_tree(Tree, N), + bplus_get_rkey(Tree, N)|Accum]). + +%%----------------------------------------------------------------- +%% Changes the dividing key between two trees. +%%----------------------------------------------------------------- +bplus_put_lkey(Tree, DKey, Pos) -> setelement(Pos*2-1, Tree, DKey). +bplus_put_rkey(Tree, DKey, Pos) -> setelement(Pos*2+1, Tree, DKey). + + +%%----------------------------------------------------------------- +%% Calculates the number of items in a node/leaf. +%%----------------------------------------------------------------- +bplus_get_size(Tree) -> + case ?NODE_TYPE(Tree) of + l -> + tuple_size(Tree)-1; + n -> + tuple_size(Tree) div 2 + end. + +%%----------------------------------------------------------------- +%% Returns a tree at position Pos from an internal node. +%%----------------------------------------------------------------- +bplus_get_tree(Tree, Pos) -> element(Pos*2, Tree). + +%%----------------------------------------------------------------- +%% Returns dividing keys, left of or right of a tree. +%%----------------------------------------------------------------- +bplus_get_lkey(Tree, Pos) -> element(Pos*2-1, Tree). +bplus_get_rkey(Tree, Pos) -> element(Pos*2+1, Tree). + -- cgit v1.2.3