%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 2001-2009. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. %% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. %% %% %CopyrightEnd% %% -module(dets_utils). %% Utility functions common to several dets file formats. %% To be used from dets, dets_v8 and dets_v9 only. -export([cmp/2, msort/1, mkeysort/2, mkeysearch/3, family/1]). -export([rename/2, pread/2, pread/4, ipread/3, pwrite/2, write/2, truncate/2, position/2, sync/1, open/2, truncate/3, fwrite/3, write_file/2, position/3, position_close/3, pwrite/4, pwrite/3, pread_close/4, read_n/2, pread_n/3, read_4/2]). -export([code_to_type/1, type_to_code/1]). -export([corrupt_reason/2, corrupt/2, corrupt_file/2, vformat/2, file_error/2]). -export([debug_mode/0, bad_object/2]). -export([cache_lookup/4, cache_size/1, new_cache/1, reset_cache/1, is_empty_cache/1]). -export([empty_free_lists/0, init_alloc/1, alloc_many/4, alloc/2, free/3, get_freelists/1, all_free/1, all_allocated/1, all_allocated_as_list/1, find_allocated/4, find_next_allocated/3, log2/1, make_zeros/1]). -export([init_slots_from_old_file/2]). -export([list_to_tree/1, tree_to_bin/5]). -compile({inline, [{sz2pos,1}, {adjust_addr,3}]}). -compile({inline, [{bplus_mk_leaf,1}, {bplus_get_size,1}, {bplus_get_tree,2}, {bplus_get_lkey,2}, {bplus_get_rkey,2}]}). %% Debug -export([init_disk_map/1, stop_disk_map/0, disk_map_segment_p/2, disk_map_segment/2]). -include("dets.hrl"). %%% A total ordering of all Erlang terms. %% -> -1 | 0 | 1. T1 is (smaller than | equal | greater than) T2. %% If is_integer(I), is_float(F), I == F then I is deemed smaller than F. cmp(T, T) -> 0; cmp([E1 | T1], [E2 | T2]) -> case cmp(E1, E2) of 0 -> cmp(T1, T2); R -> R end; cmp(T1, T2) when tuple_size(T1) =:= tuple_size(T2) -> tcmp(T1, T2, 1, tuple_size(T1)); cmp(I, F) when is_integer(I), is_float(F) -> -1; cmp(F, I) when is_float(F), is_integer(I) -> 1; cmp(T1, T2) when T1 < T2 -> -1; cmp(_T1, _T2) -> % when _T1 > _T2 1. tcmp(T1, T2, I, I) -> cmp(element(I, T1), element(I, T2)); tcmp(T1, T2, I, N) -> case cmp(element(I, T1), element(I, T2)) of 0 -> tcmp(T1, T2, I + 1, N); R -> R end. msort(L) -> %% sort is very much faster than msort, let it do most of the work. F = fun(X, Y) -> cmp(X, Y) =< 0 end, lists:sort(F, lists:sort(L)). mkeysort(I, L) -> F = fun(X, Y) -> cmp(element(I, X), element(I, Y)) =< 0 end, %% keysort is much faster than mkeysort, let it do most of the work. lists:sort(F, lists:keysort(I, L)). mkeysearch(Key, I, L) -> case lists:keysearch(Key, I, L) of {value, Value}=Reply when element(I, Value) =:= Key -> Reply; false -> false; _ -> mkeysearch2(Key, I, L) end. mkeysearch2(_Key, _I, []) -> false; mkeysearch2(Key, I, [E | _L]) when element(I, E) =:= Key -> {value, E}; mkeysearch2(Key, I, [_ | L]) -> mkeysearch2(Key, I, L). %% Be careful never to compare keys, but use matching instead. %% Otherwise sofs could have been used: %% sofs:to_external(sofs:relation_to_family(sofs:relation(L, 2))). family([]) -> []; family(L) -> [{K,V}|KVL] = mkeysort(1, L), per_key(KVL, K, [V], []). per_key([], K, Vs, KVs) -> lists:reverse(KVs, [{K,msort(Vs)}]); per_key([{K,V}|L], K, Vs, KVs) -> % match per_key(L, K, [V|Vs], KVs); per_key([{K1,V}|L], K, Vs, KVs) -> per_key(L, K1, [V], [{K,msort(Vs)}|KVs]). rename(From, To) -> case file:rename(From, To) of ok -> ok; {error, Reason} -> {error, {file_error, {From, To}, Reason}} end. %% -> {ok, Bins} | throw({NewHead, Error}) pread(Positions, Head) -> R = case file:pread(Head#head.fptr, Positions) of {ok, Bins} -> %% file:pread/2 can return 'eof' as "data". case lists:member(eof, Bins) of true -> {error, {premature_eof, Head#head.filename}}; false -> {ok, Bins} end; {error, Reason} when enomem =:= Reason; einval =:= Reason -> {error, {bad_object_header, Head#head.filename}}; {error, Reason} -> {file_error, Head#head.filename, Reason} end, case R of {ok, _Bins} -> R; Error -> throw(corrupt(Head, Error)) end. %% -> {ok, binary()} | throw({NewHead, Error}) pread(Head, Pos, Min, Extra) -> R = case file:pread(Head#head.fptr, Pos, Min+Extra) of {error, Reason} when enomem =:= Reason; einval =:= Reason -> {error, {bad_object_header, Head#head.filename}}; {error, Reason} -> {file_error, Head#head.filename, Reason}; {ok, Bin} when byte_size(Bin) < Min -> {error, {premature_eof, Head#head.filename}}; OK -> OK end, case R of {ok, _Bin} -> R; Error -> throw(corrupt(Head, Error)) end. %% -> eof | [] | {ok, {Size, Pointer, binary()}} ipread(Head, Pos1, MaxSize) -> try disk_map_pread(Pos1) catch Bad -> throw(corrupt_reason(Head, {disk_map, Bad})) end, case file:ipread_s32bu_p32bu(Head#head.fptr, Pos1, MaxSize) of {ok, {0, 0, eof}} -> []; {ok, Reply} -> {ok, Reply}; _Else -> eof end. %% -> {Head, ok} | throw({Head, Error}) pwrite(Head, []) -> {Head, ok}; pwrite(Head, Bins) -> try disk_map(Bins) catch Bad -> throw(corrupt_reason(Head, {disk_map, Bad, Bins})) end, case file:pwrite(Head#head.fptr, Bins) of ok -> {Head, ok}; Error -> corrupt_file(Head, Error) end. %% -> ok | throw({Head, Error}) write(_Head, []) -> ok; write(Head, Bins) -> case file:write(Head#head.fptr, Bins) of ok -> ok; Error -> corrupt_file(Head, Error) end. %% -> ok | throw({Head, Error}) %% Same as file:write_file/2, but calls file:sync/1. write_file(Head, Bin) -> R = case file:open(Head#head.filename, [binary, raw, write]) of {ok, Fd} -> R1 = file:write(Fd, Bin), R2 = file:sync(Fd), file:close(Fd), if R1 =:= ok -> R2; true -> R1 end; Else -> Else end, case R of ok -> ok; Error -> corrupt_file(Head, Error) end. %% -> ok | throw({Head, Error}) truncate(Head, Pos) -> case catch truncate(Head#head.fptr, Head#head.filename, Pos) of ok -> ok; Error -> throw(corrupt(Head, Error)) end. %% -> {ok, Pos} | throw({Head, Error}) position(Head, Pos) -> case file:position(Head#head.fptr, Pos) of {error, _Reason} = Error -> corrupt_file(Head, Error); OK -> OK end. %% -> ok | throw({Head, Error}) sync(Head) -> case file:sync(Head#head.fptr) of ok -> ok; Error -> corrupt_file(Head, Error) end. open(FileSpec, Args) -> case file:open(FileSpec, Args) of {ok, Fd} -> {ok, Fd}; Error -> file_error(FileSpec, Error) end. truncate(Fd, FileName, Pos) -> if Pos =:= cur -> ok; true -> position(Fd, FileName, Pos) end, case file:truncate(Fd) of ok -> ok; Error -> file_error(FileName, {error, Error}) end. fwrite(Fd, FileName, B) -> case file:write(Fd, B) of ok -> ok; Error -> file_error_close(Fd, FileName, Error) end. position(Fd, FileName, Pos) -> case file:position(Fd, Pos) of {error, Error} -> file_error(FileName, {error, Error}); OK -> OK end. position_close(Fd, FileName, Pos) -> case file:position(Fd, Pos) of {error, Error} -> file_error_close(Fd, FileName, {error, Error}); OK -> OK end. pwrite(Fd, FileName, Position, B) -> case file:pwrite(Fd, Position, B) of ok -> ok; Error -> file_error(FileName, {error, Error}) end. pwrite(Fd, FileName, Bins) -> case file:pwrite(Fd, Bins) of ok -> ok; {error, {_NoWrites, Reason}} -> file_error(FileName, {error, Reason}) end. pread_close(Fd, FileName, Pos, Size) -> case file:pread(Fd, Pos, Size) of {error, Error} -> file_error_close(Fd, FileName, {error, Error}); {ok, Bin} when byte_size(Bin) < Size -> file:close(Fd), throw({error, {tooshort, FileName}}); eof -> file:close(Fd), throw({error, {tooshort, FileName}}); OK -> OK end. file_error(FileName, {error, Reason}) -> throw({error, {file_error, FileName, Reason}}). file_error_close(Fd, FileName, {error, Reason}) -> file:close(Fd), throw({error, {file_error, FileName, Reason}}). debug_mode() -> os:getenv("DETS_DEBUG") =:= "true". bad_object(Where, Extra) -> case debug_mode() of true -> {bad_object, Where, Extra}; false -> %% Avoid showing possibly secret data on the error logger. {bad_object, Where} end. read_n(Fd, Max) -> case file:read(Fd, Max) of {ok, Bin} -> Bin; _Else -> eof end. pread_n(Fd, Position, Max) -> case file:pread(Fd, Position, Max) of {ok, Bin} -> Bin; _ -> eof end. read_4(Fd, Position) -> {ok, _} = file:position(Fd, Position), <<Four:32>> = dets_utils:read_n(Fd, 4), Four. corrupt_file(Head, {error, Reason}) -> Error = {error, {file_error, Head#head.filename, Reason}}, throw(corrupt(Head, Error)). %% -> {NewHead, Error} corrupt_reason(Head, Reason0) -> Reason = case get_disk_map() of no_disk_map -> Reason0; DM -> ST = erlang:get_stacktrace(), PD = get(), {Reason0, ST, PD, DM} end, Error = {error, {Reason, Head#head.filename}}, corrupt(Head, Error). corrupt(Head, Error) -> case get(verbose) of yes -> error_logger:format("** dets: Corrupt table ~p: ~p\n", [Head#head.name, Error]); _ -> ok end, case Head#head.update_mode of {error, _} -> {Head, Error}; _ -> {Head#head{update_mode = Error}, Error} end. vformat(F, As) -> case get(verbose) of yes -> error_logger:format(F, As); _ -> ok end. code_to_type(?SET) -> set; code_to_type(?BAG) -> bag; code_to_type(?DUPLICATE_BAG) -> duplicate_bag; code_to_type(_Type) -> badtype. type_to_code(set) -> ?SET; type_to_code(bag) -> ?BAG; type_to_code(duplicate_bag) -> ?DUPLICATE_BAG. %%% %%% Write Cache %%% cache_size(C) -> {C#cache.delay, C#cache.tsize}. %% -> [object()] | false cache_lookup(Type, [Key | Keys], CL, LU) -> %% mkeysearch returns the _first_ tuple with a matching key. case mkeysearch(Key, 1, CL) of {value, {Key,{_Seq,{insert,Object}}}} when Type =:= set -> cache_lookup(Type, Keys, CL, [Object | LU]); {value, {Key,{_Seq,delete_key}}} -> cache_lookup(Type, Keys, CL, LU); _ -> false end; cache_lookup(_Type, [], _CL, LU) -> LU. reset_cache(C) -> WrTime = C#cache.wrtime, NewWrTime = if WrTime =:= undefined -> WrTime; true -> now() end, PK = family(C#cache.cache), NewC = C#cache{cache = [], csize = 0, inserts = 0, wrtime = NewWrTime}, {NewC, C#cache.inserts, PK}. is_empty_cache(Cache) -> Cache#cache.cache =:= []. new_cache({Delay, Size}) -> #cache{cache = [], csize = 0, inserts = 0, tsize = Size, wrtime = undefined, delay = Delay}. %%% %%% Buddy System %%% %% Definitions for the buddy allocator. -define(MAXBUD, 32). % 2 GB is maximum file size -define(MAXFREELISTS, 50000000). % Bytes reserved for the free lists (at end). %%-define(DEBUG(X, Y), io:format(X, Y)). -define(DEBUG(X, Y), true). %%% Algorithm : We use a buddy system on each file. This is nicely described %%% in i.e. the last chapter of the first-grade text book %%% Data structures and algorithms by Aho, Hopcroft and %%% Ullman. I think buddy systems were invented by Knuth, a long %%% time ago. init_slots_from_old_file([{Slot,Addr} | T], Ftab) -> init_slot(Slot+1,[{Slot,Addr} | T], Ftab); init_slots_from_old_file([], Ftab) -> Ftab. init_slot(_Slot,[], Ftab) -> Ftab; % should never happen init_slot(_Slot,[{_Addr,0}|T], Ftab) -> init_slots_from_old_file(T, Ftab); init_slot(Slot,[{_Slot1,Addr}|T], Ftab) -> Stree = element(Slot, Ftab), %% io:format("init_slot ~p:~p~n",[Slot, Addr]), init_slot(Slot,T,setelement(Slot, Ftab, bplus_insert(Stree, Addr))). %%% The free lists are kept in RAM, and written to the end of the file %%% from time to time. It is possible that a considerable amount of %%% memory is used for a fragmented file. %%% %%% To make things (slightly) worse (from a memory usage point of %%% view), each traversal of the file starts with making a "map" of %%% the allocated areas; only the allocated areas will be %%% traversed. Creating a map involves inspecting and sorting the free %%% lists. Since the map is passed on between client and server, it %%% has to be a binary (to avoid copying a possibly huge term). %%% %%% An active map should always be protected by fixing the table. This %%% prevents insertion of objects into the mapped area (where some %%% objects may have been deleted). The means for implementing this %%% protection is a copy of the free lists (using even more memory, if %%% objects are inserted). The position to write an inserted object is %%% found by looking at the free lists from the time when the table %%% was fixed; areas within the mapped area that have been freed are %%% hidden from the allocator. %% -> free_table() %% A free table is a tuple of ?MAXBUD elements, element i handling %% buddies of size 2^(i-1). init_alloc(Base) -> Ftab = empty_free_lists(), Empty = bplus_empty_tree(), setelement(?MAXBUD, Ftab, bplus_insert(Empty, Base)). empty_free_lists() -> Empty = bplus_empty_tree(), %% initiate a tuple with ?MAXBUD "Empty" elements erlang:make_tuple(?MAXBUD, Empty). %% Only used when repairing or initiating. alloc_many(Head, _Sz, 0, _A0) -> Head; alloc_many(Head, Sz, N, A0) -> Ftab = Head#head.freelists, Head#head{freelists = alloc_many1(Ftab, 1, Sz * N, A0, Head)}. %% -> NewFtab | throw(Error) alloc_many1(Ftab, Pos, Size, A0, H) -> {FPos, Addr} = find_first_free(Ftab, Pos, Pos, H), true = Addr >= A0, % assertion if ?POW(FPos - 1) >= Size -> alloc_many2(Ftab, sz2pos(Size), Size, A0, H); true -> NewFtab = reserve_buddy(Ftab, FPos, FPos, Addr), NSize = Size - ?POW(FPos-1), alloc_many1(NewFtab, FPos, NSize, Addr, H) end. alloc_many2(Ftab, _Pos, 0, _A0, _H) -> Ftab; alloc_many2(Ftab, Pos, Size, A0, H) when Size band ?POW(Pos-1) > 0 -> {FPos, Addr} = find_first_free(Ftab, Pos, Pos, H), true = Addr >= A0, % assertion NewFtab = reserve_buddy(Ftab, FPos, Pos, Addr), NSize = Size - ?POW(Pos - 1), alloc_many2(NewFtab, Pos-1, NSize, Addr, H); alloc_many2(Ftab, Pos, Size, A0, H) -> alloc_many2(Ftab, Pos-1, Size, A0, H). %% -> {NewHead, Addr, Log2} | throw(Error) alloc(Head, Sz) when Head#head.fixed =/= false -> % when Sz > 0 ?DEBUG("alloc of size ~p (fixed)", [Sz]), Pos = sz2pos(Sz), {Frozen, Ftab} = Head#head.freelists, {FPos, Addr} = find_first_free(Frozen, Pos, Pos, Head), NewFrozen = reserve_buddy(Frozen, FPos, Pos, Addr), Ftab1 = undo_free(Ftab, FPos, Addr, Head#head.base), NewFtab = move_down(Ftab1, FPos, Pos, Addr), NewFreelists = {NewFrozen, NewFtab}, {Head#head{freelists = NewFreelists}, Addr, Pos}; alloc(Head, Sz) when Head#head.fixed =:= false -> % when Sz > 0 ?DEBUG("alloc of size ~p", [Sz]), Pos = sz2pos(Sz), Ftab = Head#head.freelists, {FPos, Addr} = find_first_free(Ftab, Pos, Pos, Head), NewFtab = reserve_buddy(Ftab, FPos, Pos, Addr), {Head#head{freelists = NewFtab}, Addr, Pos}. find_first_free(_Ftab, Pos, _Pos0, Head) when Pos > ?MAXBUD -> throw({error, {no_more_space_on_file, Head#head.filename}}); find_first_free(Ftab, Pos, Pos0, Head) -> PosTab = element(Pos, Ftab), case bplus_lookup_first(PosTab) of undefined -> find_first_free(Ftab, Pos+1, Pos0, Head); {ok, Addr} when Addr + ?POW(Pos0-1) > ?POW(?MAXBUD-1)-?MAXFREELISTS -> %% We would occupy (some of) the area reserved for the free lists. throw({error, {no_more_space_on_file, Head#head.filename}}); {ok, Addr} -> {Pos, Addr} end. %% When the table is fixed, free/4 may have joined buddies so that the %% requested block is now part of some larger block. We have to find %% that block, and insert free buddies along the way. undo_free(Ftab, Pos, Addr, Base) -> PosTab = element(Pos, Ftab), case bplus_lookup(PosTab, Addr) of undefined -> {BuddyAddr, MoveUpAddr} = my_buddy(Addr, ?POW(Pos-1), Base), NewFtab = setelement(Pos, Ftab, bplus_insert(PosTab, BuddyAddr)), undo_free(NewFtab, Pos+1, MoveUpAddr, Base); {ok, Addr} -> NewPosTab = bplus_delete(PosTab, Addr), setelement(Pos, Ftab, NewPosTab) end. reserve_buddy(Ftab, Pos, Pos0, Addr) -> PosTab = element(Pos, Ftab), NewPosTab = bplus_delete(PosTab, Addr), NewFtab = setelement(Pos, Ftab, NewPosTab), move_down(NewFtab, Pos, Pos0, Addr). move_down(Ftab, Pos, Pos, _Addr) -> ?DEBUG(" to address ~p, table ~p (~p bytes)~n", [_Addr, Pos, ?POW(Pos-1)]), Ftab; move_down(Ftab, Pos, Pos0, Addr) -> Pos_1 = Pos - 1, Size = ?POW(Pos_1), HighBuddy = (Addr + (Size bsr 1)), NewPosTab_1 = bplus_insert(element(Pos_1, Ftab), HighBuddy), NewFtab = setelement(Pos_1, Ftab, NewPosTab_1), move_down(NewFtab, Pos_1, Pos0, Addr). %% -> {Head, Log2} free(Head, Addr, Sz) -> ?DEBUG("free of size ~p at address ~p~n", [Sz, Addr]), Ftab = get_freelists(Head), Pos = sz2pos(Sz), {set_freelists(Head, free_in_pos(Ftab, Addr, Pos, Head#head.base)), Pos}. free_in_pos(Ftab, _Addr, Pos, _Base) when Pos > ?MAXBUD -> Ftab; free_in_pos(Ftab, Addr, Pos, Base) -> PosTab = element(Pos, Ftab), {BuddyAddr, MoveUpAddr} = my_buddy(Addr, ?POW(Pos-1), Base), case bplus_lookup(PosTab, BuddyAddr) of undefined -> % no buddy found ?DEBUG(" table ~p, no buddy~n", [Pos]), setelement(Pos, Ftab, bplus_insert(PosTab, Addr)); {ok, BuddyAddr} -> % buddy found PosTab1 = bplus_delete(PosTab, Addr), PosTab2 = bplus_delete(PosTab1, BuddyAddr), ?DEBUG(" table ~p, with buddy ~p~n", [Pos, BuddyAddr]), NewFtab = setelement(Pos, Ftab, PosTab2), free_in_pos(NewFtab, MoveUpAddr, Pos+1, Base) end. get_freelists(Head) when Head#head.fixed =:= false -> Head#head.freelists; get_freelists(Head) when Head#head.fixed =/= false -> {_Frozen, Current} = Head#head.freelists, Current. set_freelists(Head, Ftab) when Head#head.fixed =:= false -> Head#head{freelists = Ftab}; set_freelists(Head, Ftab) when Head#head.fixed =/= false -> {Frozen, _} = Head#head.freelists, Head#head{freelists = {Frozen,Ftab}}. %% Bug: If Sz0 is equal to 2^k for some k, then 2^(k+1) bytes are %% allocated (wasting 2^k bytes). Inlined. sz2pos(N) when N > 0 -> 1 + log2(N+1). %% Returns the i such that 2^(i-1) < N =< 2^i. log2(N) when is_integer(N), N >= 0 -> if N > ?POW(8) -> if N > ?POW(10) -> if N > ?POW(11) -> if N > ?POW(12) -> 12 + if N band (?POW(12)-1) =:= 0 -> log2(N bsr 12); true -> log2(1 + (N bsr 12)) end; true -> 12 end; true -> 11 end; N > ?POW(9) -> 10; true -> 9 end; N > ?POW(4) -> if N > ?POW(6) -> if N > ?POW(7) -> 8; true -> 7 end; N > ?POW(5) -> 6; true -> 5 end; N > ?POW(2) -> if N > ?POW(3) -> 4; true -> 3 end; N > ?POW(1) -> 2; N >= ?POW(0) -> 1; true -> 0 end. make_zeros(0) -> []; make_zeros(N) when N rem 2 =:= 0 -> P = make_zeros(N div 2), [P|P]; make_zeros(N) -> P = make_zeros(N div 2), [0,P|P]. %% Calculate the buddy of Addr my_buddy(Addr, Sz, Base) -> case (Addr - Base) band Sz of 0 -> % even, buddy is higher addr {Addr+Sz, Addr}; _ -> % odd, buddy is lower addr T = Addr-Sz, {T, T} end. all_free(Head) -> Tab = get_freelists(Head), Base = Head#head.base, case all_free(all(Tab), Base, Base, []) of [{Base,Base} | L] -> L; L -> L end. all_free([], X0, Y0, F) -> lists:reverse([{X0,Y0} | F]); all_free([{X,Y} | L], X0, Y0, F) when Y0 =:= X -> all_free(L, X0, Y, F); all_free([{X,Y} | L], X0, Y0, F) when Y0 < X -> all_free(L, X, Y, [{X0,Y0} | F]). all_allocated(Head) -> all_allocated(all(get_freelists(Head)), 0, Head#head.base, []). all_allocated([], _X0, _Y0, []) -> <<>>; all_allocated([], _X0, _Y0, A0) -> [<<From:32, To:32>> | A] = lists:reverse(A0), {From, To, list_to_binary(A)}; all_allocated([{X,Y} | L], X0, Y0, A) when Y0 =:= X -> all_allocated(L, X0, Y, A); all_allocated([{X,Y} | L], _X0, Y0, A) when Y0 < X -> all_allocated(L, X, Y, [<<Y0:32,X:32>> | A]). all_allocated_as_list(Head) -> all_allocated_as_list(all(get_freelists(Head)), 0, Head#head.base, []). all_allocated_as_list([], _X0, _Y0, []) -> []; all_allocated_as_list([], _X0, _Y0, A) -> lists:reverse(A); all_allocated_as_list([{X,Y} | L], X0, Y0, A) when Y0 =:= X -> all_allocated_as_list(L, X0, Y, A); all_allocated_as_list([{X,Y} | L], _X0, Y0, A) when Y0 < X -> all_allocated_as_list(L, X, Y, [[Y0 | X] | A]). all(Tab) -> all(Tab, tuple_size(Tab), []). all(_Tab, 0, L) -> %% This is not as bad as it looks. L contains less than 32 runs, %% so there will be only a small number of merges. lists:sort(L); all(Tab, I, L) -> LL = collect_tree(element(I, Tab), I, L), all(Tab, I-1, LL). %% Finds allocated areas between Addr (approx.) and Addr+Length. find_allocated(Ftab, Addr, Length, Base) -> MaxAddr = Addr + Length, Ints = collect_all_interval(Ftab, Addr, MaxAddr, Base), allocated(Ints, Addr, MaxAddr, Ftab, Base). allocated(Some, Addr, Max, Ftab, Base) -> case allocated1(Some, Addr, Max, []) of [] -> case find_next_allocated(Ftab, Addr, Base) of {From,_} -> find_allocated(Ftab, From, ?CHUNK_SIZE, Base); none -> <<>> end; L -> list_to_binary(lists:reverse(L)) end. allocated1([], Y0, Max, A) when Y0 < Max -> [<<Y0:32,Max:32>> | A]; allocated1([], _Y0, _Max, A) -> A; allocated1([{X,Y} | L], Y0, Max, A) when Y0 >= X -> allocated1(L, Y, Max, A); allocated1([{X,Y} | L], Y0, Max, A) -> % when Y0 < X allocated1(L, Y, Max, [<<Y0:32,X:32>> | A]). %% Finds the first allocated area starting at Addr or later. find_next_allocated(Ftab, Addr, Base) -> case find_next_free(Ftab, Addr, Base) of none -> none; {Addr1, Pos} when Addr1 =< Addr -> find_next_allocated(Ftab, Addr1 + ?POW(Pos-1), Base); {Next, _Pos} -> {Addr, Next} end. %% Finds the first free address starting att Addr or later. %% -> none | {FirstFreeAddress, FtabPosition} find_next_free(Ftab, Addr, Base) -> MaxBud = tuple_size(Ftab), find_next_free(Ftab, Addr, 1, MaxBud, -1, -1, Base). find_next_free(Ftab, Addr0, Pos, MaxBud, Next, PosN, Base) when Pos =< MaxBud -> Addr = adjust_addr(Addr0, Pos, Base), PosTab = element(Pos, Ftab), case bplus_lookup_next(PosTab, Addr-1) of undefined -> find_next_free(Ftab, Addr0, Pos+1, MaxBud, Next, PosN, Base); {ok, Next1} when PosN =:= -1; Next1 < Next -> find_next_free(Ftab, Addr0, Pos+1, MaxBud, Next1, Pos, Base); {ok, _} -> find_next_free(Ftab, Addr0, Pos+1, MaxBud, Next, PosN, Base) end; find_next_free(_Ftab, _Addr, _Pos, _MaxBud, -1, _PosN, _Base) -> none; find_next_free(_Ftab, _Addr, _Pos, _MaxBud, Next, PosN, _Base) -> {Next, PosN}. collect_all_interval(Ftab, Addr, MaxAddr, Base) -> MaxBud = tuple_size(Ftab), collect_all_interval(Ftab, Addr, MaxAddr, 1, MaxBud, Base, []). collect_all_interval(Ftab, L0, U, Pos, MaxBud, Base, Acc0) when Pos =< MaxBud -> PosTab = element(Pos, Ftab), L = adjust_addr(L0, Pos, Base), Acc = collect_interval(PosTab, Pos, L, U, Acc0), collect_all_interval(Ftab, L0, U, Pos+1, MaxBud, Base, Acc); collect_all_interval(_Ftab, _L, _U, _Pos, _MaxBud, _Base, Acc) -> lists:sort(Acc). %% It could be that Addr is inside a free area. This function adjusts %% the address so that is placed on a boundary in the Pos tree. Inlined. adjust_addr(Addr, Pos, Base) -> Pow = ?POW(Pos - 1), Rem = (Addr - Base) rem Pow, if Rem =:= 0 -> Addr; Addr < Pow -> Addr; true -> Addr - Rem end. %%%----------------------------------------------------------------- %%% The Disk Map is used for debugging only. %%% Very tightly coupled to the way dets_v9 works. %%%----------------------------------------------------------------- -define(DM, disk_map). get_disk_map() -> case get(?DM) of undefined -> no_disk_map; T -> {disk_map, ets:tab2list(T)} end. init_disk_map(Name) -> error_logger:info_msg("** dets: (debug) using disk map for ~p~n", [Name]), put(?DM, ets:new(any,[ordered_set])). stop_disk_map() -> catch ets:delete(erase(?DM)). disk_map_segment_p(Fd, P) -> case get(?DM) of undefined -> ok; _T -> disk_map_segment(P, pread_n(Fd, P, 8*256)) end. disk_map_segment(P, Segment) -> case get(?DM) of undefined -> ok; T -> Ps = segment_fragment_to_pointers(P, iolist_to_binary(Segment)), Ss = [{X,<<Sz:32,?ACTIVE:32>>} || {_P1,<<Sz:32,X:32>>} <- Ps, X > 0], % optimization dm(Ps ++ Ss, T) end. disk_map_pread(P) -> case get(?DM) of undefined -> ok; T -> case ets:lookup(T, P) of [] -> throw({pread, P, 8}); [{P,{pointer,0,0}}] -> ok; [{P,{pointer,Pointer,Sz}}] -> case ets:lookup(T, Pointer) of %% _P =/= P after re-hash... [{Pointer,{slot,_P,Sz}}] -> ok; Got -> throw({pread, P, Pointer, Got}) end; Got -> throw({pread, P, Got}) end end. -define(STATUS_POS, 4). -define(BASE, 1336). disk_map(Bins) -> case get(?DM) of undefined -> ok; T -> Bs = [{P,iolist_to_binary(Io)} || {P,Io} <- Bins], dm(Bs, T) end. dm([{P,_Header} | Bs], T) when P < ?BASE -> dm(Bs, T); dm([{P0,<<?FREE:32>>} | Bs], T) -> P = P0 - ?STATUS_POS, case ets:lookup(T, P) of [] -> throw({free, P0}); [{P,_OldSz}] -> true = ets:delete(T, P) end, dm(Bs, T); dm([{SlotP,<<Sz:32,?ACTIVE:32,_/binary>>} | Bs], T) -> Ptr = case ets:lookup(T, {pointer,SlotP}) of [{{pointer,SlotP}, Pointer}] -> case ets:lookup(T, Pointer) of [{Pointer,{pointer,SlotP,Sz2}}] -> case log2(Sz) =:= log2(Sz2) of true -> Pointer; false -> throw({active, SlotP, Sz, Pointer, Sz2}) end; Got -> throw({active, SlotP, Sz, Got}) end; [] -> throw({active, SlotP, Sz}) end, true = ets:insert(T, {SlotP,{slot,Ptr,Sz}}), dm(Bs, T); dm([{P,<<Sz:32,X:32>>} | Bs], T) -> %% Look for slot object in Bs? case prev(P, T) of {Prev, PrevSz} -> throw({prev, P, Sz, X, Prev, PrevSz}); ok -> ok end, case next(P, 8, T) of {next, Next} -> %% Can (should?) do more... throw({next, P, Sz, X, Next}); ok -> ok end, true = ets:insert(T, {P,{pointer,X,Sz}}), if Sz =:= 0 -> X = 0; true -> true = ets:insert(T, {{pointer,X}, P}) end, dm(Bs, T); dm([{P,<<X:32>>} | Bs], T) -> case ets:lookup(T, X) of [] -> throw({segment, P, X}); [{X,{pointer,0,0}}] -> ok; [{X,{pointer,P,X}}] -> ok end, dm(Bs, T); dm([{P,<<_Sz:32,B0/binary>>=B} | Bs], T) -> Overwrite = case catch binary_to_term(B0) of % accepts garbage at end of binary {'EXIT', _} -> <<_Sz1:32,B1/binary>> = B0, case catch binary_to_term(B1) of {'EXIT', _} -> false; _ -> true end; _ -> true end, if Overwrite -> %% overwrite same dm([{P-8,<<(byte_size(B) + 8):32,?ACTIVE:32,B/binary>>} | Bs], T); true -> dm(segment_fragment_to_pointers(P, B)++Bs, T) end; dm([], _T) -> ok. segment_fragment_to_pointers(_P, <<>>) -> []; segment_fragment_to_pointers(P, <<SzP:8/binary,B/binary>>) -> [{P,SzP} | segment_fragment_to_pointers(P+8, B)]. prev(P, T) -> case ets:prev(T, P) of '$end_of_table' -> ok; Prev -> case ets:lookup(T, Prev) of [{Prev,{pointer,_Ptr,_}}] when Prev + 8 > P -> {Prev, 8}; [{Prev,{slot,_,Sz}}] when Prev + Sz > P -> {Prev, Sz}; _ -> ok end end. next(P, PSz, T) -> case ets:next(T, P) of '$end_of_table' -> ok; Next when P + PSz > Next -> {next, Next}; _ -> ok end. %%%----------------------------------------------------------------- %%% These functions implement a B+ tree. %%%----------------------------------------------------------------- -define(max_size, 16). -define(min_size, 8). %%----------------------------------------------------------------- %% Finds out the type of the node: 'l' or 'n'. %%----------------------------------------------------------------- -define(NODE_TYPE(Tree), element(1, Tree)). %% Finds out if a node/leaf is full or not. -define(FULL(Tree), (bplus_get_size(Tree) >= ?max_size)). %% Finds out if a node/leaf is filled up over its limit. -define(OVER_FULL(Tree), (bplus_get_size(Tree) > ?max_size)). %% Finds out if a node/leaf has less items than allowed. -define(UNDER_FILLED(Tree), (bplus_get_size(Tree) < ?min_size)). %% Finds out if a node/leaf has as few items as minimum allowed. -define(LOW_FILLED(Tree), (bplus_get_size(Tree) =< ?min_size)). %%Returns a key in a leaf at position Pos. -define(GET_LEAF_KEY(Leaf, Pos), element(Pos+1, Leaf)). %% Special for dets. collect_tree(v, _TI, Acc) -> Acc; collect_tree(T, TI, Acc) -> Pow = ?POW(TI-1), collect_tree2(T, Pow, Acc). collect_tree2(Tree, Pow, Acc) -> S = bplus_get_size(Tree), case ?NODE_TYPE(Tree) of l -> collect_leaf(Tree, S, Pow, Acc); n -> collect_node(Tree, S, Pow, Acc) end. collect_leaf(_Leaf, 0, _Pow, Acc) -> Acc; collect_leaf(Leaf, I, Pow, Acc) -> Key = ?GET_LEAF_KEY(Leaf, I), V = {Key, Key+Pow}, collect_leaf(Leaf, I-1, Pow, [V | Acc]). collect_node(_Node, 0, _Pow, Acc) -> Acc; collect_node(Node, I, Pow, Acc) -> Acc1 = collect_tree2(bplus_get_tree(Node, I), Pow, Acc), collect_node(Node, I-1, Pow, Acc1). %% Special for dets. tree_to_bin(v, _F, _Max, Ws, WsSz) -> {Ws, WsSz}; tree_to_bin(T, F, Max, Ws, WsSz) -> {N, L1, Ws1, WsSz1} = tree_to_bin2(T, F, Max, 0, [], Ws, WsSz), {N1, L2, Ws2, WsSz2} = F(N, lists:reverse(L1), Ws1, WsSz1), {0, [], NWs, NWsSz} = F(N1, L2, Ws2, WsSz2), {NWs, NWsSz}. tree_to_bin2(Tree, F, Max, N, Acc, Ws, WsSz) when N >= Max -> {NN, NAcc, NWs, NWsSz} = F(N, lists:reverse(Acc), Ws, WsSz), tree_to_bin2(Tree, F, Max, NN, lists:reverse(NAcc), NWs, NWsSz); tree_to_bin2(Tree, F, Max, N, Acc, Ws, WsSz) -> S = bplus_get_size(Tree), case ?NODE_TYPE(Tree) of l -> {N+S, leaf_to_bin(bplus_leaf_to_list(Tree), Acc), Ws, WsSz}; n -> node_to_bin(Tree, F, Max, N, Acc, 1, S, Ws, WsSz) end. node_to_bin(_Node, _F, _Max, N, Acc, I, S, Ws, WsSz) when I > S -> {N, Acc, Ws, WsSz}; node_to_bin(Node, F, Max, N, Acc, I, S, Ws, WsSz) -> {N1,Acc1,Ws1,WsSz1} = tree_to_bin2(bplus_get_tree(Node, I), F, Max, N, Acc, Ws, WsSz), node_to_bin(Node, F, Max, N1, Acc1, I+1, S, Ws1, WsSz1). leaf_to_bin([N | L], Acc) -> leaf_to_bin(L, [<<N:32>> | Acc]); leaf_to_bin([], Acc) -> Acc. %% Special for dets. list_to_tree(L) -> leafs_to_nodes(L, length(L), fun bplus_mk_leaf/1, []). leafs_to_nodes([], 0, _F, [T]) -> T; leafs_to_nodes([], 0, _F, L) -> leafs_to_nodes(lists:reverse(L), length(L), fun mk_node/1, []); leafs_to_nodes(Ls, Sz, F, L) -> I = if Sz =< 16 -> Sz; Sz =< 32 -> Sz div 2; true -> 12 end, {L1, R} = split_list(Ls, I, []), N = F(L1), Sz1 = Sz - I, leafs_to_nodes(R, Sz1, F, [N | L]). mk_node([E | Es]) -> NL = [E | lists:foldr(fun(X, A) -> [get_first_key(X), X | A] end, [], Es)], bplus_mk_node(NL). split_list(L, 0, SL) -> {SL, L}; split_list([E | Es], I, SL) -> split_list(Es, I-1, [E | SL]). get_first_key(T) -> case ?NODE_TYPE(T) of l -> ?GET_LEAF_KEY(T, 1); n -> get_first_key(bplus_get_tree(T, 1)) end. %% Special for dets. collect_interval(v, _TI, _L, _U, Acc) -> Acc; collect_interval(T, TI, L, U, Acc) -> Pow = ?POW(TI-1), collect_interval2(T, Pow, L, U, Acc). collect_interval2(Tree, Pow, L, U, Acc) -> S = bplus_get_size(Tree), case ?NODE_TYPE(Tree) of l -> collect_leaf_interval(Tree, S, Pow, L, U, Acc); n -> {Max, _} = bplus_select_sub_tree(Tree, U), {Min, _} = bplus_select_sub_tree_2(Tree, L, Max), collect_node_interval(Tree, Min, Max, Pow, L, U, Acc) end. collect_leaf_interval(_Leaf, 0, _Pow, _L, _U, Acc) -> Acc; collect_leaf_interval(Leaf, I, Pow, L, U, Acc) -> Key = ?GET_LEAF_KEY(Leaf, I), if Key < L -> Acc; Key > U -> collect_leaf_interval(Leaf, I-1, Pow, L, U, Acc); true -> collect_leaf_interval(Leaf, I-1, Pow, L, U, [{Key,Key+Pow} | Acc]) end. collect_node_interval(_Node, I, UP, _Pow, _L, _U, Acc) when I > UP -> Acc; collect_node_interval(Node, I, UP, Pow, L, U, Acc) -> Acc1 = collect_interval2(bplus_get_tree(Node, I), Pow, L, U, Acc), collect_node_interval(Node, I+1, UP, Pow, L, U, Acc1). %%----------------------------------------------------------------- %% Func: empty_tree/0 %% Purpose: Creates a new empty tree. %% Returns: tree() %%----------------------------------------------------------------- bplus_empty_tree() -> v. %%----------------------------------------------------------------- %% Func: lookup/2 %% Purpose: Looks for Key in the Tree. %% Returns: {ok, {Key, Val}} | 'undefined'. %%----------------------------------------------------------------- bplus_lookup(v, _Key) -> undefined; bplus_lookup(Tree, Key) -> case ?NODE_TYPE(Tree) of l -> bplus_lookup_leaf(Key, Tree); n -> {_, SubTree} = bplus_select_sub_tree(Tree, Key), bplus_lookup(SubTree, Key) end. %%----------------------------------------------------------------- %% Searches through a leaf until the Key is ok or %% when it is determined that it does not exist. %%----------------------------------------------------------------- bplus_lookup_leaf(Key, Leaf) -> bplus_lookup_leaf_2(Key, Leaf, bplus_get_size(Leaf)). bplus_lookup_leaf_2(_, _, 0) -> undefined; bplus_lookup_leaf_2(Key, Leaf, N) -> case ?GET_LEAF_KEY(Leaf, N) of Key -> {ok, Key}; _ -> bplus_lookup_leaf_2(Key, Leaf, N-1) end. %%----------------------------------------------------------------- %% Func: lookup_first/1 %% Purpose: Finds the smallest key in the entire Tree. %% Returns: {ok, {Key, Val}} | 'undefined'. %%----------------------------------------------------------------- bplus_lookup_first(v) -> undefined; bplus_lookup_first(Tree) -> case ?NODE_TYPE(Tree) of l -> % Then it is the leftmost key here. {ok, ?GET_LEAF_KEY(Tree, 1)}; n -> % Look in the leftmost subtree. bplus_lookup_first(bplus_get_tree(Tree, 1)) end. %%----------------------------------------------------------------- %% Func: lookup_next/2 %% Purpose: Finds the next key nearest after Key. %% Returns: {ok, {Key, Val}} | 'undefined'. NIX!!! %%----------------------------------------------------------------- bplus_lookup_next(v, _) -> undefined; bplus_lookup_next(Tree, Key) -> case ?NODE_TYPE(Tree) of l -> lookup_next_leaf(Key, Tree); n -> {Pos, SubTree} = bplus_select_sub_tree(Tree, Key), case bplus_lookup_next(SubTree, Key) of undefined -> S = bplus_get_size(Tree), if % There is a right brother. S > Pos -> bplus_lookup_first(bplus_get_tree(Tree, Pos+1)); % No there is no right brother. true -> undefined end; % We ok a next item. Result -> Result end end. %%----------------------------------------------------------------- %% Returns {ok, NextKey} if there is a key in the leaf which is greater. %% If there is no such key we return 'undefined' instead. %% Key does not have to be a key in the structure, just a search value. %%----------------------------------------------------------------- lookup_next_leaf(Key, Leaf) -> lookup_next_leaf_2(Key, Leaf, bplus_get_size(Leaf), 1). lookup_next_leaf_2(Key, Leaf, Size, Size) -> % This is the rightmost key. K = ?GET_LEAF_KEY(Leaf, Size), if K > Key -> {ok, ?GET_LEAF_KEY(Leaf, Size)}; true -> undefined end; lookup_next_leaf_2(Key, Leaf, Size, N) -> K = ?GET_LEAF_KEY(Leaf, N), if K < Key -> % K is still smaller, try next in the leaf. lookup_next_leaf_2(Key, Leaf, Size, N+1); Key == K -> % Since this is exact Key it must be the next. {ok, ?GET_LEAF_KEY(Leaf, N+1)}; true -> % Key was not an exact specification. % It must be K that is next greater. {ok, ?GET_LEAF_KEY(Leaf, N)} end. %%----------------------------------------------------------------- %% Func: insert/3 %% Purpose: Inserts a new {Key, Value} into the tree. %% Returns: tree() %%----------------------------------------------------------------- bplus_insert(v, Key) -> bplus_mk_leaf([Key]); bplus_insert(Tree, Key) -> NewTree = bplus_insert_in(Tree, Key), case ?OVER_FULL(NewTree) of false -> NewTree; % If the node is over-full the tree will grow. true -> {LTree, DKey, RTree} = case ?NODE_TYPE(NewTree) of l -> bplus_split_leaf(NewTree); n -> bplus_split_node(NewTree) end, bplus_mk_node([LTree, DKey, RTree]) end. %%----------------------------------------------------------------- %% Func: delete/2 %% Purpose: Deletes a key from the tree (if present). %% Returns: tree() %%----------------------------------------------------------------- bplus_delete(v, _Key) -> v; bplus_delete(Tree, Key) -> NewTree = bplus_delete_in(Tree, Key), S = bplus_get_size(NewTree), case ?NODE_TYPE(NewTree) of l -> if S =:= 0 -> v; true -> NewTree end; n -> if S =:= 1 -> bplus_get_tree(NewTree, 1); true -> NewTree end end. %%% ----------------------- %%% Help function to insert. %%% ----------------------- bplus_insert_in(Tree, Key) -> case ?NODE_TYPE(Tree) of l -> bplus_insert_in_leaf(Tree, Key); n -> {Pos, SubTree} = bplus_select_sub_tree(Tree, Key), % Pos = "the position of the subtree". NewSubTree = bplus_insert_in(SubTree, Key), case ?OVER_FULL(NewSubTree) of false -> bplus_put_subtree(Tree, [NewSubTree, Pos]); true -> case bplus_reorganize_tree_ins(Tree, NewSubTree, Pos) of {left, {LeftT, DKey, MiddleT}} -> bplus_put_subtree(bplus_put_lkey(Tree, DKey, Pos), [LeftT, Pos-1, MiddleT, Pos]); {right, {MiddleT, DKey, RightT}} -> bplus_put_subtree(bplus_put_rkey(Tree, DKey, Pos), [MiddleT, Pos, RightT, Pos+1]); {split, {LeftT, DKey, RightT}} -> bplus_extend_tree(Tree, {LeftT, DKey, RightT}, Pos) end end end. %%----------------------------------------------------------------- %% Inserts a key in correct position in a leaf. %%----------------------------------------------------------------- bplus_insert_in_leaf(Leaf, Key) -> bplus_insert_in_leaf_2(Leaf, Key, bplus_get_size(Leaf), []). bplus_insert_in_leaf_2(Leaf, Key, 0, Accum) -> bplus_insert_in_leaf_3(Leaf, 0, [Key|Accum]); bplus_insert_in_leaf_2(Leaf, Key, N, Accum) -> K = ?GET_LEAF_KEY(Leaf, N), if Key < K -> % Not here! bplus_insert_in_leaf_2(Leaf, Key, N-1, [K|Accum]); K < Key -> % Insert here. bplus_insert_in_leaf_3(Leaf, N-1, [K, Key|Accum]); K == Key -> % Replace (?). bplus_insert_in_leaf_3(Leaf, N-1, [ Key|Accum]) end. bplus_insert_in_leaf_3(_Leaf, 0, LeafList) -> bplus_mk_leaf(LeafList); bplus_insert_in_leaf_3(Leaf, N, LeafList) -> bplus_insert_in_leaf_3(Leaf, N-1, [?GET_LEAF_KEY(Leaf, N)|LeafList]). %%% ------------------------- %%% Help functions for delete. %%% ------------------------- bplus_delete_in(Tree, Key) -> case ?NODE_TYPE(Tree) of l -> bplus_delete_in_leaf(Tree, Key); n -> {Pos, SubTree} = bplus_select_sub_tree(Tree, Key), % Pos = "the position of the subtree". NewSubTree = bplus_delete_in(SubTree, Key), % Check if it has become to small now case ?UNDER_FILLED(NewSubTree) of false -> bplus_put_subtree(Tree, [NewSubTree, Pos]); true -> case bplus_reorganize_tree_del(Tree, NewSubTree, Pos) of {left, {LeftT, DKey, MiddleT}} -> bplus_put_subtree(bplus_put_lkey(Tree, DKey, Pos), [LeftT, Pos-1, MiddleT, Pos]); {right, {MiddleT, DKey, RightT}} -> bplus_put_subtree(bplus_put_rkey(Tree, DKey, Pos), [MiddleT, Pos, RightT, Pos+1]); {join_left, JoinedTree} -> bplus_joinleft_tree(Tree, JoinedTree, Pos); {join_right, JoinedTree} -> bplus_joinright_tree(Tree, JoinedTree, Pos) end end end. %%----------------------------------------------------------------- %% Deletes a key from the leaf returning a new (smaller) leaf. %%----------------------------------------------------------------- bplus_delete_in_leaf(Leaf, Key) -> bplus_delete_in_leaf_2(Leaf, Key, bplus_get_size(Leaf), []). bplus_delete_in_leaf_2(Leaf, _, 0, _) -> Leaf; bplus_delete_in_leaf_2(Leaf, Key, N, Accum) -> K = ?GET_LEAF_KEY(Leaf, N), if Key == K -> % Remove this one! bplus_delete_in_leaf_3(Leaf, N-1, Accum); true -> bplus_delete_in_leaf_2(Leaf, Key, N-1, [K|Accum]) end. bplus_delete_in_leaf_3(_Leaf, 0, LeafList) -> bplus_mk_leaf(LeafList); bplus_delete_in_leaf_3(Leaf, N, LeafList) -> bplus_delete_in_leaf_3(Leaf, N-1, [?GET_LEAF_KEY(Leaf, N)|LeafList]). %%----------------------------------------------------------------- %% Selects and returns which subtree the search should continue in. %%----------------------------------------------------------------- bplus_select_sub_tree(Tree, Key) -> bplus_select_sub_tree_2(Tree, Key, bplus_get_size(Tree)). bplus_select_sub_tree_2(Tree, _Key, 1) -> {1, bplus_get_tree(Tree, 1)}; bplus_select_sub_tree_2(Tree, Key, N) -> K = bplus_get_lkey(Tree, N), if K > Key -> bplus_select_sub_tree_2(Tree, Key, N-1); K =< Key -> % Here it is! {N, bplus_get_tree(Tree, N)} end. %%----------------------------------------------------------------- %% Selects which brother that should take over some of our items. %% Or if they are both full makes a split. %%----------------------------------------------------------------- bplus_reorganize_tree_ins(Tree, NewSubTree, 1) -> RTree = bplus_get_tree(Tree, 2), % 2 = Pos+1 = 1+1. case ?FULL(RTree) of false -> bplus_reorganize_tree_r(Tree, NewSubTree, 1, RTree); true -> % It is full, we must split this one! bplus_reorganize_tree_s(NewSubTree) end; bplus_reorganize_tree_ins(Tree, NewSubTree, Pos) -> Size = bplus_get_size(Tree), if Pos == Size -> % Pos is the rightmost postion!. % Our only chance is the left one. LTree = bplus_get_tree(Tree, Pos-1), case ?FULL(LTree) of false -> bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); true -> % It is full, we must split this one! bplus_reorganize_tree_s(NewSubTree) end; true -> % Pos is somewhere inside the node. LTree = bplus_get_tree(Tree, Pos-1), RTree = bplus_get_tree(Tree, Pos+1), SL = bplus_get_size(LTree), SR = bplus_get_size(RTree), if SL > SR -> bplus_reorganize_tree_r(Tree, NewSubTree, Pos, RTree); SL < SR -> bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); true -> case ?FULL(LTree) of false -> bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); true -> bplus_reorganize_tree_s(NewSubTree) end end end. %%----------------------------------------------------------------- %% This function fills over items from brothers to maintain the minimum %% number of items per node/leaf. %%----------------------------------------------------------------- bplus_reorganize_tree_del(Tree, NewSubTree, 1) -> % The case when Pos is at leftmost position. RTree = bplus_get_tree(Tree, 2), % 2 = Pos+1 = 1+1. case ?LOW_FILLED(RTree) of false -> bplus_reorganize_tree_r(Tree, NewSubTree, 1, RTree); true -> % It is to small, we must join them! bplus_reorganize_tree_jr(Tree, NewSubTree, 1, RTree) end; bplus_reorganize_tree_del(Tree, NewSubTree, Pos) -> Size = bplus_get_size(Tree), if Pos == Size -> % Pos is the rightmost postion!. % Our only chance is the left one. LTree = bplus_get_tree(Tree, Pos-1), case ?LOW_FILLED(LTree) of false -> bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); true -> % It is to small, we must join this one! bplus_reorganize_tree_jl(Tree, NewSubTree, Pos, LTree) end; true -> % Pos is somewhere inside the node. LTree = bplus_get_tree(Tree, Pos-1), RTree = bplus_get_tree(Tree, Pos+1), SL = bplus_get_size(LTree), SR = bplus_get_size(RTree), if SL>SR -> bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); SL < SR -> bplus_reorganize_tree_r(Tree, NewSubTree, Pos, RTree); true -> case ?LOW_FILLED(LTree) of false -> bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree); true -> bplus_reorganize_tree_jl(Tree, NewSubTree, Pos, LTree) end end end. bplus_reorganize_tree_l(Tree, NewSubTree, Pos, LTree) -> case ?NODE_TYPE(NewSubTree) of l -> {left, bplus_split_leaf( bplus_mk_leaf( lists:append(bplus_leaf_to_list(LTree), bplus_leaf_to_list(NewSubTree))))}; n -> {left, bplus_split_node( bplus_mk_node( lists:append([bplus_node_to_list(LTree), [bplus_get_lkey(Tree, Pos)], bplus_node_to_list(NewSubTree)])))} end. bplus_reorganize_tree_r(Tree, NewSubTree, Pos, RTree) -> case ?NODE_TYPE(NewSubTree) of l -> {right, bplus_split_leaf( bplus_mk_leaf( lists:append([bplus_leaf_to_list(NewSubTree), bplus_leaf_to_list(RTree)])))}; n -> {right, bplus_split_node( bplus_mk_node( lists:append([bplus_node_to_list(NewSubTree), [bplus_get_rkey(Tree, Pos)], bplus_node_to_list(RTree)])))} end. bplus_reorganize_tree_s(NewSubTree) -> case ?NODE_TYPE(NewSubTree) of l -> {split, bplus_split_leaf(NewSubTree)}; n -> {split, bplus_split_node(NewSubTree)} end. bplus_reorganize_tree_jl(Tree, NewSubTree, Pos, LTree) -> case ?NODE_TYPE(NewSubTree) of l -> {join_left, bplus_mk_leaf(lists:append([bplus_leaf_to_list(LTree), bplus_leaf_to_list(NewSubTree)]))}; n -> {join_left, bplus_mk_node(lists:append([bplus_node_to_list(LTree), [bplus_get_lkey(Tree, Pos)], bplus_node_to_list(NewSubTree)]))} end. bplus_reorganize_tree_jr(Tree, NewSubTree, Pos, RTree) -> case ?NODE_TYPE(NewSubTree) of l -> {join_right, bplus_mk_leaf(lists:append([bplus_leaf_to_list(NewSubTree), bplus_leaf_to_list(RTree)]))}; n -> {join_right, bplus_mk_node(lists:append([bplus_node_to_list(NewSubTree), [bplus_get_rkey(Tree, Pos)], bplus_node_to_list(RTree)]))} end. %%----------------------------------------------------------------- %% Takes a leaf and divides it into two equal big leaves. %% The result is returned in a tuple. The dividing key is also returned. %%----------------------------------------------------------------- bplus_split_leaf(Leaf) -> S = bplus_get_size(Leaf), bplus_split_leaf_2(Leaf, S, S div 2, []). bplus_split_leaf_2(Leaf, Pos, 1, Accum) -> K = ?GET_LEAF_KEY(Leaf, Pos), bplus_split_leaf_3(Leaf, Pos-1, [], K, [K|Accum]); bplus_split_leaf_2(Leaf, Pos, N, Accum) -> bplus_split_leaf_2(Leaf, Pos-1, N-1, [?GET_LEAF_KEY(Leaf, Pos)|Accum]). bplus_split_leaf_3(_, 0, LeftAcc, DKey, RightAcc) -> {bplus_mk_leaf(LeftAcc), DKey, bplus_mk_leaf(RightAcc)}; bplus_split_leaf_3(Leaf, Pos, LeftAcc, DKey, RightAcc) -> bplus_split_leaf_3(Leaf, Pos-1, [?GET_LEAF_KEY(Leaf, Pos)|LeftAcc], DKey, RightAcc). %%----------------------------------------------------------------- %% Takes a node and divides it into two equal big nodes. %% The result is returned in a tuple. The dividing key is also returned. %%----------------------------------------------------------------- bplus_split_node(Node) -> S = bplus_get_size(Node), bplus_split_node_2(Node, S, S div 2, []). bplus_split_node_2(Node, Pos, 1, Accum) -> bplus_split_node_3(Node, Pos-1, [], bplus_get_lkey(Node, Pos), [bplus_get_tree(Node, Pos)|Accum]); bplus_split_node_2(Node, Pos, N, Accum) -> bplus_split_node_2(Node, Pos-1, N-1, [bplus_get_lkey(Node, Pos), bplus_get_tree(Node, Pos)|Accum]). bplus_split_node_3(Node, 1, LeftAcc, DKey, RightAcc) -> {bplus_mk_node([bplus_get_tree(Node, 1)|LeftAcc]), DKey, bplus_mk_node(RightAcc)}; bplus_split_node_3(Node, Pos, LeftAcc, DKey, RightAcc) -> bplus_split_node_3(Node, Pos-1, [bplus_get_lkey(Node, Pos), bplus_get_tree(Node, Pos)|LeftAcc], DKey, RightAcc). %%----------------------------------------------------------------- %% Inserts a joined tree insted of the old one at position Pos and %% the one nearest left/right brother. %%----------------------------------------------------------------- bplus_joinleft_tree(Tree, JoinedTree, Pos) -> bplus_join_tree_2(Tree, JoinedTree, Pos, bplus_get_size(Tree), []). bplus_joinright_tree(Tree, JoinedTree, Pos) -> bplus_join_tree_2(Tree, JoinedTree, Pos+1, bplus_get_size(Tree), []). bplus_join_tree_2(Tree, JoinedTree, Pos, Pos, Accum) -> bplus_join_tree_3(Tree, Pos-2, [JoinedTree|Accum]); bplus_join_tree_2(Tree, JoinedTree, Pos, N, Accum) -> bplus_join_tree_2(Tree, JoinedTree, Pos, N-1, [bplus_get_lkey(Tree, N), bplus_get_tree(Tree, N)|Accum]). bplus_join_tree_3(_Tree, 0, Accum) -> bplus_mk_node(Accum); bplus_join_tree_3(Tree, Pos, Accum) -> bplus_join_tree_3(Tree, Pos-1, [bplus_get_tree(Tree, Pos), bplus_get_rkey(Tree, Pos)|Accum]). %%% --------------------------------- %%% Primitive datastructure functions. %%% --------------------------------- %%----------------------------------------------------------------- %% Constructs a node out of list format. %%----------------------------------------------------------------- bplus_mk_node(NodeList) -> list_to_tuple([ n |NodeList]). %%----------------------------------------------------------------- %% Converts the node into list format. %%----------------------------------------------------------------- bplus_node_to_list(Node) -> [_|NodeList] = tuple_to_list(Node), NodeList. %%----------------------------------------------------------------- %% Constructs a leaf out of list format. %%----------------------------------------------------------------- bplus_mk_leaf(KeyList) -> list_to_tuple([l|KeyList]). %%----------------------------------------------------------------- %% Converts a leaf into list format. %%----------------------------------------------------------------- bplus_leaf_to_list(Leaf) -> [_|LeafList] = tuple_to_list(Leaf), LeafList. %%----------------------------------------------------------------- %% Changes subtree "pointers" in a node. %%----------------------------------------------------------------- bplus_put_subtree(Tree, []) -> Tree; bplus_put_subtree(Tree, [NewSubTree, Pos|Rest]) -> bplus_put_subtree(setelement(Pos*2, Tree, NewSubTree), Rest). %%----------------------------------------------------------------- %% Replaces the tree at position Pos with two new trees. %%----------------------------------------------------------------- bplus_extend_tree(Tree, Inserts, Pos) -> bplus_extend_tree_2(Tree, Inserts, Pos, bplus_get_size(Tree), []). bplus_extend_tree_2(Tree, {T1, DKey, T2}, Pos, Pos, Accum) -> bplus_extend_tree_3(Tree, Pos-1, [T1, DKey, T2|Accum]); bplus_extend_tree_2(Tree, Inserts, Pos, N, Accum) -> bplus_extend_tree_2(Tree, Inserts, Pos, N-1, [bplus_get_lkey(Tree, N), bplus_get_tree(Tree, N)|Accum]). bplus_extend_tree_3(_, 0, Accum) -> bplus_mk_node(Accum); bplus_extend_tree_3(Tree, N, Accum) -> bplus_extend_tree_3(Tree, N-1, [bplus_get_tree(Tree, N), bplus_get_rkey(Tree, N)|Accum]). %%----------------------------------------------------------------- %% Changes the dividing key between two trees. %%----------------------------------------------------------------- bplus_put_lkey(Tree, DKey, Pos) -> setelement(Pos*2-1, Tree, DKey). bplus_put_rkey(Tree, DKey, Pos) -> setelement(Pos*2+1, Tree, DKey). %%----------------------------------------------------------------- %% Calculates the number of items in a node/leaf. %%----------------------------------------------------------------- bplus_get_size(Tree) -> case ?NODE_TYPE(Tree) of l -> tuple_size(Tree)-1; n -> tuple_size(Tree) div 2 end. %%----------------------------------------------------------------- %% Returns a tree at position Pos from an internal node. %%----------------------------------------------------------------- bplus_get_tree(Tree, Pos) -> element(Pos*2, Tree). %%----------------------------------------------------------------- %% Returns dividing keys, left of or right of a tree. %%----------------------------------------------------------------- bplus_get_lkey(Tree, Pos) -> element(Pos*2-1, Tree). bplus_get_rkey(Tree, Pos) -> element(Pos*2+1, Tree).