aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src/dets_v8.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stdlib/src/dets_v8.erl')
-rw-r--r--lib/stdlib/src/dets_v8.erl1594
1 files changed, 0 insertions, 1594 deletions
diff --git a/lib/stdlib/src/dets_v8.erl b/lib/stdlib/src/dets_v8.erl
deleted file mode 100644
index 1bf53d91b1..0000000000
--- a/lib/stdlib/src/dets_v8.erl
+++ /dev/null
@@ -1,1594 +0,0 @@
-%%
-%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 2001-2016. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%% http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-%%
-%% %CopyrightEnd%
-%%
--module(dets_v8).
-
-%% Dets files, implementation part. This module handles versions up to
-%% and including 8(c). To be called from dets.erl only.
-
--export([mark_dirty/1, read_file_header/2,
- check_file_header/2, do_perform_save/1, initiate_file/11,
- init_freelist/2, fsck_input/4,
- bulk_input/3, output_objs/4, write_cache/1, may_grow/3,
- find_object/2, re_hash/2, slot_objs/2, scan_objs/8,
- db_hash/2, no_slots/1, table_parameters/1]).
-
--export([file_info/1, v_segments/1]).
-
--export([cache_segps/3]).
-
-%% For backward compatibility.
--export([sz2pos/1]).
-
--dialyzer(no_improper_lists).
-
--compile({inline, [{sz2pos,1},{scan_skip,7}]}).
--compile({inline, [{skip_bytes,5}, {get_segp,1}]}).
--compile({inline, [{wl_lookup,5}]}).
--compile({inline, [{actual_seg_size,0}]}).
-
--include("dets.hrl").
-
-%% The layout of the file is :
-%%
-%% bytes decsription
-%% ---------------------- File header
-%% 4 FreelistsPointer
-%% 4 Cookie
-%% 4 ClosedProperly (pos=8)
-%% 4 Type (pos=12)
-%% 4 Version (pos=16)
-%% 4 M
-%% 4 Next
-%% 4 KeyPos
-%% 4 NoObjects
-%% 4 N
-%% ------------------ end of file header
-%% 4*8192 SegmentArray
-%% ------------------
-%% 4*256 First segment
-%% ----------------------------- This is BASE.
-%% ??? Objects (free and alive)
-%% 4*256 Second segment (2 kB now, due to a bug)
-%% ??? Objects (free and alive)
-%% ... more objects and segments ...
-%% -----------------------------
-%% ??? Free lists
-%% -----------------------------
-%% 4 File size, in bytes.
-
-%% The first slot (0) in the segment array always points to the
-%% pre-allocated first segment.
-%% Before we can find an object we must find the slot where the
-%% object resides. Each slot is a (possibly empty) list (or chain) of
-%% objects that hash to the same slot. If the value stored in the
-%% slot is zero, the slot chain is empty. If the slot value is
-%% non-zero, the value points to a position in the file where the
-%% chain starts. Each object in a chain has the following layout:
-%%
-%% bytes decsription
-%% --------------------
-%% 4 Pointer to the next object of the chain.
-%% 4 Size of the object in bytes (Sz).
-%% 4 Status (FREE or ACTIVE)
-%% Sz Binary representing the object
-%%
-%% The status field is used while repairing a file (but not next or size).
-%%
-%%|---------------|
-%%| head |
-%%| |
-%%| |
-%%|_______________|
-%%| |------|
-%%|___seg ptr1____| |
-%%| | |
-%%|__ seg ptr 2___| |
-%%| | | segment 1
-%%| .... | V _____________
-%% | |
-%% | |
-%% |___slot 0 ____|
-%% | |
-%% |___slot 1 ____|-----|
-%% | | |
-%% | ..... | | 1:st obj in slot 1
-%% V segment 1
-%% |-----------|
-%% | next |
-%% |___________|
-%% | size |
-%% |___________|
-%% | status |
-%% |___________|
-%% | |
-%% | |
-%% | obj |
-%% | |
-
-%%%
-%%% File header
-%%%
-
--define(HEADSZ, 40). % The size of the file header, in bytes.
--define(SEGSZ, 256). % Size of a segment, in words.
--define(SEGSZ_LOG2, 8).
--define(SEGARRSZ, 8192). % Maximal number of segments.
--define(SEGADDR(SegN), (?HEADSZ + (4 * (SegN)))).
--define(BASE, ?SEGADDR((?SEGSZ + ?SEGARRSZ))).
--define(MAXOBJS, (?SEGSZ * ?SEGARRSZ)). % 2 M objects
-
--define(SLOT2SEG(S), ((S) bsr ?SEGSZ_LOG2)).
-
-%% BIG is used for hashing. BIG must be greater than the maximum
-%% number of slots, currently MAXOBJS.
--define(BIG, 16#ffffff).
-
-%% Hard coded positions into the file header:
--define(FREELIST_POS, 0).
--define(CLOSED_PROPERLY_POS, 8).
--define(D_POS, 20).
--define(NO_OBJECTS_POS, (?D_POS + 12)).
-
-%% The version of a dets file is indicated by the ClosedProperly
-%% field. Version 6 was used in the R1A release, and version 7 in the
-%% R1B release up to and including the R3B01 release. Both version 6
-%% and version 7 indicate properly closed files by the value
-%% CLOSED_PROPERLY.
-%%
-%% The current version, 8, has three sub-versions:
-%%
-%% - 8(a), indicated by the value CLOSED_PROPERLY (same as in versions 6
-%% and 7), introduced in R3B02;
-%% - 8(b), indicated by the value CLOSED_PROPERLY2(_NEED_COMPACTING),
-%% introduced in R5A and used up to and including R6A;
-%% - 8(c), indicated by the value CLOSED_PROPERLY_NEW_HASH(_NEED_COMPACTING),
-%% in use since R6B.
-%%
-%% The difference between the 8(a) and the 8(b) versions is the format
-%% used for free lists saved on dets files.
-%% The 8(c) version uses a different hashing algorithm, erlang:phash
-%% (former versions use erlang:hash).
-%% Version 8(b) files are only converted to version 8(c) if repair is
-%% done, so we need compatibility with 8(b) for a _long_ time.
-%%
-%% There are known bugs due to the fact that keys and objects are
-%% sometimes compared (==) and sometimes matched (=:=). The version
-%% used by default (9, see dets_v9.erl) does not have this problem.
-
--define(NOT_PROPERLY_CLOSED,0).
--define(CLOSED_PROPERLY,1).
--define(CLOSED_PROPERLY2,2).
--define(CLOSED_PROPERLY2_NEED_COMPACTING,3).
--define(CLOSED_PROPERLY_NEW_HASH,4).
--define(CLOSED_PROPERLY_NEW_HASH_NEED_COMPACTING,5).
-
--define(FILE_FORMAT_VERSION, 8).
--define(CAN_BUMP_BY_REPAIR, [6, 7]).
--define(CAN_CONVERT_FREELIST, [8]).
-
-%%%
-%%% Object header (next, size, status).
-%%%
-
--define(OHDSZ, 12). % The size of the object header, in bytes.
--define(STATUS_POS, 8). % Position of the status field.
-
-%% The size of each object is a multiple of 16.
-%% BUMP is used when repairing files.
--define(BUMP, 16).
-
--define(ReadAhead, 512).
-
-%%-define(DEBUGF(X,Y), io:format(X, Y)).
--define(DEBUGF(X,Y), void).
-
-%% -> ok | throw({NewHead,Error})
-mark_dirty(Head) ->
- Dirty = [{?CLOSED_PROPERLY_POS, <<?NOT_PROPERLY_CLOSED:32>>}],
- {_NewHead, ok} = dets_utils:pwrite(Head, Dirty),
- ok = dets_utils:sync(Head),
- {ok, _Pos} = dets_utils:position(Head, Head#head.freelists_p),
- ok = dets_utils:truncate(Head, cur).
-
-%% -> {ok, head()} | throw(Error)
-initiate_file(Fd, Tab, Fname, Type, Kp, MinSlots, MaxSlots,
- Ram, CacheSz, Auto, _DoInitSegments) ->
- Freelist = 0,
- Cookie = ?MAGIC,
- ClosedProperly = ?NOT_PROPERLY_CLOSED, % immediately overwritten
- Version = ?FILE_FORMAT_VERSION,
- Factor = est_no_segments(MinSlots),
- N = 0,
- M = Next = ?SEGSZ * Factor,
- NoObjects = 0,
- dets_utils:pwrite(Fd, Fname, 0,
- <<Freelist:32,
- Cookie:32,
- ClosedProperly:32,
- (dets_utils:type_to_code(Type)):32,
- Version:32,
- M:32,
- Next:32,
- Kp:32,
- NoObjects:32,
- N:32,
- 0:(?SEGARRSZ*4)/unit:8, % Initialize SegmentArray
- 0:(?SEGSZ*4)/unit:8>>), % Initialize first segment
- %% We must set the first slot of the segment pointer array to
- %% point to the first segment
- Pos = ?SEGADDR(0),
- SegP = (?HEADSZ + (4 * ?SEGARRSZ)),
- dets_utils:pwrite(Fd, Fname, Pos, <<SegP:32>>),
- segp_cache(Pos, SegP),
-
- Ftab = dets_utils:init_alloc(?BASE),
- H0 = #head{freelists=Ftab, fptr = Fd, base = ?BASE},
- {H1, Ws} = init_more_segments(H0, 1, Factor, undefined, []),
-
- %% This is not optimal but simple: always initiate the segments.
- dets_utils:pwrite(Fd, Fname, Ws),
-
- %% Return a new nice head structure
- Head = #head{
- m = M,
- m2 = M * 2,
- next = Next,
- fptr = Fd,
- no_objects = NoObjects,
- n = N,
- type = Type,
- update_mode = dirty,
- freelists = H1#head.freelists,
- auto_save = Auto,
- hash_bif = phash,
- keypos = Kp,
- min_no_slots = Factor * ?SEGSZ,
- max_no_slots = no_segs(MaxSlots) * ?SEGSZ,
-
- ram_file = Ram,
- filename = Fname,
- name = Tab,
- cache = dets_utils:new_cache(CacheSz),
- version = Version,
- bump = ?BUMP,
- base = ?BASE,
- mod = ?MODULE
- },
- {ok, Head}.
-
-est_no_segments(MinSlots) when 1 + ?SLOT2SEG(MinSlots) > ?SEGARRSZ ->
- ?SEGARRSZ;
-est_no_segments(MinSlots) ->
- 1 + ?SLOT2SEG(MinSlots).
-
-init_more_segments(Head, SegNo, Factor, undefined, Ws) when SegNo < Factor ->
- init_more_segments(Head, SegNo, Factor, seg_zero(), Ws);
-init_more_segments(Head, SegNo, Factor, SegZero, Ws) when SegNo < Factor ->
- {NewHead, W} = allocate_segment(Head, SegZero, SegNo),
- init_more_segments(NewHead, SegNo+1, Factor, SegZero, W++Ws);
-init_more_segments(Head, _SegNo, _Factor, _SegZero, Ws) ->
- {Head, Ws}.
-
-allocate_segment(Head, SegZero, SegNo) ->
- %% may throw error:
- {NewHead, Segment, _} = dets_utils:alloc(Head, 4 * ?SEGSZ),
- InitSegment = {Segment, SegZero},
- Pos = ?SEGADDR(SegNo),
- segp_cache(Pos, Segment),
- SegPointer = {Pos, <<Segment:32>>},
- {NewHead, [InitSegment, SegPointer]}.
-
-%% Read free lists (using a Buddy System) from file.
-init_freelist(Head, {convert_freelist,_Version}) ->
- %% This function converts the saved freelist of the form
- %% [{Slot1,Addr1},{Addr1,Addr2},...,{AddrN,0},{Slot2,Addr},...]
- %% i.e each slot is a linked list which ends with a 0.
- %% This is stored in a bplus_tree per Slot.
- %% Each Slot is a position in a tuple.
-
- Ftab = dets_utils:empty_free_lists(),
- Pos = Head#head.freelists_p,
- case catch prterm(Head, Pos, ?OHDSZ) of
- {0, _Sz, Term} ->
- FreeList1 = lists:reverse(Term),
- FreeList = dets_utils:init_slots_from_old_file(FreeList1, Ftab),
- Head#head{freelists = FreeList, base = ?BASE};
- _ ->
- throw({error, {bad_freelists, Head#head.filename}})
- end;
-init_freelist(Head, _) ->
- %% bplus_tree stored as is
- Pos = Head#head.freelists_p,
- case catch prterm(Head, Pos, ?OHDSZ) of
- {0, _Sz, Term} ->
- Head#head{freelists = Term, base = ?BASE};
- _ ->
- throw({error, {bad_freelists, Head#head.filename}})
- end.
-
-%% -> {ok, Fd, fileheader()} | throw(Error)
-read_file_header(Fd, FileName) ->
- {ok, Bin} = dets_utils:pread_close(Fd, FileName, 0, ?HEADSZ),
- [Freelist, Cookie, CP, Type2, Version, M, Next, Kp, NoObjects, N] =
- bin2ints(Bin),
- {ok, EOF} = dets_utils:position_close(Fd, FileName, eof),
- {ok, <<FileSize:32>>} = dets_utils:pread_close(Fd, FileName, EOF-4, 4),
- FH = #fileheader{freelist = Freelist,
- fl_base = ?BASE,
- cookie = Cookie,
- closed_properly = CP,
- type = dets_utils:code_to_type(Type2),
- version = Version,
- m = M,
- next = Next,
- keypos = Kp,
- no_objects = NoObjects,
- min_no_slots = ?DEFAULT_MIN_NO_SLOTS,
- max_no_slots = ?DEFAULT_MAX_NO_SLOTS,
- trailer = FileSize,
- eof = EOF,
- n = N,
- mod = ?MODULE},
- {ok, Fd, FH}.
-
-%% -> {ok, head(), ExtraInfo} | {error, Reason} (Reason lacking file name)
-%% ExtraInfo = {convert_freelist, Version} | true | need_compacting
-check_file_header(FH, Fd) ->
- Test =
- if
- FH#fileheader.cookie =/= ?MAGIC ->
- {error, not_a_dets_file};
- FH#fileheader.type =:= badtype ->
- {error, invalid_type_code};
- FH#fileheader.version =/= ?FILE_FORMAT_VERSION ->
- case lists:member(FH#fileheader.version,
- ?CAN_BUMP_BY_REPAIR) of
- true ->
- {error, version_bump};
- false ->
- {error, bad_version}
- end;
- FH#fileheader.trailer =/= FH#fileheader.eof ->
- {error, not_closed};
- FH#fileheader.closed_properly =:= ?CLOSED_PROPERLY ->
- case lists:member(FH#fileheader.version,
- ?CAN_CONVERT_FREELIST) of
- true ->
- {ok, {convert_freelist, FH#fileheader.version}, hash};
- false ->
- {error, not_closed} % should not happen
- end;
- FH#fileheader.closed_properly =:= ?CLOSED_PROPERLY2 ->
- {ok, true, hash};
- FH#fileheader.closed_properly =:=
- ?CLOSED_PROPERLY2_NEED_COMPACTING ->
- {ok, need_compacting, hash};
- FH#fileheader.closed_properly =:= ?CLOSED_PROPERLY_NEW_HASH ->
- {ok, true, phash};
- FH#fileheader.closed_properly =:=
- ?CLOSED_PROPERLY_NEW_HASH_NEED_COMPACTING ->
- {ok, need_compacting, phash};
- FH#fileheader.closed_properly =:= ?NOT_PROPERLY_CLOSED ->
- {error, not_closed};
- FH#fileheader.closed_properly >
- ?CLOSED_PROPERLY_NEW_HASH_NEED_COMPACTING ->
- {error, not_closed};
- true ->
- {error, not_a_dets_file}
- end,
- case Test of
- {ok, ExtraInfo, HashAlg} ->
- H = #head{
- m = FH#fileheader.m,
- m2 = FH#fileheader.m * 2,
- next = FH#fileheader.next,
- fptr = Fd,
- no_objects= FH#fileheader.no_objects,
- n = FH#fileheader.n,
- type = FH#fileheader.type,
- update_mode = saved,
- auto_save = infinity, % not saved on file
- fixed = false, % not saved on file
- freelists_p = FH#fileheader.freelist,
- hash_bif = HashAlg,
- keypos = FH#fileheader.keypos,
- min_no_slots = FH#fileheader.min_no_slots,
- max_no_slots = FH#fileheader.max_no_slots,
- version = ?FILE_FORMAT_VERSION,
- mod = ?MODULE,
- bump = ?BUMP,
- base = FH#fileheader.fl_base},
- {ok, H, ExtraInfo};
- Error ->
- Error
- end.
-
-cache_segps(Fd, FileName, M) ->
- NSegs = no_segs(M),
- {ok, Bin} = dets_utils:pread_close(Fd, FileName, ?HEADSZ, 4 * NSegs),
- Fun = fun(S, P) -> segp_cache(P, S), P+4 end,
- lists:foldl(Fun, ?HEADSZ, bin2ints(Bin)).
-
-no_segs(NoSlots) ->
- ?SLOT2SEG(NoSlots - 1) + 1.
-
-bin2ints(<<Int:32, B/binary>>) ->
- [Int | bin2ints(B)];
-bin2ints(<<>>) ->
- [].
-
-%%%
-%%% Repair, conversion and initialization of a dets file.
-%%%
-
-bulk_input(Head, InitFun, Cntrs) ->
- bulk_input(Head, InitFun, Cntrs, make_ref()).
-
-bulk_input(Head, InitFun, Cntrs, Ref) ->
- fun(close) ->
- ok;
- (read) ->
- case catch {Ref, InitFun(read)} of
- {Ref, end_of_input} ->
- end_of_input;
- {Ref, {L0, NewInitFun}} when is_list(L0),
- is_function(NewInitFun) ->
- Kp = Head#head.keypos,
- case catch bulk_objects(L0, Head, Cntrs, Kp, []) of
- {'EXIT', _Error} ->
- _ = (catch NewInitFun(close)),
- {error, invalid_objects_list};
- L ->
- {L, bulk_input(Head, NewInitFun, Cntrs, Ref)}
- end;
- {Ref, Value} ->
- {error, {init_fun, Value}};
- Error ->
- throw({thrown, Error})
- end
- end.
-
-bulk_objects([T | Ts], Head, Cntrs, Kp, L) ->
- BT = term_to_binary(T),
- Sz = byte_size(BT),
- LogSz = sz2pos(Sz+?OHDSZ),
- count_object(Cntrs, LogSz),
- Key = element(Kp, T),
- bulk_objects(Ts, Head, Cntrs, Kp, [make_object(Head, Key, LogSz, BT) | L]);
-bulk_objects([], _Head, _Cntrs, _Kp, L) ->
- L.
-
--define(FSCK_SEGMENT, 10000).
-
--define(DCT(D, CT), [D | CT]).
-
--define(VNEW(N, E), erlang:make_tuple(N, E)).
--define(VSET(I, V, E), setelement(I, V, E)).
--define(VGET(I, V), element(I, V)).
-
-%% OldVersion not used, assuming later versions have been converted already.
-output_objs(OldVersion, Head, SlotNumbers, Cntrs) ->
- fun(close) ->
- {ok, 0, Head};
- ([]) ->
- output_objs(OldVersion, Head, SlotNumbers, Cntrs);
- (L) ->
- %% Descending sizes.
- Count = lists:sort(ets:tab2list(Cntrs)),
- RCount = lists:reverse(Count),
- NoObjects = lists:foldl(fun({_Sz,No}, A) -> A + No end, 0, Count),
- {_, MinSlots, _} = SlotNumbers,
- if
- %% Using number of objects for bags and duplicate bags
- %% is not ideal; number of (unique) keys should be
- %% used instead. The effect is that there will be more
- %% segments than "necessary".
- MinSlots =/= bulk_init,
- abs(?SLOT2SEG(NoObjects) - ?SLOT2SEG(MinSlots)) > 5,
- (NoObjects < ?MAXOBJS) ->
- {try_again, NoObjects};
- true ->
- Head1 = Head#head{no_objects = NoObjects},
- SegSz = actual_seg_size(),
- {_, End, _} = dets_utils:alloc(Head, SegSz-1),
- %% Now {LogSize,NoObjects} in Cntrs is replaced by
- %% {LogSize,Position,{FileName,FileDescriptor},NoObjects}.
- {Head2, CT} = allocate_all_objects(Head1, RCount, Cntrs),
- [E | Es] = bin2term(L, []),
- {NE, Acc, DCT1} =
- output_slots(E, Es, [E], Head2, ?DCT(0, CT)),
- NDCT = write_all_sizes(DCT1, Cntrs),
- Max = ets:info(Cntrs, size),
- output_objs2(NE, Acc, Head2, Cntrs, NDCT, End, Max,Max)
- end
- end.
-
-output_objs2(E, Acc, Head, Cntrs, DCT, End, 0, MaxNoChunks) ->
- NDCT = write_all_sizes(DCT, Cntrs),
- output_objs2(E, Acc, Head, Cntrs, NDCT, End, MaxNoChunks, MaxNoChunks);
-output_objs2(E, Acc, Head, Cntrs, DCT, End, ChunkI, MaxNoChunks) ->
- fun(close) ->
- DCT1 = output_slot(Acc, Head, DCT),
- NDCT = write_all_sizes(DCT1, Cntrs),
- ?DCT(NoDups, CT) = NDCT,
- [SegAddr | []] = ?VGET(tuple_size(CT), CT),
- FinalZ = End - SegAddr,
- [{?FSCK_SEGMENT, _, {FileName, Fd}, _}] =
- ets:lookup(Cntrs, ?FSCK_SEGMENT),
- ok = dets_utils:fwrite(Fd, FileName,
- dets_utils:make_zeros(FinalZ)),
- NewHead = Head#head{no_objects = Head#head.no_objects - NoDups},
- {ok, NoDups, NewHead};
- (L) ->
- Es = bin2term(L, []),
- {NE, NAcc, NDCT} = output_slots(E, Es, Acc, Head, DCT),
- output_objs2(NE, NAcc, Head, Cntrs, NDCT, End,
- ChunkI-1, MaxNoChunks)
- end.
-
-%% By allocating bigger objects before smaller ones, holes in the
-%% buddy system memory map are avoided. Unfortunately, the segments
-%% are always allocated first, so if there are objects bigger than a
-%% segment, there is a hole to handle. (Haven't considered placing the
-%% segments among other objects of the same size.)
-allocate_all_objects(Head, Count, Cntrs) ->
- SegSize = actual_seg_size(),
- {Head1, HSz, HN, HA} = alloc_hole(Count, Head, SegSize),
- {Max, _} = hd(Count),
- CT = ?VNEW(Max+1, not_used),
- {Head2, NCT} = allocate_all(Head1, Count, Cntrs, CT),
- Head3 = free_hole(Head2, HSz, HN, HA),
- {Head3, NCT}.
-
-alloc_hole([{LSize,_} | _], Head, SegSz) when ?POW(LSize-1) > SegSz ->
- {_, SegAddr, _} = dets_utils:alloc(Head, SegSz-1),
- Size = ?POW(LSize-1)-1,
- {_, Addr, _} = dets_utils:alloc(Head, Size),
- N = (Addr - SegAddr) div SegSz,
- Head1 = dets_utils:alloc_many(Head, SegSz, N, SegAddr),
- {Head1, SegSz-1, N, SegAddr};
-alloc_hole(_Count, Head, _SegSz) ->
- {Head, 0, 0, 0}.
-
-free_hole(Head, _Size, 0, _Addr) ->
- Head;
-free_hole(Head, Size, N, Addr) ->
- {Head1, _} = dets_utils:free(Head, Addr, Size),
- free_hole(Head1, Size, N-1, Addr+Size+1).
-
-%% One (temporary) file for each buddy size, write all objects of that
-%% size to the file.
-allocate_all(Head, [{LSize,NoObjects} | Count], Cntrs, CT) ->
- Size = ?POW(LSize-1)-1,
- {_Head, Addr, _} = dets_utils:alloc(Head, Size),
- NewHead = dets_utils:alloc_many(Head, Size+1, NoObjects, Addr),
- {FileName, Fd} = temp_file(Head, LSize),
- true = ets:insert(Cntrs, {LSize, Addr, {FileName, Fd}, NoObjects}),
- NCT = ?VSET(LSize, CT, [Addr | []]),
- allocate_all(NewHead, Count, Cntrs, NCT);
-allocate_all(Head, [], Cntrs, CT) ->
- %% Note that space for the segments has been allocated already.
- %% And one file for the segments...
- {FileName, Fd} = temp_file(Head, ?FSCK_SEGMENT),
- Addr = ?SEGADDR(?SEGARRSZ),
- true = ets:insert(Cntrs, {?FSCK_SEGMENT, Addr, {FileName, Fd}, 0}),
- NCT = ?VSET(tuple_size(CT), CT, [Addr | []]),
- {Head, NCT}.
-
-temp_file(Head, N) ->
- TmpName = lists:concat([Head#head.filename, '.', N]),
- {ok, Fd} = dets_utils:open(TmpName, [raw, binary, write]),
- {TmpName, Fd}.
-
-bin2term([<<Slot:32, LogSize:8, BinTerm/binary>> | BTs], L) ->
- bin2term(BTs, [{Slot, LogSize, BinTerm} | L]);
-bin2term([], L) ->
- lists:reverse(L).
-
-write_all_sizes(?DCT(D, CT), Cntrs) ->
- ?DCT(D, write_sizes(1, tuple_size(CT), CT, Cntrs)).
-
-write_sizes(Sz, Sz, CT, Cntrs) ->
- write_size(Sz, ?FSCK_SEGMENT, CT, Cntrs);
-write_sizes(Sz, MaxSz, CT, Cntrs) ->
- NCT = write_size(Sz, Sz, CT, Cntrs),
- write_sizes(Sz+1, MaxSz, NCT, Cntrs).
-
-write_size(Sz, I, CT, Cntrs) ->
- case ?VGET(Sz, CT) of
- not_used ->
- CT;
- [Addr | L] ->
- {FileName, Fd} = ets:lookup_element(Cntrs, I, 3),
- case file:write(Fd, lists:reverse(L)) of
- ok ->
- ?VSET(Sz, CT, [Addr | []]);
- Error ->
- dets_utils:file_error(FileName, Error)
- end
- end.
-
-output_slots(E, [E1 | Es], Acc, Head, DCT)
- when element(1, E) =:= element(1, E1) ->
- output_slots(E1, Es, [E1 | Acc], Head, DCT);
-output_slots(_E, [E | L], Acc, Head, DCT) ->
- NDCT = output_slot(Acc, Head, DCT),
- output_slots(E, L, [E], Head, NDCT);
-output_slots(E, [], Acc, _Head, DCT) ->
- {E, Acc, DCT}.
-
-output_slot([E], _Head, ?DCT(D, CT)) ->
- ?DCT(D, output_slot([{foo, E}], 0, foo, CT));
-output_slot(Es0, Head, ?DCT(D, CT)) ->
- Kp = Head#head.keypos,
- Fun = fun({_Slot, _LSize, BinTerm} = E) ->
- Key = element(Kp, binary_to_term(BinTerm)),
- {Key, E}
- end,
- Es = lists:map(Fun, Es0),
- NEs = case Head#head.type of
- set ->
- [{Key0,_} = E | L0] = lists:sort(Es),
- choose_one(lists:sort(L0), Key0, [E]);
- bag ->
- lists:usort(Es);
- duplicate_bag ->
- lists:sort(Es)
- end,
- Dups = D + length(Es) - length(NEs),
- ?DCT(Dups, output_slot(NEs, 0, foo, CT)).
-
-choose_one([{Key,_} | Es], Key, L) ->
- choose_one(Es, Key, L);
-choose_one([{Key,_} = E | Es], _Key, L) ->
- choose_one(Es, Key, [E | L]);
-choose_one([], _Key, L) ->
- L.
-
-output_slot([E | Es], Next, _Slot, CT) ->
- {_Key, {Slot, LSize, BinTerm}} = E,
- Size = byte_size(BinTerm),
- Size2 = ?POW(LSize-1),
- Pad = <<0:(Size2-Size-?OHDSZ)/unit:8>>,
- BinObject = [<<Next:32, Size:32, ?ACTIVE:32>>, BinTerm | Pad],
- [Addr | L] = ?VGET(LSize, CT),
- NCT = ?VSET(LSize, CT, [Addr+Size2 | [BinObject | L]]),
- output_slot(Es, Addr, Slot, NCT);
-output_slot([], Next, Slot, CT) ->
- I = tuple_size(CT),
- [Addr | L] = ?VGET(I, CT),
- {Pos, _} = slot_position(Slot),
- NoZeros = Pos - Addr,
- BinObject = if
- NoZeros > 100 ->
- [dets_utils:make_zeros(NoZeros) | <<Next:32>>];
- true ->
- <<0:NoZeros/unit:8,Next:32>>
- end,
- Size = NoZeros+4,
- ?VSET(I, CT, [Addr+Size | [BinObject | L]]).
-
-%% Does not close Fd.
-fsck_input(Head, Fd, Cntrs, _FileHeader) ->
- %% The file is not compressed, so the object size cannot exceed
- %% the filesize, for all objects.
- MaxSz = case file:position(Fd, eof) of
- {ok, Pos} ->
- Pos;
- _ ->
- (1 bsl 32) - 1
- end,
- State0 = fsck_read(?BASE, Fd, []),
- fsck_input1(Head, State0, Fd, MaxSz, Cntrs).
-
-fsck_input1(Head, State, Fd, MaxSz, Cntrs) ->
- fun(close) ->
- ok;
- (read) ->
- case State of
- done ->
- end_of_input;
- {done, L} ->
- R = count_input(Cntrs, L, []),
- {R, fsck_input1(Head, done, Fd, MaxSz, Cntrs)};
- {cont, L, Bin, Pos} ->
- R = count_input(Cntrs, L, []),
- FR = fsck_objs(Bin, Head#head.keypos, Head, []),
- NewState = fsck_read(FR, Pos, Fd, MaxSz, Head),
- {R, fsck_input1(Head, NewState, Fd, MaxSz, Cntrs)}
- end
- end.
-
-%% The ets table Cntrs is used for counting objects per size.
-count_input(Cntrs, [[LogSz | B] | Ts], L) ->
- count_object(Cntrs, LogSz),
- count_input(Cntrs, Ts, [B | L]);
-count_input(_Cntrs, [], L) ->
- L.
-
-count_object(Cntrs, LogSz) ->
- case catch ets:update_counter(Cntrs, LogSz, 1) of
- N when is_integer(N) -> ok;
- _Badarg -> true = ets:insert(Cntrs, {LogSz, 1})
- end.
-
-fsck_read(Pos, F, L) ->
- case file:position(F, Pos) of
- {ok, _} ->
- read_more_bytes(<<>>, 0, Pos, F, L);
- _Error ->
- {done, L}
- end.
-
-fsck_read({more, Bin, Sz, L}, Pos, F, MaxSz, Head) when Sz > MaxSz ->
- FR = skip_bytes(Bin, ?BUMP, Head#head.keypos, Head, L),
- fsck_read(FR, Pos, F, MaxSz, Head);
-fsck_read({more, Bin, Sz, L}, Pos, F, _MaxSz, _Head) ->
- read_more_bytes(Bin, Sz, Pos, F, L);
-fsck_read({new, Skip, L}, Pos, F, _MaxSz, _Head) ->
- NewPos = Pos + Skip,
- fsck_read(NewPos, F, L).
-
-read_more_bytes(B, Min, Pos, F, L) ->
- Max = if
- Min < ?CHUNK_SIZE -> ?CHUNK_SIZE;
- true -> Min
- end,
- case dets_utils:read_n(F, Max) of
- eof ->
- {done, L};
- Bin ->
- NewPos = Pos + byte_size(Bin),
- {cont, L, list_to_binary([B, Bin]), NewPos}
- end.
-
-fsck_objs(Bin = <<_N:32, Sz:32, Status:32, Tail/binary>>, Kp, Head, L) ->
- if
- Status =:= ?ACTIVE ->
- case Tail of
- <<BinTerm:Sz/binary, Tail2/binary>> ->
- case catch element(Kp, binary_to_term(BinTerm)) of
- {'EXIT', _} ->
- skip_bytes(Bin, ?BUMP, Kp, Head, L);
- Key ->
- LogSz = sz2pos(Sz+?OHDSZ),
- Obj = make_object(Head, Key, LogSz, BinTerm),
- NL = [[LogSz | Obj] | L],
- Skip = ?POW(LogSz-1) - Sz - ?OHDSZ,
- skip_bytes(Tail2, Skip, Kp, Head, NL)
- end;
- _ ->
- {more, Bin, Sz, L}
- end;
- true ->
- skip_bytes(Bin, ?BUMP, Kp, Head, L)
- end;
-fsck_objs(Bin, _Kp, _Head, L) ->
- {more, Bin, 0, L}.
-
-%% Version 8 has to know about version 9.
-make_object(Head, Key, _LogSz, BT) when Head#head.version =:= 9 ->
- Slot = dets_v9:db_hash(Key, Head),
- <<Slot:32, BT/binary>>;
-make_object(Head, Key, LogSz, BT) ->
- Slot = db_hash(Key, Head),
- <<Slot:32, LogSz:8, BT/binary>>.
-
-%% Inlined.
-skip_bytes(Bin, Skip, Kp, Head, L) ->
- case Bin of
- <<_:Skip/binary, Tail/binary>> ->
- fsck_objs(Tail, Kp, Head, L);
- _ ->
- {new, Skip - byte_size(Bin), L}
- end.
-
-%% -> {NewHead, ok} | throw({Head, Error})
-do_perform_save(H) ->
- FL = dets_utils:get_freelists(H),
- B = term_to_binary(FL),
- Size = byte_size(B),
- ?DEBUGF("size of freelist = ~p~n", [Size]),
- ?DEBUGF("head.m = ~p~n", [H#head.m]),
- ?DEBUGF("head.no_objects = ~p~n", [H#head.no_objects]),
-
- {ok, Pos} = dets_utils:position(H, eof),
- H1 = H#head{freelists_p = Pos},
- W1 = {?FREELIST_POS, <<Pos:32>>},
- W2 = {Pos, [<<0:32, Size:32, ?FREE:32>>, B]},
-
- W3 = {?D_POS, <<(H1#head.m):32,
- (H1#head.next):32,
- (H1#head.keypos):32,
- (H1#head.no_objects):32,
- (H1#head.n):32>>},
- {ClosedProperly, ClosedProperlyNeedCompacitng} =
- case H1#head.hash_bif of
- hash ->
- {?CLOSED_PROPERLY2, ?CLOSED_PROPERLY2_NEED_COMPACTING};
- phash ->
- {?CLOSED_PROPERLY_NEW_HASH,
- ?CLOSED_PROPERLY_NEW_HASH_NEED_COMPACTING}
- end,
- W4 =
- if
- Size > 1000, Size > H1#head.no_objects ->
- {?CLOSED_PROPERLY_POS,
- <<ClosedProperlyNeedCompacitng:32>>};
- true ->
- {?CLOSED_PROPERLY_POS, <<ClosedProperly:32>>}
- end,
- W5 = {?FILE_FORMAT_VERSION_POS, <<?FILE_FORMAT_VERSION:32>>},
- {H2, ok} = dets_utils:pwrite(H1, [W1,W2,W3,W4,W5]),
- {ok, Pos2} = dets_utils:position(H2, eof),
- ?DEBUGF("Writing file size ~p, eof at ~p~n", [Pos2+4, Pos2]),
- dets_utils:pwrite(H2, [{Pos2, <<(Pos2 + 4):32>>}]).
-
-%% -> [term()] | throw({Head, Error})
-slot_objs(H, Slot) when Slot >= H#head.next ->
- '$end_of_table';
-slot_objs(H, Slot) ->
- {_Pos, Chain} = chain(H, Slot),
- collect_chain(H, Chain).
-
-collect_chain(_H, 0) -> [];
-collect_chain(H, Pos) ->
- {Next, _Sz, Term} = prterm(H, Pos, ?ReadAhead),
- [Term | collect_chain(H, Next)].
-
-db_hash(Key, Head) ->
- H = h(Key, Head#head.hash_bif),
- Hash = H rem Head#head.m,
- if
- Hash < Head#head.n ->
- H rem (Head#head.m2); % H rem (2 * m)
- true ->
- Hash
- end.
-
-h(I, phash) -> erlang:phash(I, ?BIG) - 1;
-h(I, HF) -> erlang:HF(I, ?BIG) - 1. %% stupid BIF has 1 counts.
-
-no_slots(_Head) ->
- undefined.
-
-table_parameters(_Head) ->
- undefined.
-
-%% Re-hashing a segment, starting with SlotStart.
-%%
-%% On the average, half of the objects of the chain are put into a new
-%% chain. If the slot of the old chain is i, then the slot of the new
-%% chain is i+m.
-%% Note that the insertion of objects into the new chain is simplified
-%% by the fact that the chains are not sorted on key, which means that
-%% each moved object can be inserted first in the new chain.
-%% (It is also a fact that the objects with the same key are not sorted.)
-%%
-%% -> {ok, Writes} | throw({Head, Error})
-re_hash(Head, SlotStart) ->
- {SlotPos, _4} = slot_position(SlotStart),
- {ok, Bin} = dets_utils:pread(Head, SlotPos, 4*?SEGSZ, 0),
- {Read, Cs} = split_bin(SlotPos, Bin, [], []),
- re_hash_read(Head, [], Read, Cs).
-
-split_bin(Pos, <<P:32, B/binary>>, R, Cs) ->
- if
- P =:= 0 ->
- split_bin(Pos+4, B, R, Cs);
- true ->
- split_bin(Pos+4, B, [{P,?ReadAhead} | R], [[Pos] | Cs])
- end;
-split_bin(_Pos, <<>>, R, Cs) ->
- {R, Cs}.
-
-re_hash_read(Head, Cs, R, RCs) ->
- {ok, Bins} = dets_utils:pread(R, Head),
- re_hash_read(Head, R, RCs, Bins, Cs, [], []).
-
-re_hash_read(Head, [{Pos, Size} | Ps], [C | Cs],
- [<<Next:32, Sz:32, _Status:32, Bin0/binary>> | Bins],
- DoneCs, R, RCs) ->
- case byte_size(Bin0) of
- BinSz when BinSz >= Sz ->
- case catch binary_to_term(Bin0) of
- {'EXIT', _Error} ->
- throw(dets_utils:corrupt_reason(Head, bad_object));
- Term ->
- Key = element(Head#head.keypos, Term),
- New = h(Key, Head#head.hash_bif) rem Head#head.m2,
- NC = case New >= Head#head.m of
- true -> [{Pos,New} | C];
- false -> [Pos | C]
- end,
- if
- Next =:= 0 ->
- NDoneCs = [NC | DoneCs],
- re_hash_read(Head, Ps, Cs, Bins, NDoneCs, R, RCs);
- true ->
- NR = [{Next,?ReadAhead} | R],
- NRCs = [NC | RCs],
- re_hash_read(Head, Ps, Cs, Bins, DoneCs, NR, NRCs)
- end
- end;
- BinSz when Size =:= BinSz+?OHDSZ ->
- NR = [{Pos, Sz+?OHDSZ} | R],
- re_hash_read(Head, Ps, Cs, Bins, DoneCs, NR, [C | RCs]);
- _BinSz ->
- throw({Head, {error, {premature_eof, Head#head.filename}}})
- end;
-re_hash_read(Head, [], [], [], Cs, [], []) ->
- re_hash_traverse_chains(Cs, Head, [], [], []);
-re_hash_read(Head, [], [], [], Cs, R, RCs) ->
- re_hash_read(Head, Cs, R, RCs).
-
-re_hash_traverse_chains([C | Cs], Head, Rs, Ns, Ws) ->
- case re_hash_find_new(C, Rs, start, start) of
- false ->
- re_hash_traverse_chains(Cs, Head, Rs, Ns, Ws);
- {NRs, FirstNew, LastNew} ->
- LastInNew = case C of
- [{_,_} | _] -> true;
- _ -> false
- end,
- N = {FirstNew, LastNew, LastInNew},
- NWs = re_hash_link(C, start, start, start, Ws),
- re_hash_traverse_chains(Cs, Head, NRs, [N | Ns], NWs)
- end;
-re_hash_traverse_chains([], Head, Rs, Ns, Ws) ->
- {ok, Bins} = dets_utils:pread(Rs, Head),
- {ok, insert_new(Rs, Bins, Ns, Ws)}.
-
-re_hash_find_new([{Pos,NewSlot} | C], R, start, start) ->
- {SPos, _4} = slot_position(NewSlot),
- re_hash_find_new(C, [{SPos,4} | R], Pos, Pos);
-re_hash_find_new([{Pos,_SPos} | C], R, _FirstNew, LastNew) ->
- re_hash_find_new(C, R, Pos, LastNew);
-re_hash_find_new([_Pos | C], R, FirstNew, LastNew) ->
- re_hash_find_new(C, R, FirstNew, LastNew);
-re_hash_find_new([], _R, start, start) ->
- false;
-re_hash_find_new([], R, FirstNew, LastNew) ->
- {R, FirstNew, LastNew}.
-
-re_hash_link([{Pos,_SPos} | C], LastOld, start, _LastInNew, Ws) ->
- re_hash_link(C, LastOld, Pos, true, Ws);
-re_hash_link([{Pos,_SPos} | C], LastOld, LastNew, false, Ws) ->
- re_hash_link(C, LastOld, Pos, true, [{Pos,<<LastNew:32>>} | Ws]);
-re_hash_link([{Pos,_SPos} | C], LastOld, _LastNew, LastInNew, Ws) ->
- re_hash_link(C, LastOld, Pos, LastInNew, Ws);
-re_hash_link([Pos | C], start, LastNew, true, Ws) ->
- re_hash_link(C, Pos, LastNew, false, [{Pos,<<0:32>>} | Ws]);
-re_hash_link([Pos | C], LastOld, LastNew, true, Ws) ->
- re_hash_link(C, Pos, LastNew, false, [{Pos,<<LastOld:32>>} | Ws]);
-re_hash_link([Pos | C], _LastOld, LastNew, LastInNew, Ws) ->
- re_hash_link(C, Pos, LastNew, LastInNew, Ws);
-re_hash_link([], _LastOld, _LastNew, _LastInNew, Ws) ->
- Ws.
-
-insert_new([{NewSlotPos,_4} | Rs], [<<P:32>> = PB | Bins], [N | Ns], Ws) ->
- {FirstNew, LastNew, LastInNew} = N,
- Ws1 = case P of
- 0 when LastInNew ->
- Ws;
- 0 ->
- [{LastNew, <<0:32>>} | Ws];
- _ ->
- [{LastNew, PB} | Ws]
- end,
- NWs = [{NewSlotPos, <<FirstNew:32>>} | Ws1],
- insert_new(Rs, Bins, Ns, NWs);
-insert_new([], [], [], Ws) ->
- Ws.
-
-%% When writing the cache, a 'work list' is first created:
-%% WorkList = [{Key, {Delete,Lookup,[Inserted]}}]
-%% Delete = keep | delete
-%% Lookup = skip | lookup
-%% Inserted = {object(), No}
-%% No = integer()
-%% If No =< 0 then there will be -No instances of object() on the file
-%% when the cache has been written. If No > 0 then No instances of
-%% object() will be added to the file.
-%% If Delete has the value 'delete', then all objects with the key Key
-%% have been deleted. (This could be viewed as a shorthand for {Object,0}
-%% for each object Object on the file not mentioned in some Inserted.)
-%% If Lookup has the value 'lookup', all objects with the key Key will
-%% be returned.
-%%
-
-%% -> {NewHead, [LookedUpObject], pwrite_list()} | throw({NewHead, Error})
-write_cache(Head) ->
- #head{cache = C, type = Type} = Head,
- case dets_utils:is_empty_cache(C) of
- true -> {Head, [], []};
- false ->
- {NewC, _MaxInserts, PerKey} = dets_utils:reset_cache(C),
- %% NoInsertedKeys is an upper limit on the number of new keys.
- {WL, NoInsertedKeys} = make_wl(PerKey, Type),
- Head1 = Head#head{cache = NewC},
- case may_grow(Head1, NoInsertedKeys, once) of
- {Head2, ok} ->
- eval_work_list(Head2, WL);
- HeadError ->
- throw(HeadError)
- end
- end.
-
-make_wl(PerKey, Type) ->
- make_wl(PerKey, Type, [], 0).
-
-make_wl([{Key,L} | PerKey], Type, WL, Ins) ->
- [Cs | I] = wl(L, Type),
- make_wl(PerKey, Type, [{Key,Cs} | WL], Ins+I);
-make_wl([], _Type, WL, Ins) ->
- {WL, Ins}.
-
-wl(L, Type) ->
- wl(L, Type, keep, skip, 0, []).
-
-wl([{_Seq, delete_key} | Cs], Type, _Del, Lookup, _I, _Objs) ->
- wl(Cs, Type, delete, Lookup, 0, []);
-wl([{_Seq, {delete_object, Object}} | Cs], Type, Del, Lookup, I, Objs) ->
- NObjs = lists:keydelete(Object, 1, Objs),
- wl(Cs, Type, Del, Lookup, I, [{Object,0} | NObjs]);
-wl([{_Seq, {insert, Object}} | Cs], Type, _Del, Lookup, _I, _Objs)
- when Type =:= set ->
- wl(Cs, Type, delete, Lookup, 1, [{Object,-1}]);
-wl([{_Seq, {insert, Object}} | Cs], Type, Del, Lookup, _I, Objs) ->
- NObjs =
- case lists:keyfind(Object, 1, Objs) of
- {_, 0} ->
- lists:keyreplace(Object, 1, Objs, {Object,-1});
- {_, _C} when Type =:= bag -> % C =:= 1; C =:= -1
- Objs;
- {_, C} when C < 0 -> % when Type =:= duplicate_bag
- lists:keyreplace(Object, 1, Objs, {Object,C-1});
- {_, C} -> % when C > 0, Type =:= duplicate_bag
- lists:keyreplace(Object, 1, Objs, {Object,C+1});
- false when Del =:= delete ->
- [{Object, -1} | Objs];
- false ->
- [{Object, 1} | Objs]
- end,
- wl(Cs, Type, Del, Lookup, 1, NObjs);
-wl([{_Seq, {lookup,_Pid}=Lookup} | Cs], Type, Del, _Lookup, I, Objs) ->
- wl(Cs, Type, Del, Lookup, I, Objs);
-wl([], _Type, Del, Lookup, I, Objs) ->
- [{Del, Lookup, Objs} | I].
-
-%% -> {NewHead, ok} | {NewHead, Error}
-may_grow(Head, 0, once) ->
- {Head, ok};
-may_grow(Head, _N, _How) when Head#head.fixed =/= false ->
- {Head, ok};
-may_grow(#head{access = read}=Head, _N, _How) ->
- {Head, ok};
-may_grow(Head, _N, _How) when Head#head.next >= ?MAXOBJS ->
- {Head, ok};
-may_grow(Head, N, How) ->
- Extra = erlang:min(2*?SEGSZ, Head#head.no_objects + N - Head#head.next),
- case catch may_grow1(Head, Extra, How) of
- {error, Reason} -> % alloc may throw error
- {Head, {error, Reason}};
- Reply ->
- Reply
- end.
-
-may_grow1(Head, Extra, many_times) when Extra > ?SEGSZ ->
- Reply = grow(Head, 1, undefined),
- self() ! ?DETS_CALL(self(), may_grow),
- Reply;
-may_grow1(Head, Extra, _How) ->
- grow(Head, Extra, undefined).
-
-%% -> {Head, ok} | throw({Head, Error})
-grow(Head, Extra, _SegZero) when Extra =< 0 ->
- {Head, ok};
-grow(Head, Extra, undefined) ->
- grow(Head, Extra, seg_zero());
-grow(Head, Extra, SegZero) ->
- #head{n = N, next = Next, m = M} = Head,
- SegNum = ?SLOT2SEG(Next),
- {Head0, Ws1} = allocate_segment(Head, SegZero, SegNum),
- {Head1, ok} = dets_utils:pwrite(Head0, Ws1),
- %% If re_hash fails, segp_cache has been called, but it does not matter.
- {ok, Ws2} = re_hash(Head1, N),
- {Head2, ok} = dets_utils:pwrite(Head1, Ws2),
- NewHead =
- if
- N + ?SEGSZ =:= M ->
- Head2#head{n = 0, next = Next + ?SEGSZ, m = 2 * M, m2 = 4 * M};
- true ->
- Head2#head{n = N + ?SEGSZ, next = Next + ?SEGSZ}
- end,
- grow(NewHead, Extra - ?SEGSZ, SegZero).
-
-seg_zero() ->
- <<0:(4*?SEGSZ)/unit:8>>.
-
-find_object(Head, Object) ->
- Key = element(Head#head.keypos, Object),
- Slot = db_hash(Key, Head),
- find_object(Head, Object, Slot).
-
-find_object(H, _Obj, Slot) when Slot >= H#head.next ->
- false;
-find_object(H, Obj, Slot) ->
- {_Pos, Chain} = chain(H, Slot),
- case catch find_obj(H, Obj, Chain) of
- {ok, Pos} ->
- {ok, Pos};
- _Else ->
- false
- end.
-
-find_obj(H, Obj, Pos) when Pos > 0 ->
- {Next, _Sz, Term} = prterm(H, Pos, ?ReadAhead),
- if
- Term == Obj ->
- {ok, Pos};
- true ->
- find_obj(H, Obj, Next)
- end.
-
-%% Given, a slot, return the {Pos, Chain} in the file where the
-%% objects hashed to this slot reside. Pos is the position in the
-%% file where the chain pointer is written and Chain is the position
-%% in the file where the first object resides.
-chain(Head, Slot) ->
- Pos = ?SEGADDR(?SLOT2SEG(Slot)),
- Segment = get_segp(Pos),
- FinalPos = Segment + (4 * ?REM2(Slot, ?SEGSZ)),
- {ok, <<Chain:32>>} = dets_utils:pread(Head, FinalPos, 4, 0),
- {FinalPos, Chain}.
-
-%%%
-%%% Cache routines depending on the dets file format.
-%%%
-
-%% -> {Head, [LookedUpObject], pwrite_list()} | throw({Head, Error})
-eval_work_list(Head, WorkLists) ->
- SWLs = tag_with_slot(WorkLists, Head, []),
- P1 = dets_utils:family(SWLs),
- {PerSlot, SlotPositions} = remove_slot_tag(P1, [], []),
- {ok, Bins} = dets_utils:pread(SlotPositions, Head),
- first_object(PerSlot, SlotPositions, Bins, Head, [], [], [], []).
-
-tag_with_slot([{K,_} = WL | WLs], Head, L) ->
- tag_with_slot(WLs, Head, [{db_hash(K, Head), WL} | L]);
-tag_with_slot([], _Head, L) ->
- L.
-
-remove_slot_tag([{S,SWLs} | SSWLs], Ls, SPs) ->
- remove_slot_tag(SSWLs, [SWLs | Ls], [slot_position(S) | SPs]);
-remove_slot_tag([], Ls, SPs) ->
- {Ls, SPs}.
-
-%% The initial chain pointers and the first object in each chain are
-%% read "in parallel", that is, with one call to file:pread/2 (two
-%% calls altogether). The following chain objects are read one by
-%% one. This is a compromise: if the chains are long and threads are
-%% active, it would be faster to keep a state for each chain and read
-%% the objects of the chains in parallel, but the overhead would be
-%% quite substantial.
-
-first_object([WorkLists | SPs], [{P1,_4} | Ss], [<<P2:32>> | Bs], Head,
- ObjsToRead, ToRead, Ls, LU) when P2 =:= 0 ->
- L0 = [{old,P1}],
- {L, NLU} = eval_slot(Head, ?ReadAhead, P2, WorkLists, L0, LU),
- first_object(SPs, Ss, Bs, Head, ObjsToRead, ToRead, [L | Ls], NLU);
-first_object([WorkLists | SPs], [{P1,_4} | Ss], [<<P2:32>> | Bs], Head,
- ObjsToRead, ToRead, Ls, LU) ->
- E = {P1,P2,WorkLists},
- first_object(SPs, Ss, Bs, Head,
- [E | ObjsToRead], [{P2, ?ReadAhead} | ToRead], Ls, LU);
-first_object([], [], [], Head, ObjsToRead, ToRead, Ls, LU) ->
- {ok, Bins} = dets_utils:pread(ToRead, Head),
- case catch eval_first(Bins, ObjsToRead, Head, Ls, LU) of
- {ok, NLs, NLU} ->
- case create_writes(NLs, Head, [], 0) of
- {Head1, [], 0} ->
- {Head1, NLU, []};
- {Head1, Ws, No} ->
- {NewHead, Ws2} = update_no_objects(Head1, Ws, No),
- {NewHead, NLU, Ws2}
- end;
- _Error ->
- throw(dets_utils:corrupt_reason(Head, bad_object))
- end.
-
-%% Update no_objects on the file too, if the number of segments that
-%% dets:fsck/6 use for estimate has changed.
-update_no_objects(Head, Ws, 0) -> {Head, Ws};
-update_no_objects(Head, Ws, Delta) ->
- No = Head#head.no_objects,
- NewNo = No + Delta,
- NWs =
- if
- NewNo > ?MAXOBJS ->
- Ws;
- ?SLOT2SEG(No) =:= ?SLOT2SEG(NewNo) ->
- Ws;
- true ->
- [{?NO_OBJECTS_POS, <<NewNo:32>>} | Ws]
- end,
- {Head#head{no_objects = NewNo}, NWs}.
-
-eval_first([<<Next:32, Sz:32, _Status:32, Bin/binary>> | Bins],
- [SP | SPs], Head, Ls, LU) ->
- {P1, P2, WLs} = SP,
- L0 = [{old,P1}],
- case byte_size(Bin) of
- BinSz when BinSz >= Sz ->
- Term = binary_to_term(Bin),
- Key = element(Head#head.keypos, Term),
- {L, NLU} = find_key(Head, P2, Next, Sz, Term, Key, WLs, L0, LU),
- eval_first(Bins, SPs, Head, [L | Ls], NLU);
- _BinSz ->
- {L, NLU} = eval_slot(Head, Sz+?OHDSZ, P2, WLs, L0, LU),
- eval_first(Bins, SPs, Head, [L | Ls], NLU)
- end;
-eval_first([], [], _Head, Ls, LU) ->
- {ok, Ls, LU}.
-
-eval_slot(_Head, _TrySize, _Pos=0, [], L, LU) ->
- {L, LU};
-eval_slot(Head, _TrySize, Pos=0, [WL | WLs], L, LU) ->
- {_Key, {_Delete, LookUp, Objects}} = WL,
- {NL, NLU} = end_of_key(Objects, LookUp, L, []),
- eval_slot(Head, ?ReadAhead, Pos, WLs, NL, NLU++LU);
-eval_slot(Head, TrySize, Pos, WLs, L, LU) ->
- {NextPos, Size, Term} = prterm(Head, Pos, TrySize),
- Key = element(Head#head.keypos, Term),
- find_key(Head, Pos, NextPos, Size, Term, Key, WLs, L, LU).
-
-find_key(Head, Pos, NextPos, Size, Term, Key, WLs, L, LU) ->
- case lists:keyfind(Key, 1, WLs) of
- {_, {Delete, LookUp, Objects}} = WL ->
- NWLs = lists:delete(WL, WLs),
- {NewObjects, NL, LUK} = eval_object(Size, Term, Delete, LookUp,
- Objects, Head, Pos, L, []),
- eval_key(Key, Delete, LookUp, NewObjects, Head, NextPos,
- NWLs, NL, LU, LUK);
- false ->
- L0 = [{old,Pos} | L],
- eval_slot(Head, ?ReadAhead, NextPos, WLs, L0, LU)
- end.
-
-eval_key(_Key, _Delete, Lookup, _Objects, Head, Pos, WLs, L, LU, LUK)
- when Head#head.type =:= set ->
- NLU = case Lookup of
- {lookup, Pid} -> [{Pid,LUK} | LU];
- skip -> LU
- end,
- eval_slot(Head, ?ReadAhead, Pos, WLs, L, NLU);
-eval_key(_Key, _Delete, LookUp, Objects, Head, Pos, WLs, L, LU, LUK)
- when Pos =:= 0 ->
- {NL, NLU} = end_of_key(Objects, LookUp, L, LUK),
- eval_slot(Head, ?ReadAhead, Pos, WLs, NL, NLU++LU);
-eval_key(Key, Delete, LookUp, Objects, Head, Pos, WLs, L, LU, LUK) ->
- {NextPos, Size, Term} = prterm(Head, Pos, ?ReadAhead),
- case element(Head#head.keypos, Term) of
- Key ->
- {NewObjects, NL, LUK1} =
- eval_object(Size, Term, Delete, LookUp,Objects,Head,Pos,L,LUK),
- eval_key(Key, Delete, LookUp, NewObjects, Head, NextPos, WLs,
- NL, LU, LUK1);
- Key2 ->
- {L1, NLU} = end_of_key(Objects, LookUp, L, LUK),
- find_key(Head, Pos, NextPos, Size, Term, Key2, WLs, L1, NLU++LU)
- end.
-
-%% All objects in Objects have the key Key.
-eval_object(Size, Term, Delete, LookUp, Objects, Head, Pos, L, LU) ->
- Type = Head#head.type,
- case lists:keyfind(Term, 1, Objects) of
- {_Object, N} when N =:= 0 ->
- L1 = [{delete,Pos,Size} | L],
- {Objects, L1, LU};
- {_Object, N} when N < 0, Type =:= set ->
- L1 = [{old,Pos} | L],
- wl_lookup(LookUp, Objects, Term, L1, LU);
- {Object, _N} when Type =:= bag -> % when N =:= 1; N =:= -1
- L1 = [{old,Pos} | L],
- Objects1 = lists:keydelete(Object, 1, Objects),
- wl_lookup(LookUp, Objects1, Term, L1, LU);
- {Object, N} when N < 0, Type =:= duplicate_bag ->
- L1 = [{old,Pos} | L],
- Objects1 = lists:keyreplace(Object, 1, Objects, {Object,N+1}),
- wl_lookup(LookUp, Objects1, Term, L1, LU);
- {_Object, N} when N > 0, Type =:= duplicate_bag ->
- L1 = [{old,Pos} | L],
- wl_lookup(LookUp, Objects, Term, L1, LU);
- false when Type =:= set, Delete =:= delete ->
- case lists:keyfind(-1, 2, Objects) of
- false -> % no inserted object, perhaps deleted objects
- L1 = [{delete,Pos,Size} | L],
- {[], L1, LU};
- {Term2, -1} ->
- Bin2 = term_to_binary(Term2),
- NSize = byte_size(Bin2),
- Overwrite =
- if
- NSize =:= Size ->
- true;
- true ->
- SizePos = sz2pos(Size+?OHDSZ),
- NSizePos = sz2pos(NSize+?OHDSZ),
- SizePos =:= NSizePos
- end,
- E = if
- Overwrite ->
- {overwrite,Bin2,Pos};
- true ->
- {replace,Bin2,Pos,Size}
- end,
- wl_lookup(LookUp, [], Term2, [E | L], LU)
- end;
- false when Delete =:= delete ->
- L1 = [{delete,Pos,Size} | L],
- {Objects, L1, LU};
- false ->
- L1 = [{old,Pos} | L],
- wl_lookup(LookUp, Objects, Term, L1, LU)
- end.
-
-%% Inlined.
-wl_lookup({lookup,_}, Objects, Term, L, LU) ->
- {Objects, L, [Term | LU]};
-wl_lookup(skip, Objects, _Term, L, LU) ->
- {Objects, L, LU}.
-
-end_of_key([{Object,N0} | Objs], LookUp, L, LU) when N0 =/= 0 ->
- N = abs(N0),
- NL = [{insert,N,term_to_binary(Object)} | L],
- NLU = case LookUp of
- {lookup, _} ->
- lists:duplicate(N, Object) ++ LU;
- skip ->
- LU
- end,
- end_of_key(Objs, LookUp, NL, NLU);
-end_of_key([_ | Objects], LookUp, L, LU) ->
- end_of_key(Objects, LookUp, L, LU);
-end_of_key([], {lookup,Pid}, L, LU) ->
- {L, [{Pid,LU}]};
-end_of_key([], skip, L, LU) ->
- {L, LU}.
-
-create_writes([L | Ls], H, Ws, No) ->
- {NH, NWs, NNo} = create_writes(L, H, Ws, No, 0, true),
- create_writes(Ls, NH, NWs, NNo);
-create_writes([], H, Ws, No) ->
- {H, lists:reverse(Ws), No}.
-
-create_writes([{old,Pos} | L], H, Ws, No, _Next, true) ->
- create_writes(L, H, Ws, No, Pos, true);
-create_writes([{old,Pos} | L], H, Ws, No, Next, false) ->
- W = {Pos, <<Next:32>>},
- create_writes(L, H, [W | Ws], No, Pos, true);
-create_writes([{insert,N,Bin} | L], H, Ws, No, Next, _NextIsOld) ->
- {NH, NWs, Pos} = create_inserts(N, H, Ws, Next, byte_size(Bin), Bin),
- create_writes(L, NH, NWs, No+N, Pos, false);
-create_writes([{overwrite,Bin,Pos} | L], H, Ws, No, Next, _) ->
- Size = byte_size(Bin),
- W = {Pos, [<<Next:32, Size:32, ?ACTIVE:32>>, Bin]},
- create_writes(L, H, [W | Ws], No, Pos, true);
-create_writes([{replace,Bin,Pos,OSize} | L], H, Ws, No, Next, _) ->
- Size = byte_size(Bin),
- {H1, _} = dets_utils:free(H, Pos, OSize+?OHDSZ),
- {NH, NewPos, _} = dets_utils:alloc(H1, ?OHDSZ + Size),
- W1 = {NewPos, [<<Next:32, Size:32, ?ACTIVE:32>>, Bin]},
- NWs = if
- Pos =:= NewPos ->
- [W1 | Ws];
- true ->
- W2 = {Pos+?STATUS_POS, <<?FREE:32>>},
- [W1,W2 | Ws]
- end,
- create_writes(L, NH, NWs, No, NewPos, false);
-create_writes([{delete,Pos,Size} | L], H, Ws, No, Next, _) ->
- {NH, _} = dets_utils:free(H, Pos, Size+?OHDSZ),
- NWs = [{Pos+?STATUS_POS,<<?FREE:32>>} | Ws],
- create_writes(L, NH, NWs, No-1, Next, false);
-create_writes([], H, Ws, No, _Next, _NextIsOld) ->
- {H, Ws, No}.
-
-create_inserts(0, H, Ws, Next, _Size, _Bin) ->
- {H, Ws, Next};
-create_inserts(N, H, Ws, Next, Size, Bin) ->
- {NH, Pos, _} = dets_utils:alloc(H, ?OHDSZ + Size),
- W = {Pos, [<<Next:32, Size:32, ?ACTIVE:32>>, Bin]},
- create_inserts(N-1, NH, [W | Ws], Pos, Size, Bin).
-
-slot_position(S) ->
- Pos = ?SEGADDR(?SLOT2SEG(S)),
- Segment = get_segp(Pos),
- FinalPos = Segment + (4 * ?REM2(S, ?SEGSZ)),
- {FinalPos, 4}.
-
-%% Twice the size of a segment due to the bug in sz2pos/1. Inlined.
-actual_seg_size() ->
- ?POW(sz2pos(?SEGSZ*4)-1).
-
-segp_cache(Pos, Segment) ->
- put(Pos, Segment).
-
-%% Inlined.
-get_segp(Pos) ->
- get(Pos).
-
-%% Bug: If Sz0 is equal to 2**k for some k, then 2**(k+1) bytes are
-%% allocated (wasting 2**k bytes).
-sz2pos(N) ->
- 1 + dets_utils:log2(N+1).
-
-scan_objs(_Head, Bin, From, To, L, Ts, R, _Type) ->
- scan_objs(Bin, From, To, L, Ts, R).
-
-scan_objs(Bin, From, To, L, Ts, -1) ->
- {stop, Bin, From, To, L, Ts};
-scan_objs(B = <<_N:32, Sz:32, St:32, T/binary>>, From, To, L, Ts, R) ->
- if
- St =:= ?ACTIVE;
- St =:= ?FREE -> % deleted after scanning started
- case T of
- <<BinTerm:Sz/binary, T2/binary>> ->
- NTs = [BinTerm | Ts],
- OSz = Sz + ?OHDSZ,
- Skip = ?POW(sz2pos(OSz)-1) - OSz,
- F2 = From + OSz,
- NR = if
- R < 0 ->
- R + 1;
- true ->
- R + OSz + Skip
- end,
- scan_skip(T2, F2, To, Skip, L, NTs, NR);
- _ ->
- {more, From, To, L, Ts, R, Sz+?OHDSZ}
- end;
- true -> % a segment
- scan_skip(B, From, To, actual_seg_size(), L, Ts, R)
- end;
-scan_objs(_B, From, To, L, Ts, R) ->
- {more, From, To, L, Ts, R, 0}.
-
-scan_skip(Bin, From, To, Skip, L, Ts, R) when From + Skip < To ->
- SkipPos = From + Skip,
- case Bin of
- <<_:Skip/binary, Tail/binary>> ->
- scan_objs(Tail, SkipPos, To, L, Ts, R);
- _ ->
- {more, SkipPos, To, L, Ts, R, 0}
- end;
-scan_skip(Bin, From, To, Skip, L, Ts, R) when From + Skip =:= To ->
- scan_next_allocated(Bin, From, To, L, Ts, R);
-scan_skip(_Bin, From, _To, Skip, L, Ts, R) -> % when From + Skip > _To
- From1 = From + Skip,
- {more, From1, From1, L, Ts, R, 0}.
-
-scan_next_allocated(_Bin, _From, To, <<>>=L, Ts, R) ->
- {more, To, To, L, Ts, R, 0};
-scan_next_allocated(Bin, From0, _To, <<From:32, To:32, L/binary>>, Ts, R) ->
- Skip = From - From0,
- scan_skip(Bin, From0, To, Skip, L, Ts, R).
-
-%% Read term from file at position Pos
-prterm(Head, Pos, ReadAhead) ->
- Res = dets_utils:pread(Head, Pos, ?OHDSZ, ReadAhead),
- ?DEBUGF("file:pread(~tp, ~p, ?) -> ~p~n", [Head#head.filename, Pos, Res]),
- {ok, <<Next:32, Sz:32, _Status:32, Bin0/binary>>} = Res,
- ?DEBUGF("{Next, Sz} = ~p~n", [{Next, Sz}]),
- Bin = case byte_size(Bin0) of
- Actual when Actual >= Sz ->
- Bin0;
- _ ->
- {ok, Bin1} = dets_utils:pread(Head, Pos + ?OHDSZ, Sz, 0),
- Bin1
- end,
- Term = binary_to_term(Bin),
- {Next, Sz, Term}.
-
-%%%%%%%%%%%%%%%%% DEBUG functions %%%%%%%%%%%%%%%%
-
-file_info(FH) ->
- #fileheader{closed_properly = CP, keypos = Kp,
- m = M, next = Next, n = N, version = Version,
- type = Type, no_objects = NoObjects}
- = FH,
- if
- CP =:= 0 ->
- {error, not_closed};
- FH#fileheader.cookie =/= ?MAGIC ->
- {error, not_a_dets_file};
- FH#fileheader.version =/= ?FILE_FORMAT_VERSION ->
- {error, bad_version};
- true ->
- {ok, [{closed_properly,CP},{keypos,Kp},{m, M},
- {n,N},{next,Next},{no_objects,NoObjects},
- {type,Type},{version,Version}]}
- end.
-
-v_segments(H) ->
- v_segments(H, 0).
-
-v_segments(_H, ?SEGARRSZ) ->
- done;
-v_segments(H, SegNo) ->
- Seg = dets_utils:read_4(H#head.fptr, ?SEGADDR(SegNo)),
- if
- Seg =:= 0 ->
- done;
- true ->
- io:format("SEGMENT ~w ", [SegNo]),
- io:format("At position ~w~n", [Seg]),
- v_segment(H, SegNo, Seg, 0),
- v_segments(H, SegNo+1)
- end.
-
-v_segment(_H, _, _SegPos, ?SEGSZ) ->
- done;
-v_segment(H, SegNo, SegPos, SegSlot) ->
- Slot = SegSlot + (SegNo * ?SEGSZ),
- Chain = dets_utils:read_4(H#head.fptr, SegPos + (4 * SegSlot)),
- if
- Chain =:= 0 -> %% don't print empty chains
- true;
- true ->
- io:format(" <~p>~p: [",[SegPos + (4 * SegSlot), Slot]),
- print_chain(H, Chain)
- end,
- v_segment(H, SegNo, SegPos, SegSlot+1).
-
-print_chain(_H, 0) ->
- io:format("] \n", []);
-print_chain(H, Pos) ->
- {ok, _} = file:position(H#head.fptr, Pos),
- case rterm(H#head.fptr) of
- {ok, 0, _Sz, Term} ->
- io:format("<~p>~p] \n",[Pos, Term]);
- {ok, Next, _Sz, Term} ->
- io:format("<~p>~p, ", [Pos, Term]),
- print_chain(H, Next);
- Other ->
- io:format("~nERROR ~p~n", [Other])
- end.
-
-%% Can't be used at the bucket level!!!!
-%% Only when we go down a chain
-rterm(F) ->
- case catch rterm2(F) of
- {'EXIT', Reason} -> %% truncated DAT file
- dets_utils:vformat("** dets: Corrupt or truncated dets file~n",
- []),
- {error, Reason};
- Other ->
- Other
- end.
-
-rterm2(F) ->
- {ok, <<Next:32, Sz:32, _:32>>} = file:read(F, ?OHDSZ),
- {ok, Bin} = file:read(F, Sz),
- Term = binary_to_term(Bin),
- {ok, Next, Sz, Term}.
-
-