%%
%% %CopyrightBegin%
%% 
%% Copyright Ericsson AB 1996-2009. All Rights Reserved.
%% 
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
%% 
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%% 
%% %CopyrightEnd%
%%

%%
%% The mnesia_init process loads tables from local disc or from
%% another nodes. It also coordinates updates of the info about
%% where we can read and write tables.
%%
%% Tables may need to be loaded initially at startup of the local
%% node or when other nodes announces that they already have loaded
%% tables that we also want.
%%
%% Initially we set the load request queue to those tables that we
%% safely can load locally, i.e. tables where we have the last
%% consistent replica and we have received mnesia_down from all
%% other nodes holding the table. Then we let the mnesia_init
%% process enter its normal working state.
%%
%% When we need to load a table we append a request to the load
%% request queue. All other requests are regarded as high priority
%% and are processed immediately (e.g. update table whereabouts).
%% We processes the load request queue as a "background" job..

-module(mnesia_controller).

-behaviour(gen_server).

%% Mnesia internal stuff
-export([
	 start/0,
	 i_have_tab/1,
	 info/0,
	 get_info/1,
	 get_workers/1,
	 force_load_table/1,
	 async_dump_log/1,
	 sync_dump_log/1,
	 connect_nodes/1,
	 wait_for_schema_commit_lock/0,
	 release_schema_commit_lock/0,
	 create_table/1,
	 get_disc_copy/1,
	 get_cstructs/0,
	 sync_and_block_table_whereabouts/4,
	 sync_del_table_copy_whereabouts/2,
	 block_table/1,
	 unblock_table/1,
	 block_controller/0,
	 unblock_controller/0,
	 unannounce_add_table_copy/2,
	 master_nodes_updated/2,
	 mnesia_down/1,
	 add_active_replica/2,
	 add_active_replica/3,
	 add_active_replica/4,
	 update/1,
	 change_table_access_mode/1,
	 del_active_replica/2,
	 wait_for_tables/2,
	 get_network_copy/2,
	 merge_schema/0,
	 start_remote_sender/4,
	 schedule_late_disc_load/2
	]).

%% gen_server callbacks
-export([init/1,
	 handle_call/3,
	 handle_cast/2,
	 handle_info/2,
	 terminate/2,
	 code_change/3]).

%% Module internal stuff
-export([call/1,
	 cast/1,
	 dump_and_reply/2,
	 load_and_reply/2,
	 send_and_reply/2,
	 wait_for_tables_init/2,
	 connect_nodes2/2
	]).

-import(mnesia_lib, [set/2, add/2]).
-import(mnesia_lib, [fatal/2, error/2, verbose/2, dbg_out/2]).

-include("mnesia.hrl").

-define(SERVER_NAME, ?MODULE).  

-record(state, {supervisor,
		schema_is_merged = false,
		early_msgs = [],
		loader_pid = [],     %% Was Pid is now [{Pid,Work}|..] 
		loader_queue,        %% Was list is now gb_tree
		sender_pid = [],     %% Was a pid or undef is now [{Pid,Work}|..] 
		sender_queue =  [],
		late_loader_queue,   %% Was list is now gb_tree
		dumper_pid,          %% Dumper or schema commit pid
		dumper_queue = [],   %% Dumper or schema commit queue
		others = [],         %% Processes that needs the copier_done msg
		dump_log_timer_ref,
		is_stopping = false
	       }).
%% Backwards Comp. Sender_pid is now a list of senders..
get_senders(#state{sender_pid = Pids}) when is_list(Pids) -> Pids.    
%% Backwards Comp. loader_pid is now a list of loaders..
get_loaders(#state{loader_pid = Pids}) when is_list(Pids) -> Pids.    
max_loaders() ->
    case ?catch_val(no_table_loaders) of
	{'EXIT', _} -> 
	    mnesia_lib:set(no_table_loaders,1),
	    1;
	Val -> Val
    end.

-record(schema_commit_lock, {owner}).
-record(block_controller, {owner}).

-record(dump_log, {initiated_by,
		   opt_reply_to
		  }).

-record(net_load, {table,
		   reason,
		   opt_reply_to,
		   cstruct = unknown
		  }).

-record(send_table, {table,
		     receiver_pid,
		     remote_storage
		    }).

-record(disc_load, {table, 
		    reason,
		    opt_reply_to
		   }).

-record(late_load, {table,
		    reason,
		    opt_reply_to,
		    loaders
		   }).

-record(loader_done, {worker_pid,
		      is_loaded,
		      table_name,
		      needs_announce,
		      needs_sync,
		      needs_reply,
		      reply_to,
		      reply}).

-record(sender_done, {worker_pid,
		      worker_res,
		      table_name
		     }).

-record(dumper_done, {worker_pid,
		      worker_res
		     }).

val(Var) ->
    case ?catch_val(Var) of
	{'EXIT', Reason} -> mnesia_lib:other_val(Var, Reason); 
	Value -> Value
    end.

start() ->
    gen_server:start_link({local, ?SERVER_NAME}, ?MODULE, [self()],
			  [{timeout, infinity}
			   %% ,{debug, [trace]}
			  ]).

sync_dump_log(InitBy) ->
    call({sync_dump_log, InitBy}).

async_dump_log(InitBy) ->
    ?SERVER_NAME ! {async_dump_log, InitBy}.
    
%% Wait for tables to be active
%% If needed, we will wait for Mnesia to start
%% If Mnesia stops, we will wait for Mnesia to restart
%% We will wait even if the list of tables is empty
%%
wait_for_tables(Tabs, Timeout) when is_list(Tabs), Timeout == infinity ->
    do_wait_for_tables(Tabs, Timeout);
wait_for_tables(Tabs, Timeout) when is_list(Tabs),
                                    is_integer(Timeout), Timeout >= 0 ->
    do_wait_for_tables(Tabs, Timeout);
wait_for_tables(Tabs, Timeout) ->
    {error, {badarg, Tabs, Timeout}}.

do_wait_for_tables(Tabs, 0) ->
    reply_wait(Tabs);
do_wait_for_tables(Tabs, Timeout) ->
    Pid = spawn_link(?MODULE, wait_for_tables_init, [self(), Tabs]),
    receive
	{?SERVER_NAME, Pid, Res} ->
	    Res;
	{'EXIT', Pid, _} ->
	    reply_wait(Tabs)
    after Timeout ->
	    unlink(Pid),
	    exit(Pid, timeout),
	    reply_wait(Tabs)
    end.
	
reply_wait(Tabs) ->
    case catch mnesia_lib:active_tables() of
	{'EXIT', _} ->
	    {error, {node_not_running, node()}};
	Active when is_list(Active) ->
	    case Tabs -- Active of
		[] ->
		    ok;
		BadTabs ->
		    {timeout, BadTabs}
	    end
    end.

wait_for_tables_init(From, Tabs) ->
    process_flag(trap_exit, true),
    Res = wait_for_init(From, Tabs, whereis(?SERVER_NAME)),
    From ! {?SERVER_NAME, self(), Res},
    unlink(From),
    exit(normal).

wait_for_init(From, Tabs, Init) ->
    case catch link(Init) of
	{'EXIT', _} ->
	    %% Mnesia is not started
	    {error, {node_not_running, node()}};
	true when is_pid(Init) ->
	    cast({sync_tabs, Tabs, self()}),
	    rec_tabs(Tabs, Tabs, From, Init)
    end.

sync_reply(Waiter, Tab) ->
    Waiter ! {?SERVER_NAME, {tab_synced, Tab}}.

rec_tabs([Tab | Tabs], AllTabs, From, Init) ->
    receive
	{?SERVER_NAME, {tab_synced, Tab}} ->
	    rec_tabs(Tabs, AllTabs, From, Init);

	{'EXIT', From, _} ->
	    %% This will trigger an exit signal
	    %% to mnesia_init
	    exit(wait_for_tables_timeout);
	
	{'EXIT', Init, _} ->
	    %% Oops, mnesia_init stopped,
	    exit(mnesia_stopped)
    end;
rec_tabs([], _, _, Init) ->
    unlink(Init),
    ok.

get_cstructs() ->
    call(get_cstructs).

update(Fun) ->
    call({update,Fun}).


mnesia_down(Node) ->
    case cast({mnesia_down, Node}) of
	{error, _} -> mnesia_monitor:mnesia_down(?SERVER_NAME, Node);
	_Pid ->  ok
    end.
wait_for_schema_commit_lock() ->
    link(whereis(?SERVER_NAME)),
    unsafe_call(wait_for_schema_commit_lock).

block_controller() ->
    call(block_controller).

unblock_controller() ->
    cast(unblock_controller).

release_schema_commit_lock() ->
    cast({release_schema_commit_lock, self()}),
    unlink(whereis(?SERVER_NAME)).

%% Special for preparation of add table copy
get_network_copy(Tab, Cs) ->
%   We can't let the controller queue this one
%   because that may cause a deadlock between schema_operations
%   and initial tableloadings which both takes schema locks.
%   But we have to get copier_done msgs when the other side 
%   goes down.
    call({add_other, self()}),
    Reason = {dumper,add_table_copy},
    Work = #net_load{table = Tab,reason = Reason,cstruct = Cs},
    %% I'll need this cause it's linked trough the subscriber
    %% might be solved by using monitor in subscr instead.
    process_flag(trap_exit, true),
    Load = load_table_fun(Work),
    Res = (catch Load()),
    process_flag(trap_exit, false),
    call({del_other, self()}),
    case Res of
 	#loader_done{is_loaded = true} ->
 	    Tab = Res#loader_done.table_name,
 	    case Res#loader_done.needs_announce of
 		true ->
 		    i_have_tab(Tab);
 		false ->
 		    ignore
 	    end,
 	    Res#loader_done.reply;
 	#loader_done{} -> 
 	    Res#loader_done.reply;
 	Else ->
 	    {not_loaded, Else}
    end.

%% This functions is invoked from the dumper
%% 
%% There are two cases here:
%% startup ->
%%   no need for sync, since mnesia_controller not started yet
%% schema_trans ->
%%   already synced with mnesia_controller since the dumper
%%   is syncronously started from mnesia_controller

create_table(Tab) ->
    {loaded, ok} = mnesia_loader:disc_load_table(Tab, {dumper,create_table}).

get_disc_copy(Tab) ->
    disc_load_table(Tab, {dumper,change_table_copy_type}, undefined).

%% Returns ok instead of yes
force_load_table(Tab) when is_atom(Tab), Tab /= schema ->
    case ?catch_val({Tab, storage_type}) of
	ram_copies ->
	    do_force_load_table(Tab);
	disc_copies ->
	    do_force_load_table(Tab);
	disc_only_copies ->
	    do_force_load_table(Tab);
	unknown ->
	    set({Tab, load_by_force}, true),
	    cast({force_load_updated, Tab}),
	    wait_for_tables([Tab], infinity);
	{'EXIT', _} ->
	    {error, {no_exists, Tab}}
    end;
force_load_table(Tab) ->
    {error, {bad_type, Tab}}.
    
do_force_load_table(Tab) ->
    Loaded = ?catch_val({Tab, load_reason}),
    case Loaded of
	unknown -> 
	    set({Tab, load_by_force}, true),
	    mnesia_late_loader:async_late_disc_load(node(), [Tab], forced_by_user),
	    wait_for_tables([Tab], infinity);
	{'EXIT', _} ->
	    set({Tab, load_by_force}, true),
	    mnesia_late_loader:async_late_disc_load(node(), [Tab], forced_by_user),
	    wait_for_tables([Tab], infinity);
	_ ->
	    ok
    end.    
master_nodes_updated(schema, _Masters) ->
    ignore;
master_nodes_updated(Tab, Masters) ->
    cast({master_nodes_updated, Tab, Masters}).

schedule_late_disc_load(Tabs, Reason) ->
    MsgTag = late_disc_load,
    try_schedule_late_disc_load(Tabs, Reason, MsgTag).

try_schedule_late_disc_load(Tabs, _Reason, MsgTag)
  when Tabs == [], MsgTag /= schema_is_merged ->
    ignore;
try_schedule_late_disc_load(Tabs, Reason, MsgTag) ->
    GetIntents =
	fun() ->
		Item = mnesia_late_disc_load,
		Nodes = val({current, db_nodes}),
		mnesia:lock({global, Item, Nodes}, write),
		case multicall(Nodes -- [node()], disc_load_intents) of
		    {Replies, []} ->
			call({MsgTag, Tabs, Reason, Replies}),
			done;
		    {_, BadNodes} ->
			%% Some nodes did not respond, lets try again
			{retry, BadNodes}
		end
	end,
    case mnesia:transaction(GetIntents) of
	{atomic, done} ->
	    done;
	{atomic, {retry, BadNodes}} ->
	    verbose("Retry late_load_tables because bad nodes: ~p~n",
		    [BadNodes]),
	    try_schedule_late_disc_load(Tabs, Reason, MsgTag);
	{aborted, AbortReason} ->
	    fatal("Cannot late_load_tables~p: ~p~n",
		  [[Tabs, Reason, MsgTag], AbortReason])
    end.

connect_nodes(Ns) ->    
    case mnesia:system_info(is_running) of 
	no ->
	    {error, {node_not_running, node()}};
	yes ->	  	   
	    Pid = spawn_link(?MODULE,connect_nodes2,[self(),Ns]),
	    receive 
		{?MODULE, Pid, Res, New} -> 
		    case Res of
			ok -> 
			    mnesia_lib:add_list(extra_db_nodes, New),
			    {ok, New};
			{aborted, {throw, Str}} when is_list(Str) ->
			    %%mnesia_recover:disconnect_nodes(New),
			    {error, {merge_schema_failed, lists:flatten(Str)}};
			Else ->
			    {error, Else}
		    end;	    
		{'EXIT', Pid, Reason} -> 
		    {error, Reason}
	    end
    end.

connect_nodes2(Father, Ns) ->
    Current = val({current, db_nodes}),
    abcast([node()|Ns], {merging_schema, node()}),
    {NewC, OldC} = mnesia_recover:connect_nodes(Ns),
    Connected = NewC ++OldC,
    New1 = mnesia_lib:intersect(Ns, Connected),
    New = New1 -- Current,    
    process_flag(trap_exit, true),
    Res = try_merge_schema(New),
    Msg = {schema_is_merged, [], late_merge, []},
    multicall([node()|Ns], Msg),
    After = val({current, db_nodes}),    
    Father ! {?MODULE, self(), Res, mnesia_lib:intersect(Ns,After)},
    unlink(Father),
    ok.
    
%% Merge the local schema with the schema on other nodes.
%% But first we must let all processes that want to force
%% load tables wait until the schema merge is done.

merge_schema() ->
    AllNodes = mnesia_lib:all_nodes(),
    case try_merge_schema(AllNodes) of
	ok -> 
	    schema_is_merged();
	{aborted, {throw, Str}} when is_list(Str) ->
	    fatal("Failed to merge schema: ~s~n", [Str]);
	Else ->
	    fatal("Failed to merge schema: ~p~n", [Else])
    end.

try_merge_schema(Nodes) ->
    case mnesia_schema:merge_schema() of
	{atomic, not_merged} ->
	    %% No more nodes that we need to merge the schema with
	    ok;
	{atomic, {merged, OldFriends, NewFriends}} ->
	    %% Check if new nodes has been added to the schema
	    Diff = mnesia_lib:all_nodes() -- [node() | Nodes],
	    mnesia_recover:connect_nodes(Diff),

	    %% Tell everybody to adopt orphan tables
	    im_running(OldFriends, NewFriends),
	    im_running(NewFriends, OldFriends),
	    
	    try_merge_schema(Nodes);
	{atomic, {"Cannot get cstructs", Node, Reason}} ->
	    dbg_out("Cannot get cstructs, Node ~p ~p~n", [Node, Reason]),
	    timer:sleep(1000), % Avoid a endless loop look alike	    
	    try_merge_schema(Nodes);
	Other ->
	    Other
    end.

im_running(OldFriends, NewFriends) ->
    abcast(OldFriends, {im_running, node(), NewFriends}).

schema_is_merged() ->
    MsgTag = schema_is_merged,
    SafeLoads = initial_safe_loads(),
    
    %% At this point we do not know anything about
    %% which tables that the other nodes already
    %% has loaded and therefore we let the normal
    %% processing of the loader_queue take care
    %% of it, since we at that time point will
    %% know the whereabouts. We rely on the fact
    %% that all nodes tells each other directly
    %% when they have loaded a table and are
    %% willing to share it.
    
    try_schedule_late_disc_load(SafeLoads, initial, MsgTag).


cast(Msg) ->
    case whereis(?SERVER_NAME) of
	undefined ->{error, {node_not_running, node()}};
	Pid ->  gen_server:cast(Pid, Msg)
    end.

abcast(Nodes, Msg) ->
    gen_server:abcast(Nodes, ?SERVER_NAME, Msg).

unsafe_call(Msg) ->
    case whereis(?SERVER_NAME) of
	undefined -> {error, {node_not_running, node()}};
	Pid -> gen_server:call(Pid, Msg, infinity)
    end.

call(Msg) ->
    case whereis(?SERVER_NAME) of
	undefined ->
	    {error, {node_not_running, node()}};
	Pid ->
	    link(Pid),
	    Res = gen_server:call(Pid, Msg, infinity),
	    unlink(Pid),

	    %% We get an exit signal if server dies
            receive
                {'EXIT', Pid, _Reason} ->
                    {error, {node_not_running, node()}}
            after 0 ->
                    Res
            end
    end.

remote_call(Node, Func, Args) ->
    case catch gen_server:call({?MODULE, Node}, {Func, Args, self()}, infinity) of
	{'EXIT', Error} ->
	    {error, Error};
	Else ->
	    Else
    end.
    
multicall(Nodes, Msg) ->
    {Good, Bad} = gen_server:multi_call(Nodes, ?MODULE, Msg, infinity),
    PatchedGood = [Reply || {_Node, Reply} <- Good],
    {PatchedGood, Bad}.  %% Make the replies look like rpc:multicalls..
%%    rpc:multicall(Nodes, ?MODULE, call, [Msg]).

%%%----------------------------------------------------------------------
%%% Callback functions from gen_server
%%%----------------------------------------------------------------------

%%----------------------------------------------------------------------
%% Func: init/1
%% Returns: {ok, State}          |
%%          {ok, State, Timeout} |
%%          {stop, Reason}
%%----------------------------------------------------------------------
init([Parent]) ->
    process_flag(trap_exit, true),
    mnesia_lib:verbose("~p starting: ~p~n", [?SERVER_NAME, self()]),

    %% Handshake and initialize transaction recovery
    %% for new nodes detected in the schema
    All = mnesia_lib:all_nodes(),
    Diff = All -- [node() | val(original_nodes)],
    mnesia_lib:unset(original_nodes),
    mnesia_recover:connect_nodes(Diff),

    Interval = mnesia_monitor:get_env(dump_log_time_threshold),
    Msg = {async_dump_log, time_threshold},
    {ok, Ref} = timer:send_interval(Interval, Msg),
    mnesia_dumper:start_regulator(),
    
    Empty = gb_trees:empty(),
    {ok, #state{supervisor = Parent, dump_log_timer_ref = Ref, 
		loader_queue = Empty,
		late_loader_queue = Empty}}.

%%----------------------------------------------------------------------
%% Func: handle_call/3
%% Returns: {reply, Reply, State}          |
%%          {reply, Reply, State, Timeout} |
%%          {noreply, State}               |
%%          {noreply, State, Timeout}      |
%%          {stop, Reason, Reply, State}   | (terminate/2 is called)
%%          {stop, Reason, Reply, State}     (terminate/2 is called)
%%----------------------------------------------------------------------

handle_call({sync_dump_log, InitBy}, From, State) ->
    Worker = #dump_log{initiated_by = InitBy,
		       opt_reply_to = From
		      },
    State2 = add_worker(Worker, State),
    noreply(State2);

handle_call(wait_for_schema_commit_lock, From, State) ->
    Worker = #schema_commit_lock{owner = From},
    State2 = add_worker(Worker, State),
    noreply(State2);

handle_call(block_controller, From, State) ->
    Worker = #block_controller{owner = From},
    State2 = add_worker(Worker, State),
    noreply(State2);

handle_call({update,Fun}, From, State) ->
    Res = (catch Fun()),
    reply(From, Res), 
    noreply(State);

handle_call(get_cstructs, From, State) ->
    Tabs = val({schema, tables}),
    Cstructs = [val({T, cstruct}) || T <- Tabs],
    Running = val({current, db_nodes}),
    reply(From, {cstructs, Cstructs, Running}), 
    noreply(State);

handle_call({schema_is_merged, [], late_merge, []}, From, 
	    State = #state{schema_is_merged = Merged}) ->
    case Merged of
	{false, Node} when Node == node(From) ->
	    Msgs = State#state.early_msgs,
	    State1 = State#state{early_msgs = [], schema_is_merged = true},
	    handle_early_msgs(lists:reverse(Msgs), State1);
	_ ->
	    %% Ooops this came to early, before we have merged :-)
	    %% or it came to late or from a node we don't care about
	    reply(From, ignore),
	    noreply(State)
    end;

handle_call({schema_is_merged, TabsR, Reason, RemoteLoaders}, From, State) ->
    State2 = late_disc_load(TabsR, Reason, RemoteLoaders, From, State),

    %% Handle early messages
    Msgs = State2#state.early_msgs,
    State3 = State2#state{early_msgs = [], schema_is_merged = true},
    handle_early_msgs(lists:reverse(Msgs), State3);

handle_call(disc_load_intents,From,State = #state{loader_queue=LQ,late_loader_queue=LLQ}) ->
    LQTabs  = gb_trees:keys(LQ),
    LLQTabs = gb_trees:keys(LLQ),
    ActiveTabs = lists:sort(mnesia_lib:local_active_tables()),
    reply(From, {ok, node(), ordsets:union([LQTabs,LLQTabs,ActiveTabs])}),
    noreply(State);

handle_call({update_where_to_write, [add, Tab, AddNode], _From}, _Dummy, State) ->
    Current = val({current, db_nodes}),
    Res = 
	case lists:member(AddNode, Current) and 
	    (State#state.schema_is_merged == true) of
	    true ->
		mnesia_lib:add_lsort({Tab, where_to_write}, AddNode);
	    false ->
		ignore
	end,
    {reply, Res, State};

handle_call({add_active_replica, [Tab, ToNode, RemoteS, AccessMode], From},
	    ReplyTo, State) ->
    KnownNode = lists:member(ToNode, val({current, db_nodes})),
    Merged = State#state.schema_is_merged,
    if
	KnownNode == false ->
	    reply(ReplyTo, ignore),
	    noreply(State);
	Merged == true ->
	    Res = case ?catch_val({Tab, cstruct}) of
		      {'EXIT', _} ->  %% Tab deleted
			  deleted;
		      _ ->
			  add_active_replica(Tab, ToNode, RemoteS, AccessMode)
		  end,
	    reply(ReplyTo, Res),
	    noreply(State);
	true -> %% Schema is not merged
	    Msg = {add_active_replica, [Tab, ToNode, RemoteS, AccessMode], From},
	    Msgs = State#state.early_msgs,
	    reply(ReplyTo, ignore),   %% Reply ignore and add data after schema merge
	    noreply(State#state{early_msgs = [{call, Msg, undefined} | Msgs]})
    end;

handle_call({unannounce_add_table_copy, [Tab, Node], From}, ReplyTo, State) ->    
    KnownNode = lists:member(node(From), val({current, db_nodes})),
    Merged = State#state.schema_is_merged,
    if
	KnownNode == false ->
	    reply(ReplyTo, ignore),
	    noreply(State);
	Merged == true ->
	    Res = unannounce_add_table_copy(Tab, Node),
	    reply(ReplyTo, Res),
	    noreply(State);
	true -> %% Schema is not merged
	    Msg = {unannounce_add_table_copy, [Tab, Node], From},
	    Msgs = State#state.early_msgs,
	    reply(ReplyTo, ignore),   %% Reply ignore and add data after schema merge
	    %% Set ReplyTO to undefined so we don't reply twice
	    noreply(State#state{early_msgs = [{call, Msg, undefined} | Msgs]})
    end;

handle_call({net_load, Tab, Cs}, From, State) ->
    State2 = 
	case State#state.schema_is_merged of
	    true -> 	    
		Worker = #net_load{table = Tab,
				   opt_reply_to = From,
				   reason = {dumper,add_table_copy},
				   cstruct = Cs
				  },
		add_worker(Worker, State);
	    false -> 
		reply(From, {not_loaded, schema_not_merged}),
		State
	end,
    noreply(State2);

handle_call(Msg, From, State) when State#state.schema_is_merged /= true ->
    %% Buffer early messages
    Msgs = State#state.early_msgs,
    noreply(State#state{early_msgs = [{call, Msg, From} | Msgs]});

handle_call({late_disc_load, Tabs, Reason, RemoteLoaders}, From, State) ->
    State2 = late_disc_load(Tabs, Reason, RemoteLoaders, From, State),
    noreply(State2);

handle_call({unblock_table, Tab}, _Dummy, State) ->
    Var = {Tab, where_to_commit},
    case val(Var) of
	{blocked, List} ->
	    set(Var, List); % where_to_commit
	_ ->
	    ignore
    end,
    {reply, ok, State};

handle_call({block_table, [Tab], From}, _Dummy, State) ->
    case lists:member(node(From), val({current, db_nodes})) of
	true ->
	    block_table(Tab);
	false ->
	    ignore
    end,
    {reply, ok, State};

handle_call({check_w2r, _Node, Tab}, _From, State) ->
    {reply, val({Tab, where_to_read}), State};

handle_call({add_other, Who}, _From, State = #state{others=Others0}) ->
    Others = [Who|Others0],
    {reply, ok, State#state{others=Others}};
handle_call({del_other, Who}, _From, State = #state{others=Others0}) ->
    Others = lists:delete(Who, Others0),
    {reply, ok, State#state{others=Others}};
	    
handle_call(Msg, _From, State) ->
    error("~p got unexpected call: ~p~n", [?SERVER_NAME, Msg]),
    noreply(State).

late_disc_load(TabsR, Reason, RemoteLoaders, From, 
	       State = #state{loader_queue = LQ, late_loader_queue = LLQ}) ->
    verbose("Intend to load tables: ~p~n", [TabsR]),
    ?eval_debug_fun({?MODULE, late_disc_load},
		    [{tabs, TabsR}, 
		     {reason, Reason},
		     {loaders, RemoteLoaders}]),

    reply(From, queued),
    %% RemoteLoaders is a list of {ok, Node, Tabs} tuples

    %% Remove deleted tabs and queued/loaded
    LocalTabs = gb_sets:from_ordset(lists:sort(mnesia_lib:val({schema,local_tables}))),
    Filter = fun(TabInfo0, Acc) -> 
		     TabInfo = {Tab,_} = 
			 case TabInfo0 of 
			     {_,_} -> TabInfo0;
			     TabN -> {TabN,Reason}
			 end,
		     case gb_sets:is_member(Tab, LocalTabs) of
			 true -> 
			     case ?catch_val({Tab, where_to_read}) == node() of
				 true -> Acc;
				 false ->
				     case gb_trees:is_defined(Tab,LQ) of
					 true ->  Acc;
					 false -> [TabInfo | Acc]
				     end
			     end;
			 false -> Acc
		     end
	     end,
    
    Tabs = lists:foldl(Filter, [], TabsR),
    
    Nodes = val({current, db_nodes}),
    LateQueue = late_loaders(Tabs, RemoteLoaders, Nodes, LLQ),
    State#state{late_loader_queue = LateQueue}. 

late_loaders([{Tab, Reason} | Tabs], RemoteLoaders, Nodes, LLQ) ->
    case gb_trees:is_defined(Tab, LLQ) of
	false ->
	    LoadNodes = late_load_filter(RemoteLoaders, Tab, Nodes, []),
	    case LoadNodes of
		[] ->  cast({disc_load, Tab, Reason}); % Ugly cast
		_ ->   ignore
	    end,
	    LateLoad = #late_load{table=Tab,loaders=LoadNodes,reason=Reason},
	    late_loaders(Tabs, RemoteLoaders, Nodes, gb_trees:insert(Tab,LateLoad,LLQ));
	true ->
	    late_loaders(Tabs, RemoteLoaders, Nodes, LLQ)	     
    end;
late_loaders([], _RemoteLoaders, _Nodes, LLQ) ->
    LLQ.

late_load_filter([{error, _} | RemoteLoaders], Tab, Nodes, Acc) ->
    late_load_filter(RemoteLoaders, Tab, Nodes, Acc);
late_load_filter([{badrpc, _} | RemoteLoaders], Tab, Nodes, Acc) ->
    late_load_filter(RemoteLoaders, Tab, Nodes, Acc);
late_load_filter([RL | RemoteLoaders], Tab, Nodes, Acc) ->
    {ok, Node, Intents} = RL,
    Access = val({Tab, access_mode}),
    LocalC = val({Tab, local_content}),
    StillActive = lists:member(Node, Nodes),
    RemoteIntent = lists:member(Tab, Intents),
    if
	Access == read_write,
	LocalC == false,
	StillActive == true,
	RemoteIntent == true ->
	    Masters = mnesia_recover:get_master_nodes(Tab),
	    case lists:member(Node, Masters) of
		true ->
		    %% The other node is master node for
		    %% the table, accept his load intent
		    late_load_filter(RemoteLoaders, Tab, Nodes, [Node | Acc]);
		false when Masters == [] ->
		    %% The table has no master nodes
		    %% accept his load intent
		    late_load_filter(RemoteLoaders, Tab, Nodes, [Node | Acc]);
		false ->
		    %% Some one else is master node for
		    %% the table, ignore his load intent
		    late_load_filter(RemoteLoaders, Tab, Nodes, Acc)
	    end;
	true ->
	    late_load_filter(RemoteLoaders, Tab, Nodes, Acc)
    end;
late_load_filter([], _Tab, _Nodes, Acc) ->
    Acc.
    
%%----------------------------------------------------------------------
%% Func: handle_cast/2
%% Returns: {noreply, State}          |
%%          {noreply, State, Timeout} |
%%          {stop, Reason, State}            (terminate/2 is called)
%%----------------------------------------------------------------------

handle_cast({release_schema_commit_lock, _Owner}, State) ->
    if
	State#state.is_stopping == true ->
	    {stop, shutdown, State};
	true -> 
	    case State#state.dumper_queue of
		[#schema_commit_lock{}|Rest] ->
		    [_Worker | Rest] = State#state.dumper_queue,
		    State2 = State#state{dumper_pid = undefined,
					 dumper_queue = Rest},
		    State3 = opt_start_worker(State2),
		    noreply(State3);
		_ ->
		    noreply(State)
	    end
    end;

handle_cast(unblock_controller, State) ->
    if
	State#state.is_stopping == true ->
	    {stop, shutdown, State};
	is_record(hd(State#state.dumper_queue), block_controller) ->
	    [_Worker | Rest] = State#state.dumper_queue,
	    State2 = State#state{dumper_pid = undefined,
				 dumper_queue = Rest},
	    State3 = opt_start_worker(State2),	    
	    noreply(State3)
    end;

handle_cast({mnesia_down, Node}, State) ->
    maybe_log_mnesia_down(Node),
    mnesia_lib:del({current, db_nodes}, Node),
    mnesia_checkpoint:tm_mnesia_down(Node),
    Alltabs = val({schema, tables}),
    reconfigure_tables(Node, Alltabs),
    %% Done from (external point of view)
    mnesia_monitor:mnesia_down(?SERVER_NAME, Node),

    %% Fix if we are late_merging against the node that went down
    case State#state.schema_is_merged of
	{false, Node} -> 
	    spawn(?MODULE, call, [{schema_is_merged, [], late_merge, []}]);
	_ ->
	    ignore
    end,
    
    %% Fix internal stuff
    LateQ = remove_loaders(Alltabs, Node, State#state.late_loader_queue),
    
    case get_senders(State) ++ get_loaders(State) of
	[] -> ignore;
	Senders -> 
	    lists:foreach(fun({Pid,_}) -> Pid ! {copier_done, Node} end,
			  Senders)
    end,
    lists:foreach(fun(Pid) -> Pid ! {copier_done,Node} end, 
		  State#state.others),
    
    Remove = fun(ST) ->
		     node(ST#send_table.receiver_pid) /= Node
	     end,
    NewSenders = lists:filter(Remove, State#state.sender_queue),
    Early = remove_early_messages(State#state.early_msgs, Node),
    noreply(State#state{sender_queue = NewSenders, 
			early_msgs = Early, 
			late_loader_queue = LateQ
		       });

handle_cast({merging_schema, Node}, State) ->
    case State#state.schema_is_merged of
	false ->
	    %% This comes from dynamic connect_nodes which are made
	    %% after mnesia:start() and the schema_merge.
	    ImANewKidInTheBlock = 
		(val({schema, storage_type}) == ram_copies) 
		andalso (mnesia_lib:val({schema, local_tables}) == [schema]),
	    case ImANewKidInTheBlock of
		true ->  %% I'm newly started ram_node..
		    noreply(State#state{schema_is_merged = {false, Node}});
		false ->
		    noreply(State)
	    end;
	_ -> %% Already merging schema.
	    noreply(State)
    end;

handle_cast(Msg, State) when State#state.schema_is_merged /= true ->
    %% Buffer early messages
    Msgs = State#state.early_msgs,
    noreply(State#state{early_msgs = [{cast, Msg} | Msgs]});

%% This must be done after schema_is_merged otherwise adopt_orphan
%% might trigger a table load from wrong nodes as a result of that we don't 
%% know which tables we can load safly first.
handle_cast({im_running, _Node, NewFriends}, State) ->
    LocalTabs = mnesia_lib:local_active_tables() -- [schema],
    RemoveLocalOnly = fun(Tab) -> not val({Tab, local_content}) end,
    Tabs = lists:filter(RemoveLocalOnly, LocalTabs),
    Ns = mnesia_lib:intersect(NewFriends, val({current, db_nodes})),
    abcast(Ns, {adopt_orphans, node(), Tabs}),
    noreply(State);

handle_cast({disc_load, Tab, Reason}, State) ->
    Worker = #disc_load{table = Tab, reason = Reason},
    State2 = add_worker(Worker, State),
    noreply(State2);

handle_cast(Worker = #send_table{}, State) ->
    State2 = add_worker(Worker, State),
    noreply(State2);

handle_cast({sync_tabs, Tabs, From}, State) ->
    %% user initiated wait_for_tables
    handle_sync_tabs(Tabs, From),
    noreply(State);

handle_cast({i_have_tab, Tab, Node}, State) ->
    case lists:member(Node, val({current, db_nodes})) of
	true -> 
	    State2 = node_has_tabs([Tab], Node, State),
	    noreply(State2);
	false ->
	    noreply(State)
    end;

handle_cast({force_load_updated, Tab}, State) ->
    case val({Tab, active_replicas}) of
	[] ->
	    %% No valid replicas
	    noreply(State);
	[SomeNode | _] ->
	    State2 = node_has_tabs([Tab], SomeNode, State),
	    noreply(State2)
    end;
    
handle_cast({master_nodes_updated, Tab, Masters}, State) ->
    Active = val({Tab, active_replicas}),
    Valid = 
	case val({Tab, load_by_force}) of
	    true ->
		Active;
	    false ->
		if
		    Masters == [] ->
			Active;
		    true ->
			mnesia_lib:intersect(Masters, Active)
		end
	end,
    case Valid of
	[] ->
	    %% No valid replicas
	    noreply(State);
	[SomeNode | _] ->
	    State2 = node_has_tabs([Tab], SomeNode, State),
	    noreply(State2)
    end;
    
handle_cast({adopt_orphans, Node, Tabs}, State) ->

    State2 = node_has_tabs(Tabs, Node, State),
    
    %% Register the other node as up and running
    mnesia_recover:log_mnesia_up(Node),
    verbose("Logging mnesia_up ~w~n",[Node]),
    mnesia_lib:report_system_event({mnesia_up, Node}),
    
    %% Load orphan tables
    LocalTabs = val({schema, local_tables}) -- [schema],
    Nodes = val({current, db_nodes}),
    {LocalOrphans, RemoteMasters} =
	orphan_tables(LocalTabs, Node, Nodes, [], []),
    Reason = {adopt_orphan, node()},
    mnesia_late_loader:async_late_disc_load(node(), LocalOrphans, Reason),
    
    Fun =
	fun(N) ->
		RemoteOrphans =
		    [Tab || {Tab, Ns} <- RemoteMasters,
			    lists:member(N, Ns)],
		mnesia_late_loader:maybe_async_late_disc_load(N, RemoteOrphans, Reason)
	end,
    lists:foreach(Fun, Nodes),
    noreply(State2);

handle_cast(Msg, State) ->
    error("~p got unexpected cast: ~p~n", [?SERVER_NAME, Msg]),
    noreply(State).

handle_sync_tabs([Tab | Tabs], From) ->    
    case val({Tab, where_to_read}) of
	nowhere ->
	    case get({sync_tab, Tab}) of
		undefined ->
		    put({sync_tab, Tab}, [From]);
		Pids ->
		    put({sync_tab, Tab}, [From | Pids])
	    end;
	_ ->
	    sync_reply(From, Tab)
    end,
    handle_sync_tabs(Tabs, From);
handle_sync_tabs([], _From) ->
    ok.

%%----------------------------------------------------------------------
%% Func: handle_info/2
%% Returns: {noreply, State}          |
%%          {noreply, State, Timeout} |
%%          {stop, Reason, State}            (terminate/2 is called)
%%----------------------------------------------------------------------

handle_info({async_dump_log, InitBy}, State) ->
    Worker = #dump_log{initiated_by = InitBy},
    State2 = add_worker(Worker, State),
    noreply(State2);

handle_info(#dumper_done{worker_pid=Pid, worker_res=Res}, State) ->
    if
	State#state.is_stopping == true ->
	    {stop, shutdown, State};
	Res == dumped, Pid == State#state.dumper_pid ->
	    [Worker | Rest] = State#state.dumper_queue,
	    reply(Worker#dump_log.opt_reply_to, Res),
	    State2 = State#state{dumper_pid = undefined,
				 dumper_queue = Rest},
	    State3 = opt_start_worker(State2),
	    noreply(State3);
	true ->
	    fatal("Dumper failed: ~p~n state: ~p~n", [Res, State]),
	    {stop, fatal, State}
    end;

handle_info(Done = #loader_done{worker_pid=WPid, table_name=Tab}, State0) ->    
    LateQueue0 = State0#state.late_loader_queue,
    State1 = State0#state{loader_pid = lists:keydelete(WPid,1,get_loaders(State0))},

    State2 =
	case Done#loader_done.is_loaded of
	    true ->
		%% Optional table announcement
		if 
		    Done#loader_done.needs_announce == true,
		    Done#loader_done.needs_reply == true ->
			i_have_tab(Tab),
			%% Should be {dumper,add_table_copy} only
			reply(Done#loader_done.reply_to,
			      Done#loader_done.reply);
		    Done#loader_done.needs_reply == true ->
			%% Should be {dumper,add_table_copy} only
			reply(Done#loader_done.reply_to,
			      Done#loader_done.reply);
		    Done#loader_done.needs_announce == true, Tab == schema ->
			i_have_tab(Tab);
		    Done#loader_done.needs_announce == true ->
			i_have_tab(Tab),
			%% Local node needs to perform user_sync_tab/1
			Ns = val({current, db_nodes}),
			abcast(Ns, {i_have_tab, Tab, node()});
		    Tab == schema ->
			ignore;
		    true ->
			%% Local node needs to perform user_sync_tab/1
			Ns = val({current, db_nodes}),
			AlreadyKnows = val({Tab, active_replicas}),
			abcast(Ns -- AlreadyKnows, {i_have_tab, Tab, node()})
		end,
		%% Optional user sync
		case Done#loader_done.needs_sync of
		    true -> user_sync_tab(Tab);
		    false -> ignore
		end,
		State1#state{late_loader_queue=gb_trees:delete_any(Tab, LateQueue0)};
	    false ->
		%% Either the node went down or table was not
		%% loaded remotly yet 
		case Done#loader_done.needs_reply of
		    true ->
			reply(Done#loader_done.reply_to,
			      Done#loader_done.reply);
		    false ->
			ignore
		end,
		case ?catch_val({Tab, active_replicas}) of
		    [_|_] -> % still available elsewhere
			{value,{_,Worker}} = lists:keysearch(WPid,1,get_loaders(State0)),
			add_loader(Tab,Worker,State1);
		    _ ->
			State1
		end
	end,
    State3 = opt_start_worker(State2),
    noreply(State3);

handle_info(#sender_done{worker_pid=Pid, worker_res=Res}, State)  ->
    Senders = get_senders(State),
    {value, {Pid,_Worker}} = lists:keysearch(Pid, 1, Senders),
    if
	Res == ok ->	    
	    State2 = State#state{sender_pid = lists:keydelete(Pid, 1, Senders)},
	    State3 = opt_start_worker(State2),
	    noreply(State3);
	true ->
	    %% No need to send any message to the table receiver
	    %% since it will soon get a mnesia_down anyway
	    fatal("Sender failed: ~p~n state: ~p~n", [Res, State]),
	    {stop, fatal, State}
    end;

handle_info({'EXIT', Pid, R}, State) when Pid == State#state.supervisor ->
    catch set(mnesia_status, stopping),
    case State#state.dumper_pid of
	undefined ->
	    dbg_out("~p was ~p~n", [?SERVER_NAME, R]),
	    {stop, shutdown, State};
	_ ->
	    noreply(State#state{is_stopping = true})
    end;

handle_info({'EXIT', Pid, R}, State) when Pid == State#state.dumper_pid ->
    case State#state.dumper_queue of
	[#schema_commit_lock{}|Workers] -> %% Schema trans crashed or was killed
	    dbg_out("WARNING: Dumper ~p exited ~p~n", [Pid, R]),
	    State2 = State#state{dumper_queue = Workers, dumper_pid = undefined},
	    State3 = opt_start_worker(State2),
	    noreply(State3);
	_Other ->
	    fatal("Dumper or schema commit crashed: ~p~n state: ~p~n", [R, State]),
	    {stop, fatal, State}
    end;

handle_info(Msg = {'EXIT', Pid, R}, State) when R /= wait_for_tables_timeout ->
    case lists:keymember(Pid, 1, get_senders(State)) of
	true ->
	    %% No need to send any message to the table receiver
	    %% since it will soon get a mnesia_down anyway
	    fatal("Sender crashed: ~p~n state: ~p~n", [{Pid,R}, State]),
	    {stop, fatal, State};
	false ->
	    case lists:keymember(Pid, 1, get_loaders(State)) of
		true -> 
		    fatal("Loader crashed: ~p~n state: ~p~n", [R, State]),
		    {stop, fatal, State};
		false ->
		    error("~p got unexpected info: ~p~n", [?SERVER_NAME, Msg]),
		    noreply(State)
	    end
    end;

handle_info({From, get_state}, State) ->
    From ! {?SERVER_NAME, State},
    noreply(State);

%% No real need for buffering
handle_info(Msg, State) when State#state.schema_is_merged /= true ->
    %% Buffer early messages
    Msgs = State#state.early_msgs,
    noreply(State#state{early_msgs = [{info, Msg} | Msgs]});

handle_info({'EXIT', Pid, wait_for_tables_timeout}, State) ->
    sync_tab_timeout(Pid, get()),
    noreply(State);

handle_info(Msg, State) ->
    error("~p got unexpected info: ~p~n", [?SERVER_NAME, Msg]),
    noreply(State).

sync_tab_timeout(Pid, [{{sync_tab, Tab}, Pids} | Tail]) ->
    case lists:delete(Pid, Pids) of
	[] ->
	    erase({sync_tab, Tab});
	Pids2 ->
	    put({sync_tab, Tab}, Pids2)
    end,
    sync_tab_timeout(Pid, Tail);
sync_tab_timeout(Pid, [_ | Tail]) ->
    sync_tab_timeout(Pid, Tail);
sync_tab_timeout(_Pid, []) ->
    ok.

%% Pick the load record that has the highest load order
%% Returns {BestLoad, RemainingQueue} or {none, []} if queue is empty
pick_next(Queue) ->
    List = gb_trees:values(Queue),
    case pick_next(List, none, none) of
	none -> {none, gb_trees:empty()};
	{Tab, Worker} -> {Worker, gb_trees:delete(Tab,Queue)}
    end.

pick_next([Head = #net_load{table=Tab}| Tail], Load, Order) ->
    select_best(Head, Tail, ?catch_val({Tab, load_order}), Load, Order);
pick_next([Head = #disc_load{table=Tab}| Tail], Load, Order) ->
    select_best(Head, Tail, ?catch_val({Tab, load_order}), Load, Order);
pick_next([], none, _Order) ->
    none;
pick_next([], Load, _Order) ->
    {element(2,Load), Load}.

select_best(_Head, Tail, {'EXIT', _WHAT}, Load, Order) ->
    %% Table have been deleted drop it.
    pick_next(Tail, Load, Order);
select_best(Load, Tail, Order, none, none) ->
    pick_next(Tail, Load, Order);
select_best(Load, Tail, Order, _OldLoad, OldOrder) when Order > OldOrder ->
    pick_next(Tail, Load, Order);
select_best(_Load, Tail, _Order, OldLoad, OldOrder) ->
    pick_next(Tail, OldLoad, OldOrder).

%%----------------------------------------------------------------------
%% Func: terminate/2
%% Purpose: Shutdown the server
%% Returns: any (ignored by gen_server)
%%----------------------------------------------------------------------
terminate(Reason, State) ->
    mnesia_monitor:terminate_proc(?SERVER_NAME, Reason, State).

%%----------------------------------------------------------------------
%% Func: code_change/3
%% Purpose: Upgrade process when its code is to be changed
%% Returns: {ok, NewState}
%%----------------------------------------------------------------------
code_change(_OldVsn, State0, _Extra) ->
    %% Loader Queue
    State1 = case State0#state.loader_pid of
		 Pids when is_list(Pids) -> State0;
		 undefined -> State0#state{loader_pid = [],loader_queue=gb_trees:empty()};
		 Pid when is_pid(Pid) -> 
		     [Loader|Rest] = State0#state.loader_queue,
		     LQ0 = [{element(2,Rec),Rec} || Rec <- Rest],
		     LQ1 = lists:sort(LQ0),
		     LQ  = gb_trees:from_orddict(LQ1),
		     State0#state{loader_pid=[{Pid,Loader}], loader_queue=LQ}
	     end,
    %% LateLoaderQueue
    State = if is_list(State1#state.late_loader_queue) -> 
		    LLQ0 = State1#state.late_loader_queue,
		    LLQ1 = lists:sort([{element(2,Rec),Rec} || Rec <- LLQ0]),
		    LLQ  = gb_trees:from_orddict(LLQ1),
		    State1#state{late_loader_queue=LLQ};
	       true ->
		    State1
	    end,
    {ok, State}.
 
%%%----------------------------------------------------------------------
%%% Internal functions
%%%----------------------------------------------------------------------

maybe_log_mnesia_down(N) ->
    %% We use mnesia_down when deciding which tables to load locally,
    %% so if we are not running (i.e haven't decided which tables
    %% to load locally), don't log mnesia_down yet.
    case mnesia_lib:is_running() of
	yes -> 
	    verbose("Logging mnesia_down ~w~n", [N]),
	    mnesia_recover:log_mnesia_down(N),
	    ok;
	_ ->	    	    
	    Filter = fun(Tab) ->
			     inactive_copy_holders(Tab, N)
		     end,
	    HalfLoadedTabs = lists:any(Filter, val({schema, local_tables}) -- [schema]),
	    if 
		HalfLoadedTabs == true ->
		    verbose("Logging mnesia_down ~w~n", [N]),
		    mnesia_recover:log_mnesia_down(N),
		    ok;		
		true ->
		    %% Unfortunately we have not loaded some common
		    %% tables yet, so we cannot rely on the nodedown
		    log_later   %% BUGBUG handle this case!!!
	    end
    end.

inactive_copy_holders(Tab, Node) ->
    Cs = val({Tab, cstruct}),
    case mnesia_lib:cs_to_storage_type(Node, Cs) of
	unknown ->
	    false;
	_Storage ->
	    mnesia_lib:not_active_here(Tab)
    end.

orphan_tables([Tab | Tabs], Node, Ns, Local, Remote) ->
    Cs = val({Tab, cstruct}),
    CopyHolders = mnesia_lib:copy_holders(Cs),
    RamCopyHolders = Cs#cstruct.ram_copies,
    DiscCopyHolders = CopyHolders -- RamCopyHolders,
    DiscNodes = val({schema, disc_copies}),
    LocalContent = Cs#cstruct.local_content,
    RamCopyHoldersOnDiscNodes = mnesia_lib:intersect(RamCopyHolders, DiscNodes),
    Active = val({Tab, active_replicas}),
    BeingCreated = (?catch_val({Tab, create_table}) == true),
    Read = val({Tab, where_to_read}),
    case lists:member(Node, DiscCopyHolders) of
	_ when BeingCreated == true -> 
	    orphan_tables(Tabs, Node, Ns, Local, Remote);
	_ when Read == node() -> %% Allready loaded
	    orphan_tables(Tabs, Node, Ns, Local, Remote);
	true when Active == [] ->
	    case DiscCopyHolders -- Ns of
		[] ->
		    %% We're last up and the other nodes have not
		    %% loaded the table. Lets load it if we are
		    %% the smallest node.
		    case lists:min(DiscCopyHolders) of
			Min when Min == node() ->
			    case mnesia_recover:get_master_nodes(Tab) of
				[] ->
				    L = [Tab | Local],
				    orphan_tables(Tabs, Node, Ns, L, Remote);
				Masters ->
				    R = [{Tab, Masters} | Remote],
				    orphan_tables(Tabs, Node, Ns, Local, R)
			    end;
			_ ->
			    orphan_tables(Tabs, Node, Ns, Local, Remote)
		    end;
		_ ->
		    orphan_tables(Tabs, Node, Ns, Local, Remote)
	    end;
	false when Active == [], DiscCopyHolders == [], RamCopyHoldersOnDiscNodes == [] ->
	    %% Special case when all replicas resides on disc less nodes
	    orphan_tables(Tabs, Node, Ns, [Tab | Local], Remote);
	_ when LocalContent == true ->
	    orphan_tables(Tabs, Node, Ns, [Tab | Local], Remote);
	_ ->
	    orphan_tables(Tabs, Node, Ns, Local, Remote)
    end;
orphan_tables([], _, _, LocalOrphans, RemoteMasters) ->
    {LocalOrphans, RemoteMasters}.

node_has_tabs([Tab | Tabs], Node, State) when Node /= node() ->
    State2 = 
	case catch update_whereabouts(Tab, Node, State) of
	    State1 = #state{} -> State1;
	    {'EXIT', R} ->  %% Tab was just deleted?
		case ?catch_val({Tab, cstruct}) of
		    {'EXIT', _} -> State; % yes
		    _ ->  erlang:error(R) 
		end
	end,
    node_has_tabs(Tabs, Node, State2);
node_has_tabs([Tab | Tabs], Node, State) ->
    user_sync_tab(Tab),
    node_has_tabs(Tabs, Node, State);
node_has_tabs([], _Node, State) ->
    State.

update_whereabouts(Tab, Node, State) ->
    Storage = val({Tab, storage_type}),
    Read = val({Tab, where_to_read}),
    LocalC = val({Tab, local_content}),
    BeingCreated = (?catch_val({Tab, create_table}) == true),
    Masters = mnesia_recover:get_master_nodes(Tab),
    ByForce = val({Tab, load_by_force}),
    GoGetIt =
	if
	    ByForce == true ->
		true;
	    Masters == [] ->
		true;
	    true ->
		lists:member(Node, Masters)
	end,
    
    dbg_out("Table ~w is loaded on ~w. s=~w, r=~w, lc=~w, f=~w, m=~w~n",
	    [Tab, Node, Storage, Read, LocalC, ByForce, GoGetIt]),
    if
	LocalC == true ->
	    %% Local contents, don't care about other node
	    State;
	BeingCreated == true ->	    
	    %% The table is currently being created
	    %% It will be handled elsewhere
	    State;
	Storage == unknown, Read == nowhere ->
	    %% No own copy, time to read remotely
	    %% if the other node is a good node
	    add_active_replica(Tab, Node),
	    case GoGetIt of
		true ->
		    set({Tab, where_to_read}, Node),
		    user_sync_tab(Tab),
		    State;
		false ->
		    State
	    end;
	Storage == unknown ->
	    %% No own copy, continue to read remotely	    
	    add_active_replica(Tab, Node),	    
	    NodeST = mnesia_lib:storage_type_at_node(Node, Tab),
	    ReadST = mnesia_lib:storage_type_at_node(Read, Tab),
	    if   %% Avoid reading from disc_only_copies
		NodeST == disc_only_copies ->
		    ignore;
		ReadST == disc_only_copies ->
		    mnesia_lib:set_remote_where_to_read(Tab);
		true ->
		    ignore
	    end,
	    user_sync_tab(Tab),
	    State;
	Read == nowhere ->
	    %% Own copy, go and get a copy of the table
	    %% if the other node is master or if there
	    %% are no master at all
	    add_active_replica(Tab, Node),
	    case GoGetIt of
		true ->
		    Worker = #net_load{table = Tab,
				       reason = {active_remote, Node}},
		    add_worker(Worker, State);
		false ->
		    State
	    end;
	true ->
	    %% We already have an own copy
	    add_active_replica(Tab, Node),
	    user_sync_tab(Tab),
	    State
    end.

initial_safe_loads() ->
    case val({schema, storage_type}) of
	ram_copies ->
	    Downs = [],
	    Tabs = val({schema, local_tables}) -- [schema],
	    LastC = fun(T) -> last_consistent_replica(T, Downs) end,
	    lists:zf(LastC, Tabs);
	
	disc_copies ->
	    Downs = mnesia_recover:get_mnesia_downs(),
	    dbg_out("mnesia_downs = ~p~n", [Downs]),
		
	    Tabs = val({schema, local_tables}) -- [schema],
	    LastC = fun(T) -> last_consistent_replica(T, Downs) end,
	    lists:zf(LastC, Tabs)
    end.
    
last_consistent_replica(Tab, Downs) ->
    Cs = val({Tab, cstruct}),
    Storage = mnesia_lib:cs_to_storage_type(node(), Cs),
    Ram = Cs#cstruct.ram_copies,
    Disc = Cs#cstruct.disc_copies,
    DiscOnly = Cs#cstruct.disc_only_copies,
    BetterCopies0 = mnesia_lib:remote_copy_holders(Cs) -- Downs,
    BetterCopies = BetterCopies0 -- Ram,
    AccessMode = Cs#cstruct.access_mode,
    Copies = mnesia_lib:copy_holders(Cs),
    Masters = mnesia_recover:get_master_nodes(Tab),
    LocalMaster0 = lists:member(node(), Masters),
    LocalContent = Cs#cstruct.local_content,
    RemoteMaster =
	if
	    Masters == [] -> false;
	    true -> not LocalMaster0
	end,
    LocalMaster =
	if
	    Masters == [] -> false;
	    true -> LocalMaster0
	end,
    if
	Copies == [node()]  ->
	    %% Only one copy holder and it is local.
	    %% It may also be a local contents table
	    {true, {Tab, local_only}};
	LocalContent == true ->
	    {true, {Tab, local_content}};
	LocalMaster == true ->
	    %% We have a local master
	    {true, {Tab, local_master}};
	RemoteMaster == true ->
	    %% Wait for remote master copy
	    false;
	Storage == ram_copies ->
	    if
		Disc == [], DiscOnly == [] ->
		    %% Nobody has copy on disc
		    {true, {Tab, ram_only}};
		true ->
		    %% Some other node has copy on disc
		    false
	    end;
	AccessMode == read_only ->
	    %% No one has been able to update the table,
	    %% i.e. all disc resident copies are equal
	    {true, {Tab, read_only}};
	BetterCopies /= [], Masters /= [node()] ->
	    %% There are better copies on other nodes
	    %% and we do not have the only master copy
	    false;
	true ->
	    {true, {Tab, initial}}
    end.

reconfigure_tables(N, [Tab |Tail]) ->
    del_active_replica(Tab, N),
    case val({Tab, where_to_read}) of
	N ->  mnesia_lib:set_remote_where_to_read(Tab);
	_ ->  ignore
    end,
    reconfigure_tables(N, Tail);
reconfigure_tables(_, []) ->
    ok.

remove_loaders([Tab| Tabs], N, Loaders) ->
    LateQ = drop_loaders(Tab, N, Loaders),
    remove_loaders(Tabs, N, LateQ);
remove_loaders([],_, LateQ) -> LateQ.

remove_early_messages([], _Node) ->
    [];
remove_early_messages([{call, {add_active_replica, [_, Node, _, _], _}, _}|R], Node) ->
    remove_early_messages(R, Node); %% Does a reply before queuing
remove_early_messages([{call, {block_table, _, From}, ReplyTo}|R], Node) 
  when node(From) == Node ->
    reply(ReplyTo, ok),  %% Remove gen:server waits..
    remove_early_messages(R, Node);
remove_early_messages([{cast, {i_have_tab, _Tab, Node}}|R], Node) ->
    remove_early_messages(R, Node);
remove_early_messages([{cast, {adopt_orphans, Node, _Tabs}}|R], Node) ->
    remove_early_messages(R, Node);
remove_early_messages([M|R],Node) ->
    [M|remove_early_messages(R,Node)].

%% Drop loader from late load queue and possibly trigger a disc_load
drop_loaders(Tab, Node, LLQ) ->
    case gb_trees:lookup(Tab,LLQ) of
	none ->
	    LLQ;
	{value, H} ->
	    %% Check if it is time to issue a disc_load request
	    case H#late_load.loaders of
		[Node] ->
		    Reason = {H#late_load.reason, last_loader_down, Node},
		    cast({disc_load, Tab, Reason});  % Ugly cast
		_ ->
		    ignore
	    end,
	    %% Drop the node from the list of loaders
	    H2 = H#late_load{loaders = H#late_load.loaders -- [Node]},
	    gb_trees:update(Tab, H2, LLQ)
    end.

add_active_replica(Tab, Node) ->
    add_active_replica(Tab, Node, val({Tab, cstruct})).

add_active_replica(Tab, Node, Cs = #cstruct{}) ->
    Storage = mnesia_lib:schema_cs_to_storage_type(Node, Cs),
    AccessMode = Cs#cstruct.access_mode,
    add_active_replica(Tab, Node, Storage, AccessMode).

%% Block table primitives

block_table(Tab) ->
    Var = {Tab, where_to_commit},
    Old = val(Var),
    New = {blocked, Old},
    set(Var, New). % where_to_commit

unblock_table(Tab) ->
    call({unblock_table, Tab}).

is_tab_blocked(W2C) when is_list(W2C) ->
    {false, W2C};
is_tab_blocked({blocked, W2C}) when is_list(W2C) ->
    {true, W2C}.

mark_blocked_tab(true, Value) -> 
    {blocked, Value};
mark_blocked_tab(false, Value) -> 
    Value.

%%

add_active_replica(Tab, Node, Storage, AccessMode) ->
    Var = {Tab, where_to_commit},
    {Blocked, Old} = is_tab_blocked(val(Var)),
    Del = lists:keydelete(Node, 1, Old),
    case AccessMode of
	read_write ->
	    New = lists:sort([{Node, Storage} | Del]),
	    set(Var, mark_blocked_tab(Blocked, New)), % where_to_commit
	    mnesia_lib:add_lsort({Tab, where_to_write}, Node);
	read_only ->
	    set(Var, mark_blocked_tab(Blocked, Del)),
	    mnesia_lib:del({Tab, where_to_write}, Node)
    end,
    add({Tab, active_replicas}, Node).

del_active_replica(Tab, Node) ->
    Var = {Tab, where_to_commit},
    {Blocked, Old} = is_tab_blocked(val(Var)),
    Del = lists:keydelete(Node, 1, Old),
    New = lists:sort(Del),
    set(Var, mark_blocked_tab(Blocked, New)),      % where_to_commit
    mnesia_lib:del({Tab, active_replicas}, Node),
    mnesia_lib:del({Tab, where_to_write}, Node).

change_table_access_mode(Cs) ->
    W = fun() -> 
		Tab = Cs#cstruct.name,
		lists:foreach(fun(N) -> add_active_replica(Tab, N, Cs) end,
			      val({Tab, active_replicas}))
	end,
    update(W).
	    

%% node To now has tab loaded, but this must be undone
%% This code is rpc:call'ed from the tab_copier process
%% when it has *not* released it's table lock
unannounce_add_table_copy(Tab, To) ->
    catch del_active_replica(Tab, To),
    case catch val({Tab , where_to_read}) of
	To -> 
	    mnesia_lib:set_remote_where_to_read(Tab);
	_ ->
	    ignore
    end.

user_sync_tab(Tab) ->
    case val(debug) of
	trace ->
	    mnesia_subscr:subscribe(whereis(mnesia_event), {table, Tab});
	_ ->
	    ignore
    end,
	
    case erase({sync_tab, Tab}) of
	undefined ->
	    ok;
	Pids ->
	    lists:foreach(fun(Pid) -> sync_reply(Pid, Tab) end, Pids)
    end.

i_have_tab(Tab) ->
    case val({Tab, local_content}) of
	true ->
	    mnesia_lib:set_local_content_whereabouts(Tab);
	false ->
	    set({Tab, where_to_read}, node())
    end,
    add_active_replica(Tab, node()).

sync_and_block_table_whereabouts(Tab, ToNode, RemoteS, AccessMode) when Tab /= schema ->
    Current = val({current, db_nodes}),
    Ns = 
	case lists:member(ToNode, Current) of
	    true -> Current -- [ToNode];
	    false -> Current
	end,   
    remote_call(ToNode, block_table, [Tab]),
    [remote_call(Node, add_active_replica, [Tab, ToNode, RemoteS, AccessMode]) ||
	Node <- [ToNode | Ns]],
    ok.

sync_del_table_copy_whereabouts(Tab, ToNode) when Tab /= schema ->
    Current = val({current, db_nodes}),
    Ns =
	case lists:member(ToNode, Current) of
	    true -> Current;
	    false -> [ToNode | Current]
	end,
    Args = [Tab, ToNode],
    [remote_call(Node, unannounce_add_table_copy, Args) || Node <- Ns],
    ok.

get_info(Timeout) ->
    case whereis(?SERVER_NAME) of
	undefined ->
	    {timeout, Timeout};
	Pid ->
	    Pid ! {self(), get_state},
	    receive
		{?SERVER_NAME, State = #state{loader_queue=LQ,late_loader_queue=LLQ}} ->
		    {info,State#state{loader_queue=gb_trees:to_list(LQ),
				      late_loader_queue=gb_trees:to_list(LLQ)}}
	    after Timeout ->
		    {timeout, Timeout}
	    end
    end.

get_workers(Timeout) ->
    case whereis(?SERVER_NAME) of
	undefined ->
	    {timeout, Timeout};
	Pid ->
	    Pid ! {self(), get_state},
	    receive
		{?SERVER_NAME, State = #state{}} ->
		    {workers, get_loaders(State), get_senders(State), State#state.dumper_pid}
	    after Timeout ->
		    {timeout, Timeout}
	    end
    end.
    
info() ->
    Tabs = mnesia_lib:local_active_tables(),
    io:format( "---> Active tables <--- ~n", []),
    info(Tabs).

info([Tab | Tail]) ->
    case val({Tab, storage_type}) of
	disc_only_copies ->
	    info_format(Tab, 
			dets:info(Tab, size), 
			dets:info(Tab, file_size),
			"bytes on disc");
	_ ->
	    info_format(Tab, 
			?ets_info(Tab, size),
			?ets_info(Tab, memory),
			"words of mem")
    end,
    info(Tail);
info([]) -> ok.


info_format(Tab, Size, Mem, Media) ->
    StrT = mnesia_lib:pad_name(atom_to_list(Tab), 15, []),
    StrS = mnesia_lib:pad_name(integer_to_list(Size), 8, []),
    StrM = mnesia_lib:pad_name(integer_to_list(Mem), 8, []),
    io:format("~s: with ~s records occupying ~s ~s~n",
	      [StrT, StrS, StrM, Media]).

%% Handle early arrived messages
handle_early_msgs([Msg | Msgs], State) ->
    %% The messages are in reverse order
    case handle_early_msg(Msg, State) of
%%         {stop, Reason, Reply, State2} ->  % Will not happen according to dialyzer
%% 	    {stop, Reason, Reply, State2};
        {stop, Reason, State2} ->
	    {stop, Reason, State2};
	{noreply, State2} ->
	    handle_early_msgs(Msgs, State2);
 	{reply, Reply, State2} ->
	    {call, _Call, From} = Msg,
	    reply(From, Reply),
 	    handle_early_msgs(Msgs, State2)
    end;
handle_early_msgs([], State) ->
    noreply(State).

handle_early_msg({call, Msg, From}, State) ->
    handle_call(Msg, From, State);
handle_early_msg({cast, Msg}, State) ->
    handle_cast(Msg, State);
handle_early_msg({info, Msg}, State) ->
    handle_info(Msg, State).
    
noreply(State) ->
    {noreply, State}.

reply(undefined, Reply) ->
    Reply;
reply(ReplyTo, Reply) ->
    gen_server:reply(ReplyTo, Reply),
    Reply.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Worker management

%% Returns new State
add_worker(Worker = #dump_log{}, State) ->
    InitBy = Worker#dump_log.initiated_by,
    Queue = State#state.dumper_queue,
    case lists:keymember(InitBy, #dump_log.initiated_by, Queue) of
	true when Worker#dump_log.opt_reply_to == undefined ->
	    %% The same threshold has been exceeded again,
	    %% before we have had the possibility to
	    %% process the older one.
	    DetectedBy = {dump_log, InitBy},
	    Event = {mnesia_overload, DetectedBy},
	    mnesia_lib:report_system_event(Event);
	_ ->
	    ignore
    end,
    Queue2 = Queue ++ [Worker],
    State2 = State#state{dumper_queue = Queue2},
    opt_start_worker(State2);
add_worker(Worker = #schema_commit_lock{}, State) ->
    Queue = State#state.dumper_queue,
    Queue2 = Queue ++ [Worker],
    State2 = State#state{dumper_queue = Queue2},
    opt_start_worker(State2);
add_worker(Worker = #net_load{}, State) ->
    opt_start_worker(add_loader(Worker#net_load.table,Worker,State));
add_worker(Worker = #send_table{}, State) ->
    Queue = State#state.sender_queue,
    State2 = State#state{sender_queue = Queue ++ [Worker]},
    opt_start_worker(State2);
add_worker(Worker = #disc_load{}, State) ->
    opt_start_worker(add_loader(Worker#disc_load.table,Worker,State));
% Block controller should be used for upgrading mnesia.
add_worker(Worker = #block_controller{}, State) -> 
    Queue = State#state.dumper_queue,
    Queue2 = [Worker | Queue],
    State2 = State#state{dumper_queue = Queue2},
    opt_start_worker(State2).

add_loader(Tab,Worker,State = #state{loader_queue=LQ0}) ->
    case gb_trees:is_defined(Tab, LQ0) of
	true -> State;
	false -> 
	    LQ=gb_trees:insert(Tab, Worker, LQ0),
	    State#state{loader_queue=LQ}
    end.

%% Optionally start a worker
%% 
%% Dumpers and loaders may run simultaneously
%% but neither of them may run during schema commit.
%% Loaders may not start if a schema commit is enqueued.
opt_start_worker(State) when State#state.is_stopping == true ->
    State;
opt_start_worker(State) ->
    %% Prioritize dumper and schema commit
    %% by checking them first
    case State#state.dumper_queue of
	[Worker | _Rest] when State#state.dumper_pid == undefined ->
	    %% Great, a worker in queue and neither
	    %% a schema transaction is being
	    %% committed and nor a dumper is running
		    
	    %% Start worker but keep him in the queue
	    if
		is_record(Worker, schema_commit_lock) ->
		    ReplyTo = Worker#schema_commit_lock.owner,
		    reply(ReplyTo, granted),
		    {Owner, _Tag} = ReplyTo,
		    opt_start_loader(State#state{dumper_pid = Owner});
		
		is_record(Worker, dump_log) ->
		    Pid = spawn_link(?MODULE, dump_and_reply, [self(), Worker]),
		    State2 = State#state{dumper_pid = Pid},

		    %% If the worker was a dumper we may
		    %% possibly be able to start a loader
		    %% or sender
		    State3 = opt_start_sender(State2),
		    opt_start_loader(State3);
		
		is_record(Worker, block_controller) ->
		    case {get_senders(State), get_loaders(State)} of
			{[], []} ->
			    ReplyTo = Worker#block_controller.owner,
			    reply(ReplyTo, granted),
			    {Owner, _Tag} = ReplyTo,
			    State#state{dumper_pid = Owner};
			_ ->
			    State
		    end
	    end;
	_ ->
	    %% Bad luck, try with a loader or sender instead 
	    State2 = opt_start_sender(State),
	    opt_start_loader(State2)
    end.

opt_start_sender(State) ->
    case State#state.sender_queue of
	[]->   State; 	    %% No need
	SenderQ -> 
	    {NewS,Kept} = opt_start_sender2(SenderQ, get_senders(State), 
					    [], get_loaders(State)),
	    State#state{sender_pid = NewS, sender_queue = Kept}
    end.

opt_start_sender2([], Pids,Kept, _) -> {Pids,Kept};
opt_start_sender2([Sender|R], Pids, Kept, LoaderQ) ->
    Tab = Sender#send_table.table,
    Active = val({Tab, active_replicas}),
    IgotIt = lists:member(node(), Active),    
    IsLoading = lists:any(fun({_Pid,Loader}) -> 
				  Tab == element(#net_load.table, Loader)
			  end, LoaderQ),
    if 
	IgotIt, IsLoading  ->
	    %% I'm currently finishing loading the table let him wait
	    opt_start_sender2(R,Pids, [Sender|Kept], LoaderQ);
	IgotIt ->
	    %% Start worker but keep him in the queue
	    Pid = spawn_link(?MODULE, send_and_reply,[self(), Sender]),
	    opt_start_sender2(R,[{Pid,Sender}|Pids],Kept,LoaderQ);
	true ->
	    verbose("Send table failed ~p not active on this node ~n", [Tab]),
	    Sender#send_table.receiver_pid ! {copier_done, node()},
	    opt_start_sender2(R,Pids, Kept, LoaderQ)
    end.

opt_start_loader(State = #state{loader_queue = LoaderQ}) ->
    Current = get_loaders(State),
    Max = max_loaders(),
    case gb_trees:is_empty(LoaderQ) of
	true -> 
	    State;
	_ when length(Current) >= Max -> 
	    State;
	false -> 
	    SchemaQueue = State#state.dumper_queue,
	    case lists:keymember(schema_commit_lock, 1, SchemaQueue) of
		false ->
		    case pick_next(LoaderQ) of
			{none,Rest} ->
			    State#state{loader_queue=Rest};
			{Worker,Rest} ->
			    case already_loading(Worker, get_loaders(State)) of
				true ->
				    opt_start_loader(State#state{loader_queue = Rest});
				false ->
				    %% Start worker but keep him in the queue
				    Pid = load_and_reply(self(), Worker),
				    State#state{loader_pid=[{Pid,Worker}|get_loaders(State)],
						loader_queue = Rest}
			    end
		    end;
		true ->
		    %% Bad luck, we must wait for the schema commit
		    State
	    end
    end.

already_loading(#net_load{table=Tab},Loaders) ->
    already_loading2(Tab,Loaders);
already_loading(#disc_load{table=Tab},Loaders) ->
    already_loading2(Tab,Loaders).

already_loading2(Tab, [{_,#net_load{table=Tab}}|_]) -> true;
already_loading2(Tab, [{_,#disc_load{table=Tab}}|_]) -> true;
already_loading2(Tab, [_|Rest]) -> already_loading2(Tab,Rest);     
already_loading2(_,[]) -> false.

start_remote_sender(Node, Tab, Receiver, Storage) ->
    Msg = #send_table{table = Tab,
		      receiver_pid = Receiver,
		      remote_storage = Storage},
    gen_server:cast({?SERVER_NAME, Node}, Msg).

dump_and_reply(ReplyTo, Worker) ->
    %% No trap_exit, die intentionally instead
    Res = mnesia_dumper:opt_dump_log(Worker#dump_log.initiated_by),
    ReplyTo ! #dumper_done{worker_pid = self(),
			   worker_res = Res},
    unlink(ReplyTo),
    exit(normal).

send_and_reply(ReplyTo, Worker) ->
    %% No trap_exit, die intentionally instead
    Res = mnesia_loader:send_table(Worker#send_table.receiver_pid,
				   Worker#send_table.table,
				   Worker#send_table.remote_storage),
    ReplyTo ! #sender_done{worker_pid = self(),
			   worker_res = Res},
    unlink(ReplyTo),
    exit(normal).

load_and_reply(ReplyTo, Worker) ->
    Load = load_table_fun(Worker),
    SendAndReply = 
	fun() -> 
		process_flag(trap_exit, true),
		Done = Load(),
		ReplyTo ! Done#loader_done{worker_pid = self()},
		unlink(ReplyTo),
		exit(normal)
	end,
    spawn_link(SendAndReply).

%% Now it is time to load the table
%% but first we must check if it still is neccessary
load_table_fun(#net_load{cstruct=Cs, table=Tab, reason=Reason, opt_reply_to=ReplyTo}) ->
    LocalC = val({Tab, local_content}),
    AccessMode = val({Tab, access_mode}),
    ReadNode = val({Tab, where_to_read}),
    Active = filter_active(Tab),
    Done = #loader_done{is_loaded = true,
			table_name = Tab,
			needs_announce = false,
			needs_sync = false,
			needs_reply = (ReplyTo /= undefined),
			reply_to = ReplyTo,
			reply = {loaded, ok}
		       },
    if
	ReadNode == node() ->
	    %% Already loaded locally
	    fun() -> Done end;
	LocalC == true ->
	    fun() ->
		    Res = mnesia_loader:disc_load_table(Tab, load_local_content),
		    Done#loader_done{reply = Res, needs_announce = true, needs_sync = true}
	    end;
	AccessMode == read_only, Reason /= {dumper,add_table_copy} ->
	    fun() -> disc_load_table(Tab, Reason, ReplyTo) end;
	true ->
	    fun() ->
		    %% Either we cannot read the table yet
		    %% or someone is moving a replica between
		    %% two nodes
		    Res = mnesia_loader:net_load_table(Tab, Reason, Active, Cs),
		    case Res of
			{loaded, ok} ->
			    Done#loader_done{needs_sync = true,
					     reply = Res};
			{not_loaded, _} ->
			    Done#loader_done{is_loaded = false,
					     reply = Res}
		    end
	    end
    end;
load_table_fun(#disc_load{table=Tab, reason=Reason, opt_reply_to=ReplyTo}) ->
    ReadNode = val({Tab, where_to_read}),
    Active = filter_active(Tab),
    Done = #loader_done{is_loaded = true,
			table_name = Tab,
			needs_announce = false,
			needs_sync = false,
			needs_reply = false
		       },
    if
	Active == [], ReadNode == nowhere ->
	    %% Not loaded anywhere, lets load it from disc
	    fun() -> disc_load_table(Tab, Reason, ReplyTo) end;
	ReadNode == nowhere ->
	    %% Already loaded on other node, lets get it
	    Cs = val({Tab, cstruct}),
	    fun() -> 
		    case mnesia_loader:net_load_table(Tab, Reason, Active, Cs) of
			{loaded, ok} ->
			    Done#loader_done{needs_sync = true};
			{not_loaded, storage_unknown} ->
			    Done#loader_done{is_loaded = false};
			{not_loaded, ErrReason} ->
			    Done#loader_done{is_loaded = false,
					     reply = {not_loaded,ErrReason}}
		    end
	    end;
	true ->
	    %% Already readable, do not worry be happy
	    fun() -> Done end
    end.

disc_load_table(Tab, Reason, ReplyTo) ->
    Done = #loader_done{is_loaded = true,
			table_name = Tab,
			needs_announce = false,
			needs_sync = false,
			needs_reply = ReplyTo /= undefined,
			reply_to = ReplyTo,
			reply = {loaded, ok}
		       },
    Res = mnesia_loader:disc_load_table(Tab, Reason),
    if
	Res == {loaded, ok} ->
	    Done#loader_done{needs_announce = true,
			     needs_sync = true,
			     reply = Res};
	ReplyTo /= undefined ->
	    Done#loader_done{is_loaded = false,
			     reply = Res};
	true ->
	    fatal("Cannot load table ~p from disc: ~p~n", [Tab, Res])
    end.

filter_active(Tab) ->
    ByForce = val({Tab, load_by_force}),
    Active  = val({Tab, active_replicas}),
    Masters = mnesia_recover:get_master_nodes(Tab),
    Ns = do_filter_active(ByForce, Active, Masters),
    %% Reorder the so that we load from fastest first 
    LS = ?catch_val({Tab, storage_type}),
    DOC = val({Tab, disc_only_copies}),
    {Good,Worse} = 
	case LS of
	    disc_only_copies ->
		G = mnesia_lib:intersect(Ns, DOC),
		{G,Ns--G};
	    _ ->
		G = Ns -- DOC,
		{G,Ns--G}
	end,
    %% Pick a random node of the fastest
    Len = length(Good),
    if 
	Len > 0 ->
	    R = erlang:phash(node(), Len+1),
	    random(R-1,Good,Worse);
	true  ->
	    Worse
    end.

random(N, [H|R], Acc) when N > 0 ->
    random(N-1,R, [H|Acc]);
random(0, L, Acc) ->
    L ++ Acc.

do_filter_active(true, Active, _Masters) ->
    Active;
do_filter_active(false, Active, []) ->
    Active;
do_filter_active(false, Active, Masters) ->
    mnesia_lib:intersect(Active, Masters).