From c0912fe4220ba5352abd8250bb38d86382052607 Mon Sep 17 00:00:00 2001 From: Richard Carlsson Date: Wed, 29 Apr 2015 15:19:50 +0200 Subject: Avoid exception overhead if HiPE is disabled --- lib/kernel/src/code.erl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'lib/kernel/src') diff --git a/lib/kernel/src/code.erl b/lib/kernel/src/code.erl index 65045666ec..580c070389 100644 --- a/lib/kernel/src/code.erl +++ b/lib/kernel/src/code.erl @@ -339,7 +339,8 @@ do_start(Flags) -> ok end, %% Quietly load native code for all modules loaded so far - load_native_code_for_all_loaded(), + Architecture = erlang:system_info(hipe_architecture), + load_native_code_for_all_loaded(Architecture), Ok2; Other -> Other @@ -554,9 +555,9 @@ has_ext(Ext, Extlen, File) -> %%% Silently load native code for all modules loaded so far. %%% --spec load_native_code_for_all_loaded() -> ok. -load_native_code_for_all_loaded() -> - Architecture = erlang:system_info(hipe_architecture), +load_native_code_for_all_loaded(undefined) -> + ok; +load_native_code_for_all_loaded(Architecture) -> try hipe_unified_loader:chunk_name(Architecture) of ChunkTag -> Loaded = all_loaded(), -- cgit v1.2.3 From c67741f8c505a9c17ffaf60f1cc37458b7e1301c Mon Sep 17 00:00:00 2001 From: Richard Carlsson Date: Wed, 29 Apr 2015 14:25:07 +0200 Subject: Avoid repeated calls to system_info in hipe loader Make hipe_unified_loader not call system_info(hipe_architecture) repeatedly. Also clean up some ugly architecture-dependent case switches. --- lib/kernel/src/hipe_unified_loader.erl | 122 +++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 50 deletions(-) (limited to 'lib/kernel/src') diff --git a/lib/kernel/src/hipe_unified_loader.erl b/lib/kernel/src/hipe_unified_loader.erl index 49d4a8fe54..949a3c9207 100644 --- a/lib/kernel/src/hipe_unified_loader.erl +++ b/lib/kernel/src/hipe_unified_loader.erl @@ -82,6 +82,13 @@ chunk_name(Architecture) -> %% HW32 %% HiPE, x86, Win32 end. +word_size(Architecture) -> + case Architecture of + amd64 -> 8; + ppc64 -> 8; + _ -> 4 + end. + %%======================================================================== -spec load_native_code(Mod, binary()) -> 'no_native' | {'module', Mod} @@ -212,18 +219,22 @@ load_common(Mod, Bin, Beam, OldReferencesToPatch) -> bad_crc; true -> put(closures_to_patch, []), + Architecture=erlang:system_info(hipe_architecture), + WordSize = word_size(Architecture), + WriteWord = write_word_fun(WordSize), %% Create data segment {ConstAddr,ConstMap2} = - create_data_segment(ConstAlign, ConstSize, ConstMap), + create_data_segment(ConstAlign, ConstSize, ConstMap, WriteWord), %% Find callees for which we may need trampolines. - CalleeMFAs = find_callee_mfas(Refs), + CalleeMFAs = find_callee_mfas(Refs, Architecture), %% Write the code to memory. {CodeAddress,Trampolines} = enter_code(CodeSize, CodeBinary, CalleeMFAs, Mod, Beam), %% Construct CalleeMFA-to-trampoline mapping. - TrampolineMap = mk_trampoline_map(CalleeMFAs, Trampolines), + TrampolineMap = mk_trampoline_map(CalleeMFAs, Trampolines, + Architecture), %% Patch references to code labels in data seg. - ok = patch_consts(LabelMap, ConstAddr, CodeAddress), + ok = patch_consts(LabelMap, ConstAddr, CodeAddress, WriteWord), %% Find out which functions are being loaded (and where). %% Note: Addresses are sorted descending. {MFAs,Addresses} = exports(ExportMap, CodeAddress), @@ -275,14 +286,26 @@ load_common(Mod, Bin, Beam, OldReferencesToPatch) -> %% Scan the list of patches and build a set (returned as a tuple) %% of the callees for which we may need trampolines. %% -find_callee_mfas(Patches) when is_list(Patches) -> - case erlang:system_info(hipe_architecture) of - amd64 -> []; - arm -> find_callee_mfas(Patches, gb_sets:empty(), false); - powerpc -> find_callee_mfas(Patches, gb_sets:empty(), true); - ppc64 -> find_callee_mfas(Patches, gb_sets:empty(), true); - ultrasparc -> []; - x86 -> [] +find_callee_mfas(Patches, Architecture) when is_list(Patches) -> + case needs_trampolines(Architecture) of + true -> find_callee_mfas(Patches, gb_sets:empty(), + no_erts_trampolines(Architecture)); + _ -> [] + end. + +needs_trampolines(Architecture) -> + case Architecture of + arm -> true; + powerpc -> true; + ppc64 -> true; + _ -> false + end. + +no_erts_trampolines(Architecture) -> + case Architecture of + powerpc -> true; + ppc64 -> true; + _ -> false end. find_callee_mfas([{Type,Data}|Patches], MFAs, SkipErtsSyms) -> @@ -318,14 +341,9 @@ add_callee_mfas([], MFAs, _SkipErtsSyms) -> MFAs. %%---------------------------------------------------------------- %% -mk_trampoline_map([], []) -> []; % archs not using trampolines -mk_trampoline_map(CalleeMFAs, Trampolines) -> - SizeofLong = - case erlang:system_info(hipe_architecture) of - amd64 -> 8; - ppc64 -> 8; - _ -> 4 - end, +mk_trampoline_map([], [], _) -> []; % archs not using trampolines +mk_trampoline_map(CalleeMFAs, Trampolines, Architecture) -> + SizeofLong = word_size(Architecture), mk_trampoline_map(tuple_size(CalleeMFAs), CalleeMFAs, Trampolines, SizeofLong, gb_trees:empty()). @@ -621,22 +639,24 @@ patch_load_mfa(CodeAddress, DestMFA, Addresses, RemoteOrLocal) -> %%---------------------------------------------------------------- %% Patch references to code labels in the data segment. %% -patch_consts(Labels, DataAddress, CodeAddress) -> +patch_consts(Labels, DataAddress, CodeAddress, WriteWord) -> lists:foreach(fun (L) -> - patch_label_or_labels(L, DataAddress, CodeAddress) + patch_label_or_labels(L, DataAddress, CodeAddress, + WriteWord) end, Labels). -patch_label_or_labels({Pos,Offset}, DataAddress, CodeAddress) -> +patch_label_or_labels({Pos,Offset}, DataAddress, CodeAddress, WriteWord) -> ?ASSERT(assert_local_patch(CodeAddress+Offset)), - write_word(DataAddress+Pos, CodeAddress+Offset); -patch_label_or_labels({sorted,Base,UnOrderdList}, DataAddress, CodeAddress) -> - sort_and_write(UnOrderdList, Base, DataAddress, CodeAddress). + WriteWord(DataAddress+Pos, CodeAddress+Offset); +patch_label_or_labels({sorted,Base,UnOrderdList}, DataAddress, CodeAddress, + WriteWord) -> + sort_and_write(UnOrderdList, Base, DataAddress, CodeAddress, WriteWord). -sort_and_write(UnOrderdList, Base, DataAddress, CodeAddress) -> +sort_and_write(UnOrderdList, Base, DataAddress, CodeAddress, WriteWord) -> WriteAndInc = fun ({_, Offset}, DataPos) -> ?ASSERT(assert_local_patch(CodeAddress+Offset)), - write_word(DataPos, CodeAddress+Offset) + WriteWord(DataPos, CodeAddress+Offset) end, lists:foldl(WriteAndInc, DataAddress+Base, sort_on_representation(UnOrderdList)). @@ -662,17 +682,18 @@ patch_instr(Address, Value, Type) -> %% XXX: It appears this is used for inserting both code addresses %% and other data. In HiPE, code addresses are still 32-bit on %% some 64-bit machines. -write_word(DataAddress, DataWord) -> - case erlang:system_info(hipe_architecture) of - amd64 -> - hipe_bifs:write_u64(DataAddress, DataWord), - DataAddress+8; - ppc64 -> - hipe_bifs:write_u64(DataAddress, DataWord), - DataAddress+8; - _ -> - hipe_bifs:write_u32(DataAddress, DataWord), - DataAddress+4 +write_word_fun(WordSize) -> + case WordSize of + 8 -> + fun (DataAddress, DataWord) -> + hipe_bifs:write_u64(DataAddress, DataWord), + DataAddress+8 + end; + 4 -> + fun (DataAddress, DataWord) -> + hipe_bifs:write_u32(DataAddress, DataWord), + DataAddress+4 + end end. %%-------------------------------------------------------------------- @@ -688,30 +709,31 @@ bif_address(Name) when is_atom(Name) -> %% memory, and produces a ConstMap2 mapping each constant's ConstNo to %% its runtime address, tagged if the constant is a term. %% -create_data_segment(DataAlign, DataSize, DataList) -> +create_data_segment(DataAlign, DataSize, DataList, WriteWord) -> %%io:format("create_data_segment: \nDataAlign: ~p\nDataSize: ~p\nDataList: ~p\n",[DataAlign,DataSize,DataList]), DataAddress = hipe_bifs:alloc_data(DataAlign, DataSize), - enter_data(DataList, [], DataAddress, DataSize). + enter_data(DataList, [], DataAddress, DataSize, WriteWord). -enter_data(List, ConstMap2, DataAddress, DataSize) -> +enter_data(List, ConstMap2, DataAddress, DataSize, WriteWord) -> case List of [ConstNo,Offset,Type,Data|Rest] when is_integer(Offset) -> %%?msg("Const ~w\n",[[ConstNo,Offset,Type,Data]]), ?ASSERT((Offset >= 0) and (Offset =< DataSize)), - Res = enter_datum(Type, Data, DataAddress+Offset), - enter_data(Rest, [{ConstNo,Res}|ConstMap2], DataAddress, DataSize); + Res = enter_datum(Type, Data, DataAddress+Offset, WriteWord), + enter_data(Rest, [{ConstNo,Res}|ConstMap2], DataAddress, DataSize, + WriteWord); [] -> {DataAddress, ConstMap2} end. -enter_datum(Type, Data, Address) -> +enter_datum(Type, Data, Address, WriteWord) -> case ?EXT2CONST_TYPE(Type) of term -> %% Address is unused for terms hipe_bifs:term_to_word(hipe_bifs:merge_term(Data)); sorted_block -> L = lists:sort([hipe_bifs:term_to_word(Term) || Term <- Data]), - write_words(L, Address), + write_words(L, Address, WriteWord), Address; block -> case Data of @@ -719,7 +741,7 @@ enter_datum(Type, Data, Address) -> write_bytes(Lbls, Address); {Lbls, SortOrder} -> SortedLbls = [Lbl || {_,Lbl} <- lists:sort(group(Lbls, SortOrder))], - write_words(SortedLbls, Address); + write_words(SortedLbls, Address, WriteWord); Lbls -> write_bytes(Lbls, Address) end, @@ -734,9 +756,9 @@ group([B1,B2,B3,B4|Ls], [O|Os]) -> bytes_to_32(B4,B3,B2,B1) -> (B4 bsl 24) bor (B3 bsl 16) bor (B2 bsl 8) bor B1. -write_words([W|Rest], Addr) -> - write_words(Rest, write_word(Addr, W)); -write_words([], Addr) when is_integer(Addr) -> true. +write_words([W|Rest], Addr, WriteWord) -> + write_words(Rest, WriteWord(Addr, W), WriteWord); +write_words([], Addr, _) when is_integer(Addr) -> true. write_bytes([B|Rest], Addr) -> hipe_bifs:write_u8(Addr, B), -- cgit v1.2.3 From ccdcb7a5c7b2c3bc1b50a5314b3045c0782d76bd Mon Sep 17 00:00:00 2001 From: Richard Carlsson Date: Mon, 20 Apr 2015 14:27:34 +0200 Subject: Move architecture knowledge out of hipe loader Make code_server be responsible for finding the architecture and deciding whether to try to load native code, in order to avoid repeated calls to system_info(hipe_architecture) and clumsy uses of try/catch to test if hipe is enabled. --- lib/kernel/src/code_server.erl | 70 ++++++++++++--------- lib/kernel/src/hipe_unified_loader.erl | 111 +++++++++++++++------------------ 2 files changed, 94 insertions(+), 87 deletions(-) (limited to 'lib/kernel/src') diff --git a/lib/kernel/src/code_server.erl b/lib/kernel/src/code_server.erl index 819554ce74..a4342715ef 100644 --- a/lib/kernel/src/code_server.erl +++ b/lib/kernel/src/code_server.erl @@ -324,12 +324,15 @@ handle_call({load_binary,Mod,File,Bin}, Caller, S) -> do_load_binary(Mod, File, Bin, Caller, S); handle_call({load_native_partial,Mod,Bin}, {_From,_Tag}, S) -> - Result = (catch hipe_unified_loader:load(Mod, Bin)), + Architecture = erlang:system_info(hipe_architecture), + Result = (catch hipe_unified_loader:load(Mod, Bin, Architecture)), Status = hipe_result_to_status(Result), {reply,Status,S}; handle_call({load_native_sticky,Mod,Bin,WholeModule}, {_From,_Tag}, S) -> - Result = (catch hipe_unified_loader:load_module(Mod, Bin, WholeModule)), + Architecture = erlang:system_info(hipe_architecture), + Result = (catch hipe_unified_loader:load_module(Mod, Bin, WholeModule, + Architecture)), Status = hipe_result_to_status(Result), {reply,Status,S}; @@ -1259,30 +1262,40 @@ try_load_module_1(File, Mod, Bin, Caller, #state{moddb=Db}=St) -> error_msg("Can't load module that resides in sticky dir\n",[]), {reply,{error,sticky_directory},St}; false -> - case catch load_native_code(Mod, Bin) of - {module,Mod} = Module -> - ets:insert(Db, {Mod,File}), - {reply,Module,St}; - no_native -> - case erlang:load_module(Mod, Bin) of - {module,Mod} = Module -> - ets:insert(Db, {Mod,File}), - post_beam_load(Mod), - {reply,Module,St}; - {error,on_load} -> - handle_on_load(Mod, File, Caller, St); - {error,What} = Error -> - error_msg("Loading of ~ts failed: ~p\n", [File, What]), - {reply,Error,St} - end; - Error -> - error_msg("Native loading of ~ts failed: ~p\n", - [File,Error]), - {reply,ok,St} - end + Architecture = erlang:system_info(hipe_architecture), + try_load_module_2(File, Mod, Bin, Caller, Architecture, St) + end. + +try_load_module_2(File, Mod, Bin, Caller, undefined, St) -> + try_load_module_3(File, Mod, Bin, Caller, undefined, St); +try_load_module_2(File, Mod, Bin, Caller, Architecture, + #state{moddb=Db}=St) -> + case catch load_native_code(Mod, Bin, Architecture) of + {module,Mod} = Module -> + ets:insert(Db, {Mod,File}), + {reply,Module,St}; + no_native -> + try_load_module_3(File, Mod, Bin, Caller, Architecture, St); + Error -> + error_msg("Native loading of ~ts failed: ~p\n", [File,Error]), + {reply,ok,St} + end. + +try_load_module_3(File, Mod, Bin, Caller, Architecture, + #state{moddb=Db}=St) -> + case erlang:load_module(Mod, Bin) of + {module,Mod} = Module -> + ets:insert(Db, {Mod,File}), + post_beam_load(Mod, Architecture), + {reply,Module,St}; + {error,on_load} -> + handle_on_load(Mod, File, Caller, St); + {error,What} = Error -> + error_msg("Loading of ~ts failed: ~p\n", [File, What]), + {reply,Error,St} end. -load_native_code(Mod, Bin) -> +load_native_code(Mod, Bin, Architecture) -> %% During bootstrapping of Open Source Erlang, we don't have any hipe %% loader modules, but the Erlang emulator might be hipe enabled. %% Therefore we must test for that the loader modules are available @@ -1291,7 +1304,8 @@ load_native_code(Mod, Bin) -> false -> no_native; true -> - Result = hipe_unified_loader:load_native_code(Mod, Bin), + Result = hipe_unified_loader:load_native_code(Mod, Bin, + Architecture), case Result of {module,_} -> put(?ANY_NATIVE_CODE_LOADED, true); @@ -1310,12 +1324,12 @@ hipe_result_to_status(Result) -> {error,Result} end. -post_beam_load(Mod) -> - %% post_beam_load/1 can potentially be very expensive because it +post_beam_load(Mod, Architecture) -> + %% post_beam_load/2 can potentially be very expensive because it %% blocks multi-scheduling; thus we want to avoid the call if we %% know that it is not needed. case get(?ANY_NATIVE_CODE_LOADED) of - true -> hipe_unified_loader:post_beam_load(Mod); + true -> hipe_unified_loader:post_beam_load(Mod, Architecture); false -> ok end. diff --git a/lib/kernel/src/hipe_unified_loader.erl b/lib/kernel/src/hipe_unified_loader.erl index 949a3c9207..ddbbc548dd 100644 --- a/lib/kernel/src/hipe_unified_loader.erl +++ b/lib/kernel/src/hipe_unified_loader.erl @@ -43,10 +43,10 @@ -export([chunk_name/1, %% Only the code and code_server modules may call the entries below! - load_native_code/2, - post_beam_load/1, - load_module/3, - load/2]). + load_native_code/3, + post_beam_load/2, + load_module/4, + load/3]). %%-define(DEBUG,true). -define(DO_ASSERT,true). @@ -91,56 +91,48 @@ word_size(Architecture) -> %%======================================================================== --spec load_native_code(Mod, binary()) -> 'no_native' | {'module', Mod} - when Mod :: atom(). +-spec load_native_code(Mod, binary(), hipe_architecture()) -> + 'no_native' | {'module', Mod} when Mod :: atom(). %% @doc %% Loads the native code of a module Mod. %% Returns {module,Mod} on success (for compatibility with %% code:load_file/1) and the atom `no_native' on failure. -load_native_code(Mod, Bin) when is_atom(Mod), is_binary(Bin) -> - Architecture = erlang:system_info(hipe_architecture), - try chunk_name(Architecture) of - ChunkTag -> - %% patch_to_emu(Mod), - case code:get_chunk(Bin, ChunkTag) of - undefined -> no_native; - NativeCode when is_binary(NativeCode) -> - erlang:system_flag(multi_scheduling, block), - try - OldReferencesToPatch = patch_to_emu_step1(Mod), - case load_module(Mod, NativeCode, Bin, OldReferencesToPatch) of - bad_crc -> no_native; - Result -> Result - end - after - erlang:system_flag(multi_scheduling, unblock) - end +load_native_code(_Mod, _Bin, undefined) -> + no_native; +load_native_code(Mod, Bin, Architecture) when is_atom(Mod), is_binary(Bin) -> + %% patch_to_emu(Mod), + case code:get_chunk(Bin, chunk_name(Architecture)) of + undefined -> no_native; + NativeCode when is_binary(NativeCode) -> + erlang:system_flag(multi_scheduling, block), + try + OldReferencesToPatch = patch_to_emu_step1(Mod), + case load_module(Mod, NativeCode, Bin, OldReferencesToPatch, + Architecture) of + bad_crc -> no_native; + Result -> Result + end + after + erlang:system_flag(multi_scheduling, unblock) end - catch - _:_ -> - %% Unknown HiPE architecture. Can't happen (in principle). - no_native end. %%======================================================================== --spec post_beam_load(atom()) -> 'ok'. +-spec post_beam_load(atom(), hipe_architecture()) -> 'ok'. -post_beam_load(Mod) when is_atom(Mod) -> - Architecture = erlang:system_info(hipe_architecture), - try chunk_name(Architecture) of - _ChunkTag -> - erlang:system_flag(multi_scheduling, block), - try - patch_to_emu(Mod) - after - erlang:system_flag(multi_scheduling, unblock) - end - catch - _:_ -> - ok - end. +%% does nothing on a hipe-disabled system +post_beam_load(_Mod, undefined) -> + ok; +post_beam_load(Mod, _) when is_atom(Mod) -> + erlang:system_flag(multi_scheduling, block), + try + patch_to_emu(Mod) + after + erlang:system_flag(multi_scheduling, unblock) + end, + ok. %%======================================================================== @@ -155,46 +147,48 @@ version_check(Version, Mod) when is_atom(Mod) -> %%======================================================================== --spec load_module(Mod, binary(), _) -> 'bad_crc' | {'module', Mod} - when Mod :: atom(). -load_module(Mod, Bin, Beam) -> +-spec load_module(Mod, binary(), _, hipe_architecture()) -> + 'bad_crc' | {'module', Mod} when Mod :: atom(). + +load_module(Mod, Bin, Beam, Architecture) -> erlang:system_flag(multi_scheduling, block), try - load_module_nosmp(Mod, Bin, Beam) + load_module_nosmp(Mod, Bin, Beam, Architecture) after erlang:system_flag(multi_scheduling, unblock) end. -load_module_nosmp(Mod, Bin, Beam) -> - load_module(Mod, Bin, Beam, []). +load_module_nosmp(Mod, Bin, Beam, Architecture) -> + load_module(Mod, Bin, Beam, [], Architecture). -load_module(Mod, Bin, Beam, OldReferencesToPatch) -> +load_module(Mod, Bin, Beam, OldReferencesToPatch, Architecture) -> ?debug_msg("************ Loading Module ~w ************\n",[Mod]), %% Loading a whole module, let the BEAM loader patch closures. put(hipe_patch_closures, false), - load_common(Mod, Bin, Beam, OldReferencesToPatch). + load_common(Mod, Bin, Beam, OldReferencesToPatch, Architecture). %%======================================================================== --spec load(Mod, binary()) -> 'bad_crc' | {'module', Mod} when Mod :: atom(). +-spec load(Mod, binary(), hipe_architecture()) -> + 'bad_crc' | {'module', Mod} when Mod :: atom(). -load(Mod, Bin) -> +load(Mod, Bin, Architecture) -> erlang:system_flag(multi_scheduling, block), try - load_nosmp(Mod, Bin) + load_nosmp(Mod, Bin, Architecture) after erlang:system_flag(multi_scheduling, unblock) end. -load_nosmp(Mod, Bin) -> +load_nosmp(Mod, Bin, Architecture) -> ?debug_msg("********* Loading funs in module ~w *********\n",[Mod]), %% Loading just some functions in a module; patch closures separately. put(hipe_patch_closures, true), - load_common(Mod, Bin, [], []). + load_common(Mod, Bin, [], [], Architecture). %%------------------------------------------------------------------------ -load_common(Mod, Bin, Beam, OldReferencesToPatch) -> +load_common(Mod, Bin, Beam, OldReferencesToPatch, Architecture) -> %% Unpack the binary. [{Version, CheckSum}, ConstAlign, ConstSize, ConstMap, LabelMap, ExportMap, @@ -219,7 +213,6 @@ load_common(Mod, Bin, Beam, OldReferencesToPatch) -> bad_crc; true -> put(closures_to_patch, []), - Architecture=erlang:system_info(hipe_architecture), WordSize = word_size(Architecture), WriteWord = write_word_fun(WordSize), %% Create data segment @@ -834,7 +827,7 @@ address_to_mfa_lth(_Address, [], Prev) -> %%---------------------------------------------------------------- %% Change callers of the given module to instead trap to BEAM. -%% load_native_code/2 calls this just before loading native code. +%% load_native_code/3 calls this just before loading native code. %% patch_to_emu(Mod) -> patch_to_emu_step2(patch_to_emu_step1(Mod)). -- cgit v1.2.3