diff options
author | Lukas Larsson <[email protected]> | 2017-09-11 14:45:04 +0200 |
---|---|---|
committer | Lukas Larsson <[email protected]> | 2017-09-11 14:45:04 +0200 |
commit | c15bb1698267ae64aac08b3b48040c44174700e5 (patch) | |
tree | cba3ff4bdbde104d912200014384f2599aa9f094 /erts/emulator | |
parent | 87b57377864d3161a79c65e32844d7539d1a9264 (diff) | |
parent | a9812e6307fe335d077f96d3a6342cbd4894ed0b (diff) | |
download | otp-c15bb1698267ae64aac08b3b48040c44174700e5.tar.gz otp-c15bb1698267ae64aac08b3b48040c44174700e5.tar.bz2 otp-c15bb1698267ae64aac08b3b48040c44174700e5.zip |
Merge branch 'lukas/erts/pgo/OTP-14604'
* lukas/erts/pgo/OTP-14604:
Add support for building a pgo beam_emu
Diffstat (limited to 'erts/emulator')
-rw-r--r-- | erts/emulator/Makefile.in | 102 | ||||
-rw-r--r-- | erts/emulator/test/estone_SUITE.erl | 31 |
2 files changed, 104 insertions, 29 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index 4e98184c4e..1b29065486 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -63,6 +63,28 @@ ARFLAGS=rc OMIT_OMIT_FP=no TYPE_LIBS= +PROFILE_COMPILER=@PROFILE_COMPILER@ +PROFILE_MARKER= +ifeq ($(PROFILE),generate) +PROFILE_MARKER=_pg +else +ifeq ($(PROFILE),use) +PROFILE_MARKER=_pu +endif +endif + +ifeq ($(PROFILE_COMPILER), gcc) +PROFILE_CORRECTION=@PROFILE_CORRECTION@ +PROFILE_GENERATE=-fprofile-generate +PROFILE_USE=-fprofile-use $(PROFILE_CORRECTION) +PROFILE_USE_DEPS=$(OBJDIR)/%_pu.gcda +endif +ifeq ($(PROFILE_COMPILER), clang) +PROFILE_GENERATE=-fprofile-instr-generate +PROFILE_USE=-fprofile-instr-use=$(OBJDIR)/default.profdata +PROFILE_USE_DEPS=$(OBJDIR)/default.profdata +endif + DIRTY_SCHEDULER_SUPPORT=@DIRTY_SCHEDULER_SUPPORT@ DIRTY_SCHEDULER_TEST=@DIRTY_SCHEDULER_TEST@ @@ -418,9 +440,20 @@ ifeq ($(TARGET), win32) EMULATOR_EXECUTABLE = beam$(TF_MARKER).dll else EMULATOR_EXECUTABLE = beam$(TF_MARKER) +PROFILE_EXECUTABLE = beam.prof$(TF_MARKER) endif CS_EXECUTABLE = erl_child_setup$(TYPEMARKER) +ifeq ($(PROFILE), generate) +EMULATOR_EXECUTABLE = $(PROFILE_EXECUTABLE) +ifeq ($(PROFILE_COMPILER), gcc) +PROFILE_LDFLAGS = -fprofile-generate +endif +ifeq ($(PROFILE_COMPILER), clang) +PROFILE_LDFLAGS = -fprofile-instr-generate +endif +endif + # ---------------------------------------------------------------------- ifeq ($(ERLANG_OSTYPE), unix) @@ -688,16 +721,33 @@ $(OBJDIR)/beams.$(RES_EXT): $(TARGET)/beams.rc endif -ifneq ($(filter tile-%,$(TARGET)),) -$(OBJDIR)/beam_emu.o: beam/beam_emu.c - $(V_CC) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) \ - $(INCLUDES) -c $< -o $@ -else # Usually the same as the default rule, but certain platforms (e.g. win32) mix # different compilers $(OBJDIR)/beam_emu.o: beam/beam_emu.c $(V_EMU_CC) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) $(INCLUDES) -c $< -o $@ -endif + +$(OBJDIR)/%_pg.o: beam/%.c + $(V_CC) $(PROFILE_GENERATE) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) $(INCLUDES) -c $< -o $@ +$(OBJDIR)/%_pu.o: beam/%.c $(PROFILE_USE_DEPS) + $(V_CC) $(PROFILE_USE) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) $(INCLUDES) -c $< -o $@ + +$(OBJDIR)/PROFILE: $(BINDIR)/$(PROFILE_EXECUTABLE) + $(V_at)echo " PROFILE ${PROFILE_EXECUTABLE}" + $(V_at)rm -f $(OBJDIR)/erl*.profraw + $(V_at)set -e; LLVM_PROFILE_FILE="$(OBJDIR)/erlc-%m.profraw" \ + ERL_FLAGS="-emu_type prof${TYPEMARKER} +S 1" $(ERLC) -DPGO \ + -o $(OBJDIR) test/estone_SUITE.erl > $(OBJDIR)/PROFILE_LOG + $(V_at)set -e; LLVM_PROFILE_FILE="$(OBJDIR)/erl-%m.profraw" \ + ERL_FLAGS="-emu_type prof${TYPEMARKER} +S 1" $(ERL) -pa $(OBJDIR) \ + -noshell -s estone_SUITE pgo -s init stop >> $(OBJDIR)/PROFILE_LOG + $(V_at)touch $@ + +$(OBJDIR)/%_pu.gcda: $(OBJDIR)/PROFILE + $(V_at)mv $(OBJDIR)/$*_pg.gcda $@ + $(V_at)touch $@ + +$(OBJDIR)/default.profdata: $(OBJDIR)/PROFILE + $(V_LLVM_PROFDATA) merge -output $@ $(OBJDIR)/*.profraw $(OBJDIR)/%.o: beam/%.c $(V_CC) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) $(INCLUDES) -c $< -o $@ @@ -759,15 +809,23 @@ $(ERL_TOP)/lib/%.beam: INIT_OBJS = $(OBJDIR)/erl_main.o $(PRELOAD_OBJ) +# -fprofile-correction is needed in order to use PGO on erl_process +# as multiple threads execute in that file. +ifeq ($(PROFILE_CORRECTION),) +PROFILE_OBJS = $(OBJDIR)/beam_emu.o +RUN_OBJS = $(OBJDIR)/erl_process.o +else +PROFILE_OBJS = $(OBJDIR)/beam_emu.o $(OBJDIR)/erl_process.o +endif + EMU_OBJS = \ - $(OBJDIR)/beam_emu.o $(OBJDIR)/beam_opcodes.o \ + $(OBJDIR)/beam_opcodes.o \ $(OBJDIR)/beam_load.o $(OBJDIR)/beam_bif_load.o \ $(OBJDIR)/beam_debug.o $(OBJDIR)/beam_bp.o \ - $(OBJDIR)/beam_catches.o \ - $(OBJDIR)/code_ix.o \ + $(OBJDIR)/beam_catches.o $(OBJDIR)/code_ix.o \ $(OBJDIR)/beam_ranges.o -RUN_OBJS = \ +RUN_OBJS += \ $(OBJDIR)/erl_alloc.o $(OBJDIR)/erl_mtrace.o \ $(OBJDIR)/erl_alloc_util.o $(OBJDIR)/erl_goodfit_alloc.o \ $(OBJDIR)/erl_bestfit_alloc.o $(OBJDIR)/erl_afit_alloc.o \ @@ -783,7 +841,7 @@ RUN_OBJS = \ $(OBJDIR)/utils.o $(OBJDIR)/bif.o \ $(OBJDIR)/io.o $(OBJDIR)/erl_printf_term.o\ $(OBJDIR)/erl_debug.o $(OBJDIR)/erl_md5.o \ - $(OBJDIR)/erl_message.o $(OBJDIR)/erl_process.o \ + $(OBJDIR)/erl_message.o \ $(OBJDIR)/erl_process_dict.o $(OBJDIR)/erl_process_lock.o \ $(OBJDIR)/erl_port_task.o $(OBJDIR)/erl_arith.o \ $(OBJDIR)/time.o $(OBJDIR)/erl_time_sup.o \ @@ -925,21 +983,23 @@ ifdef HIPE_ENABLED EXTRA_BASE_OBJS += $(HIPE_OBJS) endif -BASE_OBJS = $(EMU_OBJS) $(RUN_OBJS) $(OS_OBJS) $(EXTRA_BASE_OBJS) $(LTTNG_OBJS) +BASE_OBJS = $(EMU_OBJS) $(RUN_OBJS) $(OS_OBJS) $(EXTRA_BASE_OBJS) \ + $(LTTNG_OBJS) $(DRV_OBJS) $(NIF_OBJS) -before_DTrace_OBJS = $(BASE_OBJS) $(DRV_OBJS) $(NIF_OBJS) +PROF_OBJS = $(patsubst %.o,%$(PROFILE_MARKER).o,$(PROFILE_OBJS)) $(BASE_OBJS) + +OBJS = $(PROF_OBJS) -DTRACE_OBJS = ifdef DTRACE_ENABLED_2STEP -DTRACE_OBJS = $(OBJDIR)/erlang_dtrace.o -$(OBJDIR)/erlang_dtrace.o: $(before_DTrace_OBJS) $(TARGET)/erlang_dtrace.h +# The $(PROFILE_MARKER) is placed in the object file name in order to +# make sure we re-compile with the new object files for the profiled emulator +OBJS += $(OBJDIR)/erlang$(PROFILE_MARKER)_dtrace.o +$(OBJDIR)/erlang$(PROFILE_MARKER)_dtrace.o: $(PROF_OBJS) $(TARGET)/erlang_dtrace.h dtrace -G -C -Ibeam \ -s beam/erlang_dtrace.d \ - -o $@ $(before_DTrace_OBJS) + -o $@ $(PROF_OBJS) endif -OBJS = $(before_DTrace_OBJS) $(DTRACE_OBJS) - $(INIT_OBJS): $(TTF_DIR)/GENERATED $(OBJS): $(TTF_DIR)/GENERATED @@ -1031,8 +1091,8 @@ $(BINDIR)/$(EMULATOR_EXECUTABLE): $(INIT_OBJS) $(OBJS) $(DEPLIBS) else $(BINDIR)/$(EMULATOR_EXECUTABLE): $(INIT_OBJS) $(OBJS) $(DEPLIBS) - $(ld_verbose)$(PURIFY) $(LD) -o $(BINDIR)/$(EMULATOR_EXECUTABLE) \ - $(HIPEBEAMLDFLAGS) $(LDFLAGS) $(DEXPORT) $(INIT_OBJS) $(OBJS) \ + $(ld_verbose)$(PURIFY) $(LD) -o $@ \ + $(HIPEBEAMLDFLAGS) $(PROFILE_LDFLAGS) $(LDFLAGS) $(DEXPORT) $(INIT_OBJS) $(OBJS) \ $(STATIC_NIF_LIBS) $(STATIC_DRIVER_LIBS) $(LIBS) endif diff --git a/erts/emulator/test/estone_SUITE.erl b/erts/emulator/test/estone_SUITE.erl index 8b336b366d..c4899967ca 100644 --- a/erts/emulator/test/estone_SUITE.erl +++ b/erts/emulator/test/estone_SUITE.erl @@ -20,7 +20,7 @@ -module(estone_SUITE). %% Test functions -export([all/0, suite/0, groups/0, - estone/1, estone_bench/1]). + estone/1, estone_bench/1, pgo/0]). %% Internal exports for EStone tests -export([lists/1, @@ -44,9 +44,9 @@ links/1,lproc/1, run_micro/3,p1/1,ppp/3,macro/2,micros/0]). - --include_lib("common_test/include/ct.hrl"). +-ifndef(PGO). -include_lib("common_test/include/ct_event.hrl"). +-endif. %% EStone defines -define(TOTAL, (3000 * 1000 * 100)). %% 300 secs @@ -85,13 +85,28 @@ estone(Config) when is_list(Config) -> estone_bench(Config) -> DataDir = proplists:get_value(data_dir,Config), L = ?MODULE:macro(?MODULE:micros(),DataDir), - [ct_event:notify( - #event{name = benchmark_data, - data = [{name,proplists:get_value(title,Mark)}, - {value,proplists:get_value(estones,Mark)}]}) - || Mark <- L], + {Total, Stones} = sum_micros(L, 0, 0), + notify([[{title,"ESTONES"}, {estones, Stones}] | L]), L. +-ifndef(PGO). +notify(Marks) -> + [ct_event:notify( + #event{name = benchmark_data, + data = [{name,proplists:get_value(title, Mark)}, + {value,proplists:get_value(estones, Mark)}]}) + || Mark <- Marks]. +-else. +notify(_) -> + ok. +-endif. + +%% The benchmarks to run in order to guide PGO (profile guided optimisation) +pgo() -> + %% We run all benchmarks except the port_io as we don't want to + %% have to build a custom port. + Micros = ?MODULE:micros() -- [micro(port_io)], + ?MODULE:macro(Micros,[]). %% %% Calculate CPU speed |