aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator
diff options
context:
space:
mode:
authorLukas Larsson <[email protected]>2017-09-11 14:45:04 +0200
committerLukas Larsson <[email protected]>2017-09-11 14:45:04 +0200
commitc15bb1698267ae64aac08b3b48040c44174700e5 (patch)
treecba3ff4bdbde104d912200014384f2599aa9f094 /erts/emulator
parent87b57377864d3161a79c65e32844d7539d1a9264 (diff)
parenta9812e6307fe335d077f96d3a6342cbd4894ed0b (diff)
downloadotp-c15bb1698267ae64aac08b3b48040c44174700e5.tar.gz
otp-c15bb1698267ae64aac08b3b48040c44174700e5.tar.bz2
otp-c15bb1698267ae64aac08b3b48040c44174700e5.zip
Merge branch 'lukas/erts/pgo/OTP-14604'
* lukas/erts/pgo/OTP-14604: Add support for building a pgo beam_emu
Diffstat (limited to 'erts/emulator')
-rw-r--r--erts/emulator/Makefile.in102
-rw-r--r--erts/emulator/test/estone_SUITE.erl31
2 files changed, 104 insertions, 29 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 4e98184c4e..1b29065486 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -63,6 +63,28 @@ ARFLAGS=rc
OMIT_OMIT_FP=no
TYPE_LIBS=
+PROFILE_COMPILER=@PROFILE_COMPILER@
+PROFILE_MARKER=
+ifeq ($(PROFILE),generate)
+PROFILE_MARKER=_pg
+else
+ifeq ($(PROFILE),use)
+PROFILE_MARKER=_pu
+endif
+endif
+
+ifeq ($(PROFILE_COMPILER), gcc)
+PROFILE_CORRECTION=@PROFILE_CORRECTION@
+PROFILE_GENERATE=-fprofile-generate
+PROFILE_USE=-fprofile-use $(PROFILE_CORRECTION)
+PROFILE_USE_DEPS=$(OBJDIR)/%_pu.gcda
+endif
+ifeq ($(PROFILE_COMPILER), clang)
+PROFILE_GENERATE=-fprofile-instr-generate
+PROFILE_USE=-fprofile-instr-use=$(OBJDIR)/default.profdata
+PROFILE_USE_DEPS=$(OBJDIR)/default.profdata
+endif
+
DIRTY_SCHEDULER_SUPPORT=@DIRTY_SCHEDULER_SUPPORT@
DIRTY_SCHEDULER_TEST=@DIRTY_SCHEDULER_TEST@
@@ -418,9 +440,20 @@ ifeq ($(TARGET), win32)
EMULATOR_EXECUTABLE = beam$(TF_MARKER).dll
else
EMULATOR_EXECUTABLE = beam$(TF_MARKER)
+PROFILE_EXECUTABLE = beam.prof$(TF_MARKER)
endif
CS_EXECUTABLE = erl_child_setup$(TYPEMARKER)
+ifeq ($(PROFILE), generate)
+EMULATOR_EXECUTABLE = $(PROFILE_EXECUTABLE)
+ifeq ($(PROFILE_COMPILER), gcc)
+PROFILE_LDFLAGS = -fprofile-generate
+endif
+ifeq ($(PROFILE_COMPILER), clang)
+PROFILE_LDFLAGS = -fprofile-instr-generate
+endif
+endif
+
# ----------------------------------------------------------------------
ifeq ($(ERLANG_OSTYPE), unix)
@@ -688,16 +721,33 @@ $(OBJDIR)/beams.$(RES_EXT): $(TARGET)/beams.rc
endif
-ifneq ($(filter tile-%,$(TARGET)),)
-$(OBJDIR)/beam_emu.o: beam/beam_emu.c
- $(V_CC) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) \
- $(INCLUDES) -c $< -o $@
-else
# Usually the same as the default rule, but certain platforms (e.g. win32) mix
# different compilers
$(OBJDIR)/beam_emu.o: beam/beam_emu.c
$(V_EMU_CC) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) $(INCLUDES) -c $< -o $@
-endif
+
+$(OBJDIR)/%_pg.o: beam/%.c
+ $(V_CC) $(PROFILE_GENERATE) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) $(INCLUDES) -c $< -o $@
+$(OBJDIR)/%_pu.o: beam/%.c $(PROFILE_USE_DEPS)
+ $(V_CC) $(PROFILE_USE) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) $(INCLUDES) -c $< -o $@
+
+$(OBJDIR)/PROFILE: $(BINDIR)/$(PROFILE_EXECUTABLE)
+ $(V_at)echo " PROFILE ${PROFILE_EXECUTABLE}"
+ $(V_at)rm -f $(OBJDIR)/erl*.profraw
+ $(V_at)set -e; LLVM_PROFILE_FILE="$(OBJDIR)/erlc-%m.profraw" \
+ ERL_FLAGS="-emu_type prof${TYPEMARKER} +S 1" $(ERLC) -DPGO \
+ -o $(OBJDIR) test/estone_SUITE.erl > $(OBJDIR)/PROFILE_LOG
+ $(V_at)set -e; LLVM_PROFILE_FILE="$(OBJDIR)/erl-%m.profraw" \
+ ERL_FLAGS="-emu_type prof${TYPEMARKER} +S 1" $(ERL) -pa $(OBJDIR) \
+ -noshell -s estone_SUITE pgo -s init stop >> $(OBJDIR)/PROFILE_LOG
+ $(V_at)touch $@
+
+$(OBJDIR)/%_pu.gcda: $(OBJDIR)/PROFILE
+ $(V_at)mv $(OBJDIR)/$*_pg.gcda $@
+ $(V_at)touch $@
+
+$(OBJDIR)/default.profdata: $(OBJDIR)/PROFILE
+ $(V_LLVM_PROFDATA) merge -output $@ $(OBJDIR)/*.profraw
$(OBJDIR)/%.o: beam/%.c
$(V_CC) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) $(INCLUDES) -c $< -o $@
@@ -759,15 +809,23 @@ $(ERL_TOP)/lib/%.beam:
INIT_OBJS = $(OBJDIR)/erl_main.o $(PRELOAD_OBJ)
+# -fprofile-correction is needed in order to use PGO on erl_process
+# as multiple threads execute in that file.
+ifeq ($(PROFILE_CORRECTION),)
+PROFILE_OBJS = $(OBJDIR)/beam_emu.o
+RUN_OBJS = $(OBJDIR)/erl_process.o
+else
+PROFILE_OBJS = $(OBJDIR)/beam_emu.o $(OBJDIR)/erl_process.o
+endif
+
EMU_OBJS = \
- $(OBJDIR)/beam_emu.o $(OBJDIR)/beam_opcodes.o \
+ $(OBJDIR)/beam_opcodes.o \
$(OBJDIR)/beam_load.o $(OBJDIR)/beam_bif_load.o \
$(OBJDIR)/beam_debug.o $(OBJDIR)/beam_bp.o \
- $(OBJDIR)/beam_catches.o \
- $(OBJDIR)/code_ix.o \
+ $(OBJDIR)/beam_catches.o $(OBJDIR)/code_ix.o \
$(OBJDIR)/beam_ranges.o
-RUN_OBJS = \
+RUN_OBJS += \
$(OBJDIR)/erl_alloc.o $(OBJDIR)/erl_mtrace.o \
$(OBJDIR)/erl_alloc_util.o $(OBJDIR)/erl_goodfit_alloc.o \
$(OBJDIR)/erl_bestfit_alloc.o $(OBJDIR)/erl_afit_alloc.o \
@@ -783,7 +841,7 @@ RUN_OBJS = \
$(OBJDIR)/utils.o $(OBJDIR)/bif.o \
$(OBJDIR)/io.o $(OBJDIR)/erl_printf_term.o\
$(OBJDIR)/erl_debug.o $(OBJDIR)/erl_md5.o \
- $(OBJDIR)/erl_message.o $(OBJDIR)/erl_process.o \
+ $(OBJDIR)/erl_message.o \
$(OBJDIR)/erl_process_dict.o $(OBJDIR)/erl_process_lock.o \
$(OBJDIR)/erl_port_task.o $(OBJDIR)/erl_arith.o \
$(OBJDIR)/time.o $(OBJDIR)/erl_time_sup.o \
@@ -925,21 +983,23 @@ ifdef HIPE_ENABLED
EXTRA_BASE_OBJS += $(HIPE_OBJS)
endif
-BASE_OBJS = $(EMU_OBJS) $(RUN_OBJS) $(OS_OBJS) $(EXTRA_BASE_OBJS) $(LTTNG_OBJS)
+BASE_OBJS = $(EMU_OBJS) $(RUN_OBJS) $(OS_OBJS) $(EXTRA_BASE_OBJS) \
+ $(LTTNG_OBJS) $(DRV_OBJS) $(NIF_OBJS)
-before_DTrace_OBJS = $(BASE_OBJS) $(DRV_OBJS) $(NIF_OBJS)
+PROF_OBJS = $(patsubst %.o,%$(PROFILE_MARKER).o,$(PROFILE_OBJS)) $(BASE_OBJS)
+
+OBJS = $(PROF_OBJS)
-DTRACE_OBJS =
ifdef DTRACE_ENABLED_2STEP
-DTRACE_OBJS = $(OBJDIR)/erlang_dtrace.o
-$(OBJDIR)/erlang_dtrace.o: $(before_DTrace_OBJS) $(TARGET)/erlang_dtrace.h
+# The $(PROFILE_MARKER) is placed in the object file name in order to
+# make sure we re-compile with the new object files for the profiled emulator
+OBJS += $(OBJDIR)/erlang$(PROFILE_MARKER)_dtrace.o
+$(OBJDIR)/erlang$(PROFILE_MARKER)_dtrace.o: $(PROF_OBJS) $(TARGET)/erlang_dtrace.h
dtrace -G -C -Ibeam \
-s beam/erlang_dtrace.d \
- -o $@ $(before_DTrace_OBJS)
+ -o $@ $(PROF_OBJS)
endif
-OBJS = $(before_DTrace_OBJS) $(DTRACE_OBJS)
-
$(INIT_OBJS): $(TTF_DIR)/GENERATED
$(OBJS): $(TTF_DIR)/GENERATED
@@ -1031,8 +1091,8 @@ $(BINDIR)/$(EMULATOR_EXECUTABLE): $(INIT_OBJS) $(OBJS) $(DEPLIBS)
else
$(BINDIR)/$(EMULATOR_EXECUTABLE): $(INIT_OBJS) $(OBJS) $(DEPLIBS)
- $(ld_verbose)$(PURIFY) $(LD) -o $(BINDIR)/$(EMULATOR_EXECUTABLE) \
- $(HIPEBEAMLDFLAGS) $(LDFLAGS) $(DEXPORT) $(INIT_OBJS) $(OBJS) \
+ $(ld_verbose)$(PURIFY) $(LD) -o $@ \
+ $(HIPEBEAMLDFLAGS) $(PROFILE_LDFLAGS) $(LDFLAGS) $(DEXPORT) $(INIT_OBJS) $(OBJS) \
$(STATIC_NIF_LIBS) $(STATIC_DRIVER_LIBS) $(LIBS)
endif
diff --git a/erts/emulator/test/estone_SUITE.erl b/erts/emulator/test/estone_SUITE.erl
index 8b336b366d..c4899967ca 100644
--- a/erts/emulator/test/estone_SUITE.erl
+++ b/erts/emulator/test/estone_SUITE.erl
@@ -20,7 +20,7 @@
-module(estone_SUITE).
%% Test functions
-export([all/0, suite/0, groups/0,
- estone/1, estone_bench/1]).
+ estone/1, estone_bench/1, pgo/0]).
%% Internal exports for EStone tests
-export([lists/1,
@@ -44,9 +44,9 @@
links/1,lproc/1,
run_micro/3,p1/1,ppp/3,macro/2,micros/0]).
-
--include_lib("common_test/include/ct.hrl").
+-ifndef(PGO).
-include_lib("common_test/include/ct_event.hrl").
+-endif.
%% EStone defines
-define(TOTAL, (3000 * 1000 * 100)). %% 300 secs
@@ -85,13 +85,28 @@ estone(Config) when is_list(Config) ->
estone_bench(Config) ->
DataDir = proplists:get_value(data_dir,Config),
L = ?MODULE:macro(?MODULE:micros(),DataDir),
- [ct_event:notify(
- #event{name = benchmark_data,
- data = [{name,proplists:get_value(title,Mark)},
- {value,proplists:get_value(estones,Mark)}]})
- || Mark <- L],
+ {Total, Stones} = sum_micros(L, 0, 0),
+ notify([[{title,"ESTONES"}, {estones, Stones}] | L]),
L.
+-ifndef(PGO).
+notify(Marks) ->
+ [ct_event:notify(
+ #event{name = benchmark_data,
+ data = [{name,proplists:get_value(title, Mark)},
+ {value,proplists:get_value(estones, Mark)}]})
+ || Mark <- Marks].
+-else.
+notify(_) ->
+ ok.
+-endif.
+
+%% The benchmarks to run in order to guide PGO (profile guided optimisation)
+pgo() ->
+ %% We run all benchmarks except the port_io as we don't want to
+ %% have to build a custom port.
+ Micros = ?MODULE:micros() -- [micro(port_io)],
+ ?MODULE:macro(Micros,[]).
%%
%% Calculate CPU speed