aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
authorLukas Larsson <[email protected]>2017-08-24 16:20:36 +0200
committerLukas Larsson <[email protected]>2017-09-08 09:46:42 +0200
commita9812e6307fe335d077f96d3a6342cbd4894ed0b (patch)
tree6037e6c982f04acf2a24bacc25767e9cbf1aeded /erts
parent5d124efc4f8408cdbe2f23ca6b233b932f1dd7c6 (diff)
downloadotp-a9812e6307fe335d077f96d3a6342cbd4894ed0b.tar.gz
otp-a9812e6307fe335d077f96d3a6342cbd4894ed0b.tar.bz2
otp-a9812e6307fe335d077f96d3a6342cbd4894ed0b.zip
Add support for building a pgo beam_emu
Diffstat (limited to 'erts')
-rw-r--r--erts/Makefile10
-rw-r--r--erts/aclocal.m415
-rw-r--r--erts/configure.in88
-rw-r--r--erts/emulator/Makefile.in102
-rw-r--r--erts/emulator/test/estone_SUITE.erl31
5 files changed, 212 insertions, 34 deletions
diff --git a/erts/Makefile b/erts/Makefile
index 73d8560c1e..0393ccc759 100644
--- a/erts/Makefile
+++ b/erts/Makefile
@@ -49,11 +49,6 @@ debug opt lcnt clean:
done
(cd preloaded/src && $(MAKE) ../ebin/erts.app)
-# ----------------------------------------------------------------------
-# These are "convenience targets", provided as shortcuts for developers
-# - don't use them in scripts or assume they will always stay like this!
-#
-
.PHONY: smp
smp:
$(V_at)for type in $(TYPES); do \
@@ -112,6 +107,11 @@ local_setup:
$(ERL_TOP)/bin/start_clean.script \
$(ERL_TOP)/bin/no_dot_erlang.script
+# ----------------------------------------------------------------------
+# These are "convenience targets", provided as shortcuts for developers
+# - don't use them in scripts or assume they will always stay like this!
+#
+
# Run the configure script
.PHONY: configure
configure:
diff --git a/erts/aclocal.m4 b/erts/aclocal.m4
index 80bf236188..887babc13f 100644
--- a/erts/aclocal.m4
+++ b/erts/aclocal.m4
@@ -2726,6 +2726,21 @@ AC_DEFUN([LM_TRY_ENABLE_CFLAG], [
fi
])
+AC_DEFUN([LM_CHECK_ENABLE_CFLAG], [
+ AC_MSG_CHECKING([whether $CC accepts $1...])
+ saved_CFLAGS=$CFLAGS;
+ CFLAGS="$1 $CFLAGS";
+ AC_TRY_COMPILE([],[return 0;],can_enable_flag=true,can_enable_flag=false)
+ CFLAGS=$saved_CFLAGS;
+ if test "X$can_enable_flag" = "Xtrue"; then
+ AS_VAR_SET($2, true)
+ AC_MSG_RESULT([yes])
+ else
+ AS_VAR_SET($2, false)
+ AC_MSG_RESULT([no])
+ fi
+])
+
dnl ERL_TRY_LINK_JAVA(CLASSES, FUNCTION-BODY
dnl [ACTION_IF_FOUND [, ACTION-IF-NOT-FOUND]])
dnl Freely inspired by AC_TRY_LINK. (Maybe better to create a
diff --git a/erts/configure.in b/erts/configure.in
index b765a8ffe4..9948e71b2d 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -554,6 +554,94 @@ AC_SUBST(WFLAGS)
AC_SUBST(WERRORFLAGS)
AC_SUBST(CFLAG_RUNTIME_LIBRARY_PATH)
+## Check if we can do profile guided optimization of beam_emu
+LM_CHECK_ENABLE_CFLAG([-fprofile-generate -Werror],[PROFILE_GENERATE])
+LM_CHECK_ENABLE_CFLAG([-fprofile-use -Werror],[PROFILE_USE])
+
+## Check if this is clang
+LM_CHECK_ENABLE_CFLAG([-fprofile-instr-generate -Werror],[PROFILE_INSTR_GENERATE])
+if test "X$PROFILE_INSTR_GENERATE" = "Xtrue"; then
+ # It was clang, now we also have to check if we have llvm-profdata and that
+ # we can link programs with -fprofile-instr-use
+ saved_CFLAGS=$CFLAGS;
+ CFLAGS="-fprofile-instr-generate -Werror $saved_CFLAGS"
+ AC_RUN_IFELSE([AC_LANG_PROGRAM([],[])],
+ [AC_CHECK_PROGS([LLVM_PROFDATA], [llvm-profdata])
+ AC_CHECK_PROGS([XCRUN], [xcrun])
+ if test "X$XCRUN" != "X" -a "X$LLVM_PROFDATA" = "X"; then
+ AC_MSG_CHECKING([for $XCRUN llvm-profdata])
+ if $XCRUN llvm-profdata --help 2>& AS_MESSAGE_LOG_FD >& AS_MESSAGE_LOG_FD; then
+ LLVM_PROFDATA="$XCRUN llvm-profdata"
+ AC_MSG_RESULT([yes])
+ else
+ AC_MSG_RESULT([no])
+ fi
+ fi
+ AC_SUBST(LLVM_PROFDATA)
+ if test "X$LLVM_PROFDATA" != "X"; then
+ CFLAGS="-fprofile-instr-use=default.profdata -Werror $saved_CFLAGS";
+ $LLVM_PROFDATA merge -output=default.profdata *.profraw;
+ AC_MSG_CHECKING([whether gcc accepts -fprofile-instr-use=default.profdata -Werror])
+ AC_COMPILE_IFELSE([],
+ [AC_MSG_RESULT([yes])
+ PROFILE_INSTR_USE=true],
+ [AC_MSG_RESULT([no])
+ PROFILE_INSTR_USE=false])
+ rm -f default.profdata
+ fi],
+ [])
+ rm -f *.profraw
+ CFLAGS=$saved_CFLAGS;
+fi
+
+AC_ARG_ENABLE(pgo,
+AS_HELP_STRING([--enable-pgo],
+ [build erts using PGO (profile guided optimization)]),
+[ case "$enableval" in
+ no) enable_pgo=no ;;
+ *) enable_pgo=yes ;;
+ esac
+],enable_pgo=default)
+
+USE_PGO=false
+AC_MSG_CHECKING([whether to do PGO of erts])
+if test $enable_pgo = no; then
+ AC_MSG_RESULT([no, disabled by user])
+elif test $CROSS_COMPILING = yes; then
+ if $enable_pgo = yes; then
+ AC_MSG_ERROR(cannot use PGO when cross-compiling)
+ else
+ AC_MSG_RESULT([no, cross compiling])
+ fi
+elif test "X$host" = "Xwin32"; then
+ AC_MSG_RESULT([no, not supported in windows])
+elif test "X$PROFILE_GENERATE" = "Xtrue" -a "X$PROFILE_USE" = "Xtrue"; then
+ USE_PGO=true
+ AC_MSG_RESULT([yes, using -fprofile-generate])
+ PROFILE_COMPILER=gcc
+# check if $CC accepts -fprofile-correction, if so we can use PGO on multi-threaded files.
+ LM_CHECK_ENABLE_CFLAG([-fprofile-use -fprofile-correction -Werror],[PROFILE_CORRECTION])
+ if test "X$PROFILE_CORRECTION" = "Xtrue"; then
+ PROFILE_CORRECTION="-fprofile-correction"
+ else
+ PROFILE_CORRECTION=""
+ fi
+ AC_SUBST(PROFILE_CORRECTION)
+elif test "X$PROFILE_INSTR_GENERATE" = "Xtrue" -a "X$PROFILE_INSTR_USE" = "Xtrue"; then
+ USE_PGO=true
+ AC_MSG_RESULT([yes, using -fprofile-instr-generate])
+ PROFILE_COMPILER=clang
+else
+ if $enable_pgo = yes; then
+ AC_MSG_ERROR(cannot use PGO with this compiler)
+ else
+ AC_MSG_RESULT([no])
+ fi
+fi
+
+AC_SUBST(USE_PGO)
+AC_SUBST(PROFILE_COMPILER)
+
AC_CHECK_SIZEOF(void *) # Needed for ARCH and smp checks below
if test "x$ac_cv_sizeof_void_p" = x8; then
AC_SUBST(EXTERNAL_WORD_SIZE, 64)
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index bc7eb72221..2b203c9ee6 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -63,6 +63,28 @@ ARFLAGS=rc
OMIT_OMIT_FP=no
TYPE_LIBS=
+PROFILE_COMPILER=@PROFILE_COMPILER@
+PROFILE_MARKER=
+ifeq ($(PROFILE),generate)
+PROFILE_MARKER=_pg
+else
+ifeq ($(PROFILE),use)
+PROFILE_MARKER=_pu
+endif
+endif
+
+ifeq ($(PROFILE_COMPILER), gcc)
+PROFILE_CORRECTION=@PROFILE_CORRECTION@
+PROFILE_GENERATE=-fprofile-generate
+PROFILE_USE=-fprofile-use $(PROFILE_CORRECTION)
+PROFILE_USE_DEPS=$(OBJDIR)/%_pu.gcda
+endif
+ifeq ($(PROFILE_COMPILER), clang)
+PROFILE_GENERATE=-fprofile-instr-generate
+PROFILE_USE=-fprofile-instr-use=$(OBJDIR)/default.profdata
+PROFILE_USE_DEPS=$(OBJDIR)/default.profdata
+endif
+
DIRTY_SCHEDULER_SUPPORT=@DIRTY_SCHEDULER_SUPPORT@
DIRTY_SCHEDULER_TEST=@DIRTY_SCHEDULER_TEST@
@@ -418,9 +440,20 @@ ifeq ($(TARGET), win32)
EMULATOR_EXECUTABLE = beam$(TF_MARKER).dll
else
EMULATOR_EXECUTABLE = beam$(TF_MARKER)
+PROFILE_EXECUTABLE = beam.prof$(TF_MARKER)
endif
CS_EXECUTABLE = erl_child_setup$(TYPEMARKER)
+ifeq ($(PROFILE), generate)
+EMULATOR_EXECUTABLE = $(PROFILE_EXECUTABLE)
+ifeq ($(PROFILE_COMPILER), gcc)
+PROFILE_LDFLAGS = -fprofile-generate
+endif
+ifeq ($(PROFILE_COMPILER), clang)
+PROFILE_LDFLAGS = -fprofile-instr-generate
+endif
+endif
+
# ----------------------------------------------------------------------
ifeq ($(ERLANG_OSTYPE), unix)
@@ -687,16 +720,33 @@ $(OBJDIR)/beams.$(RES_EXT): $(TARGET)/beams.rc
endif
-ifneq ($(filter tile-%,$(TARGET)),)
-$(OBJDIR)/beam_emu.o: beam/beam_emu.c
- $(V_CC) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) \
- $(INCLUDES) -c $< -o $@
-else
# Usually the same as the default rule, but certain platforms (e.g. win32) mix
# different compilers
$(OBJDIR)/beam_emu.o: beam/beam_emu.c
$(V_EMU_CC) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) $(INCLUDES) -c $< -o $@
-endif
+
+$(OBJDIR)/%_pg.o: beam/%.c
+ $(V_CC) $(PROFILE_GENERATE) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) $(INCLUDES) -c $< -o $@
+$(OBJDIR)/%_pu.o: beam/%.c $(PROFILE_USE_DEPS)
+ $(V_CC) $(PROFILE_USE) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) $(INCLUDES) -c $< -o $@
+
+$(OBJDIR)/PROFILE: $(BINDIR)/$(PROFILE_EXECUTABLE)
+ $(V_at)echo " PROFILE ${PROFILE_EXECUTABLE}"
+ $(V_at)rm -f $(OBJDIR)/erl*.profraw
+ $(V_at)set -e; LLVM_PROFILE_FILE="$(OBJDIR)/erlc-%m.profraw" \
+ ERL_FLAGS="-emu_type prof${TYPEMARKER} +S 1" $(ERLC) -DPGO \
+ -o $(OBJDIR) test/estone_SUITE.erl > $(OBJDIR)/PROFILE_LOG
+ $(V_at)set -e; LLVM_PROFILE_FILE="$(OBJDIR)/erl-%m.profraw" \
+ ERL_FLAGS="-emu_type prof${TYPEMARKER} +S 1" $(ERL) -pa $(OBJDIR) \
+ -noshell -s estone_SUITE pgo -s init stop >> $(OBJDIR)/PROFILE_LOG
+ $(V_at)touch $@
+
+$(OBJDIR)/%_pu.gcda: $(OBJDIR)/PROFILE
+ $(V_at)mv $(OBJDIR)/$*_pg.gcda $@
+ $(V_at)touch $@
+
+$(OBJDIR)/default.profdata: $(OBJDIR)/PROFILE
+ $(V_LLVM_PROFDATA) merge -output $@ $(OBJDIR)/*.profraw
$(OBJDIR)/%.o: beam/%.c
$(V_CC) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) $(INCLUDES) -c $< -o $@
@@ -758,15 +808,23 @@ $(ERL_TOP)/lib/%.beam:
INIT_OBJS = $(OBJDIR)/erl_main.o $(PRELOAD_OBJ)
+# -fprofile-correction is needed in order to use PGO on erl_process
+# as multiple threads execute in that file.
+ifeq ($(PROFILE_CORRECTION),)
+PROFILE_OBJS = $(OBJDIR)/beam_emu.o
+RUN_OBJS = $(OBJDIR)/erl_process.o
+else
+PROFILE_OBJS = $(OBJDIR)/beam_emu.o $(OBJDIR)/erl_process.o
+endif
+
EMU_OBJS = \
- $(OBJDIR)/beam_emu.o $(OBJDIR)/beam_opcodes.o \
+ $(OBJDIR)/beam_opcodes.o \
$(OBJDIR)/beam_load.o $(OBJDIR)/beam_bif_load.o \
$(OBJDIR)/beam_debug.o $(OBJDIR)/beam_bp.o \
- $(OBJDIR)/beam_catches.o \
- $(OBJDIR)/code_ix.o \
+ $(OBJDIR)/beam_catches.o $(OBJDIR)/code_ix.o \
$(OBJDIR)/beam_ranges.o
-RUN_OBJS = \
+RUN_OBJS += \
$(OBJDIR)/erl_alloc.o $(OBJDIR)/erl_mtrace.o \
$(OBJDIR)/erl_alloc_util.o $(OBJDIR)/erl_goodfit_alloc.o \
$(OBJDIR)/erl_bestfit_alloc.o $(OBJDIR)/erl_afit_alloc.o \
@@ -782,7 +840,7 @@ RUN_OBJS = \
$(OBJDIR)/utils.o $(OBJDIR)/bif.o \
$(OBJDIR)/io.o $(OBJDIR)/erl_printf_term.o\
$(OBJDIR)/erl_debug.o $(OBJDIR)/erl_md5.o \
- $(OBJDIR)/erl_message.o $(OBJDIR)/erl_process.o \
+ $(OBJDIR)/erl_message.o \
$(OBJDIR)/erl_process_dict.o $(OBJDIR)/erl_process_lock.o \
$(OBJDIR)/erl_port_task.o $(OBJDIR)/erl_arith.o \
$(OBJDIR)/time.o $(OBJDIR)/erl_time_sup.o \
@@ -923,21 +981,23 @@ ifdef HIPE_ENABLED
EXTRA_BASE_OBJS += $(HIPE_OBJS)
endif
-BASE_OBJS = $(EMU_OBJS) $(RUN_OBJS) $(OS_OBJS) $(EXTRA_BASE_OBJS) $(LTTNG_OBJS)
+BASE_OBJS = $(EMU_OBJS) $(RUN_OBJS) $(OS_OBJS) $(EXTRA_BASE_OBJS) \
+ $(LTTNG_OBJS) $(DRV_OBJS) $(NIF_OBJS)
-before_DTrace_OBJS = $(BASE_OBJS) $(DRV_OBJS) $(NIF_OBJS)
+PROF_OBJS = $(patsubst %.o,%$(PROFILE_MARKER).o,$(PROFILE_OBJS)) $(BASE_OBJS)
+
+OBJS = $(PROF_OBJS)
-DTRACE_OBJS =
ifdef DTRACE_ENABLED_2STEP
-DTRACE_OBJS = $(OBJDIR)/erlang_dtrace.o
-$(OBJDIR)/erlang_dtrace.o: $(before_DTrace_OBJS) $(TARGET)/erlang_dtrace.h
+# The $(PROFILE_MARKER) is placed in the object file name in order to
+# make sure we re-compile with the new object files for the profiled emulator
+OBJS += $(OBJDIR)/erlang$(PROFILE_MARKER)_dtrace.o
+$(OBJDIR)/erlang$(PROFILE_MARKER)_dtrace.o: $(PROF_OBJS) $(TARGET)/erlang_dtrace.h
dtrace -G -C -Ibeam \
-s beam/erlang_dtrace.d \
- -o $@ $(before_DTrace_OBJS)
+ -o $@ $(PROF_OBJS)
endif
-OBJS = $(before_DTrace_OBJS) $(DTRACE_OBJS)
-
$(INIT_OBJS): $(TTF_DIR)/GENERATED
$(OBJS): $(TTF_DIR)/GENERATED
@@ -1029,8 +1089,8 @@ $(BINDIR)/$(EMULATOR_EXECUTABLE): $(INIT_OBJS) $(OBJS) $(DEPLIBS)
else
$(BINDIR)/$(EMULATOR_EXECUTABLE): $(INIT_OBJS) $(OBJS) $(DEPLIBS)
- $(ld_verbose)$(PURIFY) $(LD) -o $(BINDIR)/$(EMULATOR_EXECUTABLE) \
- $(HIPEBEAMLDFLAGS) $(LDFLAGS) $(DEXPORT) $(INIT_OBJS) $(OBJS) \
+ $(ld_verbose)$(PURIFY) $(LD) -o $@ \
+ $(HIPEBEAMLDFLAGS) $(PROFILE_LDFLAGS) $(LDFLAGS) $(DEXPORT) $(INIT_OBJS) $(OBJS) \
$(STATIC_NIF_LIBS) $(STATIC_DRIVER_LIBS) $(LIBS)
endif
diff --git a/erts/emulator/test/estone_SUITE.erl b/erts/emulator/test/estone_SUITE.erl
index 8b336b366d..c4899967ca 100644
--- a/erts/emulator/test/estone_SUITE.erl
+++ b/erts/emulator/test/estone_SUITE.erl
@@ -20,7 +20,7 @@
-module(estone_SUITE).
%% Test functions
-export([all/0, suite/0, groups/0,
- estone/1, estone_bench/1]).
+ estone/1, estone_bench/1, pgo/0]).
%% Internal exports for EStone tests
-export([lists/1,
@@ -44,9 +44,9 @@
links/1,lproc/1,
run_micro/3,p1/1,ppp/3,macro/2,micros/0]).
-
--include_lib("common_test/include/ct.hrl").
+-ifndef(PGO).
-include_lib("common_test/include/ct_event.hrl").
+-endif.
%% EStone defines
-define(TOTAL, (3000 * 1000 * 100)). %% 300 secs
@@ -85,13 +85,28 @@ estone(Config) when is_list(Config) ->
estone_bench(Config) ->
DataDir = proplists:get_value(data_dir,Config),
L = ?MODULE:macro(?MODULE:micros(),DataDir),
- [ct_event:notify(
- #event{name = benchmark_data,
- data = [{name,proplists:get_value(title,Mark)},
- {value,proplists:get_value(estones,Mark)}]})
- || Mark <- L],
+ {Total, Stones} = sum_micros(L, 0, 0),
+ notify([[{title,"ESTONES"}, {estones, Stones}] | L]),
L.
+-ifndef(PGO).
+notify(Marks) ->
+ [ct_event:notify(
+ #event{name = benchmark_data,
+ data = [{name,proplists:get_value(title, Mark)},
+ {value,proplists:get_value(estones, Mark)}]})
+ || Mark <- Marks].
+-else.
+notify(_) ->
+ ok.
+-endif.
+
+%% The benchmarks to run in order to guide PGO (profile guided optimisation)
+pgo() ->
+ %% We run all benchmarks except the port_io as we don't want to
+ %% have to build a custom port.
+ Micros = ?MODULE:micros() -- [micro(port_io)],
+ ?MODULE:macro(Micros,[]).
%%
%% Calculate CPU speed