diff options
Diffstat (limited to 'erts')
42 files changed, 4249 insertions, 2414 deletions
diff --git a/erts/aclocal.m4 b/erts/aclocal.m4 index 3b1edd7605..443d8622bf 100644 --- a/erts/aclocal.m4 +++ b/erts/aclocal.m4 @@ -386,14 +386,24 @@ AC_DEFUN(LM_SYS_IPV6, AC_CACHE_VAL(ac_cv_sys_ipv6_support, [ok_so_far=yes AC_TRY_COMPILE([#include <sys/types.h> -#include <netinet/in.h>], +#ifdef __WIN32__ +#include <winsock2.h> +#include <ws2tcpip.h> +#else +#include <netinet/in.h> +#endif], [struct in6_addr a6; struct sockaddr_in6 s6;], ok_so_far=yes, ok_so_far=no) if test $ok_so_far = yes; then ac_cv_sys_ipv6_support=yes else AC_TRY_COMPILE([#include <sys/types.h> -#include <netinet/in.h>], +#ifdef __WIN32__ +#include <winsock2.h> +#include <ws2tcpip.h> +#else +#include <netinet/in.h> +#endif], [struct in_addr6 a6; struct sockaddr_in6 s6;], ac_cv_sys_ipv6_support=in_addr6, ac_cv_sys_ipv6_support=no) fi @@ -994,8 +1004,8 @@ case "$THR_LIB_NAME" in case "$host_cpu" in sun4u | sparc64 | sun4v) - ethr_have_native_atomics=yes;; - i86pc | i386 | i486 | i586 | i686 | x86_64 | amd64) + ethr_have_native_atomics=yes;; + i86pc | i*86 | x86_64 | amd64) ethr_have_native_atomics=yes;; macppc | ppc | "Power Macintosh") ethr_have_native_atomics=yes;; @@ -1090,7 +1100,7 @@ test "X$disable_native_ethr_impls" = "Xyes" && AC_ARG_ENABLE(prefer-gcc-native-ethr-impls, AS_HELP_STRING([--enable-prefer-gcc-native-ethr-impls], - [enable prefer gcc native ethread implementations]), + [prefer gcc native ethread implementations]), [ case "$enableval" in yes) enable_prefer_gcc_native_ethr_impls=yes ;; *) enable_prefer_gcc_native_ethr_impls=no ;; @@ -1099,21 +1109,60 @@ AC_ARG_ENABLE(prefer-gcc-native-ethr-impls, test $enable_prefer_gcc_native_ethr_impls = yes && AC_DEFINE(ETHR_PREFER_GCC_NATIVE_IMPLS, 1, [Define if you prefer gcc native ethread implementations]) +AC_ARG_WITH(libatomic_ops, + AS_HELP_STRING([--with-libatomic_ops=PATH], + [specify and prefer usage of libatomic_ops in the ethread library])) + AC_ARG_ENABLE(ethread-pre-pentium4-compatibility, AS_HELP_STRING([--enable-ethread-pre-pentium4-compatibility], [enable compatibility with x86 processors before pentium 4 (back to 486) in the ethread library]), -[ case "$enableval" in - yes) enable_ethread_pre_pentium4_compatibility=yes ;; - *) enable_ethread_pre_pentium4_compatibilit=no ;; - esac ], enable_ethread_pre_pentium4_compatibilit=no) +[ + case "$enable_ethread_pre_pentium4_compatibility" in + yes|no) ;; + *) enable_ethread_pre_pentium4_compatibility=check;; + esac +], +[enable_ethread_pre_pentium4_compatibility=check]) + +test "$cross_compiling" != "yes" || enable_ethread_pre_pentium4_compatibility=no + +case "$enable_ethread_pre_pentium4_compatibility-$host_cpu" in + check-i86pc | check-i*86) + AC_MSG_CHECKING([whether pre pentium 4 compatibility should forced]) + AC_RUN_IFELSE([ +#if defined(__GNUC__) +# if defined(ETHR_PREFER_LIBATOMIC_OPS_NATIVE_IMPLS) +# define CHECK_LIBATOMIC_OPS__ +# else +# define CHECK_GCC_ASM__ +# endif +#elif defined(ETHR_HAVE_LIBATOMIC_OPS) +# define CHECK_LIBATOMIC_OPS__ +#endif +#if defined(CHECK_LIBATOMIC_OPS__) +#include "atomic_ops.h" +#endif +int main(void) +{ +#if defined(CHECK_GCC_ASM__) + __asm__ __volatile__("mfence" : : : "memory"); +#elif defined(CHECK_LIBATOMIC_OPS__) + AO_nop_full(); +#endif + return 0; +} + ], + [enable_ethread_pre_pentium4_compatibility=no], + [enable_ethread_pre_pentium4_compatibility=yes], + [enable_ethread_pre_pentium4_compatibility=no]) + AC_MSG_RESULT([$enable_ethread_pre_pentium4_compatibility]);; + *) + ;; +esac -test $enable_ethread_pre_pentium4_compatibilit = yes && +test $enable_ethread_pre_pentium4_compatibility = yes && AC_DEFINE(ETHR_PRE_PENTIUM4_COMPAT, 1, [Define if you want compatibilty with x86 processors before pentium4.]) -AC_ARG_WITH(libatomic_ops, - AS_HELP_STRING([--with-libatomic_ops=PATH], - [use libatomic_ops with the ethread library])) - AC_DEFINE(ETHR_HAVE_ETHREAD_DEFINES, 1, \ [Define if you have all ethread defines]) diff --git a/erts/autoconf/win32.config.cache.static b/erts/autoconf/win32.config.cache.static index 31dfe510cd..cc33fc09b3 100755 --- a/erts/autoconf/win32.config.cache.static +++ b/erts/autoconf/win32.config.cache.static @@ -212,7 +212,6 @@ ac_cv_sizeof_void_p=${ac_cv_sizeof_void_p=4} ac_cv_struct_exception=${ac_cv_struct_exception=no} ac_cv_struct_sockaddr_sa_len=${ac_cv_struct_sockaddr_sa_len=no} ac_cv_struct_tm=${ac_cv_struct_tm=time.h} -ac_cv_sys_ipv6_support=${ac_cv_sys_ipv6_support=no} ac_cv_sys_multicast_support=${ac_cv_sys_multicast_support=no} ac_cv_type_char=${ac_cv_type_char=yes} ac_cv_type_int=${ac_cv_type_int=yes} diff --git a/erts/configure.in b/erts/configure.in index 8c6f2ac076..8d629c25ae 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -110,7 +110,8 @@ ENABLE_ALLOC_TYPE_VARS= AC_SUBST(ENABLE_ALLOC_TYPE_VARS) AC_ARG_ENABLE(bootstrap-only, -[ --enable-bootstrap-only enable bootstrap only configuration], +AS_HELP_STRING([--enable-bootstrap-only], + [enable bootstrap only configuration]), [ if test "X$enableval" = "Xyes"; then # Disable stuff not necessary in a bootstrap only system in order # to speed up things by reducing the amount of stuff needing to be @@ -126,46 +127,46 @@ AC_ARG_ENABLE(bootstrap-only, ]) AC_ARG_ENABLE(threads, -[ --enable-threads enable async thread support - --disable-threads disable async thread support], +AS_HELP_STRING([--enable-threads], [enable async thread support]) +AS_HELP_STRING([--disable-threads], [disable async thread support]), [ case "$enableval" in no) enable_threads=no ;; *) enable_threads=yes ;; esac ], enable_threads=unknown) AC_ARG_ENABLE(halfword-emulator, -[ --enable-halfword-emulator enable halfword emulator (only for 64bit builds) - --disable-halfword-emulator disable halfword emulator (only for 64bit builds)], +AS_HELP_STRING([--enable-halfword-emulator], + [enable halfword emulator (only for 64bit builds)]), [ case "$enableval" in no) enable_halfword_emualtor=no ;; *) enable_halfword_emulator=yes ;; esac ], enable_halfword_emulator=unknown) AC_ARG_ENABLE(smp-support, -[ --enable-smp-support enable smp support - --disable-smp-support disable smp support], +AS_HELP_STRING([--enable-smp-support], [enable smp support]) +AS_HELP_STRING([--disable-smp-support], [disable smp support]), [ case "$enableval" in no) enable_smp_support=no ;; *) enable_smp_support=yes ;; esac ], enable_smp_support=unknown) AC_ARG_WITH(termcap, -[ --with-termcap use termcap (default) - --without-termcap do not use any termcap libraries (ncurses,curses,termcap,termlib)], +AS_HELP_STRING([--with-termcap], [use termcap (default)]) +AS_HELP_STRING([--without-termcap], + [do not use any termcap libraries (ncurses,curses,termcap,termlib)]), [], [with_termcap=yes]) AC_ARG_ENABLE(hybrid-heap, -[ --enable-hybrid-heap enable hybrid heap - --disable-hybrid-heap disable hybrid heap], +AS_HELP_STRING([--enable-hybrid-heap], [enable hybrid heap]), [ case "$enableval" in no) enable_hybrid_heap=no ;; *) enable_hybrid_heap=yes ;; esac ], enable_hybrid_heap=unknown) AC_ARG_ENABLE(lock-checking, -[ --enable-lock-checking enable lock checking], +AS_HELP_STRING([--enable-lock-checking], [enable lock checking]), [ case "$enableval" in no) enable_lock_check=no ;; *) enable_lock_check=yes ;; @@ -174,16 +175,15 @@ AC_ARG_ENABLE(lock-checking, enable_lock_check=no) AC_ARG_ENABLE(lock-counter, -[ --enable-lock-counter enable lock counters - --disable-lock-counter disable lock counters], +AS_HELP_STRING([--enable-lock-counter], [enable lock counters]), [ case "$enableval" in no) enable_lock_count=no ;; *) enable_lock_count=yes ;; esac ], enable_lock_count=no) AC_ARG_ENABLE(kernel-poll, -[ --enable-kernel-poll enable kernel poll support - --disable-kernel-poll disable kernel poll support], +AS_HELP_STRING([--enable-kernel-poll], [enable kernel poll support]) +AS_HELP_STRING([--disable-kernel-poll], [disable kernel poll support]), [ case "$enableval" in no) enable_kernel_poll=no ;; *) enable_kernel_poll=yes ;; @@ -191,25 +191,27 @@ AC_ARG_ENABLE(kernel-poll, AC_ARG_ENABLE(sctp, -[ --enable-sctp enable sctp support - --disable-sctp disable sctp support], +AS_HELP_STRING([--enable-sctp], [enable sctp support]) +AS_HELP_STRING([--disable-sctp], [disable sctp support]), [ case "$enableval" in no) enable_sctp=no ;; *) enable_sctp=yes ;; esac ], enable_sctp=unknown) AC_ARG_ENABLE(hipe, -[ --enable-hipe enable hipe support - --disable-hipe disable hipe support]) +AS_HELP_STRING([--enable-hipe], [enable hipe support]) +AS_HELP_STRING([--disable-hipe], [disable hipe support])) AC_ARG_ENABLE(native-libs, -[ --enable-native-libs compile Erlang libraries to native code]) +AS_HELP_STRING([--enable-native-libs], + [compile Erlang libraries to native code])) AC_ARG_ENABLE(tsp, -[ --enable-tsp compile tsp app]) +AS_HELP_STRING([--enable-tsp], [compile tsp app])) AC_ARG_ENABLE(fp-exceptions, -[ --enable-fp-exceptions Use hardware floating point exceptions (default if hipe enabled)], +AS_HELP_STRING([--enable-fp-exceptions], + [use hardware floating point exceptions (default if hipe enabled)]), [ case "$enableval" in no) enable_fp_exceptions=no ;; *) enable_fp_exceptions=yes ;; @@ -217,7 +219,8 @@ AC_ARG_ENABLE(fp-exceptions, ],enable_fp_exceptions=auto) AC_ARG_ENABLE(darwin-universal, -[ --enable-darwin-universal build universal binaries on darwin i386], +AS_HELP_STRING([--enable-darwin-universal], + [build universal binaries on darwin i386]), [ case "$enableval" in no) enable_darwin_universal=no ;; *) enable_darwin_univeral=yes ;; @@ -226,7 +229,7 @@ AC_ARG_ENABLE(darwin-universal, AC_ARG_ENABLE(darwin-64bit, -[ --enable-darwin-64bit build 64bit binaries on darwin], +AS_HELP_STRING([--enable-darwin-64bit], [build 64bit binaries on darwin]), [ case "$enableval" in no) enable_darwin_64bit=no ;; *) enable_darwin_64bit=yes ;; @@ -234,7 +237,8 @@ AC_ARG_ENABLE(darwin-64bit, ],enable_darwin_64bit=no) AC_ARG_ENABLE(m64-build, -[ --enable-m64-build build 64bit binaries using the -m64 flag to (g)cc], +AS_HELP_STRING([--enable-m64-build], + [build 64bit binaries using the -m64 flag to (g)cc]), [ case "$enableval" in no) enable_m64_build=no ;; *) enable_m64_build=yes ;; @@ -242,7 +246,8 @@ AC_ARG_ENABLE(m64-build, ],enable_m64_build=no) AC_ARG_ENABLE(m32-build, -[ --enable-m32-build build 32bit binaries using the -m32 flag to (g)cc], +AS_HELP_STRING([--enable-m32-build], + [build 32bit binaries using the -m32 flag to (g)cc]), [ case "$enableval" in no) enable_m32_build=no ;; *) @@ -255,7 +260,7 @@ AC_ARG_ENABLE(m32-build, ],enable_m32_build=no) AC_ARG_ENABLE(fixalloc, -[ --disable-fixalloc disable the use of fix_alloc]) +AS_HELP_STRING([--disable-fixalloc], [disable the use of fix_alloc])) if test x${enable_fixalloc} = xno ; then AC_DEFINE(NO_FIX_ALLOC,[], [Define if you don't want the fix allocator in Erlang]) @@ -263,8 +268,9 @@ fi AC_SUBST(PERFCTR_PATH) AC_ARG_WITH(perfctr, -[ --with-perfctr=PATH specify location of perfctr include and lib - --without-perfctr don't use perfctr (default)]) +AS_HELP_STRING([--with-perfctr=PATH], + [specify location of perfctr include and lib]) +AS_HELP_STRING([--without-perfctr], [don't use perfctr (default)])) if test "x$with_perfctr" = "xno" -o "x$with_perfctr" = "x" ; then PERFCTR_PATH= @@ -278,7 +284,8 @@ else fi AC_ARG_ENABLE(clock-gettime, -[ --enable-clock-gettime Use clock-gettime for time correction], +AS_HELP_STRING([--enable-clock-gettime], + [use clock-gettime for time correction]), [ case "$enableval" in no) clock_gettime_correction=no ;; *) clock_gettime_correction=yes ;; @@ -1293,8 +1300,7 @@ dnl zlib dnl ------------- AC_ARG_ENABLE(shared-zlib, -[ --enable-shared-zlib enable using shared zlib library - --disable-shared-zlib disable shared zlib, compile own zlib source (default)], +AS_HELP_STRING([--enable-shared-zlib], [enable using shared zlib library]), [ case "$enableval" in no) enable_shared_zlib=no ;; *) enable_shared_zlib=yes ;; @@ -1788,7 +1794,7 @@ AC_CHECK_FUNCS([fdatasync]) dnl Find which C libraries are required to use fdatasync AC_SEARCH_LIBS(fdatasync, [rt]) -AC_CHECK_HEADERS(net/if_dl.h ifaddrs.h) +AC_CHECK_HEADERS(net/if_dl.h ifaddrs.h netpacket/packet.h) AC_CHECK_FUNCS([getifaddrs]) dnl ---------------------------------------------------------------------- @@ -1852,6 +1858,27 @@ if test $processor_bind_functionality = yes; then AC_DEFINE(HAVE_PROCESSOR_BIND, 1, [Define if you have processor_bind functionality]) fi +AC_MSG_CHECKING([for cpuset_getaffinity/cpuset_setaffinity]) +AC_TRY_COMPILE([ +#include <sys/param.h> +#include <sys/cpuset.h> +], +[ + int res; + cpuset_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + res = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(cpuset_t), &cpuset); + res = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(cpuset_t), &cpuset); + res = CPU_ISSET(1, &cpuset); + CPU_CLR(1, &cpuset); +], + cpuset_xetaffinity=yes, + cpuset_xetaffinity=no) +AC_MSG_RESULT([$cpuset_xetaffinity]) +if test $cpuset_xetaffinity = yes; then + AC_DEFINE(HAVE_CPUSET_xETAFFINITY, 1, [Define if you have cpuset_getaffinity/cpuset_setaffinity]) +fi AC_CACHE_CHECK([for 'end' symbol], erts_cv_have_end_symbol, @@ -3431,9 +3458,12 @@ AC_SUBST(STATIC_ZLIB_LIBS) std_ssl_locations="/usr/local /usr/sfw /opt/local /usr /usr/pkg /usr/local/openssl /usr/lib/openssl /usr/openssl /usr/local/ssl /usr/lib/ssl /usr/ssl" AC_ARG_WITH(ssl-zlib, -[ --with-ssl-zlib=PATH specify location of ZLib to be used by OpenSSL - --with-ssl-zlib link SSL with Zlib (default if found) - --without-ssl-zlib don't link SSL with ZLib]) +AS_HELP_STRING([--with-ssl-zlib=PATH], + [specify location of ZLib to be used by OpenSSL]) +AS_HELP_STRING([--with-ssl-zlib], + [link SSL with Zlib (default if found)]) +AS_HELP_STRING([--without-ssl-zlib], + [don't link SSL with ZLib])) if test "x$with_ssl_zlib" = "xno"; then @@ -3502,13 +3532,13 @@ fi AC_ARG_WITH(ssl, -[ --with-ssl=PATH specify location of OpenSSL include and lib - --with-ssl use SSL (default) - --without-ssl don't use SSL]) +AS_HELP_STRING([--with-ssl=PATH], [specify location of OpenSSL include and lib]) +AS_HELP_STRING([--with-ssl], [use SSL (default)]) +AS_HELP_STRING([--without-ssl], [don't use SSL])) AC_ARG_ENABLE(dynamic-ssl-lib, -[ --enable-dynamic-ssl-lib enable using dynamic openssl libraries - --disable-dynamic-ssl-lib disable using dynamic openssl libraries], +AS_HELP_STRING([--disable-dynamic-ssl-lib], + [disable using dynamic openssl libraries]), [ case "$enableval" in no) enable_dynamic_ssl=no ;; *) enable_dynamic_ssl=yes ;; @@ -3971,9 +4001,9 @@ esac AC_ARG_WITH(javac, -[ --with-javac=JAVAC specify Java compiler to use - --with-javac use a Java compiler if found (default) - --without-javac don't use any Java compiler]) +AS_HELP_STRING([--with-javac=JAVAC], [specify Java compiler to use]) +AS_HELP_STRING([--with-javac], [use a Java compiler if found (default)]) +AS_HELP_STRING([--without-javac], [don't use any Java compiler])) dnl dnl Then there are a number of apps which needs a java compiler... diff --git a/erts/doc/src/driver.xml b/erts/doc/src/driver.xml index 006a6160de..db455312ec 100644 --- a/erts/doc/src/driver.xml +++ b/erts/doc/src/driver.xml @@ -196,11 +196,14 @@ static ErlDrvData start(ErlDrvPort port, char *command) <p>We call disconnect to log out from the database. (This should have been done from Erlang, but just in case.)</p> <code type="none"><![CDATA[ - static int do_disconnect(our_data_t* data, ei_x_buff* x); +static int do_disconnect(our_data_t* data, ei_x_buff* x); static void stop(ErlDrvData drv_data) { - do_disconnect((our_data_t*)drv_data, NULL); + our_data_t* data = (our_data_t*)drv_data; + + do_disconnect(data, NULL); + driver_free(data); } ]]></code> <p>We use the binary format only to return data to the emulator; diff --git a/erts/doc/src/driver_entry.xml b/erts/doc/src/driver_entry.xml index e71b48bd92..dd949d4048 100644 --- a/erts/doc/src/driver_entry.xml +++ b/erts/doc/src/driver_entry.xml @@ -172,7 +172,7 @@ typedef struct erl_drv_entry { added to the driver list.) The driver should return 0, or if the driver can't initialize, -1.</p> </item> - <tag><marker id="start"/>int (*start)(ErlDrvPort port, char* command)</tag> + <tag><marker id="start"/>ErlDrvData (*start)(ErlDrvPort port, char* command)</tag> <item> <p>This is called when the driver is instantiated, when <c>open_port/2</c> is called. The driver should return a @@ -188,7 +188,9 @@ typedef struct erl_drv_entry { <p>This is called when the port is closed, with <c>port_close/1</c> or <c>Port ! {self(), close}</c>. Note that terminating the port owner process also closes the - port.</p> + port. If <c>drv_data</c> is a pointer to memory allocated in + <c>start</c>, then <c>stop</c> is the place to deallocate that + memory.</p> </item> <tag><marker id="output"/>void (*output)(ErlDrvData drv_data, char *buf, int len)</tag> <item> diff --git a/erts/doc/src/epmd.xml b/erts/doc/src/epmd.xml index f01cf90a36..474230cb38 100644 --- a/erts/doc/src/epmd.xml +++ b/erts/doc/src/epmd.xml @@ -119,7 +119,7 @@ <tag><c><![CDATA[-port No]]></c></tag> <item> <p>Let this instance of epmd listen to another TCP port than - default 4369. This can be also be set using the + default 4369. This can also be set using the <c><![CDATA[ERL_EPMD_PORT]]></c> environment variable, see the section <seealso marker="#environment_variables">Environment variables</seealso> below</p> @@ -186,7 +186,7 @@ <tag><c><![CDATA[-port No]]></c></tag> <item> <p>Contacts the <c>epmd</c> listening on the given TCP port number - (default 4369). This can be also be set using the + (default 4369). This can also be set using the <c><![CDATA[ERL_EPMD_PORT]]></c> environment variable, see the section <seealso marker="#environment_variables">Environment variables</seealso> below</p> diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml index e36d0adb0d..a1d73fb698 100644 --- a/erts/doc/src/erl.xml +++ b/erts/doc/src/erl.xml @@ -686,7 +686,7 @@ </p></item> </taglist> <p>Binding of schedulers is currently only supported on newer - Linux, Solaris, and Windows systems.</p> + Linux, Solaris, FreeBSD, and Windows systems.</p> <p>If no CPU topology is available when the <c>+sbt</c> flag is processed and <c>BindType</c> is any other type than <c>u</c>, the runtime system will fail to start. CPU @@ -906,6 +906,25 @@ <seealso marker="kernel:error_logger#warning_map/0">error_logger(3)</seealso> for further information.</p> </item> + <tag><c><![CDATA[+zFlag Value]]></c></tag> + <item> + <p>Miscellaneous flags.</p> + <taglist> + <tag><marker id="+zdbbl"><c>+zdbbl size</c></marker></tag> + <item> + <p>Set the distribution buffer busy limit + (<seealso marker="erlang#system_info_dist_buf_busy_limit">dist_buf_busy_limit</seealso>) + in kilobytes. Valid range is 1-2097151. Default is 1024.</p> + <p>A larger buffer limit will allow processes to buffer + more outgoing messages over the distribution. When the + buffer limit has been reached, sending processes will be + suspended until the buffer size has shrunk. The buffer + limit is per distribution channel. A higher limit will + give lower latency and higher throughput at the expense + of higher memory usage.</p> + </item> + </taglist> + </item> </taglist> </section> diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index 59ac3dc66c..638f7eef10 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -5178,7 +5178,7 @@ true</pre> <seealso marker="#system_info_scheduler_bindings">erlang:system_info(scheduler_bindings)</seealso>. </p> <p>Schedulers can currently only be bound on newer Linux, - Solaris, and Windows systems, but more systems will be + Solaris, FreeBSD, and Windows systems, but more systems will be supported in the future. </p> <p>In order for the runtime system to be able to bind schedulers, @@ -5559,7 +5559,7 @@ true</pre> <item> <p>Returns the automatically detected <c>CpuTopology</c>. The emulator currently only detects the CPU topology on some newer - Linux, Solaris, and Windows systems. On Windows system with + Linux, Solaris, FreeBSD, and Windows systems. On Windows system with more than 32 logical processors the CPU topology is not detected. </p> <p>For more information see the documentation of the @@ -5624,6 +5624,13 @@ true</pre> The return value will always be <c>false</c> since the elib_malloc allocator has been removed.</p> </item> + <tag><marker id="system_info_dist_buf_busy_limit"><c>dist_buf_busy_limit</c></marker></tag> + <item> + <p>Returns the value of the distribution buffer busy limit + in bytes. This limit can be set on startup by passing the + <seealso marker="erl#+zdbbl">+zdbbl</seealso> command line + flag to <c>erl</c>.</p> + </item> <tag><c>fullsweep_after</c></tag> <item> <p>Returns <c>{fullsweep_after, int()}</c> which is the diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index 76d782b159..4ed0ccabc6 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -734,7 +734,7 @@ RUN_OBJS = \ $(OBJDIR)/erl_fun.o $(OBJDIR)/erl_bif_port.o \ $(OBJDIR)/erl_term.o $(OBJDIR)/erl_node_tables.o \ $(OBJDIR)/erl_monitors.o $(OBJDIR)/erl_process_dump.o \ - $(OBJDIR)/erl_bif_timer.o \ + $(OBJDIR)/erl_bif_timer.o $(OBJDIR)/erl_cpu_topology.o \ $(OBJDIR)/erl_drv_thread.o $(OBJDIR)/erl_bif_chksum.o \ $(OBJDIR)/erl_bif_re.o $(OBJDIR)/erl_unicode.o \ $(OBJDIR)/packet_parser.o $(OBJDIR)/safe_hash.o \ diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index 16b6aeac3f..694460d702 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -97,6 +97,8 @@ dist_msg_dbg(ErtsDistExternal *edep, char *what, byte *buf, int sz) #define PASS_THROUGH 'p' /* This code should go */ int erts_is_alive; /* System must be blocked on change */ +int erts_dist_buf_busy_limit; + /* distribution trap functions */ Export* dsend2_trap = NULL; @@ -160,7 +162,7 @@ Uint erts_dist_cache_size(void) static ErtsProcList * get_suspended_on_de(DistEntry *dep, Uint32 unset_qflgs) { - ERTS_SMP_LC_ASSERT(erts_smp_lc_spinlock_is_locked(&dep->qlock)); + ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(&dep->qlock)); dep->qflgs &= ~unset_qflgs; if (dep->qflgs & ERTS_DE_QFLG_EXIT) { /* No resume when exit has been scheduled */ @@ -453,17 +455,17 @@ int erts_do_net_exits(DistEntry *dep, Eterm reason) if (dep->status & ERTS_DE_SFLG_EXITING) { #ifdef DEBUG - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); ASSERT(dep->qflgs & ERTS_DE_QFLG_EXIT); - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); #endif } else { dep->status |= ERTS_DE_SFLG_EXITING; - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); ASSERT(!(dep->qflgs & ERTS_DE_QFLG_EXIT)); dep->qflgs |= ERTS_DE_QFLG_EXIT; - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); } erts_smp_de_links_lock(dep); @@ -577,7 +579,7 @@ static void clear_dist_entry(DistEntry *dep) erts_smp_de_links_unlock(dep); #endif - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); if (!dep->out_queue.last) obuf = dep->finalized_out_queue.first; @@ -593,7 +595,7 @@ static void clear_dist_entry(DistEntry *dep) dep->status = 0; suspendees = get_suspended_on_de(dep, ERTS_DE_QFLGS_ALL); - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); erts_smp_atomic_set(&dep->dist_cmd_scheduled, 0); dep->send = NULL; erts_smp_de_rwunlock(dep); @@ -611,10 +613,10 @@ static void clear_dist_entry(DistEntry *dep) } if (obufsize) { - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); ASSERT(dep->qsize >= obufsize); dep->qsize -= obufsize; - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); } } @@ -1453,8 +1455,6 @@ int erts_net_message(Port *prt, return -1; } -#define ERTS_DE_BUSY_LIMIT (128*1024) - static int dsig_send(ErtsDSigData *dsdp, Eterm ctl, Eterm msg, int force_busy) { @@ -1538,18 +1538,18 @@ dsig_send(ErtsDSigData *dsdp, Eterm ctl, Eterm msg, int force_busy) } else { ErtsProcList *plp = NULL; - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); dep->qsize += size_obuf(obuf); - if (dep->qsize >= ERTS_DE_BUSY_LIMIT) + if (dep->qsize >= erts_dist_buf_busy_limit) dep->qflgs |= ERTS_DE_QFLG_BUSY; if (!force_busy && (dep->qflgs & ERTS_DE_QFLG_BUSY)) { - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); plp = erts_proclist_create(c_p); plp->next = NULL; erts_suspend(c_p, ERTS_PROC_LOCK_MAIN, NULL); suspended = 1; - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); } /* Enqueue obuf on dist entry */ @@ -1575,7 +1575,7 @@ dsig_send(ErtsDSigData *dsdp, Eterm ctl, Eterm msg, int force_busy) } } - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); erts_schedule_dist_command(NULL, dep); erts_smp_de_runlock(dep); @@ -1708,10 +1708,8 @@ erts_dist_command(Port *prt, int reds_limit) { Sint reds = ERTS_PORT_REDS_DIST_CMD_START; int prt_busy; - int de_busy; Uint32 status; Uint32 flags; - Uint32 qflgs; Sint obufsize = 0; ErtsDistOutputQueue oq, foq; DistEntry *dep = prt->dist_entry; @@ -1746,13 +1744,12 @@ erts_dist_command(Port *prt, int reds_limit) * a mess. */ - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); oq.first = dep->out_queue.first; oq.last = dep->out_queue.last; dep->out_queue.first = NULL; dep->out_queue.last = NULL; - qflgs = dep->qflgs; - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); foq.first = dep->finalized_out_queue.first; foq.last = dep->finalized_out_queue.last; @@ -1763,17 +1760,8 @@ erts_dist_command(Port *prt, int reds_limit) goto preempted; prt_busy = (int) (prt->status & ERTS_PORT_SFLG_PORT_BUSY); - de_busy = (int) (qflgs & ERTS_DE_QFLG_BUSY); - if (prt_busy) { - if (!de_busy) { - erts_smp_spin_lock(&dep->qlock); - dep->qflgs |= ERTS_DE_QFLG_BUSY; - erts_smp_spin_unlock(&dep->qlock); - de_busy = 1; - } - } - else if (foq.first) { + if (!prt_busy && foq.first) { int preempt = 0; do { Uint size; @@ -1791,10 +1779,7 @@ erts_dist_command(Port *prt, int reds_limit) free_dist_obuf(fob); preempt = reds > reds_limit || (prt->status & ERTS_PORT_SFLGS_DEAD); if (prt->status & ERTS_PORT_SFLG_PORT_BUSY) { - erts_smp_spin_lock(&dep->qlock); - dep->qflgs |= ERTS_DE_QFLG_BUSY; - erts_smp_spin_unlock(&dep->qlock); - de_busy = prt_busy = 1; + prt_busy = 1; break; } } while (foq.first && !preempt); @@ -1877,10 +1862,7 @@ erts_dist_command(Port *prt, int reds_limit) free_dist_obuf(fob); preempt = reds > reds_limit || (prt->status & ERTS_PORT_SFLGS_DEAD); if (prt->status & ERTS_PORT_SFLG_PORT_BUSY) { - erts_smp_spin_lock(&dep->qlock); - dep->qflgs |= ERTS_DE_QFLG_BUSY; - erts_smp_spin_unlock(&dep->qlock); - de_busy = prt_busy = 1; + prt_busy = 1; if (oq.first && !preempt) goto finalize_only; } @@ -1907,22 +1889,23 @@ erts_dist_command(Port *prt, int reds_limit) * dist entry in a non-busy state and resume suspended * processes. */ - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); ASSERT(dep->qsize >= obufsize); dep->qsize -= obufsize; obufsize = 0; - if (de_busy && !prt_busy && dep->qsize < ERTS_DE_BUSY_LIMIT) { + if (!prt_busy + && (dep->qflgs & ERTS_DE_QFLG_BUSY) + && dep->qsize < erts_dist_buf_busy_limit) { ErtsProcList *suspendees; int resumed; suspendees = get_suspended_on_de(dep, ERTS_DE_QFLG_BUSY); - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); resumed = erts_resume_processes(suspendees); reds += resumed*ERTS_PORT_REDS_DIST_CMD_RESUMED; - de_busy = 0; } else - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); } ASSERT(!oq.first && !oq.last); @@ -1931,10 +1914,10 @@ erts_dist_command(Port *prt, int reds_limit) if (obufsize != 0) { ASSERT(obufsize > 0); - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); ASSERT(dep->qsize >= obufsize); dep->qsize -= obufsize; - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); } ASSERT(foq.first || !foq.last); @@ -1984,9 +1967,9 @@ erts_dist_command(Port *prt, int reds_limit) foq.last = NULL; #ifdef DEBUG - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); ASSERT(dep->qsize == obufsize); - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); #endif } else { @@ -1995,14 +1978,14 @@ erts_dist_command(Port *prt, int reds_limit) * Unhandle buffers need to be put back first * in out_queue. */ - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); dep->qsize -= obufsize; obufsize = 0; oq.last->next = dep->out_queue.first; dep->out_queue.first = oq.first; if (!dep->out_queue.last) dep->out_queue.last = oq.last; - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); } erts_schedule_dist_command(prt, NULL); @@ -2026,10 +2009,10 @@ erts_kill_dist_connection(DistEntry *dep, Uint32 connection_id) dep->status |= ERTS_DE_SFLG_EXITING; - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); ASSERT(!(dep->qflgs & ERTS_DE_QFLG_EXIT)); dep->qflgs |= ERTS_DE_QFLG_EXIT; - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); erts_schedule_dist_command(NULL, dep); } @@ -2400,13 +2383,13 @@ BIF_RETTYPE setnode_3(BIF_ALIST_3) ErtsProcList *plp = erts_proclist_create(BIF_P); plp->next = NULL; erts_suspend(BIF_P, ERTS_PROC_LOCK_MAIN, NULL); - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); if (dep->suspended.last) dep->suspended.last->next = plp; else dep->suspended.first = plp; dep->suspended.last = plp; - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); goto yield; } @@ -2434,9 +2417,9 @@ BIF_RETTYPE setnode_3(BIF_ALIST_3) ASSERT(dep->send); #ifdef DEBUG - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); ASSERT(dep->qsize == 0); - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); #endif erts_set_dist_entry_connected(dep, BIF_ARG_2, flags); diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h index fa19c7fb45..64caf34550 100644 --- a/erts/emulator/beam/dist.h +++ b/erts/emulator/beam/dist.h @@ -99,7 +99,8 @@ typedef struct { #define ERTS_DE_IS_CONNECTED(DEP) \ (!ERTS_DE_IS_NOT_CONNECTED((DEP))) - +#define ERTS_DE_BUSY_LIMIT (1024*1024) +extern int erts_dist_buf_busy_limit; extern int erts_is_alive; /* @@ -153,10 +154,10 @@ erts_dsig_prepare(ErtsDSigData *dsdp, } if (no_suspend) { failure = ERTS_DSIG_PREP_CONNECTED; - erts_smp_spin_lock(&dep->qlock); + erts_smp_mtx_lock(&dep->qlock); if (dep->qflgs & ERTS_DE_QFLG_BUSY) failure = ERTS_DSIG_PREP_WOULD_SUSPEND; - erts_smp_spin_unlock(&dep->qlock); + erts_smp_mtx_unlock(&dep->qlock); if (failure == ERTS_DSIG_PREP_WOULD_SUSPEND) goto fail; } diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index 7df9f19af0..408ffd12f7 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -247,7 +247,7 @@ type CPUDATA LONG_LIVED SYSTEM cpu_data type TMP_CPU_IDS SHORT_LIVED SYSTEM tmp_cpu_ids type EXT_TERM_DATA SHORT_LIVED PROCESSES external_term_data type ZLIB STANDARD SYSTEM zlib -type RDR_GRPS_MAP LONG_LIVED SYSTEM reader_groups_map +type CPU_GRPS_MAP LONG_LIVED SYSTEM cpu_groups_map +if smp type ASYNC SHORT_LIVED SYSTEM async diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index 40d8dc097c..89e3b3209c 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -38,6 +38,7 @@ #include "erl_instrument.h" #include "dist.h" #include "erl_gc.h" +#include "erl_cpu_topology.h" #ifdef HIPE #include "hipe_arch.h" #endif @@ -1687,6 +1688,8 @@ info_1_tuple(Process* BIF_P, /* Pointer to current process. */ return erts_get_cpu_topology_term(BIF_P, *tp); } else if (ERTS_IS_ATOM_STR("cpu_topology", sel) && arity == 2) { Eterm res = erts_get_cpu_topology_term(BIF_P, *tp); + if (res == THE_NON_VALUE) + goto badarg; ERTS_BIF_PREP_TRAP1(ret, erts_format_cpu_topology_trap, BIF_P, res); return ret; #if defined(PURIFY) || defined(VALGRIND) @@ -2345,9 +2348,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) /* Arguments that are unusual follow ... */ else if (ERTS_IS_ATOM_STR("logical_processors", BIF_ARG_1)) { int no; - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - no = erts_get_cpu_configured(erts_cpuinfo); - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); + erts_get_logical_processors(&no, NULL, NULL); if (no > 0) BIF_RET(make_small((Uint) no)); else { @@ -2357,9 +2358,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) } else if (ERTS_IS_ATOM_STR("logical_processors_online", BIF_ARG_1)) { int no; - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - no = erts_get_cpu_online(erts_cpuinfo); - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); + erts_get_logical_processors(NULL, &no, NULL); if (no > 0) BIF_RET(make_small((Uint) no)); else { @@ -2369,9 +2368,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) } else if (ERTS_IS_ATOM_STR("logical_processors_available", BIF_ARG_1)) { int no; - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - no = erts_get_cpu_available(erts_cpuinfo); - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); + erts_get_logical_processors(NULL, NULL, &no); if (no > 0) BIF_RET(make_small((Uint) no)); else { @@ -2533,6 +2530,13 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) BIF_RET(erts_nif_taints(BIF_P)); } else if (ERTS_IS_ATOM_STR("reader_groups_map", BIF_ARG_1)) { BIF_RET(erts_get_reader_groups_map(BIF_P)); + } else if (ERTS_IS_ATOM_STR("dist_buf_busy_limit", BIF_ARG_1)) { + Uint hsz = 0; + + (void) erts_bld_uint(NULL, &hsz, erts_dist_buf_busy_limit); + hp = hsz ? HAlloc(BIF_P, hsz) : NULL; + res = erts_bld_uint(&hp, NULL, erts_dist_buf_busy_limit); + BIF_RET(res); } BIF_ERROR(BIF_P, BADARG); diff --git a/erts/emulator/beam/erl_cpu_topology.c b/erts/emulator/beam/erl_cpu_topology.c new file mode 100644 index 0000000000..db95c4a5d4 --- /dev/null +++ b/erts/emulator/beam/erl_cpu_topology.c @@ -0,0 +1,2359 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2010. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: CPU topology and related functionality + * + * Author: Rickard Green + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <ctype.h> + +#include "global.h" +#include "error.h" +#include "bif.h" +#include "erl_cpu_topology.h" + +#define ERTS_MAX_READER_GROUPS 8 + +/* + * Cpu topology hierarchy. + */ +#define ERTS_TOPOLOGY_NODE 0 +#define ERTS_TOPOLOGY_PROCESSOR 1 +#define ERTS_TOPOLOGY_PROCESSOR_NODE 2 +#define ERTS_TOPOLOGY_CORE 3 +#define ERTS_TOPOLOGY_THREAD 4 +#define ERTS_TOPOLOGY_LOGICAL 5 + +#define ERTS_TOPOLOGY_MAX_DEPTH 6 + +typedef struct { + int bind_id; + int bound_id; +} ErtsCpuBindData; + +static erts_cpu_info_t *cpuinfo; + +static int max_main_threads; +static int reader_groups; + +static ErtsCpuBindData *scheduler2cpu_map; +static erts_smp_rwmtx_t cpuinfo_rwmtx; + +typedef enum { + ERTS_CPU_BIND_UNDEFINED, + ERTS_CPU_BIND_SPREAD, + ERTS_CPU_BIND_PROCESSOR_SPREAD, + ERTS_CPU_BIND_THREAD_SPREAD, + ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD, + ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD, + ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD, + ERTS_CPU_BIND_NO_SPREAD, + ERTS_CPU_BIND_NONE +} ErtsCpuBindOrder; + +#define ERTS_CPU_BIND_DEFAULT_BIND \ + ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD + +static int no_cpu_groups_callbacks; +static ErtsCpuBindOrder cpu_bind_order; + +static erts_cpu_topology_t *user_cpudata; +static int user_cpudata_size; +static erts_cpu_topology_t *system_cpudata; +static int system_cpudata_size; + +typedef struct { + int level[ERTS_TOPOLOGY_MAX_DEPTH+1]; +} erts_avail_cput; + +typedef struct { + int id; + int sub_levels; + int cpu_groups; +} erts_cpu_groups_count_t; + +typedef struct { + int logical; + int cpu_group; +} erts_cpu_groups_map_array_t; + +typedef struct erts_cpu_groups_callback_list_t_ erts_cpu_groups_callback_list_t; +struct erts_cpu_groups_callback_list_t_ { + erts_cpu_groups_callback_list_t *next; + erts_cpu_groups_callback_t callback; + void *arg; +}; + +typedef struct erts_cpu_groups_map_t_ erts_cpu_groups_map_t; +struct erts_cpu_groups_map_t_ { + erts_cpu_groups_map_t *next; + int groups; + erts_cpu_groups_map_array_t *array; + int size; + int logical_processors; + erts_cpu_groups_callback_list_t *callback_list; +}; + +typedef struct { + erts_cpu_groups_callback_t callback; + int ix; + void *arg; +} erts_cpu_groups_callback_call_t; + +static erts_cpu_groups_map_t *cpu_groups_maps; + +static erts_cpu_groups_map_t *reader_groups_map; + +#define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH + +#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff) + +#ifdef ERTS_SMP +static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata, + int size, + ErtsCpuBindOrder bind_order, + int mk_seq); +static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size); +#endif + +static void reader_groups_callback(int, ErtsSchedulerData *, int, void *); +static erts_cpu_groups_map_t *add_cpu_groups(int groups, + erts_cpu_groups_callback_t callback, + void *arg); +static void update_cpu_groups_maps(void); +static void make_cpu_groups_map(erts_cpu_groups_map_t *map, int test); +static int cpu_groups_lookup(erts_cpu_groups_map_t *map, + ErtsSchedulerData *esdp); + +static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, + int *cpudata_size); +static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata); + +static int +int_cmp(const void *vx, const void *vy) +{ + return *((int *) vx) - *((int *) vy); +} + +static int +cpu_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->thread != y->thread) + return x->thread - y->thread; + if (x->core != y->core) + return x->core - y->core; + if (x->processor_node != y->processor_node) + return x->processor_node - y->processor_node; + if (x->processor != y->processor) + return x->processor - y->processor; + if (x->node != y->node) + return x->node - y->node; + return 0; +} + +static int +cpu_processor_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->thread != y->thread) + return x->thread - y->thread; + if (x->processor_node != y->processor_node) + return x->processor_node - y->processor_node; + if (x->core != y->core) + return x->core - y->core; + if (x->node != y->node) + return x->node - y->node; + if (x->processor != y->processor) + return x->processor - y->processor; + return 0; +} + +static int +cpu_thread_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->thread != y->thread) + return x->thread - y->thread; + if (x->node != y->node) + return x->node - y->node; + if (x->processor != y->processor) + return x->processor - y->processor; + if (x->processor_node != y->processor_node) + return x->processor_node - y->processor_node; + if (x->core != y->core) + return x->core - y->core; + return 0; +} + +static int +cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->thread != y->thread) + return x->thread - y->thread; + if (x->node != y->node) + return x->node - y->node; + if (x->core != y->core) + return x->core - y->core; + if (x->processor != y->processor) + return x->processor - y->processor; + return 0; +} + +static int +cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->node != y->node) + return x->node - y->node; + if (x->thread != y->thread) + return x->thread - y->thread; + if (x->core != y->core) + return x->core - y->core; + if (x->processor != y->processor) + return x->processor - y->processor; + return 0; +} + +static int +cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->node != y->node) + return x->node - y->node; + if (x->thread != y->thread) + return x->thread - y->thread; + if (x->processor != y->processor) + return x->processor - y->processor; + if (x->core != y->core) + return x->core - y->core; + return 0; +} + +static int +cpu_no_spread_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->node != y->node) + return x->node - y->node; + if (x->processor != y->processor) + return x->processor - y->processor; + if (x->processor_node != y->processor_node) + return x->processor_node - y->processor_node; + if (x->core != y->core) + return x->core - y->core; + if (x->thread != y->thread) + return x->thread - y->thread; + return 0; +} + +static ERTS_INLINE void +make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node) +{ + int ix; + int node = -1; + int processor = -1; + int processor_node = -1; + int processor_node_node = -1; + int core = -1; + int thread = -1; + int old_node = -1; + int old_processor = -1; + int old_processor_node = -1; + int old_core = -1; + int old_thread = -1; + + for (ix = 0; ix < size; ix++) { + if (!no_node || cpudata[ix].node >= 0) { + if (old_node == cpudata[ix].node) + cpudata[ix].node = node; + else { + old_node = cpudata[ix].node; + old_processor = processor = -1; + if (!no_node) + old_processor_node = processor_node = -1; + old_core = core = -1; + old_thread = thread = -1; + if (no_node || cpudata[ix].node >= 0) + cpudata[ix].node = ++node; + } + } + if (old_processor == cpudata[ix].processor) + cpudata[ix].processor = processor; + else { + old_processor = cpudata[ix].processor; + if (!no_node) + processor_node_node = old_processor_node = processor_node = -1; + old_core = core = -1; + old_thread = thread = -1; + cpudata[ix].processor = ++processor; + } + if (no_node && cpudata[ix].processor_node < 0) + old_processor_node = -1; + else { + if (old_processor_node == cpudata[ix].processor_node) { + if (no_node) + cpudata[ix].node = cpudata[ix].processor_node = node; + else { + if (processor_node_node >= 0) + cpudata[ix].node = processor_node_node; + cpudata[ix].processor_node = processor_node; + } + } + else { + old_processor_node = cpudata[ix].processor_node; + old_core = core = -1; + old_thread = thread = -1; + if (no_node) + cpudata[ix].node = cpudata[ix].processor_node = ++node; + else { + cpudata[ix].node = processor_node_node = ++node; + cpudata[ix].processor_node = ++processor_node; + } + } + } + if (!no_node && cpudata[ix].processor_node < 0) + cpudata[ix].processor_node = 0; + if (old_core == cpudata[ix].core) + cpudata[ix].core = core; + else { + old_core = cpudata[ix].core; + old_thread = thread = -1; + cpudata[ix].core = ++core; + } + if (old_thread == cpudata[ix].thread) + cpudata[ix].thread = thread; + else + old_thread = cpudata[ix].thread = ++thread; + } +} + +static void +cpu_bind_order_sort(erts_cpu_topology_t *cpudata, + int size, + ErtsCpuBindOrder bind_order, + int mk_seq) +{ + if (size > 1) { + int no_node = 0; + int (*cmp_func)(const void *, const void *); + switch (bind_order) { + case ERTS_CPU_BIND_SPREAD: + cmp_func = cpu_spread_order_cmp; + break; + case ERTS_CPU_BIND_PROCESSOR_SPREAD: + cmp_func = cpu_processor_spread_order_cmp; + break; + case ERTS_CPU_BIND_THREAD_SPREAD: + cmp_func = cpu_thread_spread_order_cmp; + break; + case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: + no_node = 1; + cmp_func = cpu_thread_no_node_processor_spread_order_cmp; + break; + case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: + no_node = 1; + cmp_func = cpu_no_node_processor_spread_order_cmp; + break; + case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: + no_node = 1; + cmp_func = cpu_no_node_thread_spread_order_cmp; + break; + case ERTS_CPU_BIND_NO_SPREAD: + cmp_func = cpu_no_spread_order_cmp; + break; + default: + cmp_func = NULL; + erl_exit(ERTS_ABORT_EXIT, + "Bad cpu bind type: %d\n", + (int) cpu_bind_order); + break; + } + + if (mk_seq) + make_cpudata_id_seq(cpudata, size, no_node); + + qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func); + } +} + +static int +processor_order_cmp(const void *vx, const void *vy) +{ + erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; + erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; + + if (x->processor != y->processor) + return x->processor - y->processor; + if (x->node != y->node) + return x->node - y->node; + if (x->processor_node != y->processor_node) + return x->processor_node - y->processor_node; + if (x->core != y->core) + return x->core - y->core; + if (x->thread != y->thread) + return x->thread - y->thread; + return 0; +} + +#ifdef ERTS_SMP +void +erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp) +{ + erts_cpu_groups_map_t *cgm; + erts_cpu_groups_callback_list_t *cgcl; + erts_cpu_groups_callback_call_t *cgcc; + int cgcc_ix; + + /* Unbind from cpu */ + erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); + if (scheduler2cpu_map[esdp->no].bound_id >= 0 + && erts_unbind_from_cpu(cpuinfo) == 0) { + esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1; + } + + cgcc = erts_alloc(ERTS_ALC_T_TMP, + (no_cpu_groups_callbacks + * sizeof(erts_cpu_groups_callback_call_t))); + cgcc_ix = 0; + for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) { + for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) { + cgcc[cgcc_ix].callback = cgcl->callback; + cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp); + cgcc[cgcc_ix].arg = cgcl->arg; + cgcc_ix++; + } + } + ASSERT(no_cpu_groups_callbacks == cgcc_ix); + erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); + + for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++) + cgcc[cgcc_ix].callback(1, + esdp, + cgcc[cgcc_ix].ix, + cgcc[cgcc_ix].arg); + + erts_free(ERTS_ALC_T_TMP, cgcc); + + if (esdp->no <= max_main_threads) + erts_thr_set_main_status(0, 0); + +} + +void +erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp) +{ + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue)); + + if (esdp->no <= max_main_threads) + erts_thr_set_main_status(1, (int) esdp->no); + + /* Make sure we check if we should bind to a cpu or not... */ + if (esdp->run_queue->flags & ERTS_RUNQ_FLG_SHARED_RUNQ) + erts_smp_atomic_set(&esdp->chk_cpu_bind, 1); + else + esdp->run_queue->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND; +} + +#endif + +void +erts_sched_check_cpu_bind(ErtsSchedulerData *esdp) +{ + int res, cpu_id, cgcc_ix; + erts_cpu_groups_map_t *cgm; + erts_cpu_groups_callback_list_t *cgcl; + erts_cpu_groups_callback_call_t *cgcc; +#ifdef ERTS_SMP + if (erts_common_run_queue) + erts_smp_atomic_set(&esdp->chk_cpu_bind, 0); + else { + esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND; + } +#endif + erts_smp_runq_unlock(esdp->run_queue); + erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); + cpu_id = scheduler2cpu_map[esdp->no].bind_id; + if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) { + res = erts_bind_to_cpu(cpuinfo, cpu_id); + if (res == 0) + esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id; + else { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n", + (int) esdp->no, cpu_id, erl_errno_id(-res)); + erts_send_error_to_logger_nogl(dsbufp); + if (scheduler2cpu_map[esdp->no].bound_id >= 0) + goto unbind; + } + } + else if (cpu_id < 0) { + unbind: + /* Get rid of old binding */ + res = erts_unbind_from_cpu(cpuinfo); + if (res == 0) + esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1; + else if (res != -ENOTSUP) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n", + (int) esdp->no, cpu_id, erl_errno_id(-res)); + erts_send_error_to_logger_nogl(dsbufp); + } + } + + cgcc = erts_alloc(ERTS_ALC_T_TMP, + (no_cpu_groups_callbacks + * sizeof(erts_cpu_groups_callback_call_t))); + cgcc_ix = 0; + for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) { + for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) { + cgcc[cgcc_ix].callback = cgcl->callback; + cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp); + cgcc[cgcc_ix].arg = cgcl->arg; + cgcc_ix++; + } + } + + ASSERT(no_cpu_groups_callbacks == cgcc_ix); + erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); + + for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++) + cgcc[cgcc_ix].callback(0, + esdp, + cgcc[cgcc_ix].ix, + cgcc[cgcc_ix].arg); + + erts_free(ERTS_ALC_T_TMP, cgcc); + + erts_smp_runq_lock(esdp->run_queue); +} + +#ifdef ERTS_SMP +void +erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp) +{ + int cgcc_ix; + erts_cpu_groups_map_t *cgm; + erts_cpu_groups_callback_list_t *cgcl; + erts_cpu_groups_callback_call_t *cgcc; + + erts_smp_rwmtx_rlock(&cpuinfo_rwmtx); + + cgcc = erts_alloc(ERTS_ALC_T_TMP, + (no_cpu_groups_callbacks + * sizeof(erts_cpu_groups_callback_call_t))); + cgcc_ix = 0; + for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) { + for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) { + cgcc[cgcc_ix].callback = cgcl->callback; + cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp); + cgcc[cgcc_ix].arg = cgcl->arg; + cgcc_ix++; + } + } + + ASSERT(no_cpu_groups_callbacks == cgcc_ix); + erts_smp_rwmtx_runlock(&cpuinfo_rwmtx); + + for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++) + cgcc[cgcc_ix].callback(0, + esdp, + cgcc[cgcc_ix].ix, + cgcc[cgcc_ix].arg); + + erts_free(ERTS_ALC_T_TMP, cgcc); + + if (esdp->no <= max_main_threads) + erts_thr_set_main_status(1, (int) esdp->no); +} +#endif + +static void +write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size) +{ + int s_ix = 1; + int cpu_ix; + + ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx)); + + if (cpu_bind_order != ERTS_CPU_BIND_NONE && size) { + + cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1); + + for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++) + if (erts_is_cpu_available(cpuinfo, cpudata[cpu_ix].logical)) + scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical; + } + + if (s_ix <= erts_no_schedulers) + for (; s_ix <= erts_no_schedulers; s_ix++) + scheduler2cpu_map[s_ix].bind_id = -1; +} + +int +erts_init_scheduler_bind_type_string(char *how) +{ + if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP) + return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED; + + if (!system_cpudata && !user_cpudata) + return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY; + + if (sys_strcmp(how, "db") == 0) + cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND; + else if (sys_strcmp(how, "s") == 0) + cpu_bind_order = ERTS_CPU_BIND_SPREAD; + else if (sys_strcmp(how, "ps") == 0) + cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD; + else if (sys_strcmp(how, "ts") == 0) + cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD; + else if (sys_strcmp(how, "tnnps") == 0) + cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD; + else if (sys_strcmp(how, "nnps") == 0) + cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD; + else if (sys_strcmp(how, "nnts") == 0) + cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD; + else if (sys_strcmp(how, "ns") == 0) + cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD; + else if (sys_strcmp(how, "u") == 0) + cpu_bind_order = ERTS_CPU_BIND_NONE; + else + return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE; + + return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS; +} + +static Eterm +bound_schedulers_term(ErtsCpuBindOrder order) +{ + switch (order) { + case ERTS_CPU_BIND_SPREAD: { + ERTS_DECL_AM(spread); + return AM_spread; + } + case ERTS_CPU_BIND_PROCESSOR_SPREAD: { + ERTS_DECL_AM(processor_spread); + return AM_processor_spread; + } + case ERTS_CPU_BIND_THREAD_SPREAD: { + ERTS_DECL_AM(thread_spread); + return AM_thread_spread; + } + case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: { + ERTS_DECL_AM(thread_no_node_processor_spread); + return AM_thread_no_node_processor_spread; + } + case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: { + ERTS_DECL_AM(no_node_processor_spread); + return AM_no_node_processor_spread; + } + case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: { + ERTS_DECL_AM(no_node_thread_spread); + return AM_no_node_thread_spread; + } + case ERTS_CPU_BIND_NO_SPREAD: { + ERTS_DECL_AM(no_spread); + return AM_no_spread; + } + case ERTS_CPU_BIND_NONE: { + ERTS_DECL_AM(unbound); + return AM_unbound; + } + default: + ASSERT(0); + return THE_NON_VALUE; + } +} + +Eterm +erts_bound_schedulers_term(Process *c_p) +{ + ErtsCpuBindOrder order; + erts_smp_rwmtx_rlock(&cpuinfo_rwmtx); + order = cpu_bind_order; + erts_smp_rwmtx_runlock(&cpuinfo_rwmtx); + return bound_schedulers_term(order); +} + +Eterm +erts_bind_schedulers(Process *c_p, Eterm how) +{ + int notify = 0; + Eterm res; + erts_cpu_topology_t *cpudata; + int cpudata_size; + ErtsCpuBindOrder old_cpu_bind_order; + + erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); + + if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP) { + ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP); + } + else { + + old_cpu_bind_order = cpu_bind_order; + + if (ERTS_IS_ATOM_STR("default_bind", how)) + cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND; + else if (ERTS_IS_ATOM_STR("spread", how)) + cpu_bind_order = ERTS_CPU_BIND_SPREAD; + else if (ERTS_IS_ATOM_STR("processor_spread", how)) + cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD; + else if (ERTS_IS_ATOM_STR("thread_spread", how)) + cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD; + else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how)) + cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD; + else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how)) + cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD; + else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how)) + cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD; + else if (ERTS_IS_ATOM_STR("no_spread", how)) + cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD; + else if (ERTS_IS_ATOM_STR("unbound", how)) + cpu_bind_order = ERTS_CPU_BIND_NONE; + else { + cpu_bind_order = old_cpu_bind_order; + ERTS_BIF_PREP_ERROR(res, c_p, BADARG); + goto done; + } + + create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); + + if (!cpudata) { + cpu_bind_order = old_cpu_bind_order; + ERTS_BIF_PREP_ERROR(res, c_p, BADARG); + goto done; + } + + write_schedulers_bind_change(cpudata, cpudata_size); + notify = 1; + + destroy_tmp_cpu_topology_copy(cpudata); + + res = bound_schedulers_term(old_cpu_bind_order); + } + + done: + + erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); + + if (notify) + erts_sched_notify_check_cpu_bind(); + + return res; +} + +int +erts_sched_bind_atthrcreate_prepare(void) +{ + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + return esdp != NULL && erts_is_scheduler_bound(esdp); +} + +int +erts_sched_bind_atthrcreate_child(int unbind) +{ + int res = 0; + if (unbind) { + erts_smp_rwmtx_rlock(&cpuinfo_rwmtx); + res = erts_unbind_from_cpu(cpuinfo); + erts_smp_rwmtx_runlock(&cpuinfo_rwmtx); + } + return res; +} + +void +erts_sched_bind_atthrcreate_parent(int unbind) +{ + +} + +int +erts_sched_bind_atfork_prepare(void) +{ + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + int unbind = esdp != NULL && erts_is_scheduler_bound(esdp); + if (unbind) + erts_smp_rwmtx_rlock(&cpuinfo_rwmtx); + return unbind; +} + +int +erts_sched_bind_atfork_child(int unbind) +{ + if (unbind) { + ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx) + || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx)); + return erts_unbind_from_cpu(cpuinfo); + } + return 0; +} + +char * +erts_sched_bind_atvfork_child(int unbind) +{ + if (unbind) { + ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx) + || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx)); + return erts_get_unbind_from_cpu_str(cpuinfo); + } + return "false"; +} + +void +erts_sched_bind_atfork_parent(int unbind) +{ + if (unbind) + erts_smp_rwmtx_runlock(&cpuinfo_rwmtx); +} + +Eterm +erts_fake_scheduler_bindings(Process *p, Eterm how) +{ + ErtsCpuBindOrder fake_cpu_bind_order; + erts_cpu_topology_t *cpudata; + int cpudata_size; + Eterm res; + + if (ERTS_IS_ATOM_STR("default_bind", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND; + else if (ERTS_IS_ATOM_STR("spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD; + else if (ERTS_IS_ATOM_STR("processor_spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD; + else if (ERTS_IS_ATOM_STR("thread_spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD; + else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD; + else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD; + else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD; + else if (ERTS_IS_ATOM_STR("no_spread", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD; + else if (ERTS_IS_ATOM_STR("unbound", how)) + fake_cpu_bind_order = ERTS_CPU_BIND_NONE; + else { + ERTS_BIF_PREP_ERROR(res, p, BADARG); + return res; + } + + erts_smp_rwmtx_rlock(&cpuinfo_rwmtx); + create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); + erts_smp_rwmtx_runlock(&cpuinfo_rwmtx); + + if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE) + ERTS_BIF_PREP_RET(res, am_false); + else { + int i; + Eterm *hp; + + cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1); + +#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA + + erts_fprintf(stderr, "node: "); + for (i = 0; i < cpudata_size; i++) + erts_fprintf(stderr, " %2d", cpudata[i].node); + erts_fprintf(stderr, "\n"); + erts_fprintf(stderr, "processor: "); + for (i = 0; i < cpudata_size; i++) + erts_fprintf(stderr, " %2d", cpudata[i].processor); + erts_fprintf(stderr, "\n"); + if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD + && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD + && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) { + erts_fprintf(stderr, "processor_node:"); + for (i = 0; i < cpudata_size; i++) + erts_fprintf(stderr, " %2d", cpudata[i].processor_node); + erts_fprintf(stderr, "\n"); + } + erts_fprintf(stderr, "core: "); + for (i = 0; i < cpudata_size; i++) + erts_fprintf(stderr, " %2d", cpudata[i].core); + erts_fprintf(stderr, "\n"); + erts_fprintf(stderr, "thread: "); + for (i = 0; i < cpudata_size; i++) + erts_fprintf(stderr, " %2d", cpudata[i].thread); + erts_fprintf(stderr, "\n"); + erts_fprintf(stderr, "logical: "); + for (i = 0; i < cpudata_size; i++) + erts_fprintf(stderr, " %2d", cpudata[i].logical); + erts_fprintf(stderr, "\n"); +#endif + + hp = HAlloc(p, cpudata_size+1); + ERTS_BIF_PREP_RET(res, make_tuple(hp)); + *hp++ = make_arityval((Uint) cpudata_size); + for (i = 0; i < cpudata_size; i++) + *hp++ = make_small((Uint) cpudata[i].logical); + } + + destroy_tmp_cpu_topology_copy(cpudata); + + return res; +} + +Eterm +erts_get_schedulers_binds(Process *c_p) +{ + int ix; + ERTS_DECL_AM(unbound); + Eterm *hp = HAlloc(c_p, erts_no_schedulers+1); + Eterm res = make_tuple(hp); + + *(hp++) = make_arityval(erts_no_schedulers); + erts_smp_rwmtx_rlock(&cpuinfo_rwmtx); + for (ix = 1; ix <= erts_no_schedulers; ix++) + *(hp++) = (scheduler2cpu_map[ix].bound_id >= 0 + ? make_small(scheduler2cpu_map[ix].bound_id) + : AM_unbound); + erts_smp_rwmtx_runlock(&cpuinfo_rwmtx); + return res; +} + +/* + * CPU topology + */ + +typedef struct { + int *id; + int used; + int size; +} ErtsCpuTopIdSeq; + +typedef struct { + ErtsCpuTopIdSeq logical; + ErtsCpuTopIdSeq thread; + ErtsCpuTopIdSeq core; + ErtsCpuTopIdSeq processor_node; + ErtsCpuTopIdSeq processor; + ErtsCpuTopIdSeq node; +} ErtsCpuTopEntry; + +static void +init_cpu_top_entry(ErtsCpuTopEntry *cte) +{ + int size = 10; + cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, + sizeof(int)*size); + cte->logical.size = size; + cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, + sizeof(int)*size); + cte->thread.size = size; + cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, + sizeof(int)*size); + cte->core.size = size; + cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, + sizeof(int)*size); + cte->processor_node.size = size; + cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, + sizeof(int)*size); + cte->processor.size = size; + cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, + sizeof(int)*size); + cte->node.size = size; +} + +static void +destroy_cpu_top_entry(ErtsCpuTopEntry *cte) +{ + erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id); + erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id); + erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id); + erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id); + erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id); + erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id); +} + +static int +get_cput_value_or_range(int *v, int *vr, char **str) +{ + long l; + char *c = *str; + errno = 0; + if (!isdigit((unsigned char)*c)) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID; + l = strtol(c, &c, 10); + if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID; + *v = (int) l; + if (*c == '-') { + c++; + if (!isdigit((unsigned char)*c)) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + l = strtol(c, &c, 10); + if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + *vr = (int) l; + } + *str = c; + return ERTS_INIT_CPU_TOPOLOGY_OK; +} + +static int +get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str) +{ + int ix = 0; + int need_size = 0; + char *c = *str; + + while (1) { + int res; + int val; + int nids; + int val_range = -1; + res = get_cput_value_or_range(&val, &val_range, &c); + if (res != ERTS_INIT_CPU_TOPOLOGY_OK) + return res; + if (val_range < 0 || val_range == val) + nids = 1; + else { + if (val_range > val) + nids = val_range - val + 1; + else + nids = val - val_range + 1; + } + need_size += nids; + if (need_size > idseq->size) { + idseq->size = need_size + 10; + idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS, + idseq->id, + sizeof(int)*idseq->size); + } + if (nids == 1) + idseq->id[ix++] = val; + else if (val_range > val) { + for (; val <= val_range; val++) + idseq->id[ix++] = val; + } + else { + for (; val >= val_range; val--) + idseq->id[ix++] = val; + } + if (*c != ',') + break; + c++; + } + *str = c; + idseq->used = ix; + return ERTS_INIT_CPU_TOPOLOGY_OK; +} + +static int +get_cput_entry(ErtsCpuTopEntry *cput, char **str) +{ + int h; + char *c = *str; + + cput->logical.used = 0; + cput->thread.id[0] = 0; + cput->thread.used = 1; + cput->core.id[0] = 0; + cput->core.used = 1; + cput->processor_node.id[0] = -1; + cput->processor_node.used = 1; + cput->processor.id[0] = 0; + cput->processor.used = 1; + cput->node.id[0] = -1; + cput->node.used = 1; + + h = ERTS_TOPOLOGY_MAX_DEPTH; + while (*c != ':' && *c != '\0') { + int res; + ErtsCpuTopIdSeq *idseqp; + switch (*c++) { + case 'L': + if (h <= ERTS_TOPOLOGY_LOGICAL) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; + idseqp = &cput->logical; + h = ERTS_TOPOLOGY_LOGICAL; + break; + case 't': + case 'T': + if (h <= ERTS_TOPOLOGY_THREAD) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; + idseqp = &cput->thread; + h = ERTS_TOPOLOGY_THREAD; + break; + case 'c': + case 'C': + if (h <= ERTS_TOPOLOGY_CORE) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; + idseqp = &cput->core; + h = ERTS_TOPOLOGY_CORE; + break; + case 'p': + case 'P': + if (h <= ERTS_TOPOLOGY_PROCESSOR) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; + idseqp = &cput->processor; + h = ERTS_TOPOLOGY_PROCESSOR; + break; + case 'n': + case 'N': + if (h <= ERTS_TOPOLOGY_PROCESSOR) { + do_node: + if (h <= ERTS_TOPOLOGY_NODE) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; + idseqp = &cput->node; + h = ERTS_TOPOLOGY_NODE; + } + else { + int p_node = 0; + char *p_chk = c; + while (*p_chk != '\0' && *p_chk != ':') { + if (*p_chk == 'p' || *p_chk == 'P') { + p_node = 1; + break; + } + p_chk++; + } + if (!p_node) + goto do_node; + if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; + idseqp = &cput->processor_node; + h = ERTS_TOPOLOGY_PROCESSOR_NODE; + } + break; + default: + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE; + } + res = get_cput_id_seq(idseqp, &c); + if (res != ERTS_INIT_CPU_TOPOLOGY_OK) + return res; + } + + if (cput->logical.used < 1) + return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID; + + if (*c == ':') { + c++; + } + + if (cput->thread.used != 1 + && cput->thread.used != cput->logical.used) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + if (cput->core.used != 1 + && cput->core.used != cput->logical.used) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + if (cput->processor_node.used != 1 + && cput->processor_node.used != cput->logical.used) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + if (cput->processor.used != 1 + && cput->processor.used != cput->logical.used) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + if (cput->node.used != 1 + && cput->node.used != cput->logical.used) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; + + *str = c; + return ERTS_INIT_CPU_TOPOLOGY_OK; +} + +static int +verify_topology(erts_cpu_topology_t *cpudata, int size) +{ + if (size > 0) { + int *logical; + int node, processor, no_nodes, i; + + /* Verify logical ids */ + logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size); + + for (i = 0; i < size; i++) + logical[i] = cpudata[i].logical; + + qsort(logical, size, sizeof(int), int_cmp); + for (i = 0; i < size-1; i++) { + if (logical[i] == logical[i+1]) { + erts_free(ERTS_ALC_T_TMP, logical); + return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS; + } + } + + erts_free(ERTS_ALC_T_TMP, logical); + + qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp); + + /* Verify unique entities */ + + for (i = 1; i < size; i++) { + if (cpudata[i-1].processor == cpudata[i].processor + && cpudata[i-1].node == cpudata[i].node + && (cpudata[i-1].processor_node + == cpudata[i].processor_node) + && cpudata[i-1].core == cpudata[i].core + && cpudata[i-1].thread == cpudata[i].thread) { + return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES; + } + } + + /* Verify numa nodes */ + node = cpudata[0].node; + processor = cpudata[0].processor; + no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0; + for (i = 1; i < size; i++) { + if (no_nodes) { + if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; + } + else { + if (cpudata[i].processor == processor && cpudata[i].node != node) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; + node = cpudata[i].node; + processor = cpudata[i].processor; + if (node >= 0 && cpudata[i].processor_node >= 0) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; + if (node < 0 && cpudata[i].processor_node < 0) + return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; + } + } + } + + return ERTS_INIT_CPU_TOPOLOGY_OK; +} + +int +erts_init_cpu_topology_string(char *topology_str) +{ + ErtsCpuTopEntry cput; + int need_size; + char *c; + int ix; + int error = ERTS_INIT_CPU_TOPOLOGY_OK; + + if (user_cpudata) + erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); + user_cpudata_size = 10; + + user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, + (sizeof(erts_cpu_topology_t) + * user_cpudata_size)); + + init_cpu_top_entry(&cput); + + ix = 0; + need_size = 0; + + c = topology_str; + if (*c == '\0') { + error = ERTS_INIT_CPU_TOPOLOGY_MISSING; + goto fail; + } + do { + int r; + error = get_cput_entry(&cput, &c); + if (error != ERTS_INIT_CPU_TOPOLOGY_OK) + goto fail; + need_size += cput.logical.used; + if (user_cpudata_size < need_size) { + user_cpudata_size = need_size + 10; + user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA, + user_cpudata, + (sizeof(erts_cpu_topology_t) + * user_cpudata_size)); + } + + ASSERT(cput.thread.used == 1 + || cput.thread.used == cput.logical.used); + ASSERT(cput.core.used == 1 + || cput.core.used == cput.logical.used); + ASSERT(cput.processor_node.used == 1 + || cput.processor_node.used == cput.logical.used); + ASSERT(cput.processor.used == 1 + || cput.processor.used == cput.logical.used); + ASSERT(cput.node.used == 1 + || cput.node.used == cput.logical.used); + + for (r = 0; r < cput.logical.used; r++) { + user_cpudata[ix].logical = cput.logical.id[r]; + user_cpudata[ix].thread = + cput.thread.id[cput.thread.used == 1 ? 0 : r]; + user_cpudata[ix].core = + cput.core.id[cput.core.used == 1 ? 0 : r]; + user_cpudata[ix].processor_node = + cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r]; + user_cpudata[ix].processor = + cput.processor.id[cput.processor.used == 1 ? 0 : r]; + user_cpudata[ix].node = + cput.node.id[cput.node.used == 1 ? 0 : r]; + ix++; + } + } while (*c != '\0'); + + if (user_cpudata_size != ix) { + user_cpudata_size = ix; + user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA, + user_cpudata, + (sizeof(erts_cpu_topology_t) + * user_cpudata_size)); + } + + error = verify_topology(user_cpudata, user_cpudata_size); + if (error == ERTS_INIT_CPU_TOPOLOGY_OK) { + destroy_cpu_top_entry(&cput); + return ERTS_INIT_CPU_TOPOLOGY_OK; + } + + fail: + if (user_cpudata) + erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); + user_cpudata_size = 0; + destroy_cpu_top_entry(&cput); + return error; +} + +#define ERTS_GET_CPU_TOPOLOGY_ERROR -1 +#define ERTS_GET_USED_CPU_TOPOLOGY 0 +#define ERTS_GET_DETECTED_CPU_TOPOLOGY 1 +#define ERTS_GET_DEFINED_CPU_TOPOLOGY 2 + +static Eterm get_cpu_topology_term(Process *c_p, int type); + +Eterm +erts_set_cpu_topology(Process *c_p, Eterm term) +{ + erts_cpu_topology_t *cpudata = NULL; + int cpudata_size = 0; + Eterm res; + + erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); + res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY); + if (term == am_undefined) { + if (user_cpudata) + erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); + user_cpudata = NULL; + user_cpudata_size = 0; + + if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) { + cpudata_size = system_cpudata_size; + cpudata = erts_alloc(ERTS_ALC_T_TMP, + (sizeof(erts_cpu_topology_t) + * cpudata_size)); + + sys_memcpy((void *) cpudata, + (void *) system_cpudata, + sizeof(erts_cpu_topology_t)*cpudata_size); + } + } + else if (is_not_list(term)) { + error: + erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); + res = THE_NON_VALUE; + goto done; + } + else { + Eterm list = term; + int ix = 0; + + cpudata_size = 100; + cpudata = erts_alloc(ERTS_ALC_T_TMP, + (sizeof(erts_cpu_topology_t) + * cpudata_size)); + + while (is_list(list)) { + Eterm *lp = list_val(list); + Eterm cpu = CAR(lp); + Eterm* tp; + Sint id; + + if (is_not_tuple(cpu)) + goto error; + + tp = tuple_val(cpu); + + if (arityval(tp[0]) != 7 || tp[1] != am_cpu) + goto error; + + if (ix >= cpudata_size) { + cpudata_size += 100; + cpudata = erts_realloc(ERTS_ALC_T_TMP, + cpudata, + (sizeof(erts_cpu_topology_t) + * cpudata_size)); + } + + id = signed_val(tp[2]); + if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) + goto error; + cpudata[ix].node = (int) id; + + id = signed_val(tp[3]); + if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) + goto error; + cpudata[ix].processor = (int) id; + + id = signed_val(tp[4]); + if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) + goto error; + cpudata[ix].processor_node = (int) id; + + id = signed_val(tp[5]); + if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) + goto error; + cpudata[ix].core = (int) id; + + id = signed_val(tp[6]); + if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) + goto error; + cpudata[ix].thread = (int) id; + + id = signed_val(tp[7]); + if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) + goto error; + cpudata[ix].logical = (int) id; + + list = CDR(lp); + ix++; + } + + if (is_not_nil(list)) + goto error; + + cpudata_size = ix; + + if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size)) + goto error; + + if (user_cpudata_size != cpudata_size) { + if (user_cpudata) + erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); + user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, + sizeof(erts_cpu_topology_t)*cpudata_size); + user_cpudata_size = cpudata_size; + } + + sys_memcpy((void *) user_cpudata, + (void *) cpudata, + sizeof(erts_cpu_topology_t)*cpudata_size); + } + + update_cpu_groups_maps(); + + write_schedulers_bind_change(cpudata, cpudata_size); + + erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); + erts_sched_notify_check_cpu_bind(); + + done: + + if (cpudata) + erts_free(ERTS_ALC_T_TMP, cpudata); + + return res; +} + +static void +create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size) +{ + if (user_cpudata) { + *cpudata_size = user_cpudata_size; + *cpudata = erts_alloc(ERTS_ALC_T_TMP, + (sizeof(erts_cpu_topology_t) + * (*cpudata_size))); + sys_memcpy((void *) *cpudata, + (void *) user_cpudata, + sizeof(erts_cpu_topology_t)*(*cpudata_size)); + } + else if (system_cpudata) { + *cpudata_size = system_cpudata_size; + *cpudata = erts_alloc(ERTS_ALC_T_TMP, + (sizeof(erts_cpu_topology_t) + * (*cpudata_size))); + sys_memcpy((void *) *cpudata, + (void *) system_cpudata, + sizeof(erts_cpu_topology_t)*(*cpudata_size)); + } + else { + *cpudata = NULL; + *cpudata_size = 0; + } +} + +static void +destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata) +{ + if (cpudata) + erts_free(ERTS_ALC_T_TMP, cpudata); +} + + +static Eterm +bld_topology_term(Eterm **hpp, + Uint *hszp, + erts_cpu_topology_t *cpudata, + int size) +{ + Eterm res = NIL; + int i; + + if (size == 0) + return am_undefined; + + for (i = size-1; i >= 0; i--) { + res = erts_bld_cons(hpp, + hszp, + erts_bld_tuple(hpp, + hszp, + 7, + am_cpu, + make_small(cpudata[i].node), + make_small(cpudata[i].processor), + make_small(cpudata[i].processor_node), + make_small(cpudata[i].core), + make_small(cpudata[i].thread), + make_small(cpudata[i].logical)), + res); + } + return res; +} + +static Eterm +get_cpu_topology_term(Process *c_p, int type) +{ +#ifdef DEBUG + Eterm *hp_end; +#endif + Eterm *hp; + Uint hsz; + Eterm res = THE_NON_VALUE; + erts_cpu_topology_t *cpudata = NULL; + int size = 0; + + switch (type) { + case ERTS_GET_USED_CPU_TOPOLOGY: + if (user_cpudata) + goto defined; + else + goto detected; + case ERTS_GET_DETECTED_CPU_TOPOLOGY: + detected: + if (!system_cpudata) + res = am_undefined; + else { + size = system_cpudata_size; + cpudata = erts_alloc(ERTS_ALC_T_TMP, + (sizeof(erts_cpu_topology_t) + * size)); + sys_memcpy((void *) cpudata, + (void *) system_cpudata, + sizeof(erts_cpu_topology_t)*size); + } + break; + case ERTS_GET_DEFINED_CPU_TOPOLOGY: + defined: + if (!user_cpudata) + res = am_undefined; + else { + size = user_cpudata_size; + cpudata = user_cpudata; + } + break; + default: + erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type); + break; + } + + if (res == am_undefined) { + ASSERT(!cpudata); + return res; + } + + hsz = 0; + + bld_topology_term(NULL, &hsz, + cpudata, size); + + hp = HAlloc(c_p, hsz); + +#ifdef DEBUG + hp_end = hp + hsz; +#endif + + res = bld_topology_term(&hp, NULL, + cpudata, size); + + ASSERT(hp_end == hp); + + if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata) + erts_free(ERTS_ALC_T_TMP, cpudata); + + return res; +} + +Eterm +erts_get_cpu_topology_term(Process *c_p, Eterm which) +{ + Eterm res; + int type; + erts_smp_rwmtx_rlock(&cpuinfo_rwmtx); + if (ERTS_IS_ATOM_STR("used", which)) + type = ERTS_GET_USED_CPU_TOPOLOGY; + else if (ERTS_IS_ATOM_STR("detected", which)) + type = ERTS_GET_DETECTED_CPU_TOPOLOGY; + else if (ERTS_IS_ATOM_STR("defined", which)) + type = ERTS_GET_DEFINED_CPU_TOPOLOGY; + else + type = ERTS_GET_CPU_TOPOLOGY_ERROR; + if (type == ERTS_GET_CPU_TOPOLOGY_ERROR) + res = THE_NON_VALUE; + else + res = get_cpu_topology_term(c_p, type); + erts_smp_rwmtx_runlock(&cpuinfo_rwmtx); + return res; +} + +static void +get_logical_processors(int *conf, int *onln, int *avail) +{ + if (conf) + *conf = erts_get_cpu_configured(cpuinfo); + if (onln) + *onln = erts_get_cpu_online(cpuinfo); + if (avail) + *avail = erts_get_cpu_available(cpuinfo); +} + +void +erts_get_logical_processors(int *conf, int *onln, int *avail) +{ + erts_smp_rwmtx_rlock(&cpuinfo_rwmtx); + get_logical_processors(conf, onln, avail); + erts_smp_rwmtx_runlock(&cpuinfo_rwmtx); +} + +void +erts_pre_early_init_cpu_topology(int *max_rg_p, + int *conf_p, + int *onln_p, + int *avail_p) +{ + cpu_groups_maps = NULL; + no_cpu_groups_callbacks = 0; + *max_rg_p = ERTS_MAX_READER_GROUPS; + cpuinfo = erts_cpu_info_create(); + get_logical_processors(conf_p, onln_p, avail_p); +} + +void +erts_early_init_cpu_topology(int no_schedulers, + int *max_main_threads_p, + int max_reader_groups, + int *reader_groups_p) +{ + user_cpudata = NULL; + user_cpudata_size = 0; + + system_cpudata_size = erts_get_cpu_topology_size(cpuinfo); + system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, + (sizeof(erts_cpu_topology_t) + * system_cpudata_size)); + + cpu_bind_order = ERTS_CPU_BIND_UNDEFINED; + + if (!erts_get_cpu_topology(cpuinfo, system_cpudata) + || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata, + system_cpudata_size)) { + erts_free(ERTS_ALC_T_CPUDATA, system_cpudata); + system_cpudata = NULL; + system_cpudata_size = 0; + } + + max_main_threads = erts_get_cpu_configured(cpuinfo); + if (max_main_threads > no_schedulers) + max_main_threads = no_schedulers; + *max_main_threads_p = max_main_threads; + + reader_groups = max_main_threads; + if (reader_groups <= 1 || max_reader_groups <= 1) + reader_groups = 0; + if (reader_groups > max_reader_groups) + reader_groups = max_reader_groups; + *reader_groups_p = reader_groups; +} + +void +erts_init_cpu_topology(void) +{ + int ix; + + erts_smp_rwmtx_init(&cpuinfo_rwmtx, "cpu_info"); + erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); + + scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA, + (sizeof(ErtsCpuBindData) + * (erts_no_schedulers+1))); + for (ix = 1; ix <= erts_no_schedulers; ix++) { + scheduler2cpu_map[ix].bind_id = -1; + scheduler2cpu_map[ix].bound_id = -1; + } + + if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED) { + int ncpus = erts_get_cpu_configured(cpuinfo); + if (ncpus < 1 || erts_no_schedulers < ncpus) + cpu_bind_order = ERTS_CPU_BIND_NONE; + else + cpu_bind_order = ((system_cpudata || user_cpudata) + && (erts_bind_to_cpu(cpuinfo, -1) != -ENOTSUP) + ? ERTS_CPU_BIND_DEFAULT_BIND + : ERTS_CPU_BIND_NONE); + } + + reader_groups_map = add_cpu_groups(reader_groups, + reader_groups_callback, + NULL); + + if (cpu_bind_order == ERTS_CPU_BIND_NONE) + erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); + else { + erts_cpu_topology_t *cpudata; + int cpudata_size; + create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); + write_schedulers_bind_change(cpudata, cpudata_size); + erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); + erts_sched_notify_check_cpu_bind(); + destroy_tmp_cpu_topology_copy(cpudata); + } +} + +int +erts_update_cpu_info(void) +{ + int changed; + erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); + changed = erts_cpu_info_update(cpuinfo); + if (changed) { + erts_cpu_topology_t *cpudata; + int cpudata_size; + + if (system_cpudata) + erts_free(ERTS_ALC_T_CPUDATA, system_cpudata); + + system_cpudata_size = erts_get_cpu_topology_size(cpuinfo); + if (!system_cpudata_size) + system_cpudata = NULL; + else { + system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, + (sizeof(erts_cpu_topology_t) + * system_cpudata_size)); + + if (!erts_get_cpu_topology(cpuinfo, system_cpudata) + || (ERTS_INIT_CPU_TOPOLOGY_OK + != verify_topology(system_cpudata, + system_cpudata_size))) { + erts_free(ERTS_ALC_T_CPUDATA, system_cpudata); + system_cpudata = NULL; + system_cpudata_size = 0; + } + } + + update_cpu_groups_maps(); + + create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); + write_schedulers_bind_change(cpudata, cpudata_size); + destroy_tmp_cpu_topology_copy(cpudata); + } + erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); + if (changed) + erts_sched_notify_check_cpu_bind(); + return changed; +} + +/* + * reader groups map + */ + +void +reader_groups_callback(int suspending, + ErtsSchedulerData *esdp, + int group, + void *unused) +{ + if (reader_groups && esdp->no <= max_main_threads) + erts_smp_rwmtx_set_reader_group(suspending ? 0 : group+1); +} + +static Eterm get_cpu_groups_map(Process *c_p, + erts_cpu_groups_map_t *map, + int offset); +Eterm +erts_debug_reader_groups_map(Process *c_p, int groups) +{ + Eterm res; + erts_cpu_groups_map_t test; + + test.array = NULL; + test.groups = groups; + make_cpu_groups_map(&test, 1); + if (!test.array) + res = NIL; + else { + res = get_cpu_groups_map(c_p, &test, 1); + erts_free(ERTS_ALC_T_TMP, test.array); + } + return res; +} + + +Eterm +erts_get_reader_groups_map(Process *c_p) +{ + Eterm res; + erts_smp_rwmtx_rlock(&cpuinfo_rwmtx); + res = get_cpu_groups_map(c_p, reader_groups_map, 1); + erts_smp_rwmtx_runlock(&cpuinfo_rwmtx); + return res; +} + +/* + * CPU groups + */ + +static Eterm +get_cpu_groups_map(Process *c_p, + erts_cpu_groups_map_t *map, + int offset) +{ +#ifdef DEBUG + Eterm *endp; +#endif + Eterm res = NIL, tuple; + Eterm *hp; + int i; + + hp = HAlloc(c_p, map->logical_processors*(2+3)); +#ifdef DEBUG + endp = hp + map->logical_processors*(2+3); +#endif + for (i = map->size - 1; i >= 0; i--) { + if (map->array[i].logical >= 0) { + tuple = TUPLE2(hp, + make_small(map->array[i].logical), + make_small(map->array[i].cpu_group + offset)); + hp += 3; + res = CONS(hp, tuple, res); + hp += 2; + } + } + ASSERT(hp == endp); + return res; +} + +static void +make_available_cpu_topology(erts_avail_cput *no, + erts_avail_cput *avail, + erts_cpu_topology_t *cpudata, + int *size, + int test) +{ + int len = *size; + erts_cpu_topology_t last; + int a, i, j; + + no->level[ERTS_TOPOLOGY_NODE] = -1; + no->level[ERTS_TOPOLOGY_PROCESSOR] = -1; + no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1; + no->level[ERTS_TOPOLOGY_CORE] = -1; + no->level[ERTS_TOPOLOGY_THREAD] = -1; + no->level[ERTS_TOPOLOGY_LOGICAL] = -1; + + last.node = INT_MIN; + last.processor = INT_MIN; + last.processor_node = INT_MIN; + last.core = INT_MIN; + last.thread = INT_MIN; + last.logical = INT_MIN; + + a = 0; + + for (i = 0; i < len; i++) { + + if (!test && !erts_is_cpu_available(cpuinfo, cpudata[i].logical)) + continue; + + if (last.node != cpudata[i].node) + goto node; + if (last.processor != cpudata[i].processor) + goto processor; + if (last.processor_node != cpudata[i].processor_node) + goto processor_node; + if (last.core != cpudata[i].core) + goto core; + ASSERT(last.thread != cpudata[i].thread); + goto thread; + + node: + no->level[ERTS_TOPOLOGY_NODE]++; + processor: + no->level[ERTS_TOPOLOGY_PROCESSOR]++; + processor_node: + no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++; + core: + no->level[ERTS_TOPOLOGY_CORE]++; + thread: + no->level[ERTS_TOPOLOGY_THREAD]++; + + no->level[ERTS_TOPOLOGY_LOGICAL]++; + + for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++) + avail[a].level[j] = no->level[j]; + + avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical; + avail[a].level[ERTS_TOPOLOGY_CG] = 0; + + ASSERT(last.logical != cpudata[i].logical); + + last = cpudata[i]; + a++; + } + + no->level[ERTS_TOPOLOGY_NODE]++; + no->level[ERTS_TOPOLOGY_PROCESSOR]++; + no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++; + no->level[ERTS_TOPOLOGY_CORE]++; + no->level[ERTS_TOPOLOGY_THREAD]++; + no->level[ERTS_TOPOLOGY_LOGICAL]++; + + *size = a; +} + +static void +cpu_group_insert(erts_cpu_groups_map_t *map, + int logical, int cpu_group) +{ + int start = logical % map->size; + int ix = start; + + do { + if (map->array[ix].logical < 0) { + map->array[ix].logical = logical; + map->array[ix].cpu_group = cpu_group; + return; + } + ix++; + if (ix == map->size) + ix = 0; + } while (ix != start); + + erl_exit(ERTS_ABORT_EXIT, "Reader groups map full\n"); +} + + +static int +sub_levels(erts_cpu_groups_count_t *cgc, int level, int aix, + int avail_sz, erts_avail_cput *avail) +{ + int sub_level = level+1; + int last = -1; + cgc->sub_levels = 0; + + do { + if (last != avail[aix].level[sub_level]) { + cgc->sub_levels++; + last = avail[aix].level[sub_level]; + } + aix++; + } + while (aix < avail_sz && cgc->id == avail[aix].level[level]); + cgc->cpu_groups = 0; + return aix; +} + +static int +write_cpu_groups(int *cgp, erts_cpu_groups_count_t *cgcp, + int level, int a, + int avail_sz, erts_avail_cput *avail) +{ + int cg = *cgp; + int sub_level = level+1; + int sl_per_gr = cgcp->sub_levels / cgcp->cpu_groups; + int xsl = cgcp->sub_levels % cgcp->cpu_groups; + int sls = 0; + int last = -1; + int xsl_cg_lim = (cgcp->cpu_groups - xsl) + cg + 1; + + ASSERT(level < 0 || avail[a].level[level] == cgcp->id); + + do { + if (last != avail[a].level[sub_level]) { + if (!sls) { + sls = sl_per_gr; + cg++; + if (cg >= xsl_cg_lim) + sls++; + } + last = avail[a].level[sub_level]; + sls--; + } + avail[a].level[ERTS_TOPOLOGY_CG] = cg; + a++; + } while (a < avail_sz && (level < 0 + || avail[a].level[level] == cgcp->id)); + + ASSERT(cgcp->cpu_groups == cg - *cgp); + + *cgp = cg; + + return a; +} + +static int +cg_count_sub_levels_compare(const void *vx, const void *vy) +{ + erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx; + erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy; + if (x->sub_levels != y->sub_levels) + return y->sub_levels - x->sub_levels; + return x->id - y->id; +} + +static int +cg_count_id_compare(const void *vx, const void *vy) +{ + erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx; + erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy; + return x->id - y->id; +} + +static void +make_cpu_groups_map(erts_cpu_groups_map_t *map, int test) +{ + int i, spread_level, avail_sz; + erts_avail_cput no, *avail; + erts_cpu_topology_t *cpudata; + ErtsAlcType_t alc_type = (test + ? ERTS_ALC_T_TMP + : ERTS_ALC_T_CPU_GRPS_MAP); + + if (map->array) + erts_free(alc_type, map->array); + + map->array = NULL; + map->logical_processors = 0; + map->size = 0; + + if (!map->groups) + return; + + create_tmp_cpu_topology_copy(&cpudata, &avail_sz); + + if (!cpudata) + return; + + cpu_bind_order_sort(cpudata, + avail_sz, + ERTS_CPU_BIND_NO_SPREAD, + 1); + + avail = erts_alloc(ERTS_ALC_T_TMP, + sizeof(erts_avail_cput)*avail_sz); + + make_available_cpu_topology(&no, avail, cpudata, + &avail_sz, test); + + destroy_tmp_cpu_topology_copy(cpudata); + + map->size = avail_sz*2+1; + + map->array = erts_alloc(alc_type, + (sizeof(erts_cpu_groups_map_array_t) + * map->size));; + map->logical_processors = avail_sz; + + for (i = 0; i < map->size; i++) { + map->array[i].logical = -1; + map->array[i].cpu_group = -1; + } + + spread_level = ERTS_TOPOLOGY_CORE; + for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) { + if (no.level[i] > map->groups) { + spread_level = i; + break; + } + } + + if (no.level[spread_level] <= map->groups) { + int a, cg, last = -1; + cg = -1; + ASSERT(spread_level == ERTS_TOPOLOGY_CORE); + for (a = 0; a < avail_sz; a++) { + if (last != avail[a].level[spread_level]) { + cg++; + last = avail[a].level[spread_level]; + } + cpu_group_insert(map, + avail[a].level[ERTS_TOPOLOGY_LOGICAL], + cg); + } + } + else { /* map->groups < no.level[spread_level] */ + erts_cpu_groups_count_t *cg_count; + int a, cg, tl, toplevels; + + tl = spread_level-1; + + if (spread_level == ERTS_TOPOLOGY_NODE) + toplevels = 1; + else + toplevels = no.level[tl]; + + cg_count = erts_alloc(ERTS_ALC_T_TMP, + toplevels*sizeof(erts_cpu_groups_count_t)); + + if (toplevels == 1) { + cg_count[0].id = 0; + cg_count[0].sub_levels = no.level[spread_level]; + cg_count[0].cpu_groups = map->groups; + } + else { + int cgs_per_tl, cgs; + cgs = map->groups; + cgs_per_tl = cgs / toplevels; + + a = 0; + for (i = 0; i < toplevels; i++) { + cg_count[i].id = avail[a].level[tl]; + a = sub_levels(&cg_count[i], tl, a, avail_sz, avail); + } + + qsort(cg_count, + toplevels, + sizeof(erts_cpu_groups_count_t), + cg_count_sub_levels_compare); + + for (i = 0; i < toplevels; i++) { + if (cg_count[i].sub_levels < cgs_per_tl) { + cg_count[i].cpu_groups = cg_count[i].sub_levels; + cgs -= cg_count[i].sub_levels; + } + else { + cg_count[i].cpu_groups = cgs_per_tl; + cgs -= cgs_per_tl; + } + } + + while (cgs > 0) { + for (i = 0; i < toplevels; i++) { + if (cg_count[i].sub_levels == cg_count[i].cpu_groups) + break; + else { + cg_count[i].cpu_groups++; + if (--cgs == 0) + break; + } + } + } + + qsort(cg_count, + toplevels, + sizeof(erts_cpu_groups_count_t), + cg_count_id_compare); + } + + a = i = 0; + cg = -1; + while (a < avail_sz) { + a = write_cpu_groups(&cg, &cg_count[i], tl, + a, avail_sz, avail); + i++; + } + + ASSERT(map->groups == cg + 1); + + for (a = 0; a < avail_sz; a++) + cpu_group_insert(map, + avail[a].level[ERTS_TOPOLOGY_LOGICAL], + avail[a].level[ERTS_TOPOLOGY_CG]); + + erts_free(ERTS_ALC_T_TMP, cg_count); + } + + erts_free(ERTS_ALC_T_TMP, avail); +} + +static erts_cpu_groups_map_t * +add_cpu_groups(int groups, + erts_cpu_groups_callback_t callback, + void *arg) +{ + int use_groups = groups; + erts_cpu_groups_callback_list_t *cgcl; + erts_cpu_groups_map_t *cgm; + + ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx)); + + if (use_groups > max_main_threads) + use_groups = max_main_threads; + + if (!use_groups) + return NULL; + + no_cpu_groups_callbacks++; + cgcl = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP, + sizeof(erts_cpu_groups_callback_list_t)); + cgcl->callback = callback; + cgcl->arg = arg; + + for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) { + if (cgm->groups == use_groups) { + cgcl->next = cgm->callback_list; + cgm->callback_list = cgcl; + return cgm; + } + } + + + cgm = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP, + sizeof(erts_cpu_groups_map_t)); + cgm->next = cpu_groups_maps; + cgm->groups = use_groups; + cgm->array = NULL; + cgm->size = 0; + cgm->logical_processors = 0; + cgm->callback_list = cgcl; + + cgcl->next = NULL; + + make_cpu_groups_map(cgm, 0); + + cpu_groups_maps = cgm; + + return cgm; +} + +static void +remove_cpu_groups(erts_cpu_groups_callback_t callback, void *arg) +{ + erts_cpu_groups_map_t *prev_cgm, *cgm; + erts_cpu_groups_callback_list_t *prev_cgcl, *cgcl; + + ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx)); + + no_cpu_groups_callbacks--; + + prev_cgm = NULL; + for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) { + prev_cgcl = NULL; + for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) { + if (cgcl->callback == callback && cgcl->arg == arg) { + if (prev_cgcl) + prev_cgcl->next = cgcl->next; + else + cgm->callback_list = cgcl->next; + erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgcl); + if (!cgm->callback_list) { + if (prev_cgm) + prev_cgm->next = cgm->next; + else + cpu_groups_maps = cgm->next; + if (cgm->array) + erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm->array); + erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm); + } + return; + } + prev_cgcl = cgcl; + } + prev_cgm = cgm; + } + + erl_exit(ERTS_ABORT_EXIT, "Cpu groups not found\n"); +} + +static int +cpu_groups_lookup(erts_cpu_groups_map_t *map, + ErtsSchedulerData *esdp) +{ + int start, logical, ix; + + ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx) + || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx)); + + if (esdp->cpu_id < 0) + return (((int) esdp->no) - 1) % map->groups; + + logical = esdp->cpu_id; + start = logical % map->size; + ix = start; + + do { + if (map->array[ix].logical == logical) { + int group = map->array[ix].cpu_group; + ASSERT(0 <= group && group < map->groups); + return group; + } + ix++; + if (ix == map->size) + ix = 0; + } while (ix != start); + + erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical); +} + +static void +update_cpu_groups_maps(void) +{ + erts_cpu_groups_map_t *cgm; + ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx)); + + for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) + make_cpu_groups_map(cgm, 0); +} + +void +erts_add_cpu_groups(int groups, + erts_cpu_groups_callback_t callback, + void *arg) +{ + erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); + add_cpu_groups(groups, callback, arg); + erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); +} + +void erts_remove_cpu_groups(erts_cpu_groups_callback_t callback, + void *arg) +{ + erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); + remove_cpu_groups(callback, arg); + erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); +} diff --git a/erts/emulator/beam/erl_cpu_topology.h b/erts/emulator/beam/erl_cpu_topology.h new file mode 100644 index 0000000000..c5a9520b61 --- /dev/null +++ b/erts/emulator/beam/erl_cpu_topology.h @@ -0,0 +1,105 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2010. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: CPU topology and related functionality + * + * Author: Rickard Green + */ + +#ifndef ERL_CPU_TOPOLOGY_H__ +#define ERL_CPU_TOPOLOGY_H__ + +void erts_pre_early_init_cpu_topology(int *max_rg_p, + int *conf_p, + int *onln_p, + int *avail_p); +void erts_early_init_cpu_topology(int no_schedulers, + int *max_main_threads_p, + int max_reader_groups, + int *reader_groups_p); +void erts_init_cpu_topology(void); + + +#define ERTS_INIT_SCHED_BIND_TYPE_SUCCESS 0 +#define ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED 1 +#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY 2 +#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE 3 + +int erts_init_scheduler_bind_type_string(char *how); + + +#define ERTS_INIT_CPU_TOPOLOGY_OK 0 +#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID 1 +#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE 2 +#define ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY 3 +#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE 4 +#define ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES 5 +#define ERTS_INIT_CPU_TOPOLOGY_MISSING_LID 6 +#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS 7 +#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES 8 +#define ERTS_INIT_CPU_TOPOLOGY_MISSING 9 + +int erts_init_cpu_topology_string(char *topology_str); + +void erts_sched_check_cpu_bind(ErtsSchedulerData *esdp); +#ifdef ERTS_SMP +void erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp); +void erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp); +void erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp); +#endif + +int erts_update_cpu_info(void); + +Eterm erts_bind_schedulers(Process *c_p, Eterm how); +Eterm erts_get_schedulers_binds(Process *c_p); + +Eterm erts_get_reader_groups_map(Process *c_p); + +Eterm erts_set_cpu_topology(Process *c_p, Eterm term); +Eterm erts_get_cpu_topology_term(Process *c_p, Eterm which); + +int erts_update_cpu_info(void); +void erts_get_logical_processors(int *conf, int *onln, int *avail); + +int erts_sched_bind_atthrcreate_prepare(void); +int erts_sched_bind_atthrcreate_child(int unbind); +void erts_sched_bind_atthrcreate_parent(int unbind); + +int erts_sched_bind_atfork_prepare(void); +int erts_sched_bind_atfork_child(int unbind); +char *erts_sched_bind_atvfork_child(int unbind); +void erts_sched_bind_atfork_parent(int unbind); + +Eterm erts_fake_scheduler_bindings(Process *p, Eterm how); +Eterm erts_debug_cpu_groups_map(Process *c_p, int groups); + + +typedef void (*erts_cpu_groups_callback_t)(int, + ErtsSchedulerData *, + int, + void *); + +void erts_add_cpu_groups(int groups, + erts_cpu_groups_callback_t callback, + void *arg); +void erts_remove_cpu_groups(erts_cpu_groups_callback_t callback, + void *arg); + +#endif diff --git a/erts/emulator/beam/erl_drv_thread.c b/erts/emulator/beam/erl_drv_thread.c index d42820ddf3..17b08a71d4 100644 --- a/erts/emulator/beam/erl_drv_thread.c +++ b/erts/emulator/beam/erl_drv_thread.c @@ -528,7 +528,7 @@ erl_drv_tsd_get(ErlDrvTSDKey key) if (!dtid) return NULL; #endif - if (ERL_DRV_TSD_LEN__ < key) + if (ERL_DRV_TSD_LEN__ <= key) return NULL; return ERL_DRV_TSD__[key]; } diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 4ae656a3ad..a2fd5921a2 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -41,6 +41,7 @@ #include "erl_printf_term.h" #include "erl_misc_utils.h" #include "packet_parser.h" +#include "erl_cpu_topology.h" #ifdef HIPE #include "hipe_mode_switch.h" /* for hipe_mode_switch_init() */ @@ -63,6 +64,8 @@ extern void ConNormalExit(void); extern void ConWaitForExit(void); #endif +static void erl_init(int ncpu); + #define ERTS_MIN_COMPAT_REL 7 #ifdef ERTS_SMP @@ -76,9 +79,6 @@ int erts_initialized = 0; static erts_tid_t main_thread; #endif -erts_cpu_info_t *erts_cpuinfo; - -int erts_reader_groups; int erts_use_sender_punish; /* @@ -111,7 +111,6 @@ int erts_compat_rel; static int use_multi_run_queue; static int no_schedulers; static int no_schedulers_online; -static int max_reader_groups; #ifdef DEBUG Uint32 verbose; /* See erl_debug.h for information about verbose */ @@ -230,18 +229,18 @@ void erl_error(char *fmt, va_list args) erts_vfprintf(stderr, fmt, args); } -static void early_init(int *argc, char **argv); +static int early_init(int *argc, char **argv); void erts_short_init(void) { - early_init(NULL, NULL); - erl_init(); + int ncpu = early_init(NULL, NULL); + erl_init(ncpu); erts_initialized = 1; } -void -erl_init(void) +static void +erl_init(int ncpu) { init_benchmarking(); @@ -252,11 +251,11 @@ erl_init(void) erts_init_monitors(); erts_init_gc(); init_time(); - erts_init_process(); + erts_init_process(ncpu); erts_init_scheduling(use_multi_run_queue, no_schedulers, no_schedulers_online); - + erts_init_cpu_topology(); /* Must be after init_scheduling */ H_MIN_SIZE = erts_next_heap_size(H_MIN_SIZE, 0); BIN_VH_MIN_SIZE = erts_next_heap_size(BIN_VH_MIN_SIZE, 0); @@ -535,7 +534,8 @@ void erts_usage(void) erts_fprintf(stderr, "-W<i|w> set error logger warnings mapping,\n"); erts_fprintf(stderr, " see error_logger documentation for details\n"); - + erts_fprintf(stderr, "-zdbbl size set the distribution buffer busy limit in kilobytes\n"); + erts_fprintf(stderr, " valid range is [1-%d]\n", INT_MAX/1024); erts_fprintf(stderr, "\n"); erts_fprintf(stderr, "Note that if the emulator is started with erlexec (typically\n"); erts_fprintf(stderr, "from the erl script), these flags should be specified with +.\n"); @@ -587,7 +587,7 @@ static void ethr_ll_free(void *ptr) #endif -static void +static int early_init(int *argc, char **argv) /* * Only put things here which are * really important initialize @@ -600,6 +600,10 @@ early_init(int *argc, char **argv) /* int ncpuavail; int schdlrs; int schdlrs_onln; + int max_main_threads; + int max_reader_groups; + int reader_groups; + use_multi_run_queue = 1; erts_printf_eterm_func = erts_printf_term; erts_disable_tolerant_timeofday = 0; @@ -615,13 +619,11 @@ early_init(int *argc, char **argv) /* erts_use_sender_punish = 1; - erts_cpuinfo = erts_cpu_info_create(); - -#ifdef ERTS_SMP - ncpu = erts_get_cpu_configured(erts_cpuinfo); - ncpuonln = erts_get_cpu_online(erts_cpuinfo); - ncpuavail = erts_get_cpu_available(erts_cpuinfo); -#else + erts_pre_early_init_cpu_topology(&max_reader_groups, + &ncpu, + &ncpuonln, + &ncpuavail); +#ifndef ERTS_SMP ncpu = 1; ncpuonln = 1; ncpuavail = 1; @@ -664,15 +666,9 @@ early_init(int *argc, char **argv) /* ? ncpuavail : (ncpuonln > 0 ? ncpuonln : no_schedulers)); -#ifdef ERTS_SMP - erts_max_main_threads = no_schedulers_online; -#endif - schdlrs = no_schedulers; schdlrs_onln = no_schedulers_online; - max_reader_groups = ERTS_MAX_READER_GROUPS; - if (argc && argv) { int i = 1; while (i < *argc) { @@ -768,9 +764,13 @@ early_init(int *argc, char **argv) /* erts_alloc_init(argc, argv, &alloc_opts); /* Handles (and removes) -M flags. */ - - erts_early_init_scheduling(); /* Require allocators */ - erts_init_utils(); /* Require allocators */ + /* Require allocators */ + erts_early_init_scheduling(); + erts_init_utils(); + erts_early_init_cpu_topology(no_schedulers, + &max_main_threads, + max_reader_groups, + &reader_groups); #ifdef USE_THREADS { @@ -784,24 +784,13 @@ early_init(int *argc, char **argv) /* elid.mem.ll.alloc = ethr_ll_alloc; elid.mem.ll.realloc = ethr_ll_realloc; elid.mem.ll.free = ethr_ll_free; - -#ifdef ERTS_SMP - elid.main_threads = erts_max_main_threads; -#else - elid.main_threads = 1; -#endif - elid.reader_groups = (elid.main_threads > 1 - ? elid.main_threads - : 0); - if (max_reader_groups <= 1) - elid.reader_groups = 0; - if (elid.reader_groups > max_reader_groups) - elid.reader_groups = max_reader_groups; - erts_reader_groups = elid.reader_groups; + elid.main_threads = max_main_threads; + elid.reader_groups = reader_groups; erts_thr_late_init(&elid); } #endif + #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_late_init(); #endif @@ -818,7 +807,9 @@ early_init(int *argc, char **argv) /* erl_sys_args(argc, argv); erts_ets_realloc_always_moves = 0; + erts_dist_buf_busy_limit = ERTS_DE_BUSY_LIMIT; + return ncpu; } #ifndef ERTS_SMP @@ -852,8 +843,7 @@ erl_start(int argc, char **argv) char envbuf[21]; /* enough for any 64-bit integer */ size_t envbufsz; int async_max_threads = erts_async_max_threads; - - early_init(&argc, argv); + int ncpu = early_init(&argc, argv); envbufsz = sizeof(envbuf); if (erts_sys_getenv(ERL_MAX_ETS_TABLES_ENV, envbuf, &envbufsz) == 0) @@ -1110,7 +1100,7 @@ erl_start(int argc, char **argv) char *sub_param = argv[i]+2; if (has_prefix("bt", sub_param)) { arg = get_arg(sub_param+2, argv[i+1], &i); - res = erts_init_scheduler_bind_type(arg); + res = erts_init_scheduler_bind_type_string(arg); if (res != ERTS_INIT_SCHED_BIND_TYPE_SUCCESS) { switch (res) { case ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED: @@ -1135,7 +1125,7 @@ erl_start(int argc, char **argv) } else if (has_prefix("ct", sub_param)) { arg = get_arg(sub_param+2, argv[i+1], &i); - res = erts_init_cpu_topology(arg); + res = erts_init_cpu_topology_string(arg); if (res != ERTS_INIT_CPU_TOPOLOGY_OK) { switch (res) { case ERTS_INIT_CPU_TOPOLOGY_INVALID_ID: @@ -1346,6 +1336,26 @@ erl_start(int argc, char **argv) } break; + case 'z': { + char *sub_param = argv[i]+2; + int new_limit; + + if (has_prefix("dbbl", sub_param)) { + arg = get_arg(sub_param+4, argv[i+1], &i); + new_limit = atoi(arg); + if (new_limit < 1 || INT_MAX/1024 < new_limit) { + erts_fprintf(stderr, "Invalid dbbl limit: %d\n", new_limit); + erts_usage(); + } else { + erts_dist_buf_busy_limit = new_limit*1024; + } + } else { + erts_fprintf(stderr, "bad -z option %s\n", argv[i]); + erts_usage(); + } + break; + } + default: erts_fprintf(stderr, "%s unknown flag %s\n", argv[0], argv[i]); erts_usage(); @@ -1386,7 +1396,7 @@ erl_start(int argc, char **argv) boot_argc = argc - i; /* Number of arguments to init */ boot_argv = &argv[i]; - erl_init(); + erl_init(ncpu); init_shared_memory(boot_argc, boot_argv); load_preloaded(); diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c index d6138fa4e4..04c7dbd2ec 100644 --- a/erts/emulator/beam/erl_lock_check.c +++ b/erts/emulator/beam/erl_lock_check.c @@ -128,8 +128,8 @@ static erts_lc_lock_order_t erts_lock_order[] = { { "removed_fd_pre_alloc_lock", NULL }, { "state_prealloc", NULL }, { "schdlr_sspnd", NULL }, - { "cpu_bind", NULL }, { "run_queue", "address" }, + { "cpu_info", NULL }, { "pollset", "address" }, #ifdef __WIN32__ { "pollwaiter", "address" }, diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c index d0b08bf72e..8cdda395df 100644 --- a/erts/emulator/beam/erl_node_tables.c +++ b/erts/emulator/beam/erl_node_tables.c @@ -107,7 +107,7 @@ dist_table_alloc(void *dep_tmpl) dep->nlinks = NULL; dep->monitors = NULL; - erts_smp_spinlock_init_x(&dep->qlock, "dist_entry_out_queue", chnl_nr); + erts_smp_mtx_init_x(&dep->qlock, "dist_entry_out_queue", chnl_nr); dep->qflgs = 0; dep->qsize = 0; dep->out_queue.first = NULL; @@ -172,7 +172,7 @@ dist_table_free(void *vdep) ASSERT(!dep->cache); erts_smp_rwmtx_destroy(&dep->rwmtx); erts_smp_mtx_destroy(&dep->lnk_mtx); - erts_smp_spinlock_destroy(&dep->qlock); + erts_smp_mtx_destroy(&dep->qlock); #ifdef DEBUG sys_memset(vdep, 0x77, sizeof(DistEntry)); @@ -755,9 +755,9 @@ void erts_init_node_tables(void) erts_this_dist_entry->nlinks = NULL; erts_this_dist_entry->monitors = NULL; - erts_smp_spinlock_init_x(&erts_this_dist_entry->qlock, - "dist_entry_out_queue", - make_small(ERST_INTERNAL_CHANNEL_NO)); + erts_smp_mtx_init_x(&erts_this_dist_entry->qlock, + "dist_entry_out_queue", + make_small(ERST_INTERNAL_CHANNEL_NO)); erts_this_dist_entry->qflgs = 0; erts_this_dist_entry->qsize = 0; erts_this_dist_entry->out_queue.first = NULL; diff --git a/erts/emulator/beam/erl_node_tables.h b/erts/emulator/beam/erl_node_tables.h index eb759b87e9..b0a63ae035 100644 --- a/erts/emulator/beam/erl_node_tables.h +++ b/erts/emulator/beam/erl_node_tables.h @@ -131,7 +131,7 @@ typedef struct dist_entry_ { ErtsLink *nlinks; /* Link tree with subtrees */ ErtsMonitor *monitors; /* Monitor tree */ - erts_smp_spinlock_t qlock; /* Protects qflgs and out_queue */ + erts_smp_mtx_t qlock; /* Protects qflgs and out_queue */ Uint32 qflgs; Sint qsize; ErtsDistOutputQueue out_queue; diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index b47ce97c46..f252c2cbe2 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -24,7 +24,6 @@ #endif #include <stddef.h> /* offsetof() */ -#include <ctype.h> #include "sys.h" #include "erl_vm.h" #include "global.h" @@ -39,6 +38,7 @@ #include "erl_threads.h" #include "erl_binary.h" #include "beam_bp.h" +#include "erl_cpu_topology.h" #define ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED (2000*CONTEXT_REDS) #define ERTS_RUNQ_CALL_CHECK_BALANCE_REDS \ @@ -63,8 +63,6 @@ #define ERTS_WAKEUP_OTHER_DEC 10 #define ERTS_WAKEUP_OTHER_FIXED_INC (CONTEXT_REDS/10) -#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff) - #if 0 || defined(DEBUG) #define ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA #endif @@ -119,10 +117,6 @@ Uint erts_process_tab_index_mask; static int wakeup_other_limit; -#ifdef ERTS_SMP -Uint erts_max_main_threads; -#endif - int erts_sched_thread_suggested_stack_size = -1; #ifdef ERTS_ENABLE_LOCK_CHECK @@ -195,48 +189,6 @@ do { \ #endif -/* - * Cpu topology hierarchy. - */ -#define ERTS_TOPOLOGY_NODE 0 -#define ERTS_TOPOLOGY_PROCESSOR 1 -#define ERTS_TOPOLOGY_PROCESSOR_NODE 2 -#define ERTS_TOPOLOGY_CORE 3 -#define ERTS_TOPOLOGY_THREAD 4 -#define ERTS_TOPOLOGY_LOGICAL 5 - -#define ERTS_TOPOLOGY_MAX_DEPTH 6 - -typedef struct { - int bind_id; - int bound_id; -} ErtsCpuBindData; - -static ErtsCpuBindData *scheduler2cpu_map; -erts_smp_rwmtx_t erts_cpu_bind_rwmtx; - -typedef enum { - ERTS_CPU_BIND_UNDEFINED, - ERTS_CPU_BIND_SPREAD, - ERTS_CPU_BIND_PROCESSOR_SPREAD, - ERTS_CPU_BIND_THREAD_SPREAD, - ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD, - ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD, - ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD, - ERTS_CPU_BIND_NO_SPREAD, - ERTS_CPU_BIND_NONE -} ErtsCpuBindOrder; - -#define ERTS_CPU_BIND_DEFAULT_BIND \ - ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD - -ErtsCpuBindOrder cpu_bind_order; - -static erts_cpu_topology_t *user_cpudata; -static int user_cpudata_size; -static erts_cpu_topology_t *system_cpudata; -static int system_cpudata_size; - erts_sched_stat_t erts_sched_stat; ErtsRunQueue *erts_common_run_queue; @@ -259,11 +211,6 @@ ErtsSchedulerData *erts_scheduler_data; ErtsAlignedRunQueue *erts_aligned_run_queues; Uint erts_no_run_queues; -typedef union { - ErtsSchedulerData esd; - char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerData))]; -} ErtsAlignedSchedulerData; - ErtsAlignedSchedulerData *erts_aligned_scheduler_data; #ifdef ERTS_SMP @@ -334,12 +281,6 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist, 200, ERTS_ALC_T_PROC_LIST) -#define ERTS_RUNQ_IX(IX) \ - (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_run_queues), \ - &erts_aligned_run_queues[(IX)].runq) -#define ERTS_SCHEDULER_IX(IX) \ - (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \ - &erts_aligned_scheduler_data[(IX)].esd) #define ERTS_SCHED_SLEEP_INFO_IX(IX) \ (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \ &aligned_sched_sleep_info[(IX)].ssi) @@ -398,22 +339,8 @@ static int stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, #ifdef ERTS_SMP static void handle_pending_exiters(ErtsProcList *); -static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata, - int size, - ErtsCpuBindOrder bind_order, - int mk_seq); -static void signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size); - #endif -static int reader_group_lookup(int logical); -static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, - int *cpudata_size); -static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata); - -static void early_cpu_bind_init(void); -static void late_cpu_bind_init(void); - #if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK) int erts_smp_lc_runq_is_locked(ErtsRunQueue *runq) @@ -469,13 +396,13 @@ erts_pre_init_process(void) /* initialize the scheduler */ void -erts_init_process(void) +erts_init_process(int ncpu) { Uint proc_bits = ERTS_PROC_BITS; #ifdef ERTS_SMP erts_disable_proc_not_running_opt = 0; - erts_init_proc_lock(); + erts_init_proc_lock(ncpu); #endif init_proclist_alloc(); @@ -1060,6 +987,8 @@ scheduler_wait(long *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) sys_poll_aux_work: + ASSERT(!erts_port_task_have_outstanding_io_tasks()); + erl_sys_schedule(1); /* Might give us something to do */ dt = do_time_read_and_reset(); @@ -1155,6 +1084,8 @@ scheduler_wait(long *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) erts_smp_runq_unlock(rq); + ASSERT(!erts_port_task_have_outstanding_io_tasks()); + erl_sys_schedule(0); dt = do_time_read_and_reset(); @@ -1242,7 +1173,7 @@ wake_scheduler(ErtsRunQueue *rq, int incq, int one) do { ErtsSchedulerSleepInfo *wake_ssi = ssi; ssi = ssi->next; - erts_sched_finish_poke(ssi, ssi_flags_set_wake(wake_ssi)); + erts_sched_finish_poke(wake_ssi, ssi_flags_set_wake(wake_ssi)); } while (ssi); } } @@ -1335,6 +1266,31 @@ erts_smp_notify_inc_runq(ErtsRunQueue *runq) smp_notify_inc_runq(runq); } +void +erts_sched_notify_check_cpu_bind(void) +{ +#ifdef ERTS_SMP + int ix; + if (erts_common_run_queue) { + for (ix = 0; ix < erts_no_schedulers; ix++) + erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->chk_cpu_bind, 1); + wake_all_schedulers(); + } + else { + for (ix = 0; ix < erts_no_run_queues; ix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + erts_smp_runq_lock(rq); + rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND; + erts_smp_runq_unlock(rq); + wake_scheduler(rq, 0, 1); + }; + } +#else + erts_sched_check_cpu_bind(erts_get_scheduler_data()); +#endif +} + + #ifdef ERTS_SMP ErtsRunQueue * @@ -2379,7 +2335,6 @@ erts_debug_nbalance(void) void erts_early_init_scheduling(void) { - early_cpu_bind_init(); wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_MEDIUM; } @@ -2656,8 +2611,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online) /* init port tasks */ erts_port_task_init(); - - late_cpu_bind_init(); } ErtsRunQueue * @@ -2883,12 +2836,10 @@ suspend_scheduler(ErtsSchedulerData *esdp) long flgs; int changing; long no = (long) esdp->no; - ErtsRunQueue *rq = esdp->run_queue; ErtsSchedulerSleepInfo *ssi = esdp->ssi; long active_schedulers; int curr_online = 1; int wake = 0; - int reset_read_group = 0; #if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \ || defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK) long aux_work; @@ -2909,20 +2860,7 @@ suspend_scheduler(ErtsSchedulerData *esdp) erts_smp_runq_unlock(esdp->run_queue); - /* Unbind from cpu */ - erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); - if (scheduler2cpu_map[esdp->no].bound_id >= 0 - && erts_unbind_from_cpu(erts_cpuinfo) == 0) { - esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1; - reset_read_group = 1; - } - erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); - - if (reset_read_group) - erts_smp_rwmtx_set_reader_group(0); - - if (esdp->no <= erts_max_main_threads) - erts_thr_set_main_status(0, 0); + erts_sched_check_cpu_bind_prep_suspend(esdp); if (erts_system_profile_flags.scheduler) profile_scheduler(make_small(esdp->no), am_inactive); @@ -3056,17 +2994,10 @@ suspend_scheduler(ErtsSchedulerData *esdp) if (erts_system_profile_flags.scheduler) profile_scheduler(make_small(esdp->no), am_active); - if (esdp->no <= erts_max_main_threads) - erts_thr_set_main_status(1, (int) esdp->no); - erts_smp_runq_lock(esdp->run_queue); non_empty_runq(esdp->run_queue); - /* Make sure we check if we should bind to a cpu or not... */ - if (rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ) - erts_smp_atomic_set(&esdp->chk_cpu_bind, 1); - else - rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND; + erts_sched_check_cpu_bind_post_suspend(esdp); } #define ERTS_RUNQ_RESET_SUSPEND_INFO(RQ, DBG_ID) \ @@ -3583,15 +3514,7 @@ sched_thread_func(void *vesdp) erts_tsd_set(sched_data_key, vesdp); #ifdef ERTS_SMP - if (no <= erts_max_main_threads) { - erts_thr_set_main_status(1, (int) no); - if (erts_reader_groups) { - int rg = (int) no; - if (rg > erts_reader_groups) - rg = (((int) no) - 1) % erts_reader_groups + 1; - erts_smp_rwmtx_set_reader_group(rg); - } - } + erts_sched_init_check_cpu_bind((ErtsSchedulerData *) vesdp); erts_proc_lock_prepare_proc_lock_waiter(); ERTS_SCHED_SLEEP_INFO_IX(no - 1)->event = erts_tse_fetch(); @@ -3693,1907 +3616,6 @@ erts_start_schedulers(void) #endif /* ERTS_SMP */ -static int -int_cmp(const void *vx, const void *vy) -{ - return *((int *) vx) - *((int *) vy); -} - -static int -cpu_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->thread != y->thread) - return x->thread - y->thread; - if (x->core != y->core) - return x->core - y->core; - if (x->processor_node != y->processor_node) - return x->processor_node - y->processor_node; - if (x->processor != y->processor) - return x->processor - y->processor; - if (x->node != y->node) - return x->node - y->node; - return 0; -} - -static int -cpu_processor_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->thread != y->thread) - return x->thread - y->thread; - if (x->processor_node != y->processor_node) - return x->processor_node - y->processor_node; - if (x->core != y->core) - return x->core - y->core; - if (x->node != y->node) - return x->node - y->node; - if (x->processor != y->processor) - return x->processor - y->processor; - return 0; -} - -static int -cpu_thread_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->thread != y->thread) - return x->thread - y->thread; - if (x->node != y->node) - return x->node - y->node; - if (x->processor != y->processor) - return x->processor - y->processor; - if (x->processor_node != y->processor_node) - return x->processor_node - y->processor_node; - if (x->core != y->core) - return x->core - y->core; - return 0; -} - -static int -cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->thread != y->thread) - return x->thread - y->thread; - if (x->node != y->node) - return x->node - y->node; - if (x->core != y->core) - return x->core - y->core; - if (x->processor != y->processor) - return x->processor - y->processor; - return 0; -} - -static int -cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->node != y->node) - return x->node - y->node; - if (x->thread != y->thread) - return x->thread - y->thread; - if (x->core != y->core) - return x->core - y->core; - if (x->processor != y->processor) - return x->processor - y->processor; - return 0; -} - -static int -cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->node != y->node) - return x->node - y->node; - if (x->thread != y->thread) - return x->thread - y->thread; - if (x->processor != y->processor) - return x->processor - y->processor; - if (x->core != y->core) - return x->core - y->core; - return 0; -} - -static int -cpu_no_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->node != y->node) - return x->node - y->node; - if (x->processor != y->processor) - return x->processor - y->processor; - if (x->processor_node != y->processor_node) - return x->processor_node - y->processor_node; - if (x->core != y->core) - return x->core - y->core; - if (x->thread != y->thread) - return x->thread - y->thread; - return 0; -} - -static ERTS_INLINE void -make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node) -{ - int ix; - int node = -1; - int processor = -1; - int processor_node = -1; - int processor_node_node = -1; - int core = -1; - int thread = -1; - int old_node = -1; - int old_processor = -1; - int old_processor_node = -1; - int old_core = -1; - int old_thread = -1; - - for (ix = 0; ix < size; ix++) { - if (!no_node || cpudata[ix].node >= 0) { - if (old_node == cpudata[ix].node) - cpudata[ix].node = node; - else { - old_node = cpudata[ix].node; - old_processor = processor = -1; - if (!no_node) - old_processor_node = processor_node = -1; - old_core = core = -1; - old_thread = thread = -1; - if (no_node || cpudata[ix].node >= 0) - cpudata[ix].node = ++node; - } - } - if (old_processor == cpudata[ix].processor) - cpudata[ix].processor = processor; - else { - old_processor = cpudata[ix].processor; - if (!no_node) - processor_node_node = old_processor_node = processor_node = -1; - old_core = core = -1; - old_thread = thread = -1; - cpudata[ix].processor = ++processor; - } - if (no_node && cpudata[ix].processor_node < 0) - old_processor_node = -1; - else { - if (old_processor_node == cpudata[ix].processor_node) { - if (no_node) - cpudata[ix].node = cpudata[ix].processor_node = node; - else { - if (processor_node_node >= 0) - cpudata[ix].node = processor_node_node; - cpudata[ix].processor_node = processor_node; - } - } - else { - old_processor_node = cpudata[ix].processor_node; - old_core = core = -1; - old_thread = thread = -1; - if (no_node) - cpudata[ix].node = cpudata[ix].processor_node = ++node; - else { - cpudata[ix].node = processor_node_node = ++node; - cpudata[ix].processor_node = ++processor_node; - } - } - } - if (!no_node && cpudata[ix].processor_node < 0) - cpudata[ix].processor_node = 0; - if (old_core == cpudata[ix].core) - cpudata[ix].core = core; - else { - old_core = cpudata[ix].core; - old_thread = thread = -1; - cpudata[ix].core = ++core; - } - if (old_thread == cpudata[ix].thread) - cpudata[ix].thread = thread; - else - old_thread = cpudata[ix].thread = ++thread; - } -} - -static void -cpu_bind_order_sort(erts_cpu_topology_t *cpudata, - int size, - ErtsCpuBindOrder bind_order, - int mk_seq) -{ - if (size > 1) { - int no_node = 0; - int (*cmp_func)(const void *, const void *); - switch (bind_order) { - case ERTS_CPU_BIND_SPREAD: - cmp_func = cpu_spread_order_cmp; - break; - case ERTS_CPU_BIND_PROCESSOR_SPREAD: - cmp_func = cpu_processor_spread_order_cmp; - break; - case ERTS_CPU_BIND_THREAD_SPREAD: - cmp_func = cpu_thread_spread_order_cmp; - break; - case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: - no_node = 1; - cmp_func = cpu_thread_no_node_processor_spread_order_cmp; - break; - case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: - no_node = 1; - cmp_func = cpu_no_node_processor_spread_order_cmp; - break; - case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: - no_node = 1; - cmp_func = cpu_no_node_thread_spread_order_cmp; - break; - case ERTS_CPU_BIND_NO_SPREAD: - cmp_func = cpu_no_spread_order_cmp; - break; - default: - cmp_func = NULL; - erl_exit(ERTS_ABORT_EXIT, - "Bad cpu bind type: %d\n", - (int) cpu_bind_order); - break; - } - - if (mk_seq) - make_cpudata_id_seq(cpudata, size, no_node); - - qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func); - } -} - -static int -processor_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->processor != y->processor) - return x->processor - y->processor; - if (x->node != y->node) - return x->node - y->node; - if (x->processor_node != y->processor_node) - return x->processor_node - y->processor_node; - if (x->core != y->core) - return x->core - y->core; - if (x->thread != y->thread) - return x->thread - y->thread; - return 0; -} - -static void -check_cpu_bind(ErtsSchedulerData *esdp) -{ - int rg = 0; - int res; - int cpu_id; - erts_smp_runq_unlock(esdp->run_queue); - erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); - cpu_id = scheduler2cpu_map[esdp->no].bind_id; - if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) { - res = erts_bind_to_cpu(erts_cpuinfo, cpu_id); - if (res == 0) - esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id; - else { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n", - (int) esdp->no, cpu_id, erl_errno_id(-res)); - erts_send_error_to_logger_nogl(dsbufp); - if (scheduler2cpu_map[esdp->no].bound_id >= 0) - goto unbind; - } - } - else if (cpu_id < 0) { - unbind: - /* Get rid of old binding */ - res = erts_unbind_from_cpu(erts_cpuinfo); - if (res == 0) - esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1; - else if (res != -ENOTSUP) { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n", - (int) esdp->no, cpu_id, erl_errno_id(-res)); - erts_send_error_to_logger_nogl(dsbufp); - } - } - if (erts_reader_groups) { - if (esdp->cpu_id >= 0) - rg = reader_group_lookup(esdp->cpu_id); - else - rg = (((int) esdp->no) - 1) % erts_reader_groups + 1; - } - erts_smp_runq_lock(esdp->run_queue); -#ifdef ERTS_SMP - if (erts_common_run_queue) - erts_smp_atomic_set(&esdp->chk_cpu_bind, 0); - else { - esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND; - } -#endif - erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); - - if (erts_reader_groups) - erts_smp_rwmtx_set_reader_group(rg); -} - -static void -signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size) -{ - int s_ix = 1; - int cpu_ix; - - if (cpu_bind_order != ERTS_CPU_BIND_NONE && size) { - - cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1); - - for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++) - if (erts_is_cpu_available(erts_cpuinfo, cpudata[cpu_ix].logical)) - scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical; - } - - if (s_ix <= erts_no_schedulers) - for (; s_ix <= erts_no_schedulers; s_ix++) - scheduler2cpu_map[s_ix].bind_id = -1; - -#ifdef ERTS_SMP - if (erts_common_run_queue) { - for (s_ix = 0; s_ix < erts_no_schedulers; s_ix++) - erts_smp_atomic_set(&ERTS_SCHEDULER_IX(s_ix)->chk_cpu_bind, 1); - wake_all_schedulers(); - } - else { - for (s_ix = 0; s_ix < erts_no_run_queues; s_ix++) { - ErtsRunQueue *rq = ERTS_RUNQ_IX(s_ix); - erts_smp_runq_lock(rq); - rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND; - erts_smp_runq_unlock(rq); - wake_scheduler(rq, 0, 1); - }; - } -#else - check_cpu_bind(erts_get_scheduler_data()); -#endif -} - -int -erts_init_scheduler_bind_type(char *how) -{ - if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP) - return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED; - - if (!system_cpudata && !user_cpudata) - return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY; - - if (sys_strcmp(how, "db") == 0) - cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND; - else if (sys_strcmp(how, "s") == 0) - cpu_bind_order = ERTS_CPU_BIND_SPREAD; - else if (sys_strcmp(how, "ps") == 0) - cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD; - else if (sys_strcmp(how, "ts") == 0) - cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD; - else if (sys_strcmp(how, "tnnps") == 0) - cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD; - else if (sys_strcmp(how, "nnps") == 0) - cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD; - else if (sys_strcmp(how, "nnts") == 0) - cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD; - else if (sys_strcmp(how, "ns") == 0) - cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD; - else if (sys_strcmp(how, "u") == 0) - cpu_bind_order = ERTS_CPU_BIND_NONE; - else - return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE; - - return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS; -} - -/* - * reader groups map - */ - -typedef struct { - int level[ERTS_TOPOLOGY_MAX_DEPTH+1]; -} erts_avail_cput; - -typedef struct { - int *map; - int size; - int groups; -} erts_reader_groups_map_test; - -typedef struct { - int id; - int sub_levels; - int reader_groups; -} erts_rg_count_t; - -typedef struct { - int logical; - int reader_group; -} erts_reader_groups_map_t; - -typedef struct { - erts_reader_groups_map_t *map; - int map_size; - int logical_processors; - int groups; -} erts_make_reader_groups_map_test; - -static int reader_groups_available_cpu_check; -static int reader_groups_logical_processors; -static int reader_groups_map_size; -static erts_reader_groups_map_t *reader_groups_map; - -#define ERTS_TOPOLOGY_RG ERTS_TOPOLOGY_MAX_DEPTH - -static void -make_reader_groups_map(erts_make_reader_groups_map_test *test); - -static Eterm -get_reader_groups_map(Process *c_p, - erts_reader_groups_map_t *map, - int map_size, - int logical_processors) -{ -#ifdef DEBUG - Eterm *endp; -#endif - Eterm res = NIL, tuple; - Eterm *hp; - int i; - - hp = HAlloc(c_p, logical_processors*(2+3)); -#ifdef DEBUG - endp = hp + logical_processors*(2+3); -#endif - for (i = map_size - 1; i >= 0; i--) { - if (map[i].logical >= 0) { - tuple = TUPLE2(hp, - make_small(map[i].logical), - make_small(map[i].reader_group)); - hp += 3; - res = CONS(hp, tuple, res); - hp += 2; - } - } - ASSERT(hp == endp); - return res; -} - -Eterm -erts_debug_reader_groups_map(Process *c_p, int groups) -{ - Eterm res; - erts_make_reader_groups_map_test test; - - test.groups = groups; - make_reader_groups_map(&test); - if (!test.map) - res = NIL; - else { - res = get_reader_groups_map(c_p, - test.map, - test.map_size, - test.logical_processors); - erts_free(ERTS_ALC_T_TMP, test.map); - } - return res; -} - - -Eterm -erts_get_reader_groups_map(Process *c_p) -{ - Eterm res; - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - res = get_reader_groups_map(c_p, - reader_groups_map, - reader_groups_map_size, - reader_groups_logical_processors); - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); - return res; -} - -static void -make_available_cpu_topology(erts_avail_cput *no, - erts_avail_cput *avail, - erts_cpu_topology_t *cpudata, - int *size, - int test) -{ - int len = *size; - erts_cpu_topology_t last; - int a, i, j; - - no->level[ERTS_TOPOLOGY_NODE] = -1; - no->level[ERTS_TOPOLOGY_PROCESSOR] = -1; - no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1; - no->level[ERTS_TOPOLOGY_CORE] = -1; - no->level[ERTS_TOPOLOGY_THREAD] = -1; - no->level[ERTS_TOPOLOGY_LOGICAL] = -1; - - last.node = INT_MIN; - last.processor = INT_MIN; - last.processor_node = INT_MIN; - last.core = INT_MIN; - last.thread = INT_MIN; - last.logical = INT_MIN; - - a = 0; - - for (i = 0; i < len; i++) { - - if (!test && !erts_is_cpu_available(erts_cpuinfo, cpudata[i].logical)) - continue; - - if (last.node != cpudata[i].node) - goto node; - if (last.processor != cpudata[i].processor) - goto processor; - if (last.processor_node != cpudata[i].processor_node) - goto processor_node; - if (last.core != cpudata[i].core) - goto core; - ASSERT(last.thread != cpudata[i].thread); - goto thread; - - node: - no->level[ERTS_TOPOLOGY_NODE]++; - processor: - no->level[ERTS_TOPOLOGY_PROCESSOR]++; - processor_node: - no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++; - core: - no->level[ERTS_TOPOLOGY_CORE]++; - thread: - no->level[ERTS_TOPOLOGY_THREAD]++; - - no->level[ERTS_TOPOLOGY_LOGICAL]++; - - for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++) - avail[a].level[j] = no->level[j]; - - avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical; - avail[a].level[ERTS_TOPOLOGY_RG] = 0; - - ASSERT(last.logical != cpudata[a].logical); - - last = cpudata[i]; - a++; - } - - no->level[ERTS_TOPOLOGY_NODE]++; - no->level[ERTS_TOPOLOGY_PROCESSOR]++; - no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++; - no->level[ERTS_TOPOLOGY_CORE]++; - no->level[ERTS_TOPOLOGY_THREAD]++; - no->level[ERTS_TOPOLOGY_LOGICAL]++; - - *size = a; -} - -static int -reader_group_lookup(int logical) -{ - int start = logical % reader_groups_map_size; - int ix = start; - - do { - if (reader_groups_map[ix].logical == logical) { - ASSERT(reader_groups_map[ix].reader_group > 0); - return reader_groups_map[ix].reader_group; - } - ix++; - if (ix == reader_groups_map_size) - ix = 0; - } while (ix != start); - - erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical); -} - -static void -reader_group_insert(erts_reader_groups_map_t *map, int map_size, - int logical, int reader_group) -{ - int start = logical % map_size; - int ix = start; - - do { - if (map[ix].logical < 0) { - map[ix].logical = logical; - map[ix].reader_group = reader_group; - return; - } - ix++; - if (ix == map_size) - ix = 0; - } while (ix != start); - - erl_exit(ERTS_ABORT_EXIT, "Reader groups map full\n"); -} - - -static int -sub_levels(erts_rg_count_t *rgc, int level, int aix, int avail_sz, erts_avail_cput *avail) -{ - int sub_level = level+1; - int last = -1; - rgc->sub_levels = 0; - - do { - if (last != avail[aix].level[sub_level]) { - rgc->sub_levels++; - last = avail[aix].level[sub_level]; - } - aix++; - } - while (aix < avail_sz && rgc->id == avail[aix].level[level]); - rgc->reader_groups = 0; - return aix; -} - -static int -write_reader_groups(int *rgp, erts_rg_count_t *rgcp, - int level, int a, - int avail_sz, erts_avail_cput *avail) -{ - int rg = *rgp; - int sub_level = level+1; - int sl_per_gr = rgcp->sub_levels / rgcp->reader_groups; - int xsl = rgcp->sub_levels % rgcp->reader_groups; - int sls = 0; - int last = -1; - int xsl_rg_lim = (rgcp->reader_groups - xsl) + rg + 1; - - ASSERT(level < 0 || avail[a].level[level] == rgcp->id) - - do { - if (last != avail[a].level[sub_level]) { - if (!sls) { - sls = sl_per_gr; - rg++; - if (rg >= xsl_rg_lim) - sls++; - } - last = avail[a].level[sub_level]; - sls--; - } - avail[a].level[ERTS_TOPOLOGY_RG] = rg; - a++; - } while (a < avail_sz && (level < 0 - || avail[a].level[level] == rgcp->id)); - - ASSERT(rgcp->reader_groups == rg - *rgp); - - *rgp = rg; - - return a; -} - -static int -rg_count_sub_levels_compare(const void *vx, const void *vy) -{ - erts_rg_count_t *x = (erts_rg_count_t *) vx; - erts_rg_count_t *y = (erts_rg_count_t *) vy; - if (x->sub_levels != y->sub_levels) - return y->sub_levels - x->sub_levels; - return x->id - y->id; -} - -static int -rg_count_id_compare(const void *vx, const void *vy) -{ - erts_rg_count_t *x = (erts_rg_count_t *) vx; - erts_rg_count_t *y = (erts_rg_count_t *) vy; - return x->id - y->id; -} - -static void -make_reader_groups_map(erts_make_reader_groups_map_test *test) -{ - int i, spread_level, avail_sz; - erts_avail_cput no, *avail; - erts_cpu_topology_t *cpudata; - erts_reader_groups_map_t *map; - int map_sz; - int groups = erts_reader_groups; - - if (test) { - test->map = NULL; - test->map_size = 0; - groups = test->groups; - } - - if (!groups) - return; - - if (!test) { - if (reader_groups_map) - erts_free(ERTS_ALC_T_RDR_GRPS_MAP, reader_groups_map); - - reader_groups_logical_processors = 0; - reader_groups_map_size = 0; - reader_groups_map = NULL; - } - - create_tmp_cpu_topology_copy(&cpudata, &avail_sz); - - if (!cpudata) - return; - - cpu_bind_order_sort(cpudata, - avail_sz, - ERTS_CPU_BIND_NO_SPREAD, - 1); - - avail = erts_alloc(ERTS_ALC_T_TMP, - sizeof(erts_avail_cput)*avail_sz); - - make_available_cpu_topology(&no, avail, cpudata, - &avail_sz, test != NULL); - - destroy_tmp_cpu_topology_copy(cpudata); - - map_sz = avail_sz*2+1; - - if (test) { - map = erts_alloc(ERTS_ALC_T_TMP, - (sizeof(erts_reader_groups_map_t) - * map_sz)); - test->map = map; - test->map_size = map_sz; - test->logical_processors = avail_sz; - } - else { - map = erts_alloc(ERTS_ALC_T_RDR_GRPS_MAP, - (sizeof(erts_reader_groups_map_t) - * map_sz)); - reader_groups_map = map; - reader_groups_logical_processors = avail_sz; - reader_groups_map_size = map_sz; - - } - - for (i = 0; i < map_sz; i++) { - map[i].logical = -1; - map[i].reader_group = 0; - } - - spread_level = ERTS_TOPOLOGY_CORE; - for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) { - if (no.level[i] > groups) { - spread_level = i; - break; - } - } - - if (no.level[spread_level] <= groups) { - int a, rg, last = -1; - rg = 0; - ASSERT(spread_level == ERTS_TOPOLOGY_CORE); - for (a = 0; a < avail_sz; a++) { - if (last != avail[a].level[spread_level]) { - rg++; - last = avail[a].level[spread_level]; - } - reader_group_insert(map, - map_sz, - avail[a].level[ERTS_TOPOLOGY_LOGICAL], - rg); - } - } - else { /* groups < no.level[spread_level] */ - erts_rg_count_t *rg_count; - int a, rg, tl, toplevels; - - tl = spread_level-1; - - if (spread_level == ERTS_TOPOLOGY_NODE) - toplevels = 1; - else - toplevels = no.level[tl]; - - rg_count = erts_alloc(ERTS_ALC_T_TMP, - toplevels*sizeof(erts_rg_count_t)); - - if (toplevels == 1) { - rg_count[0].id = 0; - rg_count[0].sub_levels = no.level[spread_level]; - rg_count[0].reader_groups = groups; - } - else { - int rgs_per_tl, rgs; - rgs = groups; - rgs_per_tl = rgs / toplevels; - - a = 0; - for (i = 0; i < toplevels; i++) { - rg_count[i].id = avail[a].level[tl]; - a = sub_levels(&rg_count[i], tl, a, avail_sz, avail); - } - - qsort(rg_count, - toplevels, - sizeof(erts_rg_count_t), - rg_count_sub_levels_compare); - - for (i = 0; i < toplevels; i++) { - if (rg_count[i].sub_levels < rgs_per_tl) { - rg_count[i].reader_groups = rg_count[i].sub_levels; - rgs -= rg_count[i].sub_levels; - } - else { - rg_count[i].reader_groups = rgs_per_tl; - rgs -= rgs_per_tl; - } - } - - while (rgs > 0) { - for (i = 0; i < toplevels; i++) { - if (rg_count[i].sub_levels == rg_count[i].reader_groups) - break; - else { - rg_count[i].reader_groups++; - if (--rgs == 0) - break; - } - } - } - - qsort(rg_count, - toplevels, - sizeof(erts_rg_count_t), - rg_count_id_compare); - } - - a = i = rg = 0; - while (a < avail_sz) { - a = write_reader_groups(&rg, &rg_count[i], tl, - a, avail_sz, avail); - i++; - } - - ASSERT(groups == rg); - - for (a = 0; a < avail_sz; a++) - reader_group_insert(map, - map_sz, - avail[a].level[ERTS_TOPOLOGY_LOGICAL], - avail[a].level[ERTS_TOPOLOGY_RG]); - - erts_free(ERTS_ALC_T_TMP, rg_count); - } - - erts_free(ERTS_ALC_T_TMP, avail); -} - -/* - * CPU topology - */ - -typedef struct { - int *id; - int used; - int size; -} ErtsCpuTopIdSeq; - -typedef struct { - ErtsCpuTopIdSeq logical; - ErtsCpuTopIdSeq thread; - ErtsCpuTopIdSeq core; - ErtsCpuTopIdSeq processor_node; - ErtsCpuTopIdSeq processor; - ErtsCpuTopIdSeq node; -} ErtsCpuTopEntry; - -static void -init_cpu_top_entry(ErtsCpuTopEntry *cte) -{ - int size = 10; - cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, - sizeof(int)*size); - cte->logical.size = size; - cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, - sizeof(int)*size); - cte->thread.size = size; - cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, - sizeof(int)*size); - cte->core.size = size; - cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, - sizeof(int)*size); - cte->processor_node.size = size; - cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, - sizeof(int)*size); - cte->processor.size = size; - cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, - sizeof(int)*size); - cte->node.size = size; -} - -static void -destroy_cpu_top_entry(ErtsCpuTopEntry *cte) -{ - erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id); - erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id); - erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id); - erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id); - erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id); - erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id); -} - -static int -get_cput_value_or_range(int *v, int *vr, char **str) -{ - long l; - char *c = *str; - errno = 0; - if (!isdigit((unsigned char)*c)) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID; - l = strtol(c, &c, 10); - if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID; - *v = (int) l; - if (*c == '-') { - c++; - if (!isdigit((unsigned char)*c)) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - l = strtol(c, &c, 10); - if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - *vr = (int) l; - } - *str = c; - return ERTS_INIT_CPU_TOPOLOGY_OK; -} - -static int -get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str) -{ - int ix = 0; - int need_size = 0; - char *c = *str; - - while (1) { - int res; - int val; - int nids; - int val_range = -1; - res = get_cput_value_or_range(&val, &val_range, &c); - if (res != ERTS_INIT_CPU_TOPOLOGY_OK) - return res; - if (val_range < 0 || val_range == val) - nids = 1; - else { - if (val_range > val) - nids = val_range - val + 1; - else - nids = val - val_range + 1; - } - need_size += nids; - if (need_size > idseq->size) { - idseq->size = need_size + 10; - idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS, - idseq->id, - sizeof(int)*idseq->size); - } - if (nids == 1) - idseq->id[ix++] = val; - else if (val_range > val) { - for (; val <= val_range; val++) - idseq->id[ix++] = val; - } - else { - for (; val >= val_range; val--) - idseq->id[ix++] = val; - } - if (*c != ',') - break; - c++; - } - *str = c; - idseq->used = ix; - return ERTS_INIT_CPU_TOPOLOGY_OK; -} - -static int -get_cput_entry(ErtsCpuTopEntry *cput, char **str) -{ - int h; - char *c = *str; - - cput->logical.used = 0; - cput->thread.id[0] = 0; - cput->thread.used = 1; - cput->core.id[0] = 0; - cput->core.used = 1; - cput->processor_node.id[0] = -1; - cput->processor_node.used = 1; - cput->processor.id[0] = 0; - cput->processor.used = 1; - cput->node.id[0] = -1; - cput->node.used = 1; - - h = ERTS_TOPOLOGY_MAX_DEPTH; - while (*c != ':' && *c != '\0') { - int res; - ErtsCpuTopIdSeq *idseqp; - switch (*c++) { - case 'L': - if (h <= ERTS_TOPOLOGY_LOGICAL) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; - idseqp = &cput->logical; - h = ERTS_TOPOLOGY_LOGICAL; - break; - case 't': - case 'T': - if (h <= ERTS_TOPOLOGY_THREAD) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; - idseqp = &cput->thread; - h = ERTS_TOPOLOGY_THREAD; - break; - case 'c': - case 'C': - if (h <= ERTS_TOPOLOGY_CORE) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; - idseqp = &cput->core; - h = ERTS_TOPOLOGY_CORE; - break; - case 'p': - case 'P': - if (h <= ERTS_TOPOLOGY_PROCESSOR) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; - idseqp = &cput->processor; - h = ERTS_TOPOLOGY_PROCESSOR; - break; - case 'n': - case 'N': - if (h <= ERTS_TOPOLOGY_PROCESSOR) { - do_node: - if (h <= ERTS_TOPOLOGY_NODE) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; - idseqp = &cput->node; - h = ERTS_TOPOLOGY_NODE; - } - else { - int p_node = 0; - char *p_chk = c; - while (*p_chk != '\0' && *p_chk != ':') { - if (*p_chk == 'p' || *p_chk == 'P') { - p_node = 1; - break; - } - p_chk++; - } - if (!p_node) - goto do_node; - if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; - idseqp = &cput->processor_node; - h = ERTS_TOPOLOGY_PROCESSOR_NODE; - } - break; - default: - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE; - } - res = get_cput_id_seq(idseqp, &c); - if (res != ERTS_INIT_CPU_TOPOLOGY_OK) - return res; - } - - if (cput->logical.used < 1) - return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID; - - if (*c == ':') { - c++; - } - - if (cput->thread.used != 1 - && cput->thread.used != cput->logical.used) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - if (cput->core.used != 1 - && cput->core.used != cput->logical.used) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - if (cput->processor_node.used != 1 - && cput->processor_node.used != cput->logical.used) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - if (cput->processor.used != 1 - && cput->processor.used != cput->logical.used) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - if (cput->node.used != 1 - && cput->node.used != cput->logical.used) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - - *str = c; - return ERTS_INIT_CPU_TOPOLOGY_OK; -} - -static int -verify_topology(erts_cpu_topology_t *cpudata, int size) -{ - if (size > 0) { - int *logical; - int node, processor, no_nodes, i; - - /* Verify logical ids */ - logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size); - - for (i = 0; i < size; i++) - logical[i] = cpudata[i].logical; - - qsort(logical, size, sizeof(int), int_cmp); - for (i = 0; i < size-1; i++) { - if (logical[i] == logical[i+1]) { - erts_free(ERTS_ALC_T_TMP, logical); - return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS; - } - } - - erts_free(ERTS_ALC_T_TMP, logical); - - qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp); - - /* Verify unique entities */ - - for (i = 1; i < size; i++) { - if (cpudata[i-1].processor == cpudata[i].processor - && cpudata[i-1].node == cpudata[i].node - && (cpudata[i-1].processor_node - == cpudata[i].processor_node) - && cpudata[i-1].core == cpudata[i].core - && cpudata[i-1].thread == cpudata[i].thread) { - return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES; - } - } - - /* Verify numa nodes */ - node = cpudata[0].node; - processor = cpudata[0].processor; - no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0; - for (i = 1; i < size; i++) { - if (no_nodes) { - if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; - } - else { - if (cpudata[i].processor == processor && cpudata[i].node != node) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; - node = cpudata[i].node; - processor = cpudata[i].processor; - if (node >= 0 && cpudata[i].processor_node >= 0) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; - if (node < 0 && cpudata[i].processor_node < 0) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; - } - } - } - - return ERTS_INIT_CPU_TOPOLOGY_OK; -} - -int -erts_init_cpu_topology(char *topology_str) -{ - ErtsCpuTopEntry cput; - int need_size; - char *c; - int ix; - int error = ERTS_INIT_CPU_TOPOLOGY_OK; - - if (user_cpudata) - erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); - user_cpudata_size = 10; - - user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, - (sizeof(erts_cpu_topology_t) - * user_cpudata_size)); - - init_cpu_top_entry(&cput); - - ix = 0; - need_size = 0; - - c = topology_str; - if (*c == '\0') { - error = ERTS_INIT_CPU_TOPOLOGY_MISSING; - goto fail; - } - do { - int r; - error = get_cput_entry(&cput, &c); - if (error != ERTS_INIT_CPU_TOPOLOGY_OK) - goto fail; - need_size += cput.logical.used; - if (user_cpudata_size < need_size) { - user_cpudata_size = need_size + 10; - user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA, - user_cpudata, - (sizeof(erts_cpu_topology_t) - * user_cpudata_size)); - } - - ASSERT(cput.thread.used == 1 - || cput.thread.used == cput.logical.used); - ASSERT(cput.core.used == 1 - || cput.core.used == cput.logical.used); - ASSERT(cput.processor_node.used == 1 - || cput.processor_node.used == cput.logical.used); - ASSERT(cput.processor.used == 1 - || cput.processor.used == cput.logical.used); - ASSERT(cput.node.used == 1 - || cput.node.used == cput.logical.used); - - for (r = 0; r < cput.logical.used; r++) { - user_cpudata[ix].logical = cput.logical.id[r]; - user_cpudata[ix].thread = - cput.thread.id[cput.thread.used == 1 ? 0 : r]; - user_cpudata[ix].core = - cput.core.id[cput.core.used == 1 ? 0 : r]; - user_cpudata[ix].processor_node = - cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r]; - user_cpudata[ix].processor = - cput.processor.id[cput.processor.used == 1 ? 0 : r]; - user_cpudata[ix].node = - cput.node.id[cput.node.used == 1 ? 0 : r]; - ix++; - } - } while (*c != '\0'); - - if (user_cpudata_size != ix) { - user_cpudata_size = ix; - user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA, - user_cpudata, - (sizeof(erts_cpu_topology_t) - * user_cpudata_size)); - } - - error = verify_topology(user_cpudata, user_cpudata_size); - if (error == ERTS_INIT_CPU_TOPOLOGY_OK) { - destroy_cpu_top_entry(&cput); - return ERTS_INIT_CPU_TOPOLOGY_OK; - } - - fail: - if (user_cpudata) - erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); - user_cpudata_size = 0; - destroy_cpu_top_entry(&cput); - return error; -} - -#define ERTS_GET_CPU_TOPOLOGY_ERROR -1 -#define ERTS_GET_USED_CPU_TOPOLOGY 0 -#define ERTS_GET_DETECTED_CPU_TOPOLOGY 1 -#define ERTS_GET_DEFINED_CPU_TOPOLOGY 2 - -static Eterm get_cpu_topology_term(Process *c_p, int type); - -Eterm -erts_set_cpu_topology(Process *c_p, Eterm term) -{ - erts_cpu_topology_t *cpudata = NULL; - int cpudata_size = 0; - Eterm res; - - erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); - res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY); - if (term == am_undefined) { - if (user_cpudata) - erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); - user_cpudata = NULL; - user_cpudata_size = 0; - - if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) { - cpudata_size = system_cpudata_size; - cpudata = erts_alloc(ERTS_ALC_T_TMP, - (sizeof(erts_cpu_topology_t) - * cpudata_size)); - - sys_memcpy((void *) cpudata, - (void *) system_cpudata, - sizeof(erts_cpu_topology_t)*cpudata_size); - } - } - else if (is_not_list(term)) { - error: - res = THE_NON_VALUE; - goto done; - } - else { - Eterm list = term; - int ix = 0; - - cpudata_size = 100; - cpudata = erts_alloc(ERTS_ALC_T_TMP, - (sizeof(erts_cpu_topology_t) - * cpudata_size)); - - while (is_list(list)) { - Eterm *lp = list_val(list); - Eterm cpu = CAR(lp); - Eterm* tp; - Sint id; - - if (is_not_tuple(cpu)) - goto error; - - tp = tuple_val(cpu); - - if (arityval(tp[0]) != 7 || tp[1] != am_cpu) - goto error; - - if (ix >= cpudata_size) { - cpudata_size += 100; - cpudata = erts_realloc(ERTS_ALC_T_TMP, - cpudata, - (sizeof(erts_cpu_topology_t) - * cpudata_size)); - } - - id = signed_val(tp[2]); - if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) - goto error; - cpudata[ix].node = (int) id; - - id = signed_val(tp[3]); - if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) - goto error; - cpudata[ix].processor = (int) id; - - id = signed_val(tp[4]); - if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) - goto error; - cpudata[ix].processor_node = (int) id; - - id = signed_val(tp[5]); - if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) - goto error; - cpudata[ix].core = (int) id; - - id = signed_val(tp[6]); - if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) - goto error; - cpudata[ix].thread = (int) id; - - id = signed_val(tp[7]); - if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) - goto error; - cpudata[ix].logical = (int) id; - - list = CDR(lp); - ix++; - } - - if (is_not_nil(list)) - goto error; - - cpudata_size = ix; - - if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size)) - goto error; - - if (user_cpudata_size != cpudata_size) { - if (user_cpudata) - erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); - user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, - sizeof(erts_cpu_topology_t)*cpudata_size); - user_cpudata_size = cpudata_size; - } - - sys_memcpy((void *) user_cpudata, - (void *) cpudata, - sizeof(erts_cpu_topology_t)*cpudata_size); - } - - make_reader_groups_map(NULL); - - signal_schedulers_bind_change(cpudata, cpudata_size); - - done: - erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); - - if (cpudata) - erts_free(ERTS_ALC_T_TMP, cpudata); - - return res; -} - -static Eterm -bound_schedulers_term(ErtsCpuBindOrder order) -{ - switch (order) { - case ERTS_CPU_BIND_SPREAD: { - ERTS_DECL_AM(spread); - return AM_spread; - } - case ERTS_CPU_BIND_PROCESSOR_SPREAD: { - ERTS_DECL_AM(processor_spread); - return AM_processor_spread; - } - case ERTS_CPU_BIND_THREAD_SPREAD: { - ERTS_DECL_AM(thread_spread); - return AM_thread_spread; - } - case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: { - ERTS_DECL_AM(thread_no_node_processor_spread); - return AM_thread_no_node_processor_spread; - } - case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: { - ERTS_DECL_AM(no_node_processor_spread); - return AM_no_node_processor_spread; - } - case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: { - ERTS_DECL_AM(no_node_thread_spread); - return AM_no_node_thread_spread; - } - case ERTS_CPU_BIND_NO_SPREAD: { - ERTS_DECL_AM(no_spread); - return AM_no_spread; - } - case ERTS_CPU_BIND_NONE: { - ERTS_DECL_AM(unbound); - return AM_unbound; - } - default: - ASSERT(0); - return THE_NON_VALUE; - } -} - -Eterm -erts_bound_schedulers_term(Process *c_p) -{ - ErtsCpuBindOrder order; - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - order = cpu_bind_order; - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); - return bound_schedulers_term(order); -} - -static void -create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size) -{ - if (user_cpudata) { - *cpudata_size = user_cpudata_size; - *cpudata = erts_alloc(ERTS_ALC_T_TMP, - (sizeof(erts_cpu_topology_t) - * (*cpudata_size))); - sys_memcpy((void *) *cpudata, - (void *) user_cpudata, - sizeof(erts_cpu_topology_t)*(*cpudata_size)); - } - else if (system_cpudata) { - *cpudata_size = system_cpudata_size; - *cpudata = erts_alloc(ERTS_ALC_T_TMP, - (sizeof(erts_cpu_topology_t) - * (*cpudata_size))); - sys_memcpy((void *) *cpudata, - (void *) system_cpudata, - sizeof(erts_cpu_topology_t)*(*cpudata_size)); - } - else { - *cpudata = NULL; - *cpudata_size = 0; - } -} - -static void -destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata) -{ - if (cpudata) - erts_free(ERTS_ALC_T_TMP, cpudata); -} - -Eterm -erts_bind_schedulers(Process *c_p, Eterm how) -{ - Eterm res; - erts_cpu_topology_t *cpudata; - int cpudata_size; - ErtsCpuBindOrder old_cpu_bind_order; - - erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); - - if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP) { - ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP); - } - else { - - old_cpu_bind_order = cpu_bind_order; - - if (ERTS_IS_ATOM_STR("default_bind", how)) - cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND; - else if (ERTS_IS_ATOM_STR("spread", how)) - cpu_bind_order = ERTS_CPU_BIND_SPREAD; - else if (ERTS_IS_ATOM_STR("processor_spread", how)) - cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD; - else if (ERTS_IS_ATOM_STR("thread_spread", how)) - cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD; - else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how)) - cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD; - else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how)) - cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD; - else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how)) - cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD; - else if (ERTS_IS_ATOM_STR("no_spread", how)) - cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD; - else if (ERTS_IS_ATOM_STR("unbound", how)) - cpu_bind_order = ERTS_CPU_BIND_NONE; - else { - cpu_bind_order = old_cpu_bind_order; - ERTS_BIF_PREP_ERROR(res, c_p, BADARG); - goto done; - } - - create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); - - if (!cpudata) { - cpu_bind_order = old_cpu_bind_order; - ERTS_BIF_PREP_ERROR(res, c_p, BADARG); - goto done; - } - - signal_schedulers_bind_change(cpudata, cpudata_size); - - destroy_tmp_cpu_topology_copy(cpudata); - - res = bound_schedulers_term(old_cpu_bind_order); - } - - done: - - erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); - - return res; -} - -Eterm -erts_fake_scheduler_bindings(Process *p, Eterm how) -{ - ErtsCpuBindOrder fake_cpu_bind_order; - erts_cpu_topology_t *cpudata; - int cpudata_size; - Eterm res; - - if (ERTS_IS_ATOM_STR("default_bind", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND; - else if (ERTS_IS_ATOM_STR("spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD; - else if (ERTS_IS_ATOM_STR("processor_spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD; - else if (ERTS_IS_ATOM_STR("thread_spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD; - else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD; - else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD; - else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD; - else if (ERTS_IS_ATOM_STR("no_spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD; - else if (ERTS_IS_ATOM_STR("unbound", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_NONE; - else { - ERTS_BIF_PREP_ERROR(res, p, BADARG); - return res; - } - - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); - - if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE) - ERTS_BIF_PREP_RET(res, am_false); - else { - int i; - Eterm *hp; - - cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1); - -#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA - - erts_fprintf(stderr, "node: "); - for (i = 0; i < cpudata_size; i++) - erts_fprintf(stderr, " %2d", cpudata[i].node); - erts_fprintf(stderr, "\n"); - erts_fprintf(stderr, "processor: "); - for (i = 0; i < cpudata_size; i++) - erts_fprintf(stderr, " %2d", cpudata[i].processor); - erts_fprintf(stderr, "\n"); - if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD - && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD - && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) { - erts_fprintf(stderr, "processor_node:"); - for (i = 0; i < cpudata_size; i++) - erts_fprintf(stderr, " %2d", cpudata[i].processor_node); - erts_fprintf(stderr, "\n"); - } - erts_fprintf(stderr, "core: "); - for (i = 0; i < cpudata_size; i++) - erts_fprintf(stderr, " %2d", cpudata[i].core); - erts_fprintf(stderr, "\n"); - erts_fprintf(stderr, "thread: "); - for (i = 0; i < cpudata_size; i++) - erts_fprintf(stderr, " %2d", cpudata[i].thread); - erts_fprintf(stderr, "\n"); - erts_fprintf(stderr, "logical: "); - for (i = 0; i < cpudata_size; i++) - erts_fprintf(stderr, " %2d", cpudata[i].logical); - erts_fprintf(stderr, "\n"); -#endif - - hp = HAlloc(p, cpudata_size+1); - ERTS_BIF_PREP_RET(res, make_tuple(hp)); - *hp++ = make_arityval((Uint) cpudata_size); - for (i = 0; i < cpudata_size; i++) - *hp++ = make_small((Uint) cpudata[i].logical); - } - - destroy_tmp_cpu_topology_copy(cpudata); - - return res; -} - -Eterm -erts_get_schedulers_binds(Process *c_p) -{ - int ix; - ERTS_DECL_AM(unbound); - Eterm *hp = HAlloc(c_p, erts_no_schedulers+1); - Eterm res = make_tuple(hp); - - *(hp++) = make_arityval(erts_no_schedulers); - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - for (ix = 1; ix <= erts_no_schedulers; ix++) - *(hp++) = (scheduler2cpu_map[ix].bound_id >= 0 - ? make_small(scheduler2cpu_map[ix].bound_id) - : AM_unbound); - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); - return res; -} - -static Eterm -bld_topology_term(Eterm **hpp, - Uint *hszp, - erts_cpu_topology_t *cpudata, - int size) -{ - Eterm res = NIL; - int i; - - if (size == 0) - return am_undefined; - - for (i = size-1; i >= 0; i--) { - res = erts_bld_cons(hpp, - hszp, - erts_bld_tuple(hpp, - hszp, - 7, - am_cpu, - make_small(cpudata[i].node), - make_small(cpudata[i].processor), - make_small(cpudata[i].processor_node), - make_small(cpudata[i].core), - make_small(cpudata[i].thread), - make_small(cpudata[i].logical)), - res); - } - return res; -} - -static Eterm -get_cpu_topology_term(Process *c_p, int type) -{ -#ifdef DEBUG - Eterm *hp_end; -#endif - Eterm *hp; - Uint hsz; - Eterm res = THE_NON_VALUE; - erts_cpu_topology_t *cpudata = NULL; - int size = 0; - - switch (type) { - case ERTS_GET_USED_CPU_TOPOLOGY: - if (user_cpudata) - goto defined; - else - goto detected; - case ERTS_GET_DETECTED_CPU_TOPOLOGY: - detected: - if (!system_cpudata) - res = am_undefined; - else { - size = system_cpudata_size; - cpudata = erts_alloc(ERTS_ALC_T_TMP, - (sizeof(erts_cpu_topology_t) - * size)); - sys_memcpy((void *) cpudata, - (void *) system_cpudata, - sizeof(erts_cpu_topology_t)*size); - } - break; - case ERTS_GET_DEFINED_CPU_TOPOLOGY: - defined: - if (!user_cpudata) - res = am_undefined; - else { - size = user_cpudata_size; - cpudata = user_cpudata; - } - break; - default: - erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type); - break; - } - - if (res == am_undefined) { - ASSERT(!cpudata); - return res; - } - - hsz = 0; - - bld_topology_term(NULL, &hsz, - cpudata, size); - - hp = HAlloc(c_p, hsz); - -#ifdef DEBUG - hp_end = hp + hsz; -#endif - - res = bld_topology_term(&hp, NULL, - cpudata, size); - - ASSERT(hp_end == hp); - - if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata) - erts_free(ERTS_ALC_T_TMP, cpudata); - - return res; -} - -Eterm -erts_get_cpu_topology_term(Process *c_p, Eterm which) -{ - Eterm res; - int type; - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - if (ERTS_IS_ATOM_STR("used", which)) - type = ERTS_GET_USED_CPU_TOPOLOGY; - else if (ERTS_IS_ATOM_STR("detected", which)) - type = ERTS_GET_DETECTED_CPU_TOPOLOGY; - else if (ERTS_IS_ATOM_STR("defined", which)) - type = ERTS_GET_DEFINED_CPU_TOPOLOGY; - else - type = ERTS_GET_CPU_TOPOLOGY_ERROR; - if (type == ERTS_GET_CPU_TOPOLOGY_ERROR) - res = THE_NON_VALUE; - else - res = get_cpu_topology_term(c_p, type); - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); - return res; -} - -static void -early_cpu_bind_init(void) -{ - user_cpudata = NULL; - user_cpudata_size = 0; - - system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo); - system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, - (sizeof(erts_cpu_topology_t) - * system_cpudata_size)); - - cpu_bind_order = ERTS_CPU_BIND_UNDEFINED; - - reader_groups_available_cpu_check = 1; - reader_groups_logical_processors = 0; - reader_groups_map_size = 0; - reader_groups_map = NULL; - - if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata) - || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata, - system_cpudata_size)) { - erts_free(ERTS_ALC_T_CPUDATA, system_cpudata); - system_cpudata = NULL; - system_cpudata_size = 0; - } -} - -static void -late_cpu_bind_init(void) -{ - int ix; - - erts_smp_rwmtx_init(&erts_cpu_bind_rwmtx, "cpu_bind"); - - scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA, - (sizeof(ErtsCpuBindData) - * (erts_no_schedulers+1))); - for (ix = 1; ix <= erts_no_schedulers; ix++) { - scheduler2cpu_map[ix].bind_id = -1; - scheduler2cpu_map[ix].bound_id = -1; - } - - if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED) { - int ncpus = erts_get_cpu_configured(erts_cpuinfo); - if (ncpus < 1 || erts_no_schedulers < ncpus) - cpu_bind_order = ERTS_CPU_BIND_NONE; - else - cpu_bind_order = ((system_cpudata || user_cpudata) - && (erts_bind_to_cpu(erts_cpuinfo, -1) != -ENOTSUP) - ? ERTS_CPU_BIND_DEFAULT_BIND - : ERTS_CPU_BIND_NONE); - } - - make_reader_groups_map(NULL); - - if (cpu_bind_order != ERTS_CPU_BIND_NONE) { - erts_cpu_topology_t *cpudata; - int cpudata_size; - create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); - signal_schedulers_bind_change(cpudata, cpudata_size); - destroy_tmp_cpu_topology_copy(cpudata); - } -} - -int -erts_update_cpu_info(void) -{ - int changed; - erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); - changed = erts_cpu_info_update(erts_cpuinfo); - if (changed) { - erts_cpu_topology_t *cpudata; - int cpudata_size; - - if (system_cpudata) - erts_free(ERTS_ALC_T_CPUDATA, system_cpudata); - - system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo); - if (!system_cpudata_size) - system_cpudata = NULL; - else { - system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, - (sizeof(erts_cpu_topology_t) - * system_cpudata_size)); - - if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata) - || (ERTS_INIT_CPU_TOPOLOGY_OK - != verify_topology(system_cpudata, - system_cpudata_size))) { - erts_free(ERTS_ALC_T_CPUDATA, system_cpudata); - system_cpudata = NULL; - system_cpudata_size = 0; - } - } - - create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); - signal_schedulers_bind_change(cpudata, cpudata_size); - destroy_tmp_cpu_topology_copy(cpudata); - } - erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); - return changed; -} - #ifdef ERTS_SMP static void @@ -7069,7 +5091,7 @@ Process *schedule(Process *p, int calls) } if ((rq->flags & ERTS_RUNQ_FLG_CHK_CPU_BIND) || erts_smp_atomic_read(&esdp->chk_cpu_bind)) { - check_cpu_bind(esdp); + erts_sched_check_cpu_bind(esdp); } } @@ -7165,7 +5187,9 @@ Process *schedule(Process *p, int calls) erts_smp_atomic_set(&function_calls, 0); fcalls = 0; + ASSERT(!erts_port_task_have_outstanding_io_tasks()); + #ifdef ERTS_SMP /* erts_sys_schedule_interrupt(0); */ #endif @@ -7497,6 +5521,15 @@ erts_schedule_misc_op(void (*func)(void *), void *arg) ErtsRunQueue *rq = erts_get_runq_current(NULL); ErtsMiscOpList *molp = misc_op_list_alloc(); + if (!rq) { + /* + * This can only happen when the sys msg dispatcher + * thread schedules misc ops (this happens *very* + * seldom; only when trace drivers are unloaded). + */ + rq = ERTS_RUNQ_IX(0); + } + erts_smp_runq_lock(rq); while (rq->misc.evac_runq) { diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 4365e409e5..c038e57b65 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -89,7 +89,6 @@ extern int erts_sched_thread_suggested_stack_size; #define ERTS_SCHED_THREAD_MAX_STACK_SIZE 8192 /* Kilo words */ #ifdef ERTS_SMP -extern Uint erts_max_main_threads; #include "erl_bits.h" #endif @@ -426,6 +425,13 @@ struct ErtsSchedulerData_ { #endif }; +typedef union { + ErtsSchedulerData esd; + char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerData))]; +} ErtsAlignedSchedulerData; + +extern ErtsAlignedSchedulerData *erts_aligned_scheduler_data; + #ifndef ERTS_SMP extern ErtsSchedulerData *erts_scheduler_data; #endif @@ -1007,27 +1013,12 @@ extern struct erts_system_profile_flags_t erts_system_profile_flags; (p)->flags &= ~F_TIMO; \ } while (0) - -#define ERTS_INIT_SCHED_BIND_TYPE_SUCCESS 0 -#define ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED 1 -#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY 2 -#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE 3 - -int erts_init_scheduler_bind_type(char *how); - -#define ERTS_INIT_CPU_TOPOLOGY_OK 0 -#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID 1 -#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE 2 -#define ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY 3 -#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE 4 -#define ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES 5 -#define ERTS_INIT_CPU_TOPOLOGY_MISSING_LID 6 -#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS 7 -#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES 8 -#define ERTS_INIT_CPU_TOPOLOGY_MISSING 9 - -int erts_init_cpu_topology(char *topology_str); -int erts_update_cpu_info(void); +#define ERTS_RUNQ_IX(IX) \ + (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_run_queues), \ + &erts_aligned_run_queues[(IX)].runq) +#define ERTS_SCHEDULER_IX(IX) \ + (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \ + &erts_aligned_scheduler_data[(IX)].esd) void erts_pre_init_process(void); void erts_late_init_process(void); @@ -1058,8 +1049,9 @@ Eterm erts_multi_scheduling_blockers(Process *); void erts_start_schedulers(void); void erts_smp_notify_check_children_needed(void); #endif +void erts_sched_notify_check_cpu_bind(void); Uint erts_active_schedulers(void); -void erts_init_process(void); +void erts_init_process(int); Eterm erts_process_status(Process *, ErtsProcLocks, Process *, Eterm); Uint erts_run_queues_len(Uint *); void erts_add_to_runq(Process *); diff --git a/erts/emulator/beam/erl_process_lock.c b/erts/emulator/beam/erl_process_lock.c index a4d12139e9..1bebcdb911 100644 --- a/erts/emulator/beam/erl_process_lock.c +++ b/erts/emulator/beam/erl_process_lock.c @@ -117,10 +117,9 @@ static int aux_thr_proc_lock_spin_count; static void cleanup_tse(void); void -erts_init_proc_lock(void) +erts_init_proc_lock(int cpus) { int i; - int cpus; erts_smp_spinlock_init(&qs_lock, "proc_lck_qs_alloc"); for (i = 0; i < ERTS_NO_OF_PIX_LOCKS; i++) { #ifdef ERTS_ENABLE_LOCK_COUNT @@ -138,7 +137,6 @@ erts_init_proc_lock(void) lc_id.proc_lock_msgq = erts_lc_get_lock_order_id("proc_msgq"); lc_id.proc_lock_status = erts_lc_get_lock_order_id("proc_status"); #endif - cpus = erts_get_cpu_configured(erts_cpuinfo); if (cpus > 1) { proc_lock_spin_count = ERTS_PROC_LOCK_SPIN_COUNT_BASE; proc_lock_spin_count += (ERTS_PROC_LOCK_SPIN_COUNT_SCHED_INC diff --git a/erts/emulator/beam/erl_process_lock.h b/erts/emulator/beam/erl_process_lock.h index 7cfc9893fa..4fe30c7209 100644 --- a/erts/emulator/beam/erl_process_lock.h +++ b/erts/emulator/beam/erl_process_lock.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2007-2009. All Rights Reserved. + * Copyright Ericsson AB 2007-2010. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -334,7 +334,7 @@ erts_proc_lock_flags_cmpxchg(erts_proc_lock_t *lck, ErtsProcLocks new, extern erts_pix_lock_t erts_pix_locks[ERTS_NO_OF_PIX_LOCKS]; -void erts_init_proc_lock(void); +void erts_init_proc_lock(int cpus); void erts_proc_lock_prepare_proc_lock_waiter(void); void erts_proc_lock_failed(Process *, erts_pix_lock_t *, diff --git a/erts/emulator/beam/erl_threads.h b/erts/emulator/beam/erl_threads.h index 0b7269262e..a74cf79b8c 100644 --- a/erts/emulator/beam/erl_threads.h +++ b/erts/emulator/beam/erl_threads.h @@ -27,9 +27,6 @@ #define ERTS_SPIN_BODY ETHR_SPIN_BODY -#define ERTS_MAX_READER_GROUPS 8 -extern int erts_reader_groups; - #include "sys.h" #ifdef USE_THREADS diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index ecd3c8f68a..12536f6cde 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1728,11 +1728,6 @@ Uint erts_current_reductions(Process* current, Process *p); int erts_print_system_version(int to, void *arg, Process *c_p); -/* - * Interface to erl_init - */ -void erl_init(void); - #define seq_trace_output(token, msg, type, receiver, process) \ seq_trace_output_generic((token), (msg), (type), (receiver), (process), NIL) #define seq_trace_output_exit(token, msg, type, receiver, exitfrom) \ diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c index 79022d5dd7..9ed92bbe03 100644 --- a/erts/emulator/beam/io.c +++ b/erts/emulator/beam/io.c @@ -2802,17 +2802,25 @@ driver_deliver_term(ErlDrvPort port, break; case ERL_DRV_INT: /* signed int argument */ ERTS_DDT_CHK_ENOUGH_ARGS(1); +#if HALFWORD_HEAP + erts_bld_sint64(NULL, &need, (Sint64)ptr[0]); +#else /* check for bignum */ if (!IS_SSMALL((Sint)ptr[0])) need += BIG_UINT_HEAP_SIZE; /* use small_to_big */ +#endif ptr++; depth++; break; case ERL_DRV_UINT: /* unsigned int argument */ ERTS_DDT_CHK_ENOUGH_ARGS(1); +#if HALFWORD_HEAP + erts_bld_uint64(NULL, &need, (Uint64)ptr[0]); +#else /* check for bignum */ if (!IS_USMALL(0, (Uint)ptr[0])) need += BIG_UINT_HEAP_SIZE; /* use small_to_big */ +#endif ptr++; depth++; break; @@ -2979,22 +2987,30 @@ driver_deliver_term(ErlDrvPort port, break; case ERL_DRV_INT: /* signed int argument */ +#if HALFWORD_HEAP + mess = erts_bld_sint64(&hp, NULL, (Sint64)ptr[0]); +#else if (IS_SSMALL((Sint)ptr[0])) mess = make_small((Sint)ptr[0]); else { mess = small_to_big((Sint)ptr[0], hp); hp += BIG_UINT_HEAP_SIZE; } +#endif ptr++; break; case ERL_DRV_UINT: /* unsigned int argument */ +#if HALFWORD_HEAP + mess = erts_bld_uint64(&hp, NULL, (Uint64)ptr[0]); +#else if (IS_USMALL(0, (Uint)ptr[0])) mess = make_small((Uint)ptr[0]); else { mess = uint_to_big((Uint)ptr[0], hp); hp += BIG_UINT_HEAP_SIZE; } +#endif ptr++; break; diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index 0031568af6..93203a08a9 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -466,8 +466,6 @@ static const int zero_value = 0, one_value = 1; # endif /* !__WIN32__ */ #endif /* WANT_NONBLOCKING */ -extern erts_cpu_info_t *erts_cpuinfo; /* erl_init.c */ - __decl_noreturn void __noreturn erl_exit(int n, char*, ...); /* Some special erl_exit() codes: */ diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 3de48194fb..f07e4793d2 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -54,6 +54,9 @@ #ifdef HAVE_IFADDRS_H #include <ifaddrs.h> #endif +#ifdef HAVE_NETPACKET_PACKET_H +#include <netpacket/packet.h> +#endif /* All platforms fail on malloc errors. */ #define FATAL_MALLOC @@ -85,6 +88,7 @@ #include <winsock2.h> #endif #include <windows.h> +#include <iphlpapi.h> #include <Ws2tcpip.h> /* NEED VC 6.0 !!! */ @@ -467,6 +471,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_REQ_IFGET 22 #define INET_REQ_IFSET 23 #define INET_REQ_SUBSCRIBE 24 +#define INET_REQ_GETIFADDRS 25 /* TCP requests */ #define TCP_REQ_ACCEPT 40 #define TCP_REQ_LISTEN 41 @@ -3824,39 +3829,81 @@ do { if ((end)-(ptr) < (n)) goto error; } while(0) static char* sockaddr_to_buf(struct sockaddr* addr, char* ptr, char* end) { if (addr->sa_family == AF_INET || addr->sa_family == 0) { - struct in_addr a; - buf_check(ptr,end,sizeof(struct in_addr)); - a = ((struct sockaddr_in*) addr)->sin_addr; - sys_memcpy(ptr, (char*)&a, sizeof(struct in_addr)); - return ptr + sizeof(struct in_addr); + struct in_addr *p = &(((struct sockaddr_in*) addr)->sin_addr); + buf_check(ptr, end, 1 + sizeof(struct in_addr)); + *ptr = INET_AF_INET; + sys_memcpy(ptr+1, (char*)p, sizeof(struct in_addr)); + return ptr + 1 + sizeof(struct in_addr); } #if defined(HAVE_IN6) && defined(AF_INET6) else if (addr->sa_family == AF_INET6) { - struct in6_addr a; - buf_check(ptr,end,sizeof(struct in6_addr)); - a = ((struct sockaddr_in6*) addr)->sin6_addr; - sys_memcpy(ptr, (char*)&a, sizeof(struct in6_addr)); - return ptr + sizeof(struct in6_addr); + struct in6_addr *p = &(((struct sockaddr_in6*) addr)->sin6_addr); + buf_check(ptr, end, 1 + sizeof(struct in6_addr)); + *ptr = INET_AF_INET6; + sys_memcpy(ptr+1, (char*)p, sizeof(struct in6_addr)); + return ptr + 1 + sizeof(struct in6_addr); + } +#endif +#if defined(AF_LINK) + else if (addr->sa_family == AF_LINK) { + struct sockaddr_dl *sdl_p = (struct sockaddr_dl*) addr; + buf_check(ptr, end, 2 + sdl_p->sdl_alen); + put_int16(sdl_p->sdl_alen, ptr); ptr += 2; + sys_memcpy(ptr, sdl_p->sdl_data + sdl_p->sdl_nlen, sdl_p->sdl_alen); + return ptr + sdl_p->sdl_alen; + } +#endif +#if defined(AF_PACKET) && defined(HAVE_NETPACKET_PACKET_H) + else if(addr->sa_family == AF_PACKET) { + struct sockaddr_ll *sll_p = (struct sockaddr_ll*) addr; + buf_check(ptr, end, 2 + sll_p->sll_halen); + put_int16(sll_p->sll_halen, ptr); ptr += 2; + sys_memcpy(ptr, sll_p->sll_addr, sll_p->sll_halen); + return ptr + sll_p->sll_halen; } #endif + return ptr; error: return NULL; - } static char* buf_to_sockaddr(char* ptr, char* end, struct sockaddr* addr) { - buf_check(ptr,end,sizeof(struct in_addr)); - sys_memcpy((char*) &((struct sockaddr_in*)addr)->sin_addr, ptr, - sizeof(struct in_addr)); - addr->sa_family = AF_INET; - return ptr + sizeof(struct in_addr); - + buf_check(ptr,end,1); + switch (*ptr++) { + case INET_AF_INET: { + struct in_addr *p = &((struct sockaddr_in*)addr)->sin_addr; + buf_check(ptr,end,sizeof(struct in_addr)); + sys_memcpy((char*) p, ptr, sizeof(struct in_addr)); + addr->sa_family = AF_INET; + return ptr + sizeof(struct in_addr); + } + case INET_AF_INET6: { + struct in6_addr *p = &((struct sockaddr_in6*)addr)->sin6_addr; + buf_check(ptr,end,sizeof(struct in6_addr)); + sys_memcpy((char*) p, ptr, sizeof(struct in6_addr)); + addr->sa_family = AF_INET6; + return ptr + sizeof(struct in6_addr); + } + } error: return NULL; } +#if defined (IFF_POINTOPOINT) +#define IFGET_FLAGS(cflags) IFGET_FLAGS_P2P(cflags, IFF_POINTOPOINT) +#elif defined IFF_POINTTOPOINT +#define IFGET_FLAGS(cflags) IFGET_FLAGS_P2P(cflags, IFF_POINTTOPOINT) +#endif + +#define IFGET_FLAGS_P2P(cflags, iff_ptp) \ + ((((cflags) & IFF_UP) ? INET_IFF_UP : 0) | \ + (((cflags) & IFF_BROADCAST) ? INET_IFF_BROADCAST : 0) | \ + (((cflags) & IFF_LOOPBACK) ? INET_IFF_LOOPBACK : 0) | \ + (((cflags) & iff_ptp) ? INET_IFF_POINTTOPOINT : 0) | \ + (((cflags) & IFF_UP) ? INET_IFF_RUNNING : 0) | /* emulate running ? */ \ + (((cflags) & IFF_MULTICAST) ? INET_IFF_MULTICAST : 0)) #if defined(__WIN32__) && defined(SIO_GET_INTERFACE_LIST) @@ -3894,7 +3941,6 @@ static int inet_ctl_getiflist(inet_descriptor* desc, char** rbuf, int rsize) return ctl_reply(INET_REP_OK, sbuf, sptr - sbuf, rbuf, rsize); } - /* input is an ip-address in string format i.e A.B.C.D ** scan the INTERFACE_LIST to get the options */ @@ -3980,27 +4026,12 @@ static int inet_ctl_ifget(inet_descriptor* desc, char* buf, int len, break; case INET_IFOPT_FLAGS: { - long eflags = 0; int flags = ifp->iiFlags; /* just enumerate the interfaces (no names) */ - /* translate flags */ - if (flags & IFF_UP) - eflags |= INET_IFF_UP; - if (flags & IFF_BROADCAST) - eflags |= INET_IFF_BROADCAST; - if (flags & IFF_LOOPBACK) - eflags |= INET_IFF_LOOPBACK; - if (flags & IFF_POINTTOPOINT) - eflags |= INET_IFF_POINTTOPOINT; - if (flags & IFF_UP) /* emulate runnign ? */ - eflags |= INET_IFF_RUNNING; - if (flags & IFF_MULTICAST) - eflags |= INET_IFF_MULTICAST; - buf_check(sptr, s_end, 5); *sptr++ = INET_IFOPT_FLAGS; - put_int32(eflags, sptr); + put_int32(IFGET_FLAGS(flags), sptr); sptr += 4; break; } @@ -4021,7 +4052,6 @@ static int inet_ctl_ifset(inet_descriptor* desc, char* buf, int len, return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); } - #elif defined(SIOCGIFCONF) && defined(SIOCSIFFLAGS) /* cygwin has SIOCGIFCONF but not SIOCSIFFLAGS (Nov 2002) */ @@ -4032,69 +4062,77 @@ static int inet_ctl_ifset(inet_descriptor* desc, char* buf, int len, #define SIZEA(p) (sizeof (p)) #endif - -static int inet_ctl_getiflist(inet_descriptor* desc, char** rbuf, int rsize) -{ - struct ifconf ifc; - struct ifreq *ifr; - char *buf; - int buflen, ifc_len, i; - char *sbuf, *sp; - - /* Courtesy of Per Bergqvist and W. Richard Stevens */ - - ifc_len = 0; - buflen = 100 * sizeof(struct ifreq); - buf = ALLOC(buflen); +static int get_ifconf(SOCKET s, struct ifconf *ifcp) { + int ifc_len = 0; + int buflen = 100 * sizeof(struct ifreq); + char *buf = ALLOC(buflen); for (;;) { - ifc.ifc_len = buflen; - ifc.ifc_buf = buf; - if (ioctl(desc->s, SIOCGIFCONF, (char *)&ifc) < 0) { + ifcp->ifc_len = buflen; + ifcp->ifc_buf = buf; + if (ioctl(s, SIOCGIFCONF, (char *)ifcp) < 0) { int res = sock_errno(); if (res != EINVAL || ifc_len) { FREE(buf); - return ctl_error(res, rbuf, rsize); + return -1; } } else { - if (ifc.ifc_len == ifc_len) break; /* buf large enough */ - ifc_len = ifc.ifc_len; + if (ifcp->ifc_len == ifc_len) break; /* buf large enough */ + ifc_len = ifcp->ifc_len; } buflen += 10 * sizeof(struct ifreq); buf = (char *)REALLOC(buf, buflen); } - - sp = sbuf = ALLOC(ifc_len+1); + return 0; +} + +static void free_ifconf(struct ifconf *ifcp) { + FREE(ifcp->ifc_buf); +} + +static int inet_ctl_getiflist(inet_descriptor* desc, char** rbuf, int rsize) +{ + struct ifconf ifc; + struct ifreq *ifrp; + char *sbuf, *sp; + int i; + + /* Courtesy of Per Bergqvist and W. Richard Stevens */ + + if (get_ifconf(desc->s, &ifc) < 0) { + return ctl_error(sock_errno(), rbuf, rsize); + } + + sp = sbuf = ALLOC(ifc.ifc_len+1); *sp++ = INET_REP_OK; i = 0; for (;;) { int n; - - ifr = (struct ifreq *) VOIDP(buf + i); - n = sizeof(ifr->ifr_name) + SIZEA(ifr->ifr_addr); - if (n < sizeof(*ifr)) n = sizeof(*ifr); - if (i+n > ifc_len) break; + + ifrp = (struct ifreq *) VOIDP(ifc.ifc_buf + i); + n = sizeof(ifrp->ifr_name) + SIZEA(ifrp->ifr_addr); + if (n < sizeof(*ifrp)) n = sizeof(*ifrp); + if (i+n > ifc.ifc_len) break; i += n; - - switch (ifr->ifr_addr.sa_family) { + + switch (ifrp->ifr_addr.sa_family) { #if defined(HAVE_IN6) && defined(AF_INET6) case AF_INET6: #endif case AF_INET: - ASSERT(sp+IFNAMSIZ+1 < sbuf+buflen+1) - strncpy(sp, ifr->ifr_name, IFNAMSIZ); + ASSERT(sp+IFNAMSIZ+1 < sbuf+ifc.ifc_len+1) + strncpy(sp, ifrp->ifr_name, IFNAMSIZ); sp[IFNAMSIZ] = '\0'; sp += strlen(sp), ++sp; } - - if (i >= ifc_len) break; + + if (i >= ifc.ifc_len) break; } - FREE(buf); + free_ifconf(&ifc); *rbuf = sbuf; return sp - sbuf; } - /* FIXME: temporary hack */ #ifndef IFHWADDRLEN #define IFHWADDRLEN 6 @@ -4133,37 +4171,52 @@ static int inet_ctl_ifget(inet_descriptor* desc, char* buf, int len, #ifdef SIOCGIFHWADDR if (ioctl(desc->s, SIOCGIFHWADDR, (char *)&ifreq) < 0) break; - buf_check(sptr, s_end, 1+IFHWADDRLEN); + buf_check(sptr, s_end, 1+2+IFHWADDRLEN); *sptr++ = INET_IFOPT_HWADDR; + put_int16(IFHWADDRLEN, sptr); sptr += 2; /* raw memcpy (fix include autoconf later) */ sys_memcpy(sptr, (char*)(&ifreq.ifr_hwaddr.sa_data), IFHWADDRLEN); sptr += IFHWADDRLEN; -#elif defined(HAVE_GETIFADDRS) - struct ifaddrs *ifa, *ifp; - int found = 0; - - if (getifaddrs(&ifa) == -1) - goto error; +#elif defined(SIOCGENADDR) + if (ioctl(desc->s, SIOCGENADDR, (char *)&ifreq) < 0) + break; + buf_check(sptr, s_end, 1+2+sizeof(ifreq.ifr_enaddr)); + *sptr++ = INET_IFOPT_HWADDR; + put_int16(sizeof(ifreq.ifr_enaddr), sptr); sptr += 2; + /* raw memcpy (fix include autoconf later) */ + sys_memcpy(sptr, (char*)(&ifreq.ifr_enaddr), + sizeof(ifreq.ifr_enaddr)); + sptr += sizeof(ifreq.ifr_enaddr); +#elif defined(HAVE_GETIFADDRS) && defined(AF_LINK) + struct ifaddrs *ifa, *ifp; + struct sockaddr_dl *sdlp; + int found = 0; + + if (getifaddrs(&ifa) == -1) + goto error; - for (ifp = ifa; ifp; ifp = ifp->ifa_next) { - if ((ifp->ifa_addr->sa_family == AF_LINK) && - (sys_strcmp(ifp->ifa_name, ifreq.ifr_name) == 0)) { - found = 1; - break; - } - } + for (ifp = ifa; ifp; ifp = ifp->ifa_next) { + if ((ifp->ifa_addr->sa_family == AF_LINK) && + (sys_strcmp(ifp->ifa_name, ifreq.ifr_name) == 0)) { + found = 1; + break; + } + } - if (found == 0) { - freeifaddrs(ifa); - break; - } + if (found == 0) { + freeifaddrs(ifa); + break; + } + sdlp = (struct sockaddr_dl *)ifp->ifa_addr; - buf_check(sptr, s_end, 1+IFHWADDRLEN); - *sptr++ = INET_IFOPT_HWADDR; - sys_memcpy(sptr, ((struct sockaddr_dl *)ifp->ifa_addr)->sdl_data + - ((struct sockaddr_dl *)ifp->ifa_addr)->sdl_nlen, IFHWADDRLEN); - freeifaddrs(ifa); - sptr += IFHWADDRLEN; + buf_check(sptr, s_end, 1+2+sdlp->sdl_alen); + *sptr++ = INET_IFOPT_HWADDR; + put_int16(sdlp->sdl_alen, sptr); sptr += 2; + sys_memcpy(sptr, + sdlp->sdl_data + sdlp->sdl_nlen, + sdlp->sdl_alen); + freeifaddrs(ifa); + sptr += sdlp->sdl_alen; #endif break; } @@ -4240,29 +4293,15 @@ static int inet_ctl_ifget(inet_descriptor* desc, char* buf, int len, case INET_IFOPT_FLAGS: { int flags; - int eflags = 0; if (ioctl(desc->s, SIOCGIFFLAGS, (char*)&ifreq) < 0) flags = 0; else flags = ifreq.ifr_flags; - /* translate flags */ - if (flags & IFF_UP) - eflags |= INET_IFF_UP; - if (flags & IFF_BROADCAST) - eflags |= INET_IFF_BROADCAST; - if (flags & IFF_LOOPBACK) - eflags |= INET_IFF_LOOPBACK; - if (flags & IFF_POINTOPOINT) - eflags |= INET_IFF_POINTTOPOINT; - if (flags & IFF_RUNNING) - eflags |= INET_IFF_RUNNING; - if (flags & IFF_MULTICAST) - eflags |= INET_IFF_MULTICAST; buf_check(sptr, s_end, 5); *sptr++ = INET_IFOPT_FLAGS; - put_int32(eflags, sptr); + put_int32(IFGET_FLAGS(flags), sptr); sptr += 4; break; } @@ -4300,17 +4339,22 @@ static int inet_ctl_ifset(inet_descriptor* desc, char* buf, int len, (void) ioctl(desc->s, SIOCSIFADDR, (char*)&ifreq); break; - case INET_IFOPT_HWADDR: - buf_check(buf, b_end, IFHWADDRLEN); + case INET_IFOPT_HWADDR: { + unsigned int len; + buf_check(buf, b_end, 2); + len = get_int16(buf); buf += 2; + buf_check(buf, b_end, len); #ifdef SIOCSIFHWADDR /* raw memcpy (fix include autoconf later) */ - sys_memcpy((char*)(&ifreq.ifr_hwaddr.sa_data), buf, IFHWADDRLEN); + sys_memset((char*)(&ifreq.ifr_hwaddr.sa_data), + '\0', sizeof(ifreq.ifr_hwaddr.sa_data)); + sys_memcpy((char*)(&ifreq.ifr_hwaddr.sa_data), buf, len); (void) ioctl(desc->s, SIOCSIFHWADDR, (char *)&ifreq); #endif - buf += IFHWADDRLEN; + buf += len; break; - + } case INET_IFOPT_BROADADDR: #ifdef SIOCSIFBRDADDR @@ -4415,6 +4459,551 @@ static int inet_ctl_ifset(inet_descriptor* desc, char* buf, int len, #endif + + +/* Latin-1 to utf8 */ + +static int utf8_len(const char *c, int m) { + int l; + for (l = 0; m; c++, l++, m--) { + if (*c == '\0') break; + if ((*c & 0x7f) != *c) l++; + } + return l; +} + +static void utf8_encode(const char *c, int m, char *p) { + for (; m; c++, m--) { + if (*c == '\0') break; + if ((*c & 0x7f) != *c) { + *p++ = (char) (0xC0 | (0x03 & (*c >> 6))); + *p++ = (char) (0x80 | (0x3F & *c)); + } else { + *p++ = (char) *c; + } + } +} + +#if defined(__WIN32__) + +static void set_netmask_bytes(char *c, int len, int pref_len) { + int i, m; + for (i = 0, m = pref_len >> 3; i < m && i < len; i++) c[i] = '\xFF'; + if (i < len) c[i++] = 0xFF << (8 - (pref_len & 7)); + for (; i < len; i++) c[i] = '\0'; +} + + +int eq_masked_bytes(char *a, char *b, int pref_len) { + int i, m; + for (i = 0, m = pref_len >> 3; i < m; i++) { + if (a[i] != b[i]) return 0; + } + m = pref_len & 7; + if (m) { + m = 0xFF & (0xFF << (8 - m)); + if ((a[i] & m) != (b[i] & m)) return 0; + } + return !0; +} + +static int inet_ctl_getifaddrs(inet_descriptor* desc_p, + char **rbuf_pp, int rsize) +{ + int i; + DWORD ret, n; + IP_INTERFACE_INFO *info_p; + MIB_IPADDRTABLE *ip_addrs_p; + IP_ADAPTER_ADDRESSES *ip_adaddrs_p, *ia_p; + + char *buf_p; + char *buf_alloc_p; + int buf_size =512; +# define BUF_ENSURE(Size) \ + do { \ + int NEED_, GOT_ = buf_p - buf_alloc_p; \ + NEED_ = GOT_ + (Size); \ + if (NEED_ > buf_size) { \ + buf_size = NEED_ + 512; \ + buf_alloc_p = REALLOC(buf_alloc_p, buf_size); \ + buf_p = buf_alloc_p + GOT_; \ + } \ + } while(0) +# define SOCKADDR_TO_BUF(opt, sa) \ + do { \ + if (sa) { \ + char *P_; \ + *buf_p++ = (opt); \ + while (! (P_ = sockaddr_to_buf((sa), buf_p, \ + buf_alloc_p+buf_size))) { \ + int GOT_ = buf_p - buf_alloc_p; \ + buf_size += 512; \ + buf_alloc_p = REALLOC(buf_alloc_p, buf_size); \ + buf_p = buf_alloc_p + GOT_; \ + } \ + if (P_ == buf_p) { \ + buf_p--; \ + } else { \ + buf_p = P_; \ + } \ + } \ + } while (0) + + { + /* Try GetAdaptersAddresses, if it is available */ + unsigned long ip_adaddrs_size = 16 * 1024; + ULONG family = AF_UNSPEC; + ULONG flags = + GAA_FLAG_INCLUDE_PREFIX | GAA_FLAG_SKIP_ANYCAST | + GAA_FLAG_SKIP_DNS_SERVER | GAA_FLAG_SKIP_FRIENDLY_NAME | + GAA_FLAG_SKIP_MULTICAST; + ULONG (WINAPI *fpGetAdaptersAddresses) + (ULONG, ULONG, PVOID, PIP_ADAPTER_ADDRESSES, PULONG); + HMODULE iphlpapi = GetModuleHandle("iphlpapi"); + fpGetAdaptersAddresses = (void *) + (iphlpapi ? + GetProcAddress(iphlpapi, "GetAdaptersAddresses") : + NULL); + if (fpGetAdaptersAddresses) { + ip_adaddrs_p = ALLOC(ip_adaddrs_size); + for (i = 17; i; i--) { + ret = fpGetAdaptersAddresses( + family, flags, NULL, ip_adaddrs_p, &ip_adaddrs_size); + ip_adaddrs_p = REALLOC(ip_adaddrs_p, ip_adaddrs_size); + if (ret == NO_ERROR) break; + if (ret == ERROR_BUFFER_OVERFLOW) continue; + i = 0; + } + if (! i) { + FREE(ip_adaddrs_p); + ip_adaddrs_p = NULL; + } + } else ip_adaddrs_p = NULL; + } + + { + /* Load the IP_INTERFACE_INFO table (only IPv4 interfaces), + * reliable source of interface names on XP + */ + unsigned long info_size = 4 * 1024; + info_p = ALLOC(info_size); + for (i = 17; i; i--) { + ret = GetInterfaceInfo(info_p, &info_size); + info_p = REALLOC(info_p, info_size); + if (ret == NO_ERROR) break; + if (ret == ERROR_INSUFFICIENT_BUFFER) continue; + i = 0; + } + if (! i) { + FREE(info_p); + info_p = NULL; + } + } + + if (! ip_adaddrs_p) { + /* If GetAdaptersAddresses gave nothing we fall back to + * MIB_IPADDRTABLE (only IPv4 interfaces) + */ + unsigned long ip_addrs_size = 16 * sizeof(*ip_addrs_p); + ip_addrs_p = ALLOC(ip_addrs_size); + for (i = 17; i; i--) { + ret = GetIpAddrTable(ip_addrs_p, &ip_addrs_size, FALSE); + ip_addrs_p = REALLOC(ip_addrs_p, ip_addrs_size); + if (ret == NO_ERROR) break; + if (ret == ERROR_INSUFFICIENT_BUFFER) continue; + i = 0; + } + if (! i) { + if (info_p) FREE(info_p); + FREE(ip_addrs_p); + return ctl_reply(INET_REP_OK, NULL, 0, rbuf_pp, rsize); + } + } else ip_addrs_p = NULL; + + buf_p = buf_alloc_p = ALLOC(buf_size); + *buf_p++ = INET_REP_OK; + + /* Iterate over MIB_IPADDRTABLE or IP_ADAPTER_ADDRESSES */ + for (ia_p = NULL, ip_addrs_p ? ((void *)(i = 0)) : (ia_p = ip_adaddrs_p); + ip_addrs_p ? (i < ip_addrs_p->dwNumEntries) : (ia_p != NULL); + ip_addrs_p ? ((void *)(i++)) : (ia_p = ia_p->Next)) { + MIB_IPADDRROW *ipaddrrow_p = NULL; + DWORD flags = INET_IFF_MULTICAST; + DWORD index = 0; + WCHAR *wname_p = NULL; + MIB_IFROW ifrow; + + if (ip_addrs_p) { + ipaddrrow_p = ip_addrs_p->table + i; + index = ipaddrrow_p->dwIndex; + } else { + index = ia_p->IfIndex; + if (ia_p->Flags & IP_ADAPTER_NO_MULTICAST) { + flags &= ~INET_IFF_MULTICAST; + } + } +index: + if (! index) goto done; + sys_memzero(&ifrow, sizeof(ifrow)); + ifrow.dwIndex = index; + if (GetIfEntry(&ifrow) != NO_ERROR) break; + /* Find the interface name - first try MIB_IFROW.wzname */ + if (ifrow.wszName[0] != 0) { + wname_p = ifrow.wszName; + } else { + /* Then try IP_ADAPTER_INDEX_MAP.Name (only IPv4 adapters) */ + int j; + for (j = 0; j < info_p->NumAdapters; j++) { + if (info_p->Adapter[j].Index == (ULONG) ifrow.dwIndex) { + if (info_p->Adapter[j].Name[0] != 0) { + wname_p = info_p->Adapter[j].Name; + } + break; + } + } + } + if (wname_p) { + int len; + /* Convert interface name to UTF-8 */ + len = + WideCharToMultiByte( + CP_UTF8, 0, wname_p, -1, NULL, 0, NULL, NULL); + if (! len) break; + BUF_ENSURE(len); + WideCharToMultiByte( + CP_UTF8, 0, wname_p, -1, buf_p, len, NULL, NULL); + buf_p += len; + } else { + /* Found no name - + * use "MIB_IFROW.dwIndex: MIB_IFROW.bDescr" as name instead */ + int l; + l = utf8_len(ifrow.bDescr, ifrow.dwDescrLen); + BUF_ENSURE(9 + l+1); + buf_p += + erts_sprintf( + buf_p, "%lu: ", (unsigned long) ifrow.dwIndex); + utf8_encode(ifrow.bDescr, ifrow.dwDescrLen, buf_p); + buf_p += l; + *buf_p++ = '\0'; + } + /* Interface flags, often make up broadcast and multicast flags */ + switch (ifrow.dwType) { + case IF_TYPE_ETHERNET_CSMACD: + flags |= INET_IFF_BROADCAST; + break; + case IF_TYPE_SOFTWARE_LOOPBACK: + flags |= INET_IFF_LOOPBACK; + flags &= ~INET_IFF_MULTICAST; + break; + default: + flags &= ~INET_IFF_MULTICAST; + break; + } + if (ifrow.dwAdminStatus) { + flags |= INET_IFF_UP; + switch (ifrow.dwOperStatus) { + case IF_OPER_STATUS_CONNECTING: + flags |= INET_IFF_POINTTOPOINT; + break; + case IF_OPER_STATUS_CONNECTED: + flags |= INET_IFF_RUNNING | INET_IFF_POINTTOPOINT; + break; + case IF_OPER_STATUS_OPERATIONAL: + flags |= INET_IFF_RUNNING; + break; + } + } + BUF_ENSURE(1 + 4); + *buf_p++ = INET_IFOPT_FLAGS; + put_int32(flags, buf_p); buf_p += 4; + if (ipaddrrow_p) { + /* Legacy implementation through GetIpAddrTable */ + struct sockaddr_in sin; + /* IP Address */ + sys_memzero(&sin, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = ipaddrrow_p->dwAddr; + BUF_ENSURE(1); + /* Netmask */ + SOCKADDR_TO_BUF(INET_IFOPT_ADDR, (struct sockaddr *) &sin); + sin.sin_addr.s_addr = ipaddrrow_p->dwMask; + BUF_ENSURE(1); + SOCKADDR_TO_BUF(INET_IFOPT_NETMASK, (struct sockaddr *) &sin); + if (flags & INET_IFF_BROADCAST) { + /* Broadcast address - fake it*/ + sin.sin_addr.s_addr = ipaddrrow_p->dwAddr; + sin.sin_addr.s_addr |= ~ipaddrrow_p->dwMask; + BUF_ENSURE(1); + SOCKADDR_TO_BUF( + INET_IFOPT_BROADADDR, (struct sockaddr *) &sin); + } + } else { + IP_ADAPTER_UNICAST_ADDRESS *p; + /* IP Address(es) */ + for (p = ia_p->FirstUnicastAddress; + p; + p = p->Next) + { + IP_ADAPTER_PREFIX *q; + ULONG shortest_length; + struct sockaddr *shortest_p, *sa_p = p->Address.lpSockaddr; + BUF_ENSURE(1); + SOCKADDR_TO_BUF(INET_IFOPT_ADDR, sa_p); + shortest_p = NULL; + shortest_length = 0; + for (q = ia_p->FirstPrefix; + q; + q = q->Next) { + struct sockaddr *sp_p = q->Address.lpSockaddr; + if (sa_p->sa_family != sp_p->sa_family) continue; + switch (sa_p->sa_family) { + case AF_INET: { + struct sockaddr_in sin; + DWORD sa, sp, mask; + sa = ntohl((DWORD) + ((struct sockaddr_in *) + sa_p)->sin_addr.s_addr); + sp = ntohl((DWORD) + ((struct sockaddr_in *) + sp_p)->sin_addr.s_addr); + mask = 0xFFFFFFFF << (32 - q->PrefixLength); + if ((sa & mask) != (sp & mask)) continue; + if ((! shortest_p) + || q->PrefixLength < shortest_length) { + shortest_p = sp_p; + shortest_length = q->PrefixLength; + } + } break; + case AF_INET6: { + struct sockaddr_in6 sin6; + if (!eq_masked_bytes((char *) + &((struct sockaddr_in6 *) + sa_p)->sin6_addr, + (char *) + &((struct sockaddr_in6 *) + sp_p)->sin6_addr, + q->PrefixLength)) { + continue; + } + if ((! shortest_p) + || q->PrefixLength < shortest_length) { + shortest_p = sp_p; + shortest_length = q->PrefixLength; + } + } break; + } + } + if (! shortest_p) { + /* Found no shortest prefix */ + shortest_p = sa_p; + switch (shortest_p->sa_family) { + case AF_INET: { + /* Fall back to old classfull network addresses */ + DWORD addr = ntohl(((struct sockaddr_in *)shortest_p) + ->sin_addr.s_addr); + if (! (addr & 0x800000)) { + /* Class A */ + shortest_length = 8; + } else if (! (addr & 0x400000)) { + /* Class B */ + shortest_length = 16; + } else if (! (addr & 0x200000)) { + /* Class C */ + shortest_length = 24; + } else { + shortest_length = 32; + } + } break; + case AF_INET6: { + /* Just play it safe */ + shortest_length = 128; + } break; + } + } + switch (shortest_p->sa_family) { + case AF_INET: { + struct sockaddr_in sin; + DWORD mask = 0xFFFFFFFF << (32 - shortest_length); + sys_memzero(&sin, sizeof(sin)); + sin.sin_family = shortest_p->sa_family; + sin.sin_addr.s_addr = htonl(mask); + BUF_ENSURE(1); + SOCKADDR_TO_BUF(INET_IFOPT_NETMASK, + (struct sockaddr *) &sin); + if (flags & INET_IFF_BROADCAST) { + DWORD sp = + ntohl((DWORD) + ((struct sockaddr_in *)shortest_p) + -> sin_addr.s_addr); + sin.sin_addr.s_addr = htonl(sp | ~mask); + BUF_ENSURE(1); + SOCKADDR_TO_BUF(INET_IFOPT_BROADADDR, + (struct sockaddr *) &sin); + } + } break; + case AF_INET6: { + struct sockaddr_in6 sin6; + sys_memzero(&sin6, sizeof(sin6)); + sin6.sin6_family = shortest_p->sa_family; + set_netmask_bytes((char *) &sin6.sin6_addr, + 16, + shortest_length); + BUF_ENSURE(1); + SOCKADDR_TO_BUF(INET_IFOPT_NETMASK, + (struct sockaddr *) &sin6); + } break; + } + } + } + if (ifrow.dwPhysAddrLen) { + /* Hardware Address */ + BUF_ENSURE(1 + 2 + ifrow.dwPhysAddrLen); + *buf_p++ = INET_IFOPT_HWADDR; + put_int16(ifrow.dwPhysAddrLen, buf_p); buf_p += 2; + sys_memcpy(buf_p, ifrow.bPhysAddr, ifrow.dwPhysAddrLen); + buf_p += ifrow.dwPhysAddrLen; + } + +done: + /* That is all for this interface */ + BUF_ENSURE(1); + *buf_p++ = '\0'; + if (ia_p && + ia_p->Ipv6IfIndex && + ia_p->Ipv6IfIndex != index) + { + /* Oops, there was an other interface for IPv6. Possible? XXX */ + index = ia_p->Ipv6IfIndex; + goto index; + } + } + + if (ip_adaddrs_p) FREE(ip_adaddrs_p); + if (info_p) FREE(info_p); + if (ip_addrs_p) FREE(ip_addrs_p); + + buf_size = buf_p - buf_alloc_p; + buf_alloc_p = REALLOC(buf_alloc_p, buf_size); + /* buf_p is now unreliable */ + *rbuf_pp = buf_alloc_p; + return buf_size; +# undef BUF_ENSURE +} + +#elif defined(HAVE_GETIFADDRS) + +static int inet_ctl_getifaddrs(inet_descriptor* desc_p, + char **rbuf_pp, int rsize) +{ + struct ifaddrs *ifa_p, *ifa_free_p; + + int buf_size; + char *buf_p; + char *buf_alloc_p; + + buf_size = 512; + buf_alloc_p = ALLOC(buf_size); + buf_p = buf_alloc_p; +# define BUF_ENSURE(Size) \ + do { \ + int NEED_, GOT_ = buf_p - buf_alloc_p; \ + NEED_ = GOT_ + (Size); \ + if (NEED_ > buf_size) { \ + buf_size = NEED_ + 512; \ + buf_alloc_p = REALLOC(buf_alloc_p, buf_size); \ + buf_p = buf_alloc_p + GOT_; \ + } \ + } while (0) +# define SOCKADDR_TO_BUF(opt, sa) \ + do { \ + if (sa) { \ + char *P_; \ + *buf_p++ = (opt); \ + while (! (P_ = sockaddr_to_buf((sa), buf_p, \ + buf_alloc_p+buf_size))) { \ + int GOT_ = buf_p - buf_alloc_p; \ + buf_size += 512; \ + buf_alloc_p = REALLOC(buf_alloc_p, buf_size); \ + buf_p = buf_alloc_p + GOT_; \ + } \ + if (P_ == buf_p) { \ + buf_p--; \ + } else { \ + buf_p = P_; \ + } \ + } \ + } while (0) + + if (getifaddrs(&ifa_p) < 0) { + return ctl_error(sock_errno(), rbuf_pp, rsize); + } + ifa_free_p = ifa_p; + *buf_p++ = INET_REP_OK; + for (; ifa_p; ifa_p = ifa_p->ifa_next) { + int len = utf8_len(ifa_p->ifa_name, -1); + BUF_ENSURE(len+1 + 1+4 + 1); + utf8_encode(ifa_p->ifa_name, -1, buf_p); + buf_p += len; + *buf_p++ = '\0'; + *buf_p++ = INET_IFOPT_FLAGS; + put_int32(IFGET_FLAGS(ifa_p->ifa_flags), buf_p); buf_p += 4; + if (ifa_p->ifa_addr->sa_family == AF_INET +#if defined(AF_INET6) + || ifa_p->ifa_addr->sa_family == AF_INET6 +#endif + ) { + SOCKADDR_TO_BUF(INET_IFOPT_ADDR, ifa_p->ifa_addr); + BUF_ENSURE(1); + SOCKADDR_TO_BUF(INET_IFOPT_NETMASK, ifa_p->ifa_netmask); + if (ifa_p->ifa_flags & IFF_POINTOPOINT) { + BUF_ENSURE(1); + SOCKADDR_TO_BUF(INET_IFOPT_DSTADDR, ifa_p->ifa_dstaddr); + } else if (ifa_p->ifa_flags & IFF_BROADCAST) { + BUF_ENSURE(1); + SOCKADDR_TO_BUF(INET_IFOPT_BROADADDR, ifa_p->ifa_broadaddr); + } + } +#if defined(AF_LINK) || defined(AF_PACKET) + else if ( +#if defined(AF_LINK) + ifa_p->ifa_addr->sa_family == AF_LINK +#else + 0 +#endif +#if defined(AF_PACKET) + || ifa_p->ifa_addr->sa_family == AF_PACKET +#endif + ) { + char *bp = buf_p; + BUF_ENSURE(1); + SOCKADDR_TO_BUF(INET_IFOPT_HWADDR, ifa_p->ifa_addr); + if (buf_p - bp < 4) buf_p = bp; /* Empty hwaddr */ + } +#endif + BUF_ENSURE(1); + *buf_p++ = '\0'; + } + buf_size = buf_p - buf_alloc_p; + buf_alloc_p = REALLOC(buf_alloc_p, buf_size); + /* buf_p is now unreliable */ + freeifaddrs(ifa_free_p); + *rbuf_pp = buf_alloc_p; + return buf_size; +# undef BUF_ENSURE +} + +#else + +static int inet_ctl_getifaddrs(inet_descriptor* desc_p, + char **rbuf_pp, int rsize) +{ + return ctl_error(ENOTSUP, rbuf_pp, rsize); +} + +#endif + + + #ifdef VXWORKS /* ** THIS is a terrible creature, a bug in the TCP part @@ -5293,12 +5882,15 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len) if (pmtud_enable) cflags |= SPP_PMTUD_ENABLE; if (pmtud_disable) cflags |= SPP_PMTUD_DISABLE; +# ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_SACKDELAY + /* The followings are missing in FreeBSD 7.1 */ sackdelay_enable =eflags& SCTP_FLAG_SACDELAY_ENABLE; sackdelay_disable=eflags& SCTP_FLAG_SACDELAY_DISABLE; if (sackdelay_enable && sackdelay_disable) return -1; if (sackdelay_enable) cflags |= SPP_SACKDELAY_ENABLE; if (sackdelay_disable) cflags |= SPP_SACKDELAY_DISABLE; +# endif arg.pap.spp_flags = cflags; # endif @@ -6199,13 +6791,15 @@ static int sctp_fill_opts(inet_descriptor* desc, char* buf, int buflen, if (ap.spp_flags & SPP_PMTUD_DISABLE) { i = LOAD_ATOM (spec, i, am_pmtud_disable); n++; } - +# ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_SACKDELAY + /* SPP_SACKDELAY_* not in FreeBSD 7.1 */ if (ap.spp_flags & SPP_SACKDELAY_ENABLE) { i = LOAD_ATOM (spec, i, am_sackdelay_enable); n++; } if (ap.spp_flags & SPP_SACKDELAY_DISABLE) { i = LOAD_ATOM (spec, i, am_sackdelay_disable); n++; } # endif +# endif PLACE_FOR(spec, i, LOAD_NIL_CNT + LOAD_LIST_CNT + 2*LOAD_TUPLE_CNT); @@ -6676,6 +7270,13 @@ static int inet_ctl(inet_descriptor* desc, int cmd, char* buf, int len, return inet_ctl_getiflist(desc, rbuf, rsize); } + case INET_REQ_GETIFADDRS: { + DEBUGF(("inet_ctl(%ld): GETIFADDRS\r\n", (long)desc->port)); + if (!IS_OPEN(desc)) + return ctl_xerror(EXBADPORT, rbuf, rsize); + return inet_ctl_getifaddrs(desc, rbuf, rsize); + } + case INET_REQ_IFGET: { DEBUGF(("inet_ctl(%ld): IFGET\r\n", (long)desc->port)); if (!IS_OPEN(desc)) diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c index af4ab693dc..01ba773688 100644 --- a/erts/emulator/sys/unix/sys.c +++ b/erts/emulator/sys/unix/sys.c @@ -75,6 +75,7 @@ static erts_smp_rwmtx_t environ_rwmtx; #include "erl_sys_driver.h" #include "erl_check_io.h" +#include "erl_cpu_topology.h" #ifndef DISABLE_VFORK #define DISABLE_VFORK 0 @@ -399,7 +400,7 @@ typedef struct { #ifdef ERTS_THR_HAVE_SIG_FUNCS sigset_t saved_sigmask; #endif - int unbind_child; + int sched_bind_data; } erts_thr_create_data_t; /* @@ -410,15 +411,13 @@ static void * thr_create_prepare(void) { erts_thr_create_data_t *tcdp; - ErtsSchedulerData *esdp; tcdp = erts_alloc(ERTS_ALC_T_TMP, sizeof(erts_thr_create_data_t)); #ifdef ERTS_THR_HAVE_SIG_FUNCS erts_thr_sigmask(SIG_BLOCK, &thr_create_sigmask, &tcdp->saved_sigmask); #endif - esdp = erts_get_scheduler_data(); - tcdp->unbind_child = esdp && erts_is_scheduler_bound(esdp); + tcdp->sched_bind_data = erts_sched_bind_atthrcreate_prepare(); return (void *) tcdp; } @@ -430,6 +429,8 @@ thr_create_cleanup(void *vtcdp) { erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp; + erts_sched_bind_atthrcreate_parent(tcdp->sched_bind_data); + #ifdef ERTS_THR_HAVE_SIG_FUNCS /* Restore signalmask... */ erts_thr_sigmask(SIG_SETMASK, &tcdp->saved_sigmask, NULL); @@ -456,12 +457,7 @@ thr_create_prepare_child(void *vtcdp) erts_thread_disable_fpe(); #endif - if (tcdp->unbind_child) { - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - erts_unbind_from_cpu(erts_cpuinfo); - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); - } - + erts_sched_bind_atthrcreate_child(tcdp->sched_bind_data); } #endif /* #ifdef USE_THREADS */ @@ -1461,9 +1457,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op CHLD_STAT_LOCK; - unbind = erts_is_scheduler_bound(NULL); - if (unbind) - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); + unbind = erts_sched_bind_atfork_prepare(); #if !DISABLE_VFORK /* See fork/vfork discussion before this function. */ @@ -1476,7 +1470,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op if (pid == 0) { /* The child! Setup child... */ - if (unbind && erts_unbind_from_cpu(erts_cpuinfo) != 0) + if (erts_sched_bind_atfork_child(unbind) != 0) goto child_error; /* OBSERVE! @@ -1577,8 +1571,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op cs_argv[CS_ARGV_PROGNAME_IX] = child_setup_prog; cs_argv[CS_ARGV_WD_IX] = opts->wd ? opts->wd : "."; - cs_argv[CS_ARGV_UNBIND_IX] - = (unbind ? erts_get_unbind_from_cpu_str(erts_cpuinfo) : "false"); + cs_argv[CS_ARGV_UNBIND_IX] = erts_sched_bind_atvfork_child(unbind); cs_argv[CS_ARGV_FD_CR_IX] = fd_close_range; for (i = 0; i < CS_ARGV_NO_OF_DUP2_OPS; i++) cs_argv[CS_ARGV_DUP2_OP_IX(i)] = &dup2_op[i][0]; @@ -1627,8 +1620,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op } #endif - if (unbind) - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); + erts_sched_bind_atfork_parent(unbind); if (pid == -1) { saved_errno = errno; diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c index 15d4cd7361..d24347b3aa 100644 --- a/erts/emulator/sys/win32/sys.c +++ b/erts/emulator/sys/win32/sys.c @@ -31,7 +31,7 @@ #include "global.h" #include "erl_threads.h" #include "../../drivers/win32/win_con.h" - +#include "erl_cpu_topology.h" void erts_sys_init_float(void); @@ -2973,13 +2973,50 @@ check_supported_os_version(void) } #ifdef USE_THREADS -#ifdef ERTS_ENABLE_LOCK_COUNT + +typedef struct { + int sched_bind_data; +} erts_thr_create_data_t; + +/* + * thr_create_prepare() is called in parent thread before thread creation. + * Returned value is passed as argument to thr_create_cleanup(). + */ +static void * +thr_create_prepare(void) +{ + erts_thr_create_data_t *tcdp; + + tcdp = erts_alloc(ERTS_ALC_T_TMP, sizeof(erts_thr_create_data_t)); + tcdp->sched_bind_data = erts_sched_bind_atthrcreate_prepare(); + + return (void *) tcdp; +} + + +/* thr_create_cleanup() is called in parent thread after thread creation. */ +static void +thr_create_cleanup(void *vtcdp) +{ + erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp; + + erts_sched_bind_atthrcreate_parent(tcdp->sched_bind_data); + + erts_free(ERTS_ALC_T_TMP, tcdp); +} + static void thr_create_prepare_child(void *vtcdp) { + erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp; + +#ifdef ERTS_ENABLE_LOCK_COUNT erts_lcnt_thread_setup(); -} #endif /* ERTS_ENABLE_LOCK_COUNT */ + + erts_sched_bind_atthrcreate_child(tcdp->sched_bind_data); +} + #endif /* USE_THREADS */ void @@ -2991,9 +3028,13 @@ erts_sys_pre_init(void) #ifdef USE_THREADS { erts_thr_init_data_t eid = ERTS_THR_INIT_DATA_DEF_INITER; -#ifdef ERTS_ENABLE_LOCK_COUNT + eid.thread_create_child_func = thr_create_prepare_child; -#endif + /* Before creation in parent */ + eid.thread_create_prepare_func = thr_create_prepare; + /* After creation in parent */ + eid.thread_create_parent_func = thr_create_cleanup, + erts_thr_init(&eid); #ifdef ERTS_ENABLE_LOCK_COUNT erts_lcnt_init(); diff --git a/erts/emulator/test/distribution_SUITE.erl b/erts/emulator/test/distribution_SUITE.erl index 7c19274696..79252d0593 100644 --- a/erts/emulator/test/distribution_SUITE.erl +++ b/erts/emulator/test/distribution_SUITE.erl @@ -27,6 +27,7 @@ -export([all/1, ping/1, bulk_send/1, bulk_send_small/1, bulk_send_big/1, + bulk_send_bigbig/1, local_send/1, local_send_small/1, local_send_big/1, local_send_legal/1, link_to_busy/1, exit_to_busy/1, lost_exit/1, link_to_dead/1, link_to_dead_new_node/1, @@ -50,7 +51,8 @@ -export([sender/3, receiver2/2, dummy_waiter/0, dead_process/0, roundtrip/1, bounce/1, do_dist_auto_connect/1, inet_rpc_server/1, dist_parallel_sender/3, dist_parallel_receiver/0, - dist_evil_parallel_receiver/0]). + dist_evil_parallel_receiver/0, + sendersender/4, sendersender2/4]). all(suite) -> [ ping, bulk_send, local_send, link_to_busy, exit_to_busy, @@ -121,7 +123,7 @@ bulk_send(doc) -> "the time. This tests that a process that is suspended on a ", "busy port will eventually be resumed."]; bulk_send(suite) -> - [bulk_send_small, bulk_send_big]. + [bulk_send_small, bulk_send_big, bulk_send_bigbig]. bulk_send_small(Config) when is_list(Config) -> ?line bulk_send(64, 32). @@ -129,6 +131,9 @@ bulk_send_small(Config) when is_list(Config) -> bulk_send_big(Config) when is_list(Config) -> ?line bulk_send(32, 64). +bulk_send_bigbig(Config) when is_list(Config) -> + ?line bulk_sendsend(32*5, 4). + bulk_send(Terms, BinSize) -> ?line Dog = test_server:timetrap(test_server:seconds(30)), @@ -145,6 +150,53 @@ bulk_send(Terms, BinSize) -> ?line test_server:timetrap_cancel(Dog), {comment, integer_to_list(trunc(Size/1024/Elapsed+0.5)) ++ " K/s"}. +bulk_sendsend(Terms, BinSize) -> + {Rate1, MonitorCount1} = bulk_sendsend2(Terms, BinSize, 5), + {Rate2, MonitorCount2} = bulk_sendsend2(Terms, BinSize, 995), + Ratio = if MonitorCount2 == 0 -> MonitorCount1 / 1.0; + true -> MonitorCount1 / MonitorCount2 + end, + %% A somewhat arbitrary ratio, but hopefully one that will accomodate + %% a wide range of CPU speeds. + true = (Ratio > 8.0), + {comment, + integer_to_list(Rate1) ++ " K/s, " ++ + integer_to_list(Rate2) ++ " K/s, " ++ + integer_to_list(MonitorCount1) ++ " monitor msgs, " ++ + integer_to_list(MonitorCount2) ++ " monitor msgs, " ++ + float_to_list(Ratio) ++ " monitor ratio"}. + +bulk_sendsend2(Terms, BinSize, BusyBufSize) -> + ?line Dog = test_server:timetrap(test_server:seconds(30)), + + ?line io:format("Sending ~w binaries, each of size ~w K", + [Terms, BinSize]), + ?line {ok, NodeRecv} = start_node(bulk_receiver), + ?line Recv = spawn(NodeRecv, erlang, apply, [fun receiver/2, [0, 0]]), + ?line Bin = list_to_binary(lists:duplicate(BinSize*1024, 253)), + ?line Size = Terms*size(Bin), + + %% SLF LEFT OFF HERE. + %% When the caller uses small hunks, like 4k via + %% bulk_sendsend(32*5, 4), then (on my laptop at least), we get + %% zero monitor messages. But if we use "+zdbbl 5", then we + %% get a lot of monitor messages. So, if we can count up the + %% total number of monitor messages that we get when running both + %% default busy size and "+zdbbl 5", and if the 5 case gets + %% "many many more" monitor messages, then we know we're working. + + ?line {ok, NodeSend} = start_node(bulk_sender, "+zdbbl " ++ integer_to_list(BusyBufSize)), + ?line _Send = spawn(NodeSend, erlang, apply, [fun sendersender/4, [self(), Recv, Bin, Terms]]), + ?line {Elapsed, {TermsN, SizeN}, MonitorCount} = + receive {sendersender, BigRes} -> + BigRes + end, + ?line stop_node(NodeRecv), + ?line stop_node(NodeSend), + + ?line test_server:timetrap_cancel(Dog), + {trunc(SizeN/1024/Elapsed+0.5), MonitorCount}. + sender(To, _Bin, 0) -> To ! {done, self()}, receive @@ -155,6 +207,43 @@ sender(To, Bin, Left) -> To ! {term, Bin}, sender(To, Bin, Left-1). +%% Sender process to be run on a slave node + +sendersender(Parent, To, Bin, Left) -> + erlang:system_monitor(self(), [busy_dist_port]), + [spawn(fun() -> sendersender2(To, Bin, Left, false) end) || + _ <- lists:seq(1,1)], + {USec, {Res, MonitorCount}} = + timer:tc(?MODULE, sendersender2, [To, Bin, Left, true]), + Parent ! {sendersender, {USec/1000000, Res, MonitorCount}}. + +sendersender2(To, Bin, Left, SendDone) -> + sendersender3(To, Bin, Left, SendDone, 0). + +sendersender3(To, _Bin, 0, SendDone, MonitorCount) -> + if SendDone -> + To ! {done, self()}; + true -> + ok + end, + receive + {monitor, _Pid, _Type, _Info} = M -> + sendersender3(To, _Bin, 0, SendDone, MonitorCount + 1) + after 0 -> + if SendDone -> + receive + Any when is_tuple(Any), size(Any) == 2 -> + {Any, MonitorCount} + end; + true -> + exit(normal) + end + end; +sendersender3(To, Bin, Left, SendDone, MonitorCount) -> + To ! {term, Bin}, + %%timer:sleep(50), + sendersender3(To, Bin, Left-1, SendDone, MonitorCount). + %% Receiver process to be run on a slave node. receiver(Terms, Size) -> diff --git a/erts/emulator/test/send_term_SUITE.erl b/erts/emulator/test/send_term_SUITE.erl index 5fd01a9ac5..819aa34886 100644 --- a/erts/emulator/test/send_term_SUITE.erl +++ b/erts/emulator/test/send_term_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2005-2009. All Rights Reserved. +%% Copyright Ericsson AB 2005-2010. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -61,7 +61,7 @@ basic(Config) when is_list(Config) -> ?line ExpectExt2Term = term(P, 5), %% ERL_DRV_INT, ERL_DRV_UINT - ?line case erlang:system_info(wordsize) of + ?line case erlang:system_info({wordsize, external}) of 4 -> ?line {-1, 4294967295} = term(P, 6); 8 -> diff --git a/erts/emulator/test/system_info_SUITE.erl b/erts/emulator/test/system_info_SUITE.erl index ba433d4e11..cd940f3ddf 100644 --- a/erts/emulator/test/system_info_SUITE.erl +++ b/erts/emulator/test/system_info_SUITE.erl @@ -132,6 +132,7 @@ misc_smoke_tests(Config) when is_list(Config) -> ?line true = is_binary(erlang:system_info(procs)), ?line true = is_binary(erlang:system_info(loaded)), ?line true = is_binary(erlang:system_info(dist)), + ?line ok = try erlang:system_info({cpu_topology,erts_get_cpu_topology_error_case}), fail catch error:badarg -> ok end, ?line ok. diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c index 6ec2c8b52c..60b3af7db7 100644 --- a/erts/etc/common/erlexec.c +++ b/erts/etc/common/erlexec.c @@ -138,6 +138,12 @@ static char *plusr_val_switches[] = { NULL }; +/* +z arguments with values */ +static char *plusz_val_switches[] = { + "dbbl", + NULL +}; + /* * Define sleep(seconds) in terms of Sleep() on Windows. @@ -909,6 +915,20 @@ int main(int argc, char **argv) i++; } break; + case 'z': + if (!is_one_of_strings(&argv[i][2], plusz_val_switches)) { + goto the_default; + } else { + if (i+1 >= argc + || argv[i+1][0] == '-' + || argv[i+1][0] == '+') + usage(argv[i]); + argv[i][0] = '-'; + add_Eargs(argv[i]); + add_Eargs(argv[i+1]); + i++; + } + break; default: the_default: argv[i][0] = '-'; /* Change +option to -option. */ @@ -1096,7 +1116,7 @@ usage_aux(void) "[+l] [+M<SUBSWITCH> <ARGUMENT>] [+P MAX_PROCS] [+R COMPAT_REL] " "[+r] [+rg READER_GROUPS_LIMIT] [+s SCHEDULER_OPTION] " "[+S NO_SCHEDULERS:NO_SCHEDULERS_ONLINE] [+T LEVEL] [+V] [+v] " - "[+W<i|w>] [args ...]\n"); + "[+W<i|w>] [+z MISC_OPTION] [args ...]\n"); exit(1); } diff --git a/erts/etc/common/inet_gethost.c b/erts/etc/common/inet_gethost.c index d3ff4874ac..e095836258 100644 --- a/erts/etc/common/inet_gethost.c +++ b/erts/etc/common/inet_gethost.c @@ -59,6 +59,7 @@ #define WIN32_LEAN_AND_MEAN #include <winsock2.h> #include <windows.h> +#include <ws2tcpip.h> #include <process.h> #include <stdio.h> #include <stdlib.h> diff --git a/erts/etc/win32/cygwin_tools/vc/ld.sh b/erts/etc/win32/cygwin_tools/vc/ld.sh index b04935ed9b..9a38c10748 100755 --- a/erts/etc/win32/cygwin_tools/vc/ld.sh +++ b/erts/etc/win32/cygwin_tools/vc/ld.sh @@ -53,7 +53,7 @@ while test -n "$1" ; do STDLIB_FORCED=true; STDLIB=LIBCMTD.LIB;; -lsocket) - DEFAULT_LIBRARIES="$DEFAULT_LIBRARIES WS2_32.LIB";; + DEFAULT_LIBRARIES="$DEFAULT_LIBRARIES WS2_32.LIB IPHLPAPI.LIB";; -l*) y=`echo $x | sed 's,^-l\(.*\),\1,g'`; MPATH=`cygpath -m $y`; @@ -168,6 +168,7 @@ RES=$? CMANIFEST=`cygpath $MANIFEST` if [ "$RES" = "0" -a -f "$CMANIFEST" ]; then # Add stuff to manifest to turn off "virtualization" + sed -n -i '1h;1!H;${;g;s,<trustInfo.*</trustInfo>.,,g;p;}' $CMANIFEST sed -i "s/<\/assembly>/ <ms_asmv2:trustInfo xmlns:ms_asmv2=\"urn:schemas-microsoft-com:asm.v2\">\n <ms_asmv2:security>\n <ms_asmv2:requestedPrivileges>\n <ms_asmv2:requestedExecutionLevel level=\"AsInvoker\" uiAccess=\"false\"\/>\n <\/ms_asmv2:requestedPrivileges>\n <\/ms_asmv2:security>\n <\/ms_asmv2:trustInfo>\n<\/assembly>/" $CMANIFEST eval mt.exe -nologo -manifest "$MANIFEST" -outputresource:"$OUTPUTRES" >>/tmp/link.exe.${p}.1 2>>/tmp/link.exe.${p}.2 diff --git a/erts/include/internal/ethread.h b/erts/include/internal/ethread.h index 4a205699bd..931d692908 100644 --- a/erts/include/internal/ethread.h +++ b/erts/include/internal/ethread.h @@ -239,6 +239,8 @@ typedef DWORD ethr_tsd_key; # include "gcc/ethread.h" # include "libatomic_ops/ethread.h" # endif +# elif defined(ETHR_HAVE_LIBATOMIC_OPS) +# include "libatomic_ops/ethread.h" # elif defined(ETHR_WIN32_THREADS) # include "win/ethread.h" # endif diff --git a/erts/lib_src/common/erl_misc_utils.c b/erts/lib_src/common/erl_misc_utils.c index 498ce6837a..4c881993a5 100644 --- a/erts/lib_src/common/erl_misc_utils.c +++ b/erts/lib_src/common/erl_misc_utils.c @@ -71,6 +71,19 @@ (CPUSET)) != 0 ? -errno : 0) #define ERTS_MU_SET_THR_AFFINITY__(SETP) \ (sched_setaffinity(0, sizeof(cpu_set_t), (SETP)) != 0 ? -errno : 0) +#elif defined(HAVE_CPUSET_xETAFFINITY) +# include <sys/param.h> +# include <sys/cpuset.h> +# define ERTS_HAVE_MISC_UTIL_AFFINITY_MASK__ +#define ERTS_MU_GET_PROC_AFFINITY__(CPUINFOP, CPUSET) \ + (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, \ + sizeof(cpuset_t), \ + (CPUSET)) != 0 ? -errno : 0) +#define ERTS_MU_SET_THR_AFFINITY__(CPUSETP) \ + (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, \ + sizeof(cpuset_t), \ + (CPUSETP)) != 0 ? -errno : 0) +# define cpu_set_t cpuset_t #elif defined(__WIN32__) # define ERTS_HAVE_MISC_UTIL_AFFINITY_MASK__ # define cpu_set_t DWORD @@ -100,6 +113,11 @@ # define ERTS_SYS_CPU_PATH "/sys/devices/system/cpu" #endif +#ifdef __FreeBSD__ +#include <sys/types.h> +#include <sys/sysctl.h> +#endif + static int read_topology(erts_cpu_info_t *cpuinfo); #if defined(ERTS_HAVE_MISC_UTIL_AFFINITY_MASK__) @@ -1380,6 +1398,245 @@ read_topology(erts_cpu_info_t *cpuinfo) return res; } +#elif defined(__FreeBSD__) + +/** + * FreeBSD topology detection is based on kern.sched.topology_spec XML as + * exposed by the ULE scheduler and described in SMP(4). It is available in + * 8.0 and higher. + * + * Threads are identified in this XML chunk with a THREAD flag. The function + * (simplistically) distinguishes cores and processors by the amount of cache + * they share (0 => processor, otherwise => core). Nodes are not identified + * (ULE doesn't handle NUMA yet, I believe). + */ + +/** + * Recursively parse a topology_spec <group> tag. + */ +static +const char* parse_topology_spec_group(erts_cpu_info_t *cpuinfo, const char* xml, int parentCacheLevel, int* processor_p, int* core_p, int* index_procs_p) { + int error = 0; + int cacheLevel = parentCacheLevel; + const char* next_group_start = strstr(xml + 1, "<group"); + int is_thread_group = 0; + const char* next_cache_level; + const char* next_thread_flag; + const char* next_group_end; + const char* next_children; + const char* next_children_end; + + /* parse the cache level */ + next_cache_level = strstr(xml, "cache-level=\""); + if (next_cache_level && (next_group_start == NULL || next_cache_level < next_group_start)) { + sscanf(next_cache_level, "cache-level=\"%i\"", &cacheLevel); + } + + /* parse the threads flag */ + next_thread_flag = strstr(xml, "THREAD"); + if (next_thread_flag && (next_group_start == NULL || next_thread_flag < next_group_start)) + is_thread_group = 1; + + /* Determine if it's a leaf with the position of the next children tag */ + next_group_end = strstr(xml, "</group>"); + next_children = strstr(xml, "<children>"); + next_children_end = strstr(xml, "</children>"); + if (next_children == NULL || next_group_end < next_children) { + do { + const char* next_cpu_start; + const char* next_cpu_cdata; + const char* next_cpu_end; + int cpu_str_size; + char* cpu_str; + char* cpu_crsr; + char* brkb; + int thread = 0; + int index_procs = *index_procs_p; + + next_cpu_start = strstr(xml, "<cpu"); + if (!next_cpu_start) { + error = 1; + break; + } + next_cpu_cdata = strstr(next_cpu_start, ">") + 1; + if (!next_cpu_cdata) { + error = 1; + break; + } + next_cpu_end = strstr(next_cpu_cdata, "</cpu>"); + if (!next_cpu_end) { + error = 1; + break; + } + cpu_str_size = next_cpu_end - next_cpu_cdata; + cpu_str = (char*) malloc(cpu_str_size + 1); + memcpy(cpu_str, (const char*) next_cpu_cdata, cpu_str_size); + cpu_str[cpu_str_size] = 0; + for (cpu_crsr = strtok_r(cpu_str, " \t,", &brkb); cpu_crsr; cpu_crsr = strtok_r(NULL, " \t,", &brkb)) { + int cpu_id; + if (index_procs >= cpuinfo->configured) { + void* t = realloc(cpuinfo->topology, (sizeof(erts_cpu_topology_t) * (index_procs + 1))); + if (t) { + cpuinfo->topology = t; + } else { + error = 1; + break; + } + } + cpu_id = atoi(cpu_crsr); + cpuinfo->topology[index_procs].node = -1; + cpuinfo->topology[index_procs].processor = *processor_p; + cpuinfo->topology[index_procs].processor_node = -1; + cpuinfo->topology[index_procs].core = *core_p; + cpuinfo->topology[index_procs].thread = thread; + cpuinfo->topology[index_procs].logical = cpu_id; + if (is_thread_group) { + thread++; + } else { + *core_p = (*core_p)++; + } + index_procs++; + } + *index_procs_p = index_procs; + free(cpu_str); + } while (0); + xml = next_group_end; + } else { + while (next_group_start != NULL && next_group_start < next_children_end) { + xml = parse_topology_spec_group(cpuinfo, next_group_start, cacheLevel, processor_p, core_p, index_procs_p); + if (!xml) + break; + next_group_start = strstr(xml, "<group"); + next_children_end = strstr(xml, "</children>"); + } + } + + if (cacheLevel == 0) { + *core_p = 0; + *processor_p = (*processor_p)++; + } else { + *core_p = (*core_p)++; + } + + if (error) + xml = NULL; + + return xml; +} + +/** + * Parse the topology_spec. Return the number of CPUs or 0 if parsing failed. + */ +static +int parse_topology_spec(erts_cpu_info_t *cpuinfo, const char* xml) { + int res = 1; + int index_procs = 0; + int core = 0; + int processor = 0; + xml = strstr(xml, "<groups"); + if (!xml) + return -1; + + xml += 7; + xml = strstr(xml, "<group"); + while (xml) { + xml = parse_topology_spec_group(cpuinfo, xml, 0, &processor, &core, &index_procs); + if (!xml) { + res = 0; + break; + } + xml = strstr(xml, "<group"); + } + + if (res) + res = index_procs; + + return res; +} + +static int +read_topology(erts_cpu_info_t *cpuinfo) +{ + int ix; + int res = 0; + size_t topology_spec_size = 0; + void* topology_spec = NULL; + + errno = 0; + + if (cpuinfo->configured < 1) + goto error; + + cpuinfo->topology_size = cpuinfo->configured; + cpuinfo->topology = malloc(sizeof(erts_cpu_topology_t) + * cpuinfo->configured); + if (!cpuinfo->topology) { + res = -ENOMEM; + goto error; + } + + for (ix = 0; ix < cpuinfo->configured; ix++) { + cpuinfo->topology[ix].node = -1; + cpuinfo->topology[ix].processor = -1; + cpuinfo->topology[ix].processor_node = -1; + cpuinfo->topology[ix].core = -1; + cpuinfo->topology[ix].thread = -1; + cpuinfo->topology[ix].logical = -1; + } + + if (!sysctlbyname("kern.sched.topology_spec", NULL, &topology_spec_size, NULL, 0)) { + topology_spec = malloc(topology_spec_size); + if (!topology_spec) { + res = -ENOMEM; + goto error; + } + + if (sysctlbyname("kern.sched.topology_spec", topology_spec, &topology_spec_size, NULL, 0)) { + goto error; + } + + res = parse_topology_spec(cpuinfo, topology_spec); + if (!res || res < cpuinfo->online) + res = 0; + else { + cpuinfo->topology_size = res; + + if (cpuinfo->topology_size != cpuinfo->configured) { + void *t = realloc(cpuinfo->topology, (sizeof(erts_cpu_topology_t) + * cpuinfo->topology_size)); + if (t) + cpuinfo->topology = t; + } + + adjust_processor_nodes(cpuinfo, 1); + + qsort(cpuinfo->topology, + cpuinfo->topology_size, + sizeof(erts_cpu_topology_t), + cpu_cmp); + } + } + +error: + + if (res == 0) { + cpuinfo->topology_size = 0; + if (cpuinfo->topology) { + free(cpuinfo->topology); + cpuinfo->topology = NULL; + } + if (errno) + res = -errno; + else + res = -EINVAL; + } + + if (topology_spec) + free(topology_spec); + + return res; +} + #else static int diff --git a/erts/preloaded/ebin/prim_inet.beam b/erts/preloaded/ebin/prim_inet.beam Binary files differindex a777971b32..3f53f35273 100644 --- a/erts/preloaded/ebin/prim_inet.beam +++ b/erts/preloaded/ebin/prim_inet.beam diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl index 91d39c6a73..446656e45f 100644 --- a/erts/preloaded/src/prim_inet.erl +++ b/erts/preloaded/src/prim_inet.erl @@ -37,7 +37,7 @@ -export([setopt/3, setopts/2, getopt/2, getopts/2, is_sockopt_val/2]). -export([chgopt/3, chgopts/2]). -export([getstat/2, getfd/1, getindex/1, getstatus/1, gettype/1, - getiflist/1, ifget/3, ifset/3, + getifaddrs/1, getiflist/1, ifget/3, ifset/3, gethostname/1]). -export([getservbyname/3, getservbyport/3]). -export([peername/1, setpeername/2]). @@ -216,9 +216,10 @@ bindx(S, AddFlag, Addrs) -> sctp -> %% Really multi-homed "bindx". Stringified args: %% [AddFlag, (Port, IP)+]: - Args = ?int8(AddFlag) ++ - lists:concat([?int16(Port)++ip_to_bytes(IP) || - {IP, Port} <- Addrs]), + Args = + [?int8(AddFlag)| + [[?int16(Port)|ip_to_bytes(IP)] || + {IP, Port} <- Addrs]], case ctl_cmd(S, ?SCTP_REQ_BINDX, Args) of {ok,_} -> {ok, S}; Error -> Error @@ -623,7 +624,7 @@ chgopt(S, Opt, Value) when is_port(S) -> chgopts(S, [{Opt,Value}]). chgopts(S, Opts) when is_port(S), is_list(Opts) -> - case inet:getopts(S, need_template(Opts)) of + case getopts(S, need_template(Opts)) of {ok,Templates} -> try merge_options(Opts, Templates) of NewOpts -> @@ -636,7 +637,94 @@ chgopts(S, Opts) when is_port(S), is_list(Opts) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% -%% IFLIST(insock()) -> {ok,IfNameList} | {error, Reason} +%% getifaddrs(insock()) -> {ok,IfAddrsList} | {error, Reason} +%% +%% IfAddrsList = [{Name,[Opts]}] +%% Name = string() +%% Opts = {flags,[Flag]} | {addr,Addr} | {netmask,Addr} | {broadaddr,Addr} +%% | {dstaddr,Addr} | {hwaddr,HwAddr} | {mtu,integer()} +%% Flag = up | broadcast | loopback | running | multicast +%% Addr = ipv4addr() | ipv6addr() +%% HwAddr = ethernet_addr() +%% +%% get interface name and addresses list +%% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +getifaddrs(S) when is_port(S) -> + case ctl_cmd(S, ?INET_REQ_GETIFADDRS, []) of + {ok, Data} -> + {ok, comp_ifaddrs(build_ifaddrs(Data), ktree_empty())}; + {error,enotsup} -> + case getiflist(S) of + {ok, IFs} -> + {ok, getifaddrs_ifget(S, IFs)}; + Err1 -> Err1 + end; + Err2 -> Err2 + end. + +%% Restructure interface properties per interface and remove duplicates + +comp_ifaddrs([{If,Opts}|IfOpts], T) -> + case ktree_is_defined(If, T) of + true -> + OptSet = comp_ifaddrs_add(ktree_get(If, T), Opts), + comp_ifaddrs(IfOpts, ktree_update(If, OptSet, T)); + false -> + OptSet = comp_ifaddrs_add(ktree_empty(), Opts), + comp_ifaddrs(IfOpts, ktree_insert(If, OptSet, T)) + end; +comp_ifaddrs([], T) -> + [{If,ktree_keys(ktree_get(If, T))} || If <- ktree_keys(T)]. + +comp_ifaddrs_add(OptSet, [Opt|Opts]) -> + case ktree_is_defined(Opt, OptSet) of + true + when element(1, Opt) =:= flags; + element(1, Opt) =:= hwaddr -> + comp_ifaddrs_add(OptSet, Opts); + _ -> + comp_ifaddrs_add(ktree_insert(Opt, undefined, OptSet), Opts) + end; +comp_ifaddrs_add(OptSet, []) -> OptSet. + +%% Legacy emulation of getifaddrs + +getifaddrs_ifget(_, []) -> []; +getifaddrs_ifget(S, [IF|IFs]) -> + case ifget(S, IF, [flags]) of + {ok,[{flags,Flags}]=FlagsVals} -> + BroadOpts = + case member(broadcast, Flags) of + true -> + [broadaddr,hwaddr]; + false -> + [hwaddr] + end, + P2POpts = + case member(pointtopoint, Flags) of + true -> + [dstaddr|BroadOpts]; + false -> + BroadOpts + end, + getifaddrs_ifget(S, IFs, IF, FlagsVals, [addr,netmask|P2POpts]); + _ -> + getifaddrs_ifget(S, IFs, IF, [], [addr,netmask,hwaddr]) + end. + +getifaddrs_ifget(S, IFs, IF, FlagsVals, Opts) -> + OptVals = + case ifget(S, IF, Opts) of + {ok,OVs} -> OVs; + _ -> [] + end, + [{IF,FlagsVals++OptVals}|getifaddrs_ifget(S, IFs)]. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% +%% getiflist(insock()) -> {ok,IfNameList} | {error, Reason} %% %% get interface name list %% @@ -1325,6 +1413,19 @@ type_value_2({enum,List}, Enum) -> {value,_} -> true; false -> false end; +type_value_2(sockaddr, Addr) -> + case Addr of + any -> true; + loopback -> true; + {A,B,C,D} when ?ip(A,B,C,D) -> true; + {A,B,C,D,E,F,G,H} when ?ip6(A,B,C,D,E,F,G,H) -> true; + _ -> false + end; +type_value_2(linkaddr, Addr) when is_list(Addr) -> + case len(Addr, 32768) of + undefined -> false; + _ -> true + end; type_value_2({bitenumlist,List}, EnumList) -> case enum_vals(EnumList, List) of Ls when is_list(Ls) -> true; @@ -1413,14 +1514,21 @@ enc_value_2(addr, {any,Port}) -> [?INET_AF_ANY|?int16(Port)]; enc_value_2(addr, {loopback,Port}) -> [?INET_AF_LOOPBACK|?int16(Port)]; -enc_value_2(addr, {IP,Port}) -> - case tuple_size(IP) of - 4 -> - [?INET_AF_INET,?int16(Port)|ip4_to_bytes(IP)]; - 8 -> - [?INET_AF_INET6,?int16(Port)|ip6_to_bytes(IP)] - end; +enc_value_2(addr, {IP,Port}) when tuple_size(IP) =:= 4 -> + [?INET_AF_INET,?int16(Port)|ip4_to_bytes(IP)]; +enc_value_2(addr, {IP,Port}) when tuple_size(IP) =:= 8 -> + [?INET_AF_INET6,?int16(Port)|ip6_to_bytes(IP)]; enc_value_2(ether, [X1,X2,X3,X4,X5,X6]) -> [X1,X2,X3,X4,X5,X6]; +enc_value_2(sockaddr, any) -> + [?INET_AF_ANY]; +enc_value_2(sockaddr, loopback) -> + [?INET_AF_LOOPBACK]; +enc_value_2(sockaddr, IP) when tuple_size(IP) =:= 4 -> + [?INET_AF_INET|ip4_to_bytes(IP)]; +enc_value_2(sockaddr, IP) when tuple_size(IP) =:= 8 -> + [?INET_AF_INET6|ip6_to_bytes(IP)]; +enc_value_2(linkaddr, Linkaddr) -> + [?int16(length(Linkaddr)),Linkaddr]; enc_value_2(sctp_assoc_id, Val) -> ?int32(Val); %% enc_value_2(sctp_assoc_id, Bin) -> [byte_size(Bin),Bin]; enc_value_2({enum,List}, Enum) -> @@ -1465,6 +1573,10 @@ dec_value(time, [X3,X2,X1,X0|T]) -> end; dec_value(ip, [A,B,C,D|T]) -> {{A,B,C,D}, T}; dec_value(ether,[X1,X2,X3,X4,X5,X6|T]) -> {[X1,X2,X3,X4,X5,X6],T}; +dec_value(sockaddr, [X|T]) -> + get_ip(X, T); +dec_value(linkaddr, [X1,X0|T]) -> + split(?i16(X1,X0), T); dec_value({enum,List}, [X3,X2,X1,X0|T]) -> Val = ?i32(X3,X2,X1,X0), case enum_name(Val, List) of @@ -1480,7 +1592,7 @@ dec_value({bitenumlist,List}, [X3,X2,X1,X0|T]) -> %% {enum_names(Val, List), T}; dec_value(binary,[L0,L1,L2,L3|List]) -> Len = ?i32(L0,L1,L2,L3), - {X,T}=lists:split(Len,List), + {X,T}=split(Len,List), {list_to_binary(X),T}; dec_value(Types, List) when is_tuple(Types) -> {L,T} = dec_value_tuple(Types, List, 1, []), @@ -1495,7 +1607,7 @@ dec_value_tuple(Types, List, N, Acc) {Term,Tail} = dec_value(element(N, Types), List), dec_value_tuple(Types, Tail, N+1, [Term|Acc]); dec_value_tuple(_, List, _, Acc) -> - {lists:reverse(Acc),List}. + {rev(Acc),List}. borlist([V|Vs], Value) -> borlist(Vs, V bor Value); @@ -1702,11 +1814,11 @@ merge_fields(_, _, _) -> []. %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -type_ifopt(addr) -> ip; -type_ifopt(broadaddr) -> ip; -type_ifopt(dstaddr) -> ip; +type_ifopt(addr) -> sockaddr; +type_ifopt(broadaddr) -> sockaddr; +type_ifopt(dstaddr) -> sockaddr; type_ifopt(mtu) -> int; -type_ifopt(netmask) -> ip; +type_ifopt(netmask) -> sockaddr; type_ifopt(flags) -> {bitenumlist, [{up, ?INET_IFF_UP}, @@ -1718,7 +1830,7 @@ type_ifopt(flags) -> {no_pointtopoint, ?INET_IFF_NPOINTTOPOINT}, {running, ?INET_IFF_RUNNING}, {multicast, ?INET_IFF_MULTICAST}]}; -type_ifopt(hwaddr) -> ether; +type_ifopt(hwaddr) -> linkaddr; type_ifopt(Opt) when is_atom(Opt) -> undefined. enc_ifopt(addr) -> ?INET_IFOPT_ADDR; @@ -1903,6 +2015,30 @@ encode_ifname(Name) -> if N > 255 -> {error, einval}; true -> {ok,[N | Name]} end. + +build_ifaddrs(Cs) -> + build_ifaddrs(Cs, []). +%% +build_ifaddrs([], []) -> + []; +build_ifaddrs([0|Cs], Acc) -> + Name = utf8_to_characters(rev(Acc)), + {Opts,Rest} = build_ifaddrs_opts(Cs, []), + [{Name,Opts}|build_ifaddrs(Rest)]; +build_ifaddrs([C|Cs], Acc) -> + build_ifaddrs(Cs, [C|Acc]). + +build_ifaddrs_opts([0|Cs], Acc) -> + {rev(Acc),Cs}; +build_ifaddrs_opts([C|Cs]=CCs, Acc) -> + case dec_ifopt(C) of + undefined -> + erlang:error(badarg, [CCs,Acc]); + Opt -> + Type = type_ifopt(Opt), + {Val,Rest} = dec_value(Type, Cs), + build_ifaddrs_opts(Rest, [{Opt,Val}|Acc]) + end. build_iflist(Cs) -> build_iflist(Cs, [], []). @@ -1927,6 +2063,80 @@ rev(L) -> rev(L,[]). rev([C|L],Acc) -> rev(L,[C|Acc]); rev([],Acc) -> Acc. +split(N, L) -> split(N, L, []). +split(0, L, R) when is_list(L) -> {rev(R),L}; +split(N, [H|T], R) when is_integer(N), N > 0 -> split(N-1, T, [H|R]). + +len(L, N) -> len(L, N, 0). +len([], N, C) when is_integer(N), N >= 0 -> C; +len(L, 0, _) when is_list(L) -> undefined; +len([_|L], N, C) when is_integer(N), N >= 0 -> len(L, N-1, C+1). + +member(X, [X|_]) -> true; +member(X, [_|Xs]) -> member(X, Xs); +member(_, []) -> false. + + + +%% Lookup tree that keeps key insert order + +ktree_empty() -> {[],tree()}. +ktree_is_defined(Key, {_,T}) -> tree(T, Key, is_defined). +ktree_get(Key, {_,T}) -> tree(T, Key, get). +ktree_insert(Key, V, {Keys,T}) -> {[Key|Keys],tree(T, Key, {insert,V})}. +ktree_update(Key, V, {Keys,T}) -> {Keys,tree(T, Key, {update,V})}. +ktree_keys({Keys,_}) -> rev(Keys). + +%% Simple lookup tree. Hash the key to get statistical balance. +%% Key is matched equal, not compared equal. + +tree() -> nil. +tree(T, Key, Op) -> tree(T, Key, Op, erlang:phash2(Key)). + +tree(nil, _, is_defined, _) -> false; +tree(nil, K, {insert,V}, _) -> {K,V,nil,nil}; +tree({K,_,_,_}, K, is_defined, _) -> true; +tree({K,V,_,_}, K, get, _) -> V; +tree({K,_,L,R}, K, {update,V}, _) -> {K,V,L,R}; +tree({K0,V0,L,R}, K, Op, H) -> + H0 = erlang:phash2(K0), + if H0 < H; H0 =:= H, K0 < K -> + if is_tuple(Op) -> + {K0,V0,tree(L, K, Op, H),R}; + true -> + tree(L, K, Op, H) + end; + true -> + if is_tuple(Op) -> + {K0,V0,L,tree(R, K, Op, H)}; + true -> + tree(R, K, Op, H) + end + end. + + + +utf8_to_characters([]) -> []; +utf8_to_characters([B|Bs]=Arg) when (B band 16#FF) =:= B -> + if 16#F8 =< B -> + erlang:error(badarg, [Arg]); + 16#F0 =< B -> + utf8_to_characters(Bs, B band 16#07, 3); + 16#E0 =< B -> + utf8_to_characters(Bs, B band 16#0F, 2); + 16#C0 =< B -> + utf8_to_characters(Bs, B band 16#1F, 1); + 16#80 =< B -> + erlang:error(badarg, [Arg]); + true -> + [B|utf8_to_characters(Bs)] + end. +%% +utf8_to_characters(Bs, U, 0) -> + [U|utf8_to_characters(Bs)]; +utf8_to_characters([B|Bs], U, N) when ((B band 16#3F) bor 16#80) =:= B -> + utf8_to_characters(Bs, (U bsl 6) bor (B band 16#3F), N-1). + ip_to_bytes(IP) when tuple_size(IP) =:= 4 -> ip4_to_bytes(IP); ip_to_bytes(IP) when tuple_size(IP) =:= 8 -> ip6_to_bytes(IP). diff --git a/erts/test/erlexec_SUITE.erl b/erts/test/erlexec_SUITE.erl index 164ce9faaf..6adb865f6d 100644 --- a/erts/test/erlexec_SUITE.erl +++ b/erts/test/erlexec_SUITE.erl @@ -33,7 +33,7 @@ -export([all/1, init_per_testcase/2, fin_per_testcase/2]). --export([args_file/1, evil_args_file/1, env/1, args_file_env/1, otp_7461/1, otp_7461_remote/1, otp_8209/1]). +-export([args_file/1, evil_args_file/1, env/1, args_file_env/1, otp_7461/1, otp_7461_remote/1, otp_8209/1, zdbbl_dist_buf_busy_limit/1]). -include_lib("test_server/include/test_server.hrl"). @@ -53,7 +53,8 @@ fin_per_testcase(_Case, Config) -> all(doc) -> []; all(suite) -> - [args_file, evil_args_file, env, args_file_env, otp_7461, otp_8209]. + [args_file, evil_args_file, env, args_file_env, otp_7461, otp_8209, + zdbbl_dist_buf_busy_limit]. otp_8209(doc) -> @@ -330,6 +331,25 @@ otp_7461_remote([halt, Pid]) -> io:format("halt order from ~p to node ~p\n",[Pid,node()]), halt(). +zdbbl_dist_buf_busy_limit(doc) -> + ["Check +zdbbl flag"]; +zdbbl_dist_buf_busy_limit(suite) -> + []; +zdbbl_dist_buf_busy_limit(Config) when is_list(Config) -> + LimKB = 1122233, + LimB = LimKB*1024, + ?line {ok,[[PName]]} = init:get_argument(progname), + ?line SNameS = "erlexec_test_02", + ?line SName = list_to_atom(SNameS++"@"++ + hd(tl(string:tokens(atom_to_list(node()),"@")))), + ?line Cmd = PName ++ " -sname "++SNameS++" -setcookie "++ + atom_to_list(erlang:get_cookie()) ++ + " +zdbbl " ++ integer_to_list(LimKB), + ?line open_port({spawn,Cmd},[]), + ?line pong = loop_ping(SName,40), + ?line LimB = rpc:call(SName,erlang,system_info,[dist_buf_busy_limit]), + ?line ok = cleanup_node(SNameS, 10), + ok. %% |