diff options
Diffstat (limited to 'erts')
86 files changed, 5407 insertions, 2755 deletions
diff --git a/erts/configure.in b/erts/configure.in index 3b4c46d4a5..8d245252b5 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -101,7 +101,7 @@ ERL_XCOMP_SYSROOT_INIT AC_ISC_POSIX -AC_CONFIG_HEADER($host/config.h:config.h.in include/internal/$host/ethread_header_config.h:include/internal/ethread_header_config.h.in include/$host/erl_int_sizes_config.h:include/erl_int_sizes_config.h.in) +AC_CONFIG_HEADER($host/config.h:config.h.in include/internal/$host/ethread_header_config.h:include/internal/ethread_header_config.h.in include/$host/erl_int_sizes_config.h:include/erl_int_sizes_config.h.in include/$host/erl_native_features_config.h:include/erl_native_features_config.h.in) dnl ---------------------------------------------------------------------- dnl Optional features. dnl ---------------------------------------------------------------------- @@ -123,6 +123,7 @@ AS_HELP_STRING([--enable-bootstrap-only], with_ssl_zlib=no enable_hipe=no enable_sctp=no + enable_dirty_schedulers=no fi ]) @@ -134,6 +135,13 @@ AS_HELP_STRING([--disable-threads], [disable async thread support]), *) enable_threads=yes ;; esac ], enable_threads=unknown) +AC_ARG_ENABLE(dirty-schedulers, +AS_HELP_STRING([--enable-dirty-schedulers], [enable dirty scheduler support]), +[ case "$enableval" in + no) enable_dirty_schedulers=no ;; + *) enable_dirty_schedulers=yes ;; + esac ], enable_dirty_schedulers=no) + AC_ARG_ENABLE(halfword-emulator, AS_HELP_STRING([--enable-halfword-emulator], [enable halfword emulator (only for 64bit builds)]), @@ -1222,6 +1230,13 @@ esac if test $emu_threads != yes; then enable_lock_check=no enable_lock_count=no + AC_MSG_CHECKING(whether dirty schedulers should be enabled) + if test "x$enable_dirty_schedulers" != "xno"; then + AC_DEFINE(ERL_NIF_DIRTY_SCHEDULER_SUPPORT, 1, [Dirty scheduler support]) + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) + fi else # Threads enabled for emulator EMU_THR_LIB_NAME=$ETHR_LIB_NAME @@ -1240,7 +1255,17 @@ else if test "x$enable_lock_count" != "xno"; then EMU_THR_DEFS="$EMU_THR_DEFS -DERTS_ENABLE_LOCK_COUNT" fi - + + AC_MSG_CHECKING(whether dirty schedulers should be enabled) + if test "x$enable_dirty_schedulers" != "xno"; then + EMU_THR_DEFS="$EMU_THR_DEFS -DERTS_DIRTY_SCHEDULERS" + AC_DEFINE(ERTS_DIRTY_SCHEDULERS, 1, [Define if the emulator supports dirty schedulers]) + AC_DEFINE(ERL_NIF_DIRTY_SCHEDULER_SUPPORT, 1, [Dirty scheduler support]) + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) + fi + disable_child_waiter_thread=no case $host_os in solaris*) @@ -1336,7 +1361,7 @@ TERMCAP_LIB= if test "x$with_termcap" != "xno" && test "X$host" != "Xwin32"; then # try these libs - termcap_libs="ncurses curses termcap termlib" + termcap_libs="tinfo ncurses curses termcap termlib" for termcap_lib in $termcap_libs; do AC_CHECK_LIB($termcap_lib, tgetent, TERMCAP_LIB="-l$termcap_lib") @@ -1372,24 +1397,44 @@ dnl ------------- dnl zlib dnl ------------- -AC_ARG_ENABLE(shared-zlib, -AS_HELP_STRING([--enable-shared-zlib], [enable using shared zlib library]), -[ case "$enableval" in - no) enable_shared_zlib=no ;; - *) enable_shared_zlib=yes ;; - esac ], enable_shared_zlib=no) +AC_ARG_ENABLE(builtin-zlib, + AS_HELP_STRING([--enable-builtin-zlib], + [force use of our own built-in zlib]), + [ case "$enableval" in + no) enable_builtin_zlib=no ;; + *) enable_builtin_zlib=yes ;; + esac ], enable_builtin_zlib=no) Z_LIB= -if test "x$enable_shared_zlib" = "xyes" ; then - AC_CHECK_LIB(z, adler32_combine, - [Z_LIB="-lz" - AC_DEFINE(HAVE_LIBZ, 1, [Define to 1 if you have the `z' library (-lz).])], - [AC_MSG_ERROR([cannot find any shared zlib])]) +if test "x$enable_builtin_zlib" = "xyes"; then + AC_MSG_NOTICE([Using our own built-in zlib source]) else - AC_MSG_NOTICE([Using own zlib source]) +AC_MSG_CHECKING(for zlib 1.2.5 or higher) +zlib_save_LIBS=$LIBS +LIBS="-lz $LIBS" +AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[ +#include "zlib.h" +]],[[ +#if ZLIB_VERNUM >= 0x1250 + Bytef s[1]; + s[0] = 0; + (void) adler32((uLong)0, s, 1); +#else +#error "No zlib 1.2.5 or higher found" +error +#endif +]])], +[ + Z_LIB="-lz" + AC_DEFINE(HAVE_LIBZ, 1, [Define to 1 if you have the `z' library (-lz).]) + AC_MSG_RESULT(yes) +],[ + AC_MSG_RESULT(no) +]) +LIBS=$zlib_save_LIBS fi - AC_SUBST(Z_LIB) dnl @@ -3807,10 +3852,16 @@ if test "$enable_dtrace_test" = "yes" ; then if ! dtrace -h $DTRACE_CPP -Iemulator/beam -o ./foo-dtrace.h -s emulator/beam/erlang_dtrace.d; then AC_MSG_ERROR([Could not precompile erlang_dtrace.d: dtrace -h failed]) fi - rm -f foo-dtrace.h + + $RM -f dtest.{o,c} + cat > dtest.c <<_DTEST + #include "foo-dtrace.h" + int main(void) { ERLANG_DIST_PORT_BUSY_ENABLED(); return 0; } +_DTEST + $CC $CFLAGS -c -o dtest.o dtest.c $RM -f $DTRACE_2STEP_TEST - if dtrace -G $DTRACE_CPP $DTRACE_BITS_FLAG -Iemulator/beam -o $DTRACE_2STEP_TEST -s emulator/beam/erlang_dtrace.d 2> /dev/null && \ + if dtrace -G $DTRACE_CPP $DTRACE_BITS_FLAG -Iemulator/beam -o $DTRACE_2STEP_TEST -s emulator/beam/erlang_dtrace.d dtest.o && \ test -f $DTRACE_2STEP_TEST ; then rm $DTRACE_2STEP_TEST DTRACE_ENABLED_2STEP=yes @@ -3818,6 +3869,8 @@ if test "$enable_dtrace_test" = "yes" ; then else AC_MSG_NOTICE([dtrace precompilation for 1-stage DTrace successful]) fi + $RM -f dtest.{o,c} foo-dtrace.h + DTRACE_ENABLED=yes case $OPSYS in linux) diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml index bf0d132955..27a23174d5 100644 --- a/erts/doc/src/erl.xml +++ b/erts/doc/src/erl.xml @@ -792,6 +792,54 @@ SMP support enabled (see the <seealso marker="#smp">-smp</seealso> flag).</p> </item> + <tag><marker id="+SDcpu"><c><![CDATA[+SDcpu DirtyCPUSchedulers:DirtyCPUSchedulersOnline]]></c></marker></tag> + <item> + <p>Sets the number of dirty CPU scheduler threads to create and dirty + CPU scheduler threads to set online when threading support has been + enabled. The maximum for both values is 1024, and each value is further + limited by the settings for normal schedulers: the number of dirty CPU + scheduler threads created cannot exceed the number of normal scheduler + threads created, and the number of dirty CPU scheduler threads online + cannot exceed the number of normal scheduler threads online (see the + <seealso marker="#+S">+S</seealso> and <seealso marker="#+SP">+SP</seealso> + flags for more details). By default, the number of dirty CPU scheduler + threads created equals the number of normal scheduler threads created, + and the number of dirty CPU scheduler threads online equals the number + of normal scheduler threads online. <c>DirtyCPUSchedulers</c> may be + omitted if <c>:DirtyCPUSchedulersOnline</c> is not and vice versa. The + number of dirty CPU schedulers online can be changed at run time via + <seealso marker="erlang#system_flag_dirty_cpu_schedulers_online">erlang:system_flag(dirty_cpu_schedulers_online, DirtyCPUSchedulersOnline)</seealso>. + </p> + <p>This option is ignored if the emulator doesn't have threading support + enabled. Currently, <em>this option is experimental</em> and is supported only + if the emulator was configured and built with support for dirty schedulers + enabled (it's disabled by default). + </p> + </item> + <tag><marker id="+SDPcpu"><c><![CDATA[+SDPcpu DirtyCPUSchedulersPercentage:DirtyCPUSchedulersOnlinePercentage]]></c></marker></tag> + <item> + <p>Similar to <seealso marker="#+SDcpu">+SDcpu</seealso> but uses percentages to set the + number of dirty CPU scheduler threads to create and number of dirty CPU scheduler threads + to set online when threading support has been enabled. Specified values must be greater + than 0. For example, <c>+SDPcpu 50:25</c> sets the number of dirty CPU scheduler threads + to 50% of the logical processors configured and the number of dirty CPU scheduler threads + online to 25% of the logical processors available. <c>DirtyCPUSchedulersPercentage</c> may + be omitted if <c>:DirtyCPUSchedulersOnlinePercentage</c> is not and vice versa. The + number of dirty CPU schedulers online can be changed at run time via + <seealso marker="erlang#system_flag_dirty_cpu_schedulers_online">erlang:system_flag(dirty_cpu_schedulers_online, DirtyCPUSchedulersOnline)</seealso>. + </p> + <p>This option interacts with <seealso marker="#+SDcpu">+SDcpu</seealso> settings. + For example, on a system with 8 logical cores configured and 8 logical cores available, + the combination of the options <c>+SDcpu 4:4 +SDPcpu 50:25</c> (in either order) results + in 2 dirty CPU scheduler threads (50% of 4) and 1 dirty CPU scheduler thread online (25% of 4). + </p> + <p>This option is ignored if the emulator doesn't have threading support + enabled. Currently, <em>this option is experimental</em> and is supported only + if the emulator was configured and built with support for dirty schedulers + enabled (it's disabled by default). + </p> + </item> + <tag><marker id="+SDio"><c><![CDATA[+SDio IOSchedulers]]></c></marker></tag> <tag><c><![CDATA[+sFlag Value]]></c></tag> <item> <p>Scheduling specific flags.</p> @@ -941,6 +989,10 @@ when schedulers frequently run out of work. When disabled, the frequency with which schedulers run out of work will not be taken into account by the load balancing logic. + <br/> <c>+scl false</c> is similar to + <seealso marker="#+sub">+sub true</seealso> with the difference + that <c>+sub true</c> also will balance scheduler utilization + between schedulers. </p> </item> <tag><marker id="+sct"><c>+sct CpuTopology</c></marker></tag> @@ -1087,6 +1139,29 @@ documentation of the <seealso marker="#+sbt">+sbt</seealso> flag. </p> </item> + <tag><marker id="+sub"><c>+sub true|false</c></marker></tag> + <item> + <p>Enable or disable + <seealso marker="erts:erlang#statistics_scheduler_wall_time">scheduler + utilization</seealso> balancing of load. By default scheduler + utilization balancing is disabled and instead scheduler + compaction of load is enabled which will strive for a load + distribution which causes as many scheduler threads as possible + to be fully loaded (i.e., not run out of work). When scheduler + utilization balancing is enabled the system will instead try to + balance scheduler utilization between schedulers. That is, + strive for equal scheduler utilization on all schedulers. + <br/> <c>+sub true</c> is only supported on + systems where the runtime system detects and use a monotonically + increasing high resolution clock. On other systems, the runtime + system will fail to start. + <br/> <c>+sub true</c> implies + <seealso marker="#+scl">+scl false</seealso>. The difference + between <c>+sub true</c> and <c>+scl false</c> is that + <c>+scl false</c> will not try to balance the scheduler + utilization. + </p> + </item> <tag><marker id="+swct"><c>+sws very_eager|eager|medium|lazy|very_lazy</c></marker></tag> <item> <p> @@ -1126,18 +1201,18 @@ <tag><marker id="+spp"><c>+spp Bool</c></marker></tag> <item> <p>Set default scheduler hint for port parallelism. If set to - <c>true</c>, the VM will schedule port tasks when it by this can - improve the parallelism in the system. If set to <c>false</c>, - the VM will try to perform port tasks immediately and by this - improve latency at the expense of parallelism. If this - flag has not been passed, the default scheduler hint for port - parallelism is currently <c>false</c>. The default used can be - inspected in runtime by calling - <seealso marker="erlang#system_info_port_parallelism">erlang:system_info(port_parallelism)</seealso>. + <c>true</c>, the VM will schedule port tasks when doing so will + improve parallelism in the system. If set to <c>false</c>, the VM + will try to perform port tasks immediately, improving latency at the + expense of parallelism. If this flag has not been passed, the + default scheduler hint for port parallelism is currently + <c>false</c>. The default used can be inspected in runtime by + calling <seealso + marker="erlang#system_info_port_parallelism">erlang:system_info(port_parallelism)</seealso>. The default can be overriden on port creation by passing the <seealso marker="erlang#open_port_parallelism">parallelism</seealso> - option to - <seealso marker="erlang#open_port/2">open_port/2</seealso></p>. + option to <seealso + marker="erlang#open_port/2">open_port/2</seealso></p>. </item> <tag><marker id="sched_thread_stack_size"><c><![CDATA[+sss size]]></c></marker></tag> <item> diff --git a/erts/doc/src/erl_driver.xml b/erts/doc/src/erl_driver.xml index b453a4861e..c2f7fa4588 100644 --- a/erts/doc/src/erl_driver.xml +++ b/erts/doc/src/erl_driver.xml @@ -745,7 +745,7 @@ typedef struct ErlIOVec { created and decrement it once when the port associated with the lock terminates. The emulator will also increment the reference count when an async job is enqueued and decrement - it after an async job has been invoked, or canceled. Besides + it after an async job has been invoked. Besides this, it is the responsibility of the driver to ensure that the reference count does not reach zero before the last use of the lock by the driver has been made. The reference count @@ -1995,14 +1995,12 @@ ERL_DRV_EXT2TERM char *buf, ErlDrvUInt len <c>async_invoke</c> and <c>async_free</c>. It's typically a pointer to a structure that contains a pipe or event that can be used to signal that the async operation completed. - The data should be freed in <c>async_free</c>, because it's - called if <c>driver_async_cancel</c> is called.</p> + The data should be freed in <c>async_free</c>.</p> <p>When the async operation is done, <seealso marker="driver_entry#ready_async">ready_async</seealso> driver entry function is called. If <c>ready_async</c> is null in the driver entry, the <c>async_free</c> function is called instead.</p> - <p>The return value is a handle to the asynchronous task, which - can be used as argument to <c>driver_async_cancel</c>.</p> + <p>The return value is a handle to the asynchronous task.</p> <note> <p>As of erts version 5.5.4.3 the default stack size for threads in the async-thread pool is 16 kilowords, @@ -2040,26 +2038,6 @@ ERL_DRV_EXT2TERM char *buf, ErlDrvUInt len </desc> </func> <func> - <name><ret>int</ret><nametext>driver_async_cancel(long id)</nametext></name> - <fsummary>Cancel an asynchronous call</fsummary> - <desc> - <marker id="driver_async_cancel"></marker> - <p>This function used to cancel a scheduled asynchronous operation, - if it was still in the queue. It returned 1 if it succeeded, and - 0 if it failed.</p> - <p>Since it could not guarantee success, it was more or less useless. - The user had to implement synchronization of cancellation anyway. - It also unnecessarily complicated the implementation. Therefore, - as of OTP-R15B <c>driver_async_cancel()</c> is deprecated, and - scheduled for removal in OTP-R17. It will currently always fail, - and return 0.</p> - <warning><p><c>driver_async_cancel()</c> is deprecated and will - be removed in the OTP-R17 release.</p> - </warning> - - </desc> - </func> - <func> <name><ret>int</ret><nametext>driver_lock_driver(ErlDrvPort port)</nametext></name> <fsummary>Make sure the driver is never unloaded</fsummary> <desc> diff --git a/erts/doc/src/erl_nif.xml b/erts/doc/src/erl_nif.xml index 7ac8181d47..8b19725c02 100644 --- a/erts/doc/src/erl_nif.xml +++ b/erts/doc/src/erl_nif.xml @@ -181,7 +181,11 @@ ok to dispatch the work to another thread, return from the native function, and wait for the result. The thread can send the result back to the calling thread using message passing. Information - about thread primitives can be found below.</p> + about thread primitives can be found below. If you have built your system + with <em>the currently experimental</em> support for dirty schedulers, + you may want to try out this functionality by dispatching the work to a + <seealso marker="#dirty_nifs">dirty NIF</seealso>, + which does not have the same duration restriction as a normal NIF.</p> </description> <section> <title>FUNCTIONALITY</title> @@ -312,6 +316,38 @@ ok <p>The library initialization callbacks <c>load</c>, <c>reload</c> and <c>upgrade</c> are all thread-safe even for shared state data.</p> </item> + <tag>Dirty NIFs</tag> + <item><p><marker id="dirty_nifs"/><em>Note that the dirty NIF functionality + is experimental</em> and that you have to enable support for dirty + schedulers when building OTP in order to try the functionality out. Native functions + <seealso marker="#lengthy_work"> + must normally run quickly</seealso>, as explained earlier in this document. They + generally should execute for no more than a millisecond. But not all native functions + can execute so quickly; for example, functions that encrypt large blocks of data or + perform lengthy file system operations can often run for tens of seconds or more.</p> + <p>A NIF that cannot execute in a millisecond or less is called a "dirty NIF" since + it performs work that the Erlang runtime cannot handle cleanly. Applications + that make use of such functions must indicate to the runtime that the functions are + dirty so they can be handled specially. To schedule a dirty NIF for execution, the + application calls <seealso marker="#enif_schedule_dirty_nif">enif_schedule_dirty_nif</seealso>, + passing to it a pointer to the dirty NIF to be executed and indicating with a flag + argument whether it expects the operation to be CPU-bound or I/O-bound.</p> + <p>All dirty NIFs must ultimately invoke the <seealso marker="#enif_schedule_dirty_nif_finalizer"> + enif_schedule_dirty_nif_finalizer</seealso> as their final action, passing to it the + result they wish to return to the original caller. A finalizer function can either + receive the result and return it directly, or it can return a different value instead. + For convenience, the NIF API provides the <seealso marker="#enif_dirty_nif_finalizer"> + enif_dirty_nif_finalizer</seealso> function that applications can use as a finalizer; + it simply returns its result argument.</p> + <note><p>Dirty NIF support is available only when the emulator is configured with dirty + schedulers enabled. This feature is currently disabled by default. To determine whether + the dirty NIF API is available, native code can check to see if the C preprocessor macro + <c>ERL_NIF_DIRTY_SCHEDULER_SUPPORT</c> is defined. Also, if the Erlang runtime was built + without threading support, dirty schedulers are disabled. To check at runtime for the presence + of dirty scheduler threads, code can call the <seealso marker="#enif_have_dirty_schedulers"><c> + enif_have_dirty_schedulers()</c></seealso> API function, which returns true if dirty + scheduler threads are present, false otherwise.</p></note> + </item> </taglist> </section> <section> @@ -610,6 +646,18 @@ typedef enum { See also the <seealso marker="#WARNING">warning</seealso> text at the beginning of this document.</p> </desc> </func> + <func><name><ret>ERL_NIF_TERM</ret><nametext>enif_dirty_nif_finalizer(ErlNifEnv* env, ERL_NIF_TERM result)</nametext></name> + <fsummary>Simple dirty NIF result finalizer</fsummary> + <desc> + <p>A convenience function that a dirty NIF can use as a finalizer that simply + return its <c>result</c> argument as its return value. This function is provided + for dirty NIFs with results that should be returned directly to the original caller.</p> + <note><p>This function is available only when the emulator is configured with dirty + schedulers enabled. This feature is currently disabled by default. To determine whether + the dirty NIF API is available, native code can check to see if the C preprocessor macro + <c>ERL_NIF_DIRTY_SCHEDULER_SUPPORT</c> is defined.</p></note> + </desc> + </func> <func><name><ret>int</ret><nametext>enif_equal_tids(ErlNifTid tid1, ErlNifTid tid2)</nametext></name> <fsummary></fsummary> <desc><p>Same as <seealso marker="erl_driver#erl_drv_equal_tids">erl_drv_equal_tids</seealso>. @@ -730,6 +778,22 @@ typedef enum { and return true, or return false if <c>term</c> is not an unsigned integer or is outside the bounds of type <c>unsigned long</c>.</p></desc> </func> + <func><name><ret>int</ret><nametext>enif_have_dirty_schedulers()</nametext></name> + <fsummary>Runtime check for the presence of dirty scheduler threads</fsummary> + <desc> + <p>Check at runtime for the presence of dirty scheduler threads. If the emulator is + built with threading support, dirty scheduler threads are available and + <c>enif_have_dirty_schedulers()</c> returns true. If the emulator was built without + threading support, <c>enif_have_dirty_schedulers()</c> returns false.</p> + <p>If dirty scheduler threads are not available in the emulator, calls to + <c>enif_schedule_dirty_nif</c> and <c>enif_schedule_dirty_nif_finalizer</c> result in + the NIF and finalizer functions being called directly within the calling thread.</p> + <note><p>This function is available only when the emulator is configured with dirty + schedulers enabled. This feature is currently disabled by default. To determine whether + the dirty NIF API is available, native code can check to see if the C preprocessor macro + <c>ERL_NIF_DIRTY_SCHEDULER_SUPPORT</c> is defined.</p></note> + </desc> + </func> <func><name><ret>int</ret><nametext>enif_inspect_binary(ErlNifEnv* env, ERL_NIF_TERM bin_term, ErlNifBinary* bin)</nametext></name> <fsummary>Inspect the content of a binary</fsummary> <desc><p>Initialize the structure pointed to by <c>bin</c> with @@ -777,6 +841,20 @@ typedef enum { Erlang operators <c>=:=</c> and <c>=/=</c>.</p></desc> </func> + <func><name><ret>int</ret><nametext>enif_is_on_dirty_scheduler(ErlNifEnv* env)</nametext></name> + <fsummary>Check to see if executing on a dirty scheduler thread</fsummary> + <desc> + <p>Check to see if the current NIF is executing on a dirty scheduler thread. If the + emulator is built with threading support, calling <c>enif_is_on_dirty_scheduler</c> + from within a dirty NIF returns true. It returns false when the calling NIF is a regular + NIF or a NIF finalizer, both of which run on normal scheduler threads, or when the emulator + is built without threading support.</p> + <note><p>This function is available only when the emulator is configured with dirty + schedulers enabled. This feature is currently disabled by default. To determine whether + the dirty NIF API is available, native code can check to see if the C preprocessor macro + <c>ERL_NIF_DIRTY_SCHEDULER_SUPPORT</c> is defined.</p></note> + </desc> + </func> <func><name><ret>int</ret><nametext>enif_is_pid(ErlNifEnv* env, ERL_NIF_TERM term)</nametext></name> <fsummary>Determine if a term is a pid</fsummary> <desc><p>Return true if <c>term</c> is a pid.</p></desc> @@ -1141,6 +1219,48 @@ typedef enum { <desc><p>Same as <seealso marker="erl_driver#erl_drv_rwlock_tryrwlock">erl_drv_rwlock_tryrwlock</seealso>. </p></desc> </func> + <func><name><ret>ERL_NIF_TERM</ret><nametext>enif_schedule_dirty_nif(ErlNifEnv* env, int flags, ERL_NIF_TERM (*fp)(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]), int argc, const ERL_NIF_TERM argv[])</nametext></name> + <fsummary>Schedule a dirty NIF for execution</fsummary> + <desc> + <p>Schedule dirty NIF <c>fp</c> to execute a long-running operation. The <c>flags</c> + argument must be set to either <c>ERL_NIF_DIRTY_JOB_CPU_BOUND</c> if the job is expected to + be primarily CPU-bound, or <c>ERL_NIF_DIRTY_JOB_IO_BOUND</c> for jobs that will be + I/O-bound. The <c>argc</c> and <c>argv</c> arguments can either be the originals passed + into the calling NIF, or they can be values created by the calling NIF. The calling + NIF must use the return value of <c>enif_schedule_dirty_nif</c> as its own return value.</p> + <p>Be aware that <c>enif_schedule_dirty_nif</c>, as its name implies, only schedules the + dirty NIF for future execution. The calling NIF does not block waiting for the dirty NIF to + execute and return, which means that the calling NIF can't expect to receive the dirty NIF + return value and use it for further operations.</p> + <p>A dirty NIF may not invoke the <seealso marker="#enif_make_badarg">enif_make_badarg</seealso> + to raise an exception. If it wishes to return an exception, the dirty NIF should pass a + regular result indicating the exception details to its finalizer, and allow the finalizer + to raise the exception on its behalf.</p> + <note><p>This function is available only when the emulator is configured with dirty schedulers + enabled. This feature is currently disabled by default. To determine whether the dirty NIF API + is available, native code can check to see if the C preprocessor macro + <c>ERL_NIF_DIRTY_SCHEDULER_SUPPORT</c> is defined.</p></note> + </desc> + </func> + <func><name><ret>ERL_NIF_TERM</ret><nametext>enif_schedule_dirty_nif_finalizer(ErlNifEnv* env, ERL_NIF_TERM result, ERL_NIF_TERM (*fp)(ErlNifEnv* env, ERL_NIF_TERM result))</nametext></name> + <fsummary>Schedule a dirty NIF finalizer</fsummary> + <desc> + <p>When a dirty NIF finishes executing, it must schedule a finalizer function to return + its result to the original NIF caller. The dirty NIF passes <c>result</c> as the value it + wants the finalizer to use as the return value. The <c>fp</c> argument is a pointer to the + finalizer function. The NIF API provides the <seealso marker="#enif_dirty_nif_finalizer"> + enif_dirty_nif_finalizer</seealso> function that can be used as a finalizer that simply + returns its <c>result</c> argument. You are also free to write your own custom finalizer + that uses <c>result</c> to derive a different return value, or ignores <c>result</c> + entirely and returns a completely different value.</p> + <p>Without exception, all dirty NIFs must invoke <c>enif_schedule_dirty_nif_finalizer</c> + to complete their execution.</p> + <note><p>This function is available only when the emulator is configured with dirty + schedulers enabled. This feature is currently disabled by default. To determine whether + the dirty NIF API is available, native code can check to see if the C preprocessor macro + <c>ERL_NIF_DIRTY_SCHEDULER_SUPPORT</c> is defined.</p></note> + </desc> + </func> <func><name><ret>ErlNifPid *</ret><nametext>enif_self(ErlNifEnv* caller_env, ErlNifPid* pid)</nametext></name> <fsummary>Get the pid of the calling process.</fsummary> <desc><p>Initialize the pid variable <c>*pid</c> to represent the diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index 124302a2cb..4cf5631727 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -3011,11 +3011,11 @@ os_prompt% </pre> <tag><marker id="open_port_parallelism"><c>{parallelism, Boolean}</c></marker></tag> <item> <p>Set scheduler hint for port parallelism. If set to <c>true</c>, - the VM will schedule port tasks when it by this can improve the + the VM will schedule port tasks when doing so will improve parallelism in the system. If set to <c>false</c>, the VM will - try to perform port tasks immediately and by this improving the - latency at the expense of parallelism. The default can be set on - system startup by passing the + try to perform port tasks immediately, improving latency at the + expense of parallelism. The default can be set on system startup + by passing the <seealso marker="erl#+spp">+spp</seealso> command line argument to <seealso marker="erl">erl(1)</seealso>. </p> @@ -5194,6 +5194,27 @@ ok </func> <func> <name name="system_flag" arity="2" clause_i="3"/> + <fsummary>Set system flag dirty CPU schedulers online</fsummary> + <desc> + <p><marker id="system_flag_dirty_cpu_schedulers_online"></marker> + Sets the amount of dirty CPU schedulers online. Valid range is + <![CDATA[1 <= DirtyCPUSchedulersOnline <= N]]> where <c>N</c> is the + lesser of the return values of <c>erlang:system_info(dirty_cpu_schedulers)</c> and + <c>erlang:system_info(schedulers_online)</c>. + </p> + <p>Returns the old value of the flag.</p> + <p><em>Note that the dirty schedulers functionality is experimental</em>, and + that you have to enable support for dirty schedulers when building OTP in + order to try the functionality out.</p> + <p>For more information see + <seealso marker="#system_info_dirty_cpu_schedulers">erlang:system_info(dirty_cpu_schedulers)</seealso> + and + <seealso marker="#system_info_dirty_cpu_schedulers_online">erlang:system_info(dirty_cpu_schedulers_online)</seealso>. + </p> + </desc> + </func> + <func> + <name name="system_flag" arity="2" clause_i="4"/> <fsummary>Set system flag fullsweep_after</fsummary> <desc> <p><c><anno>Number</anno></c> is a non-negative integer which indicates @@ -5211,7 +5232,7 @@ ok </desc> </func> <func> - <name name="system_flag" arity="2" clause_i="4"/> + <name name="system_flag" arity="2" clause_i="5"/> <fsummary>Set system flag min_heap_size</fsummary> <desc> <p>Sets the default minimum heap size for processes. The @@ -5226,7 +5247,7 @@ ok </desc> </func> <func> - <name name="system_flag" arity="2" clause_i="5"/> + <name name="system_flag" arity="2" clause_i="6"/> <fsummary>Set system flag min_bin_vheap_size</fsummary> <desc> <p>Sets the default minimum binary virtual heap size for processes. The @@ -5241,7 +5262,7 @@ ok </desc> </func> <func> - <name name="system_flag" arity="2" clause_i="6"/> + <name name="system_flag" arity="2" clause_i="7"/> <fsummary>Set system flag multi_scheduling</fsummary> <desc> <p><marker id="system_flag_multi_scheduling"></marker> @@ -5279,7 +5300,7 @@ ok </desc> </func> <func> - <name name="system_flag" arity="2" clause_i="7"/> + <name name="system_flag" arity="2" clause_i="8"/> <type name="scheduler_bind_type"/> <fsummary>Set system flag scheduler_bind_type</fsummary> <desc> @@ -5399,7 +5420,7 @@ ok </desc> </func> <func> - <name name="system_flag" arity="2" clause_i="8"/> + <name name="system_flag" arity="2" clause_i="9"/> <fsummary>Set system flag scheduler_wall_time</fsummary> <desc><p><marker id="system_flag_scheduler_wall_time"></marker> Turns on/off scheduler wall time measurements. </p> @@ -5409,7 +5430,7 @@ ok </desc> </func> <func> - <name name="system_flag" arity="2" clause_i="9"/> + <name name="system_flag" arity="2" clause_i="10"/> <fsummary>Set system flag schedulers_online</fsummary> <desc> <p><marker id="system_flag_schedulers_online"></marker> @@ -5425,7 +5446,7 @@ ok </desc> </func> <func> - <name name="system_flag" arity="2" clause_i="10"/> + <name name="system_flag" arity="2" clause_i="11"/> <fsummary>Set system flag trace_control_word</fsummary> <desc> <p>Sets the value of the node's trace control word to @@ -5785,6 +5806,71 @@ ok compiled; otherwise, <c>false</c>. </p> </item> + <tag><marker id="system_info_dirty_cpu_schedulers"><c>dirty_cpu_schedulers</c></marker></tag> + <item> + <p>Returns the number of dirty CPU scheduler threads used by + the emulator. Dirty CPU schedulers execute CPU-bound + native functions such as NIFs, linked-in driver code, and BIFs + that cannot be managed cleanly by the emulator's normal schedulers. + </p> + <p>The number of dirty CPU scheduler threads is determined at emulator + boot time and cannot be changed after that. The number of dirty CPU + scheduler threads online can however be changed at any time. The number of + dirty CPU schedulers can be set on startup by passing + the <seealso marker="erts:erl#+SDcpu">+SDcpu</seealso> command line flag, see + <seealso marker="erts:erl#+SDcpu">erl(1)</seealso>. + </p> + <p><em>Note that the dirty schedulers functionality is experimental</em>, and + that you have to enable support for dirty schedulers when building OTP in + order to try the functionality out.</p> + <p>See also <seealso marker="#system_flag_dirty_cpu_schedulers_online">erlang:system_flag(dirty_cpu_schedulers_online, DirtyCPUSchedulersOnline)</seealso>, + <seealso marker="#system_info_dirty_cpu_schedulers_online">erlang:system_info(dirty_cpu_schedulers_online)</seealso>, + <seealso marker="#system_info_dirty_io_schedulers">erlang:system_info(dirty_io_schedulers)</seealso>, + <seealso marker="#system_info_schedulers">erlang:system_info(schedulers)</seealso>, + <seealso marker="#system_info_schedulers_online">erlang:system_info(schedulers_online)</seealso>, and + <seealso marker="#system_flag_schedulers_online">erlang:system_flag(schedulers_online, SchedulersOnline)</seealso>.</p> + </item> + <tag><marker id="system_info_dirty_cpu_schedulers_online"><c>dirty_cpu_schedulers_online</c></marker></tag> + <item> + <p>Returns the number of dirty CPU schedulers online. The return value + satisfies the following relationship: + <c><![CDATA[1 <= DirtyCPUSchedulersOnline <= N]]></c>, where <c>N</c> is + the lesser of the return values of <c>erlang:system_info(dirty_cpu_schedulers)</c> and + <c>erlang:system_info(schedulers_online)</c>. + </p> + <p>The number of dirty CPU schedulers online can be set on startup by passing + the <seealso marker="erts:erl#+SDcpu">+SDcpu</seealso> command line flag, see + <seealso marker="erts:erl#+SDcpu">erl(1)</seealso>. + </p> + <p><em>Note that the dirty schedulers functionality is experimental</em>, and + that you have to enable support for dirty schedulers when building OTP in + order to try the functionality out.</p> + <p>For more information, see + <seealso marker="#system_info_dirty_cpu_schedulers">erlang:system_info(dirty_cpu_schedulers)</seealso>, + <seealso marker="#system_info_dirty_io_schedulers">erlang:system_info(dirty_io_schedulers)</seealso>, + <seealso marker="#system_info_schedulers_online">erlang:system_info(schedulers_online)</seealso>, and + <seealso marker="#system_flag_dirty_cpu_schedulers_online">erlang:system_flag(dirty_cpu_schedulers_online, DirtyCPUSchedulersOnline)</seealso>. + </p> + </item> + <tag><marker id="system_info_dirty_io_schedulers"><c>dirty_io_schedulers</c></marker></tag> + <item> + <p>Returns the number of dirty I/O schedulers as an integer. Dirty I/O schedulers + execute I/O-bound native functions such as NIFs and linked-in driver code that + cannot be managed cleanly by the emulator's normal schedulers. + </p> + <p>This value can be set on startup by passing + the <seealso marker="erts:erl#+SDio">+SDio</seealso> command line flag, see + <seealso marker="erts:erl#+SDio">erl(1)</seealso>. + </p> + <p><em>Note that the dirty schedulers functionality is experimental</em>, and + that you have to enable support for dirty schedulers when building OTP in + order to try the functionality out.</p> + <p>For more information, see + <seealso marker="#system_info_dirty_cpu_schedulers">erlang:system_info(dirty_cpu_schedulers)</seealso>, + <seealso marker="#system_info_dirty_cpu_schedulers_online">erlang:system_info(dirty_cpu_schedulers_online)</seealso>, and + <seealso marker="#system_flag_dirty_cpu_schedulers_online">erlang:system_flag(dirty_cpu_schedulers_online, DirtyCPUSchedulersOnline)</seealso>. + </p> + </item> <tag><c>dist</c></tag> <item> <p>Returns a binary containing a string of distribution diff --git a/erts/doc/src/erts_alloc.xml b/erts/doc/src/erts_alloc.xml index fffa9f594c..c9eca39a99 100644 --- a/erts/doc/src/erts_alloc.xml +++ b/erts/doc/src/erts_alloc.xml @@ -395,16 +395,17 @@ <c><![CDATA[<utilization>]]></c> is an integer in the range <c>[0, 100]</c> representing utilization in percent. When a utilization value larger than zero is used, allocator instances - are allowed to abandon multiblock carriers. Currently the default - is zero. If <c>de</c> (default enabled) is passed instead of a - <c><![CDATA[<utilization>]]></c>, a recomended non zero utilization - value will be used. The actual value chosen depend on allocator - type and may be changed between ERTS versions. Carriers will be - abandoned when memory utilization in the allocator instance falls - below the utilization value used. Once a carrier has been abandoned, - no new allocations will be made in it. When an allocator instance - gets an increased multiblock carrier need, it will first try to - fetch an abandoned carrier from an allocator instances of the same + are allowed to abandon multiblock carriers. If <c>de</c> (default + enabled) is passed instead of a <c><![CDATA[<utilization>]]></c>, + a recomended non zero utilization value will be used. The actual + value chosen depend on allocator type and may be changed between + ERTS versions. Currently the default equals <c>de</c>, but this + may be changed in the future. Carriers will be abandoned when + memory utilization in the allocator instance falls below the + utilization value used. Once a carrier has been abandoned, no new + allocations will be made in it. When an allocator instance gets an + increased multiblock carrier need, it will first try to fetch an + abandoned carrier from an allocator instances of the same allocator type. If no abandoned carrier could be fetched, it will create a new empty carrier. When an abandoned carrier has been fetched it will function as an ordinary carrier. This feature has diff --git a/erts/doc/src/escript.xml b/erts/doc/src/escript.xml index 180447cac4..d2b09d4515 100644 --- a/erts/doc/src/escript.xml +++ b/erts/doc/src/escript.xml @@ -44,6 +44,7 @@ <p><c>escript</c> runs a script written in Erlang.</p> <p>Here follows an example.</p> <pre> +$ <input>chmod u+x factorial</input> $ <input>cat factorial</input> #!/usr/bin/env escript %% -*- erlang -*- @@ -66,12 +67,13 @@ usage() -> fac(0) -> 1; fac(N) -> N * fac(N-1). -$ <input>factorial 5</input> +$ <input>./factorial 5</input> factorial 5 = 120 -$ <input>factorial</input> +$ <input>./factorial</input> usage: factorial integer -$ <input>factorial five</input> -usage: factorial integer </pre> +$ <input>./factorial five</input> +usage: factorial integer + </pre> <p>The header of the Erlang script in the example differs from a normal Erlang module. The first line is intended to be the interpreter line, which invokes <c>escript</c>. However if you diff --git a/erts/doc/src/notes.xml b/erts/doc/src/notes.xml index 8c008c493e..b4ebef72f4 100644 --- a/erts/doc/src/notes.xml +++ b/erts/doc/src/notes.xml @@ -257,7 +257,7 @@ processes before the BIF returns, or fail with an exception due to the port not being open. </p><p> The synchronous port BIFs are: </p> <list> <item><seealso - marker="erlang#port_close/1/"><c>port_close/1</c></seealso></item> + marker="erlang#port_close/1"><c>port_close/1</c></seealso></item> <item><seealso marker="erlang#port_command/2"><c>port_command/2</c></seealso></item> <item><seealso diff --git a/erts/doc/src/zlib.xml b/erts/doc/src/zlib.xml index afc597b729..11a7437f5a 100644 --- a/erts/doc/src/zlib.xml +++ b/erts/doc/src/zlib.xml @@ -161,20 +161,22 @@ list_to_binary([Compressed|Last])</pre> state. <c><anno>MemLevel</anno></c>=1 uses minimum memory but is slow and reduces compression ratio; <c><anno>MemLevel</anno></c>=9 uses maximum memory for optimal speed. The default value is 8.</p> - <p>The <c><anno>Strategy</anno></c> parameter is used to tune the - compression algorithm. Use the value <c>default</c> for - normal data, <c>filtered</c> for data produced by a filter - (or predictor), or <c>huffman_only</c> to force Huffman - encoding only (no string match). Filtered data consists - mostly of small values with a somewhat random - distribution. In this case, the compression algorithm is - tuned to compress them better. The effect of - <c>filtered</c>is to force more Huffman coding and less - string matching; it is somewhat intermediate between - <c>default</c> and <c>huffman_only</c>. The <c><anno>Strategy</anno></c> - parameter only affects the compression ratio but not the - correctness of the compressed output even if it is not set - appropriately.</p> + <p>The <c><anno>Strategy</anno></c> parameter is used to tune + the compression algorithm. Use the value <c>default</c> for + normal data, <c>filtered</c> for data produced by a filter (or + predictor), <c>huffman_only</c> to force Huffman encoding + only (no string match), or <c>rle</c> to limit match + distances to one (run-length encoding). Filtered data + consists mostly of small values with a somewhat random + distribution. In this case, the compression algorithm is tuned + to compress them better. The effect of <c>filtered</c>is to + force more Huffman coding and less string matching; it is + somewhat intermediate between <c>default</c> and + <c>huffman_only</c>. <c>rle</c> is designed to be almost as + fast as <c>huffman_only</c>, but give better compression for PNG + image data. The <c><anno>Strategy</anno></c> parameter only + affects the compression ratio but not the correctness of the + compressed output even if it is not set appropriately.</p> </desc> </func> <func> diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index b270099566..9125a8024f 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -665,7 +665,6 @@ endif ifneq ($(filter tile-%,$(TARGET)),) $(OBJDIR)/beam_emu.o: beam/beam_emu.c $(V_CC) $(subst -O2, $(GEN_OPT_FLGS), $(CFLAGS)) \ - -OPT:Olimit=0 -WOPT:lpre=off:spre=off:epre=off \ $(INCLUDES) -c $< -o $@ else # Usually the same as the default rule, but certain platforms (e.g. win32) mix diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 78ab6fa30f..7fecdd5c5f 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2013. All Rights Reserved. + * Copyright Ericsson AB 1996-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -48,7 +48,7 @@ # define OpCase(OpCode) case op_##OpCode # define CountCase(OpCode) case op_count_##OpCode # define OpCode(OpCode) ((Uint*)op_##OpCode) -# define Goto(Rel) {Go = (int)(Rel); goto emulator_loop;} +# define Goto(Rel) {Go = (int)(UWord)(Rel); goto emulator_loop;} # define LabelAddr(Addr) &&##Addr #else # define OpCase(OpCode) lb_##OpCode @@ -133,7 +133,7 @@ do { \ /* We don't check the range if an ordinary switch is used */ #ifdef NO_JUMP_TABLE -#define VALID_INSTR(IP) (0 <= (int)(IP) && ((int)(IP) < (NUMBER_OF_OPCODES*2+10))) +#define VALID_INSTR(IP) ((UWord)(IP) < (NUMBER_OF_OPCODES*2+10)) #else #define VALID_INSTR(IP) \ ((SWord)LabelAddr(emulator_loop) <= (SWord)(IP) && \ @@ -217,6 +217,7 @@ BeamInstr beam_continue_exit[1]; BeamInstr* em_call_error_handler; BeamInstr* em_apply_bif; +BeamInstr* em_call_nif; /* NOTE These should be the only variables containing trace instructions. @@ -3323,6 +3324,13 @@ void process_main(void) reg[0] = r(0); nif_bif_result = (*fp)(&env, bif_nif_arity, reg); erts_post_nif(&env); +#ifdef ERTS_DIRTY_SCHEDULERS + if (is_non_value(nif_bif_result) && c_p->freason == TRAP) { + Export* ep = (Export*) c_p->psd->data[ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT]; + ep->code[0] = I[-3]; + ep->code[1] = I[-2]; + } +#endif } ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(nif_bif_result)); PROCESS_MAIN_CHK_LOCKS(c_p); @@ -4326,7 +4334,19 @@ void process_main(void) flags = Arg(2); BsGetFieldSize(tmp_arg2, (flags >> 3), ClauseFail(), size); if (size >= SMALL_BITS) { - Uint wordsneeded = 1+WSIZE(NBYTES((Uint) size)); + Uint wordsneeded; + /* check bits size before potential gc. + * We do not want a gc and then realize we don't need + * the allocated space (i.e. if the op fails) + * + * remember to reacquire the matchbuffer after gc. + */ + + mb = ms_matchbuffer(tmp_arg1); + if (mb->size - mb->offset < size) { + ClauseFail(); + } + wordsneeded = 1+WSIZE(NBYTES((Uint) size)); TestHeapPreserve(wordsneeded, Arg(1), tmp_arg1); } mb = ms_matchbuffer(tmp_arg1); @@ -4952,6 +4972,7 @@ void process_main(void) em_call_error_handler = OpCode(call_error_handler); em_apply_bif = OpCode(apply_bif); + em_call_nif = OpCode(call_nif); beam_apply[0] = (BeamInstr) OpCode(i_apply); beam_apply[1] = (BeamInstr) OpCode(normal_exit); diff --git a/erts/emulator/beam/beam_load.h b/erts/emulator/beam/beam_load.h index 65a8f26d7c..bd22b0c4de 100644 --- a/erts/emulator/beam/beam_load.h +++ b/erts/emulator/beam/beam_load.h @@ -49,6 +49,7 @@ extern void** beam_ops; extern BeamInstr beam_debug_apply[]; extern BeamInstr* em_call_error_handler; extern BeamInstr* em_apply_bif; +extern BeamInstr* em_call_nif; /* * The following variables keep a sorted list of address ranges for diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c index 96666d98ed..61c1abedb5 100644 --- a/erts/emulator/beam/bif.c +++ b/erts/emulator/beam/bif.c @@ -4488,7 +4488,7 @@ BIF_RETTYPE system_flag_2(BIF_ALIST_2) BIF_P->group_leader, "A call to erlang:system_flag(cpu_topology, _) was made.\n" "The cpu_topology argument is deprecated and scheduled\n" - "for removal in erts-5.10/OTP-R16. For more information\n" + "for removal in Erlang/OTP 18. For more information\n" "see the erlang:system_flag/2 documentation.\n"); BIF_TRAP1(set_cpu_topology_trap, BIF_P, BIF_ARG_2); } else if (ERTS_IS_ATOM_STR("scheduler_bind_type", BIF_ARG_1)) { @@ -4496,7 +4496,7 @@ BIF_RETTYPE system_flag_2(BIF_ALIST_2) BIF_P->group_leader, "A call to erlang:system_flag(scheduler_bind_type, _) was\n" "made. The scheduler_bind_type argument is deprecated and\n" - "scheduled for removal in erts-5.10/OTP-R16. For more\n" + "scheduled for removal in Erlang/OTP 18. For more\n" "information see the erlang:system_flag/2 documentation.\n"); return erts_bind_schedulers(BIF_P, BIF_ARG_2); } diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c index 2b27b111d8..41a041eba6 100644 --- a/erts/emulator/beam/big.c +++ b/erts/emulator/beam/big.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2013. All Rights Reserved. + * Copyright Ericsson AB 1996-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -1603,9 +1603,11 @@ big_to_double(Wterm x, double* resp) /* * Logic has been copied from erl_bif_guard.c and slightly * modified to use a static instead of dynamic heap + * + * HALFWORD: Return relative term with 'heap' as base. */ Eterm -double_to_big(double x, Eterm *heap) +double_to_big(double x, Eterm *heap, Uint hsz) { int is_negative; int ds; @@ -1633,9 +1635,10 @@ double_to_big(double x, Eterm *heap) sz = BIG_NEED_SIZE(ds); /* number of words including arity */ hp = heap; - res = make_big(hp); + res = make_big_rel(hp, heap); xp = (ErtsDigit*) (hp + 1); + ASSERT(ds < hsz); for (i = ds - 1; i >= 0; i--) { ErtsDigit d; diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h index 1a7b14170f..d80111822e 100644 --- a/erts/emulator/beam/big.h +++ b/erts/emulator/beam/big.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2013. All Rights Reserved. + * Copyright Ericsson AB 1996-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -141,7 +141,7 @@ Eterm big_lshift(Eterm, Sint, Eterm*); int big_comp (Wterm, Wterm); int big_ucomp (Eterm, Eterm); int big_to_double(Wterm x, double* resp); -Eterm double_to_big(double, Eterm*); +Eterm double_to_big(double, Eterm*, Uint hsz); Eterm small_to_big(Sint, Eterm*); Eterm uint_to_big(Uint, Eterm*); Eterm uword_to_big(UWord, Eterm*); diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c index b5ba9bb94a..c6b324dc15 100644 --- a/erts/emulator/beam/erl_alloc.c +++ b/erts/emulator/beam/erl_alloc.c @@ -75,9 +75,9 @@ #define ERTS_ALC_DEFAULT_ENABLED_ACUL_EHEAP_ALLOC 45 #define ERTS_ALC_DEFAULT_ENABLED_ACUL_LL_ALLOC 85 -#define ERTS_ALC_DEFAULT_ACUL 0 -#define ERTS_ALC_DEFAULT_ACUL_EHEAP_ALLOC 0 -#define ERTS_ALC_DEFAULT_ACUL_LL_ALLOC 0 +#define ERTS_ALC_DEFAULT_ACUL ERTS_ALC_DEFAULT_ENABLED_ACUL +#define ERTS_ALC_DEFAULT_ACUL_EHEAP_ALLOC ERTS_ALC_DEFAULT_ENABLED_ACUL_EHEAP_ALLOC +#define ERTS_ALC_DEFAULT_ACUL_LL_ALLOC ERTS_ALC_DEFAULT_ENABLED_ACUL_LL_ALLOC #ifndef ERTS_SMP # undef ERTS_ALC_DEFAULT_ACUL @@ -1754,6 +1754,9 @@ erts_alloc_register_scheduler(void *vesdp) int ix = (int) esdp->no; int aix; +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); +#endif for (aix = ERTS_ALC_A_MIN; aix <= ERTS_ALC_A_MAX; aix++) { ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[aix]; esdp->alloc_data.deallctr[aix] = NULL; diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index 32308fae9b..b4e52770e3 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -1,7 +1,7 @@ # # %CopyrightBegin% # -# Copyright Ericsson AB 2003-2013. All Rights Reserved. +# Copyright Ericsson AB 2003-2014. All Rights Reserved. # # The contents of this file are subject to the Erlang Public License, # Version 1.1, (the "License"); you may not use this file except in @@ -150,7 +150,7 @@ type LINK_LH STANDARD PROCESSES link_lh type SUSPEND_MON STANDARD PROCESSES suspend_monitor type PEND_SUSPEND SHORT_LIVED PROCESSES pending_suspend type PROC_LIST SHORT_LIVED PROCESSES proc_list -type EXTRA_ROOT SHORT_LIVED PROCESSES extra_root +type SAVED_ESTACK SHORT_LIVED PROCESSES saved_estack type FUN_ENTRY LONG_LIVED CODE fun_entry type ATOM_TXT LONG_LIVED ATOM atom_text type BEAM_REGISTER EHEAP PROCESSES beam_register diff --git a/erts/emulator/beam/erl_async.c b/erts/emulator/beam/erl_async.c index e6d72f569b..f0cec1c53c 100644 --- a/erts/emulator/beam/erl_async.c +++ b/erts/emulator/beam/erl_async.c @@ -602,7 +602,7 @@ unsigned int driver_async_port_key(ErlDrvPort port) ** return values: ** 0 completed ** -1 error -** N handle value (used with async_cancel) +** N handle value ** arguments: ** ix driver index ** key pointer to secedule queue (NULL means round robin) @@ -687,23 +687,3 @@ long driver_async(ErlDrvPort ix, unsigned int* key, return id; } - -int driver_async_cancel(unsigned int id) -{ - /* - * Not supported anymore. Always fail (which is backward - * compatible). - * - * This functionality could be implemented again. However, - * it is (and always has been) completely useless since - * it doesn't give you any guarantees whatsoever. The user - * needs to (and always have had to) synchronize in his/her - * own code in order to get any guarantees. - */ - return 0; -} - - - - - diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index e0b654cb22..f25b4dbae5 100755 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -90,6 +90,9 @@ static char erts_system_version[] = ("Erlang/OTP " ERLANG_OTP_RELEASE " [smp:%beu:%beu]" #endif #ifdef USE_THREADS +#ifdef ERTS_DIRTY_SCHEDULERS + " [ds:%beu:%beu:%beu]" +#endif " [async-threads:%d]" #endif #ifdef HIPE @@ -312,7 +315,13 @@ erts_print_system_version(int to, void *arg, Process *c_p) char *ocp = otp_correction_package; #ifdef ERTS_SMP Uint total, online, active; - (void) erts_schedulers_state(&total, &online, &active, 0); +#ifdef ERTS_DIRTY_SCHEDULERS + Uint dirty_cpu, dirty_cpu_onln, dirty_io; + + (void) erts_schedulers_state(&total, &online, &active, &dirty_cpu, &dirty_cpu_onln, &dirty_io, 0); +#else + (void) erts_schedulers_state(&total, &online, &active, NULL, NULL, NULL, 0); +#endif #endif for (i = 0; i < sizeof(otp_correction_package)-4; i++) { if (ocp[i] == '-' && ocp[i+1] == 'r' && ocp[i+2] == 'c') @@ -330,6 +339,9 @@ erts_print_system_version(int to, void *arg, Process *c_p) rc_str #ifdef ERTS_SMP , total, online +#ifdef ERTS_DIRTY_SCHEDULERS + , dirty_cpu, dirty_cpu_onln, dirty_io +#endif #endif #ifdef USE_THREADS , erts_async_max_threads @@ -2477,6 +2489,9 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) switch (erts_schedulers_state(&total, &online, &active, + NULL, + NULL, + NULL, 1)) { case ERTS_SCHDLR_SSPND_DONE: { Eterm *hp = HAlloc(BIF_P, 4); @@ -2500,7 +2515,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) BIF_RET(make_small(1)); #else Uint total, online, active; - switch (erts_schedulers_state(&total, &online, &active, 1)) { + switch (erts_schedulers_state(&total, &online, &active, NULL, NULL, NULL, 1)) { case ERTS_SCHDLR_SSPND_DONE: BIF_RET(make_small(online)); case ERTS_SCHDLR_SSPND_YIELD_RESTART: @@ -2517,7 +2532,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) BIF_RET(make_small(1)); #else Uint total, online, active; - switch (erts_schedulers_state(&total, &online, &active, 1)) { + switch (erts_schedulers_state(&total, &online, &active, NULL, NULL, NULL, 1)) { case ERTS_SCHDLR_SSPND_DONE: BIF_RET(make_small(active)); case ERTS_SCHDLR_SSPND_YIELD_RESTART: @@ -2529,6 +2544,20 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) BIF_ERROR(BIF_P, EXC_INTERNAL_ERROR); } #endif +#if defined(ERTS_SMP) && defined(ERTS_DIRTY_SCHEDULERS) + } else if (ERTS_IS_ATOM_STR("dirty_cpu_schedulers", BIF_ARG_1)) { + Uint dirty_cpu; + erts_schedulers_state(NULL, NULL, NULL, &dirty_cpu, NULL, NULL, 1); + BIF_RET(make_small(dirty_cpu)); + } else if (ERTS_IS_ATOM_STR("dirty_cpu_schedulers_online", BIF_ARG_1)) { + Uint dirty_cpu_onln; + erts_schedulers_state(NULL, NULL, NULL, NULL, &dirty_cpu_onln, NULL, 1); + BIF_RET(make_small(dirty_cpu_onln)); + } else if (ERTS_IS_ATOM_STR("dirty_io_schedulers", BIF_ARG_1)) { + Uint dirty_io; + erts_schedulers_state(NULL, NULL, NULL, NULL, NULL, &dirty_io, 1); + BIF_RET(make_small(dirty_io)); +#endif } else if (ERTS_IS_ATOM_STR("run_queues", BIF_ARG_1)) { res = make_small(erts_no_run_queues); BIF_RET(res); diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c index ef3749a2c4..a358ecf326 100644 --- a/erts/emulator/beam/erl_db_util.c +++ b/erts/emulator/beam/erl_db_util.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1998-2013. All Rights Reserved. + * Copyright Ericsson AB 1998-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -1838,7 +1838,7 @@ restart: ep = termp; break; case matchArrayBind: /* When the array size is unknown. */ - ASSERT(termp); + ASSERT(termp || arity==0); n = *pc++; variables[n].term = dpm_array_to_list(psp, termp, arity); break; diff --git a/erts/emulator/beam/erl_driver.h b/erts/emulator/beam/erl_driver.h index 5cffae92be..2bd3181bdc 100644 --- a/erts/emulator/beam/erl_driver.h +++ b/erts/emulator/beam/erl_driver.h @@ -132,8 +132,8 @@ typedef struct { #define DO_WRITE ERL_DRV_WRITE #define ERL_DRV_EXTENDED_MARKER (0xfeeeeeed) -#define ERL_DRV_EXTENDED_MAJOR_VERSION 2 -#define ERL_DRV_EXTENDED_MINOR_VERSION 2 +#define ERL_DRV_EXTENDED_MAJOR_VERSION 3 +#define ERL_DRV_EXTENDED_MINOR_VERSION 0 /* * The emulator will refuse to load a driver with different major @@ -657,12 +657,6 @@ EXTERN long driver_async(ErlDrvPort ix, void* async_data, void (*async_free)(void*)); -/* - * driver_async_cancel() is deprecated. It is scheduled for removal - * in OTP-R16. For more information see the erl_driver(3) documentation. - */ -EXTERN int driver_async_cancel(unsigned int key) ERL_DRV_DEPRECATED_FUNC; - /* Locks the driver in the machine "forever", there is no unlock function. Note that this is almost never useful, as an open port towards the driver locks it until the port is closed, why unexpected diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c index c5585d39e8..ab8448e8a1 100644 --- a/erts/emulator/beam/erl_gc.c +++ b/erts/emulator/beam/erl_gc.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2002-2013. All Rights Reserved. + * Copyright Ericsson AB 2002-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -1157,7 +1157,7 @@ do_minor(Process *p, Uint new_sz, Eterm* objv, int nobj) old_htop = sweep_one_area(OLD_HTOP(p), old_htop, heap, heap_size); } OLD_HTOP(p) = old_htop; - HIGH_WATER(p) = (HEAP_START(p) != HIGH_WATER(p)) ? n_heap : n_htop; + HIGH_WATER(p) = n_htop; if (MSO(p).first) { sweep_off_heap(p, 0); @@ -1975,17 +1975,6 @@ setup_rootset(Process *p, Eterm *objv, int nobj, Rootset *rootset) ++n; } - /* - * A trapping BIF can add to rootset by setting the extra_root - * in the process_structure. - */ - if (p->extra_root != NULL) { - roots[n].v = p->extra_root->objv; - roots[n].sz = p->extra_root->sz; - ++n; - } - - ASSERT((is_nil(p->seq_trace_token) || is_tuple(follow_moved(p->seq_trace_token)) || is_atom(p->seq_trace_token))); @@ -2563,11 +2552,6 @@ offset_one_rootset(Process *p, Sint offs, char* area, Uint area_size, p->dictionary->used, offs, area, area_size); } - if (p->extra_root != NULL) { - offset_heap_ptr(p->extra_root->objv, - p->extra_root->sz, - offs, area, area_size); - } offset_heap_ptr(&p->fvalue, 1, offs, area, area_size); offset_heap_ptr(&p->ftrace, 1, offs, area, area_size); diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 1af80dd04b..c17256f466 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -484,92 +484,107 @@ void erts_usage(void) /* erts_fprintf(stderr, "-# number set the number of items to be used in traces etc\n"); */ - erts_fprintf(stderr, "-a size suggested stack size in kilo words for threads\n"); - erts_fprintf(stderr, " in the async-thread pool, valid range is [%d-%d]\n", + erts_fprintf(stderr, "-a size suggested stack size in kilo words for threads\n"); + erts_fprintf(stderr, " in the async-thread pool, valid range is [%d-%d]\n", ERTS_ASYNC_THREAD_MIN_STACK_SIZE, ERTS_ASYNC_THREAD_MAX_STACK_SIZE); - erts_fprintf(stderr, "-A number set number of threads in async thread pool,\n"); - erts_fprintf(stderr, " valid range is [0-%d]\n", + erts_fprintf(stderr, "-A number set number of threads in async thread pool,\n"); + erts_fprintf(stderr, " valid range is [0-%d]\n", ERTS_MAX_NO_OF_ASYNC_THREADS); - erts_fprintf(stderr, "-B[c|d|i] c to have Ctrl-c interrupt the Erlang shell,\n"); - erts_fprintf(stderr, " d (or no extra option) to disable the break\n"); - erts_fprintf(stderr, " handler, i to ignore break signals\n"); + erts_fprintf(stderr, "-B[c|d|i] c to have Ctrl-c interrupt the Erlang shell,\n"); + erts_fprintf(stderr, " d (or no extra option) to disable the break\n"); + erts_fprintf(stderr, " handler, i to ignore break signals\n"); /* erts_fprintf(stderr, "-b func set the boot function (default boot)\n"); */ - erts_fprintf(stderr, "-c disable continuous date/time correction with\n"); - erts_fprintf(stderr, " respect to uptime\n"); + erts_fprintf(stderr, "-c disable continuous date/time correction with\n"); + erts_fprintf(stderr, " respect to uptime\n"); - erts_fprintf(stderr, "-d don't write a crash dump for internally detected errors\n"); - erts_fprintf(stderr, " (halt(String) will still produce a crash dump)\n"); - erts_fprintf(stderr, "-fn[u|a|l] Control how filenames are interpreted\n"); - erts_fprintf(stderr, "-hms size set minimum heap size in words (default %d)\n", + erts_fprintf(stderr, "-d don't write a crash dump for internally detected errors\n"); + erts_fprintf(stderr, " (halt(String) will still produce a crash dump)\n"); + erts_fprintf(stderr, "-fn[u|a|l] Control how filenames are interpreted\n"); + erts_fprintf(stderr, "-hms size set minimum heap size in words (default %d)\n", H_DEFAULT_SIZE); - erts_fprintf(stderr, "-hmbs size set minimum binary virtual heap size in words (default %d)\n", + erts_fprintf(stderr, "-hmbs size set minimum binary virtual heap size in words (default %d)\n", VH_DEFAULT_SIZE); /* erts_fprintf(stderr, "-i module set the boot module (default init)\n"); */ - erts_fprintf(stderr, "-K boolean enable or disable kernel poll\n"); - erts_fprintf(stderr, "-n[s|a|d] Control behavior of signals to ports\n"); - erts_fprintf(stderr, " Note that this flag is deprecated!\n"); - erts_fprintf(stderr, "-M<X> <Y> memory allocator switches,\n"); - erts_fprintf(stderr, " see the erts_alloc(3) documentation for more info.\n"); - erts_fprintf(stderr, "-pc <set> Control what characters are considered printable (default latin1)\n"); - erts_fprintf(stderr, "-P number set maximum number of processes on this node,\n"); - erts_fprintf(stderr, " valid range is [%d-%d]\n", + erts_fprintf(stderr, "-K boolean enable or disable kernel poll\n"); + erts_fprintf(stderr, "-n[s|a|d] Control behavior of signals to ports\n"); + erts_fprintf(stderr, " Note that this flag is deprecated!\n"); + erts_fprintf(stderr, "-M<X> <Y> memory allocator switches,\n"); + erts_fprintf(stderr, " see the erts_alloc(3) documentation for more info.\n"); + erts_fprintf(stderr, "-pc <set> Control what characters are considered printable (default latin1)\n"); + erts_fprintf(stderr, "-P number set maximum number of processes on this node,\n"); + erts_fprintf(stderr, " valid range is [%d-%d]\n", ERTS_MIN_PROCESSES, ERTS_MAX_PROCESSES); - erts_fprintf(stderr, "-Q number set maximum number of ports on this node,\n"); - erts_fprintf(stderr, " valid range is [%d-%d]\n", + erts_fprintf(stderr, "-Q number set maximum number of ports on this node,\n"); + erts_fprintf(stderr, " valid range is [%d-%d]\n", ERTS_MIN_PORTS, ERTS_MAX_PORTS); - erts_fprintf(stderr, "-R number set compatibility release number,\n"); - erts_fprintf(stderr, " valid range [%d-%d]\n", + erts_fprintf(stderr, "-R number set compatibility release number,\n"); + erts_fprintf(stderr, " valid range [%d-%d]\n", this_rel-2, this_rel); - erts_fprintf(stderr, "-r force ets memory block to be moved on realloc\n"); - erts_fprintf(stderr, "-rg amount set reader groups limit\n"); - erts_fprintf(stderr, "-sbt type set scheduler bind type, valid types are:\n"); - erts_fprintf(stderr, "-stbt type u|ns|ts|ps|s|nnts|nnps|tnnps|db\n"); - erts_fprintf(stderr, "-sbwt val set scheduler busy wait threshold, valid values are:\n"); - erts_fprintf(stderr, " none|very_short|short|medium|long|very_long.\n"); - erts_fprintf(stderr, "-scl bool enable/disable compaction of scheduler load,\n"); - erts_fprintf(stderr, " see the erl(1) documentation for more info.\n"); - erts_fprintf(stderr, "-sct cput set cpu topology,\n"); - erts_fprintf(stderr, " see the erl(1) documentation for more info.\n"); - erts_fprintf(stderr, "-sws val set scheduler wakeup strategy, valid values are:\n"); - erts_fprintf(stderr, " default|legacy.\n"); - erts_fprintf(stderr, "-swct val set scheduler wake cleanup threshold, valid values are:\n"); - erts_fprintf(stderr, " very_lazy|lazy|medium|eager|very_eager.\n"); - erts_fprintf(stderr, "-swt val set scheduler wakeup threshold, valid values are:\n"); - erts_fprintf(stderr, " very_low|low|medium|high|very_high.\n"); - erts_fprintf(stderr, "-sss size suggested stack size in kilo words for scheduler threads,\n"); - erts_fprintf(stderr, " valid range is [%d-%d]\n", + erts_fprintf(stderr, "-r force ets memory block to be moved on realloc\n"); + erts_fprintf(stderr, "-rg amount set reader groups limit\n"); + erts_fprintf(stderr, "-sbt type set scheduler bind type, valid types are:\n"); + erts_fprintf(stderr, "-stbt type u|ns|ts|ps|s|nnts|nnps|tnnps|db\n"); + erts_fprintf(stderr, "-sbwt val set scheduler busy wait threshold, valid values are:\n"); + erts_fprintf(stderr, " none|very_short|short|medium|long|very_long.\n"); + erts_fprintf(stderr, "-scl bool enable/disable compaction of scheduler load,\n"); + erts_fprintf(stderr, " see the erl(1) documentation for more info.\n"); + erts_fprintf(stderr, "-sct cput set cpu topology,\n"); + erts_fprintf(stderr, " see the erl(1) documentation for more info.\n"); +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT_OPT + erts_fprintf(stderr, "-sub bool enable/disable scheduler utilization balancing,\n"); +#else + erts_fprintf(stderr, "-sub false disable scheduler utilization balancing,\n"); +#endif + erts_fprintf(stderr, " see the erl(1) documentation for more info.\n"); + erts_fprintf(stderr, "-sws val set scheduler wakeup strategy, valid values are:\n"); + erts_fprintf(stderr, " default|legacy.\n"); + erts_fprintf(stderr, "-swct val set scheduler wake cleanup threshold, valid values are:\n"); + erts_fprintf(stderr, " very_lazy|lazy|medium|eager|very_eager.\n"); + erts_fprintf(stderr, "-swt val set scheduler wakeup threshold, valid values are:\n"); + erts_fprintf(stderr, " very_low|low|medium|high|very_high.\n"); + erts_fprintf(stderr, "-sss size suggested stack size in kilo words for scheduler threads,\n"); + erts_fprintf(stderr, " valid range is [%d-%d]\n", ERTS_SCHED_THREAD_MIN_STACK_SIZE, ERTS_SCHED_THREAD_MAX_STACK_SIZE); - erts_fprintf(stderr, "-spp Bool set port parallelism scheduling hint\n"); - erts_fprintf(stderr, "-S n1:n2 set number of schedulers (n1), and number of\n"); - erts_fprintf(stderr, " schedulers online (n2), maximum for both\n"); - erts_fprintf(stderr, " numbers is %d\n", + erts_fprintf(stderr, "-spp Bool set port parallelism scheduling hint\n"); + erts_fprintf(stderr, "-S n1:n2 set number of schedulers (n1), and number of\n"); + erts_fprintf(stderr, " schedulers online (n2), maximum for both\n"); + erts_fprintf(stderr, " numbers is %d\n", ERTS_MAX_NO_OF_SCHEDULERS); - erts_fprintf(stderr, "-SP p1:p2 specify schedulers (p1) and schedulers online (p2)\n"); - erts_fprintf(stderr, " as percentages of logical processors configured and logical\n"); - erts_fprintf(stderr, " processors available, respectively\n"); - erts_fprintf(stderr, "-t size set the maximum number of atoms the " - "emulator can handle\n"); - erts_fprintf(stderr, " valid range is [%d-%d]\n", + erts_fprintf(stderr, "-SP p1:p2 specify schedulers (p1) and schedulers online (p2)\n"); + erts_fprintf(stderr, " as percentages of logical processors configured and logical\n"); + erts_fprintf(stderr, " processors available, respectively\n"); +#ifdef ERTS_DIRTY_SCHEDULERS + erts_fprintf(stderr, "-SDcpu n1:n2 set number of dirty CPU schedulers (n1), and number of\n"); + erts_fprintf(stderr, " dirty CPU schedulers online (n2), valid range for both\n"); + erts_fprintf(stderr, " numbers is [1-%d], and n2 must be less than or equal to n1\n", + ERTS_MAX_NO_OF_DIRTY_CPU_SCHEDULERS); + erts_fprintf(stderr, "-SDPcpu p1:p2 specify dirty CPU schedulers (p1) and dirty CPU schedulers\n"); + erts_fprintf(stderr, " online (p2) as percentages of logical processors configured\n"); + erts_fprintf(stderr, " and logical processors available, respectively\n"); + erts_fprintf(stderr, "-SDio n set number of dirty I/O schedulers, valid range is [0-%d]\n", + ERTS_MAX_NO_OF_DIRTY_IO_SCHEDULERS); +#endif + erts_fprintf(stderr, "-t size set the maximum number of atoms the emulator can handle\n"); + erts_fprintf(stderr, " valid range is [%d-%d]\n", MIN_ATOM_TABLE_SIZE, MAX_ATOM_TABLE_SIZE); - erts_fprintf(stderr, "-T number set modified timing level,\n"); - erts_fprintf(stderr, " valid range is [0-%d]\n", + erts_fprintf(stderr, "-T number set modified timing level, valid range is [0-%d]\n", ERTS_MODIFIED_TIMING_LEVELS-1); - erts_fprintf(stderr, "-V print Erlang version\n"); + erts_fprintf(stderr, "-V print Erlang version\n"); - erts_fprintf(stderr, "-v turn on chatty mode (GCs will be reported etc)\n"); + erts_fprintf(stderr, "-v turn on chatty mode (GCs will be reported etc)\n"); - erts_fprintf(stderr, "-W<i|w> set error logger warnings mapping,\n"); - erts_fprintf(stderr, " see error_logger documentation for details\n"); - erts_fprintf(stderr, "-zdbbl size set the distribution buffer busy limit in kilobytes\n"); - erts_fprintf(stderr, " valid range is [1-%d]\n", INT_MAX/1024); + erts_fprintf(stderr, "-W<i|w> set error logger warnings mapping,\n"); + erts_fprintf(stderr, " see error_logger documentation for details\n"); + erts_fprintf(stderr, "-zdbbl size set the distribution buffer busy limit in kilobytes\n"); + erts_fprintf(stderr, " valid range is [1-%d]\n", INT_MAX/1024); erts_fprintf(stderr, "\n"); erts_fprintf(stderr, "Note that if the emulator is started with erlexec (typically\n"); erts_fprintf(stderr, "from the erl script), these flags should be specified with +.\n"); @@ -637,6 +652,13 @@ early_init(int *argc, char **argv) /* int schdlrs_percentage = 100; int schdlrs_onln_percentage = 100; int max_main_threads; +#ifdef ERTS_DIRTY_SCHEDULERS + int dirty_cpu_scheds; + int dirty_cpu_scheds_online; + int dirty_cpu_scheds_pctg = 100; + int dirty_cpu_scheds_onln_pctg = 100; + int dirty_io_scheds; +#endif int max_reader_groups; int reader_groups; char envbuf[21]; /* enough for any 64-bit integer */ @@ -712,6 +734,12 @@ early_init(int *argc, char **argv) /* schdlrs = no_schedulers; schdlrs_onln = no_schedulers_online; +#ifdef ERTS_DIRTY_SCHEDULERS + dirty_cpu_scheds = no_schedulers; + dirty_cpu_scheds_online = no_schedulers_online; + dirty_io_scheds = 10; +#endif + envbufsz = sizeof(envbuf); /* erts_sys_getenv(_raw)() not initialized yet; need erts_sys_getenv__() */ @@ -802,7 +830,121 @@ early_init(int *argc, char **argv) /* VERBOSE(DEBUG_SYSTEM, ("using %d:%d scheduler percentages\n", schdlrs_percentage, schdlrs_onln_percentage)); - } else { + } +#ifdef ERTS_DIRTY_SCHEDULERS + else if (argv[i][2] == 'D') { + char *arg; + char *type = argv[i]+3; + if (strcmp(type, "Pcpu") == 0) { + int ptot, ponln; + arg = get_arg(argv[i]+7, argv[i+1], &i); + switch (sscanf(arg, "%d:%d", &ptot, &ponln)) { + case 0: + switch (sscanf(arg, ":%d", &ponln)) { + case 1: + if (ponln < 0) + goto bad_SDPcpu; + ptot = 100; + goto chk_SDPcpu; + default: + goto bad_SDPcpu; + } + case 1: + if (ptot < 0) + goto bad_SDPcpu; + ponln = ptot < 100 ? ptot : 100; + goto chk_SDPcpu; + case 2: + if (ptot < 0 || ponln < 0) + goto bad_SDPcpu; + chk_SDPcpu: + dirty_cpu_scheds_pctg = ptot; + dirty_cpu_scheds_onln_pctg = ponln; + break; + default: + bad_SDPcpu: + erts_fprintf(stderr, + "bad dirty CPU schedulers percentage specifier %s\n", + arg); + erts_usage(); + break; + } + VERBOSE(DEBUG_SYSTEM, + ("using %d:%d dirty CPU scheduler percentages\n", + dirty_cpu_scheds_pctg, dirty_cpu_scheds_onln_pctg)); + } else if (strcmp(type, "cpu") == 0) { + int tot, onln; + arg = get_arg(argv[i]+6, argv[i+1], &i); + switch (sscanf(arg, "%d:%d", &tot, &onln)) { + case 0: + switch (sscanf(arg, ":%d", &onln)) { + case 1: + tot = no_schedulers; + goto chk_SDcpu; + default: + goto bad_SDcpu; + } + case 1: + onln = tot < dirty_cpu_scheds_online ? + tot : dirty_cpu_scheds_online; + case 2: + chk_SDcpu: + if (tot > 0) + dirty_cpu_scheds = tot; + else + dirty_cpu_scheds = no_schedulers + tot; + if (onln > 0) + dirty_cpu_scheds_online = onln; + else + dirty_cpu_scheds_online = no_schedulers_online + onln; + if (dirty_cpu_scheds < 1 || + ERTS_MAX_NO_OF_DIRTY_CPU_SCHEDULERS < dirty_cpu_scheds) { + erts_fprintf(stderr, + "bad amount of dirty CPU schedulers %d\n", + tot); + erts_usage(); + } + if (dirty_cpu_scheds_online < 1 || + dirty_cpu_scheds < dirty_cpu_scheds_online) { + erts_fprintf(stderr, + "bad amount of dirty CPU schedulers online %d " + "(total amount of dirty CPU schedulers %d)\n", + dirty_cpu_scheds_online, dirty_cpu_scheds); + erts_usage(); + } + break; + default: + bad_SDcpu: + erts_fprintf(stderr, + "bad amount of dirty CPU schedulers %s\n", + arg); + erts_usage(); + break; + } + VERBOSE(DEBUG_SYSTEM, + ("using %d:%d dirty CPU scheduler(s)\n", tot, onln)); + } else if (strcmp(type, "io") == 0) { + arg = get_arg(argv[i]+5, argv[i+1], &i); + dirty_io_scheds = atoi(arg); + if (dirty_io_scheds < 0 || + dirty_io_scheds > ERTS_MAX_NO_OF_DIRTY_IO_SCHEDULERS) { + erts_fprintf(stderr, + "bad number of dirty I/O schedulers %s\n", + arg); + erts_usage(); + } + VERBOSE(DEBUG_SYSTEM, + ("using %d dirty I/O scheduler(s)\n", dirty_io_scheds)); + } else { + erts_fprintf(stderr, + "bad or missing dirty scheduler specifier: %s\n", + argv[i]); + erts_usage(); + break; + } + } +#endif + else { int tot, onln; char *arg = get_arg(argv[i]+2, argv[i+1], &i); switch (sscanf(arg, "%d:%d", &tot, &onln)) { @@ -889,6 +1031,17 @@ early_init(int *argc, char **argv) /* (void)schdlrs_percentage; (void)schdlrs_onln_percentage; #endif +#ifdef ERTS_DIRTY_SCHEDULERS + /* apply any dirty scheduler precentages */ + if (dirty_cpu_scheds_pctg != 100 || dirty_cpu_scheds_onln_pctg != 100) { + dirty_cpu_scheds = dirty_cpu_scheds * dirty_cpu_scheds_pctg / 100; + dirty_cpu_scheds_online = dirty_cpu_scheds_online * dirty_cpu_scheds_onln_pctg / 100; + } + if (dirty_cpu_scheds > schdlrs) + dirty_cpu_scheds = schdlrs; + if (dirty_cpu_scheds_online > schdlrs_onln) + dirty_cpu_scheds_online = schdlrs_onln; +#endif } #ifndef USE_THREADS @@ -901,6 +1054,11 @@ early_init(int *argc, char **argv) /* erts_no_schedulers = (Uint) no_schedulers; #endif +#ifdef ERTS_DIRTY_SCHEDULERS + erts_no_dirty_cpu_schedulers = dirty_cpu_scheds; + erts_no_dirty_cpu_schedulers_online = dirty_cpu_scheds_online; + erts_no_dirty_io_schedulers = dirty_io_scheds; +#endif erts_early_init_scheduling(no_schedulers); alloc_opts.ncpu = ncpu; @@ -918,10 +1076,18 @@ early_init(int *argc, char **argv) /* * * * Unmanaged threads that need to register: * ** Async threads (see erl_async.c) + * ** Dirty scheduler threads */ erts_thr_progress_init(no_schedulers, no_schedulers+2, - erts_async_max_threads); +#ifndef ERTS_DIRTY_SCHEDULERS + erts_async_max_threads +#else + erts_async_max_threads + + erts_no_dirty_cpu_schedulers + + erts_no_dirty_io_schedulers +#endif + ); #endif erts_thr_q_init(); erts_init_utils(); @@ -1386,7 +1552,15 @@ erl_start(int argc, char **argv) break; case 'S' : /* Was handled in early_init() just read past it */ - if (argv[i][2] == 'P') + if (argv[i][2] == 'D') { + char* type = argv[i]+3; + if (strcmp(type, "Pcpu") == 0) + (void) get_arg(argv[i]+7, argv[i+1], &i); + if (strcmp(type, "cpu") == 0) + (void) get_arg(argv[i]+6, argv[i+1], &i); + else if (strcmp(type, "io") == 0) + (void) get_arg(argv[i]+5, argv[i+1], &i); + } else if (argv[i][2] == 'P') (void) get_arg(argv[i]+3, argv[i+1], &i); else (void) get_arg(argv[i]+2, argv[i+1], &i); @@ -1433,8 +1607,10 @@ erl_start(int argc, char **argv) } else if (has_prefix("cl", sub_param)) { arg = get_arg(sub_param+2, argv[i+1], &i); - if (sys_strcmp("true", arg) == 0) + if (sys_strcmp("true", arg) == 0) { erts_sched_compact_load = 1; + erts_sched_balance_util = 0; + } else if (sys_strcmp("false", arg) == 0) erts_sched_compact_load = 0; else { @@ -1512,6 +1688,26 @@ erl_start(int argc, char **argv) erts_usage(); } } + else if (has_prefix("ub", sub_param)) { + arg = get_arg(sub_param+2, argv[i+1], &i); + if (sys_strcmp("true", arg) == 0) { +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT_OPT + erts_sched_balance_util = 1; +#else + erts_fprintf(stderr, + "scheduler utilization balancing not " + "supported on this system\n"); + erts_usage(); +#endif + } + else if (sys_strcmp("false", arg) == 0) + erts_sched_balance_util = 0; + else { + erts_fprintf(stderr, "bad scheduler utilization balancing " + " value '%s'\n", arg); + erts_usage(); + } + } else if (has_prefix("wct", sub_param)) { arg = get_arg(sub_param+3, argv[i+1], &i); if (erts_sched_set_wake_cleanup_threshold(arg) != 0) { diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c index 0dd83fa6ed..a8ff94ac89 100644 --- a/erts/emulator/beam/erl_lock_check.c +++ b/erts/emulator/beam/erl_lock_check.c @@ -123,6 +123,9 @@ static erts_lc_lock_order_t erts_lock_order[] = { { "schdlr_sspnd", NULL }, { "migration_info_update", NULL }, { "run_queue", "address" }, +#ifdef ERTS_DIRTY_SCHEDULERS + { "dirty_run_queue_sleep_list", "address" }, +#endif { "process_table", NULL }, { "cpu_info", NULL }, { "pollset", "address" }, diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index dc285b3cf7..e1e213c4eb 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -34,6 +34,7 @@ #include "beam_bp.h" #include "erl_thr_progress.h" #include "dtrace-wrapper.h" +#include "erl_process.h" #if defined(USE_DYNAMIC_TRACE) && (defined(USE_DTRACE) || defined(USE_SYSTEMTAP)) #define HAVE_USE_DTRACE 1 #endif @@ -1451,6 +1452,156 @@ int enif_consume_timeslice(ErlNifEnv* env, int percent) return ERTS_BIF_REDS_LEFT(env->proc) == 0; } +#ifdef ERTS_DIRTY_SCHEDULERS + +static void +alloc_proc_psd(Process* proc, Export **ep) +{ + int i; + if (!*ep) { + *ep = erts_alloc(ERTS_ALC_T_PSD, sizeof(Export)); + sys_memset((void*) *ep, 0, sizeof(Export)); + for (i=0; i<ERTS_NUM_CODE_IX; i++) { + (*ep)->addressv[i] = &(*ep)->code[3]; + } + (*ep)->code[3] = (BeamInstr) em_call_nif; + } + (void) ERTS_PROC_SET_DIRTY_SCHED_TRAP_EXPORT(proc, ERTS_PROC_LOCK_MAIN, *ep); +} + +static ERL_NIF_TERM +execute_dirty_nif_finalizer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + Eterm* reg = ERTS_PROC_GET_SCHDATA(env->proc)->x_reg_array; + ERL_NIF_TERM result = (ERL_NIF_TERM) reg[0]; + typedef ERL_NIF_TERM (*FinalizerFP)(ErlNifEnv*, ERL_NIF_TERM); + FinalizerFP fp; +#if HAVE_INT64 && SIZEOF_LONG != 8 + ASSERT(sizeof(fp) <= sizeof(ErlNifUInt64)); + enif_get_uint64(env, reg[1], (ErlNifUInt64 *) &fp); +#else + ASSERT(sizeof(fp) <= sizeof(unsigned long)); + enif_get_ulong(env, reg[1], (unsigned long *) &fp); +#endif + return (*fp)(env, result); +} + +#endif /* ERTS_DIRTY_SCHEDULERS */ + +#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT + +ERL_NIF_TERM +enif_schedule_dirty_nif(ErlNifEnv* env, int flags, + ERL_NIF_TERM (*fp)(ErlNifEnv*, int, const ERL_NIF_TERM[]), + int argc, const ERL_NIF_TERM argv[]) +{ +#ifdef USE_THREADS + erts_aint32_t state, n, a; + Process* proc = env->proc; + Eterm* reg = ERTS_PROC_GET_SCHDATA(proc)->x_reg_array; + Export* ep = NULL; + int i; + + int chkflgs = (flags & (ERL_NIF_DIRTY_JOB_IO_BOUND|ERL_NIF_DIRTY_JOB_CPU_BOUND)); + if (chkflgs != ERL_NIF_DIRTY_JOB_IO_BOUND && chkflgs != ERL_NIF_DIRTY_JOB_CPU_BOUND) + return enif_make_badarg(env); + + a = erts_smp_atomic32_read_acqb(&proc->state); + while (1) { + n = state = a; + if (chkflgs == ERL_NIF_DIRTY_JOB_CPU_BOUND) + n |= ERTS_PSFLG_DIRTY_CPU_PROC; + else + n |= ERTS_PSFLG_DIRTY_IO_PROC; + a = erts_smp_atomic32_cmpxchg_mb(&proc->state, n, state); + if (a == state) + break; + } + if (!(ep = ERTS_PROC_GET_DIRTY_SCHED_TRAP_EXPORT(proc))) + alloc_proc_psd(proc, &ep); + ERTS_VBUMP_ALL_REDS(proc); + ep->code[2] = argc; + for (i = 0; i < argc; i++) { + reg[i] = (Eterm) argv[i]; + } + proc->i = (BeamInstr*) ep->addressv[0]; + ep->code[4] = (BeamInstr) fp; + proc->freason = TRAP; + + return THE_NON_VALUE; +#else + return (*fp)(env, argc, argv); +#endif +} + +ERL_NIF_TERM +enif_schedule_dirty_nif_finalizer(ErlNifEnv* env, ERL_NIF_TERM result, + ERL_NIF_TERM (*fp)(ErlNifEnv*, ERL_NIF_TERM)) +{ +#ifdef USE_THREADS + erts_aint32_t state, n, a; + Process* proc = env->proc; + Eterm* reg = ERTS_PROC_GET_SCHDATA(proc)->x_reg_array; + Export* ep; + + a = erts_smp_atomic32_read_acqb(&proc->state); + while (1) { + n = state = a; + if (!(n & (ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q))) + break; + n &= ~(ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC + |ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q); + a = erts_smp_atomic32_cmpxchg_mb(&proc->state, n, state); + if (a == state) + break; + } + if (!(ep = ERTS_PROC_GET_DIRTY_SCHED_TRAP_EXPORT(proc))) + alloc_proc_psd(proc, &ep); + ERTS_VBUMP_ALL_REDS(proc); + ep->code[2] = 2; + reg[0] = (Eterm) result; +#if HAVE_INT64 && SIZEOF_LONG != 8 + ASSERT(sizeof(fp) <= sizeof(ErlNifUInt64)); + reg[1] = (Eterm) enif_make_uint64(env, (ErlNifUInt64) fp); +#else + ASSERT(sizeof(fp) <= sizeof(unsigned long)); + reg[1] = (Eterm) enif_make_ulong(env, (unsigned long) fp); +#endif + proc->i = (BeamInstr*) ep->addressv[0]; + ep->code[4] = (BeamInstr) execute_dirty_nif_finalizer; + proc->freason = TRAP; + + return THE_NON_VALUE; +#else + return (*fp)(env, result); +#endif +} + +/* A simple finalizer that just returns its result argument */ +ERL_NIF_TERM +enif_dirty_nif_finalizer(ErlNifEnv* env, ERL_NIF_TERM result) +{ + return result; +} + +int +enif_is_on_dirty_scheduler(ErlNifEnv* env) +{ + return ERTS_SCHEDULER_IS_DIRTY(env->proc->scheduler_data); +} + +int +enif_have_dirty_schedulers() +{ +#ifdef USE_THREADS + return 1; +#else + return 0; +#endif +} + +#endif /* ERL_NIF_DIRTY_SCHEDULER_SUPPORT */ + /*************************************************************************** ** load_nif/2 ** ***************************************************************************/ diff --git a/erts/emulator/beam/erl_nif.h b/erts/emulator/beam/erl_nif.h index 5f4dc21d5c..fb3c359ec9 100644 --- a/erts/emulator/beam/erl_nif.h +++ b/erts/emulator/beam/erl_nif.h @@ -23,7 +23,11 @@ #ifndef __ERL_NIF_H__ #define __ERL_NIF_H__ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include "erl_native_features_config.h" #include "erl_drv_nif.h" /* Version history: @@ -34,9 +38,14 @@ ** 2.2: R14B03 enif_is_exception ** 2.3: R15 enif_make_reverse_list, enif_is_number ** 2.4: R16 enif_consume_timeslice +** 2.5: R17 dirty schedulers */ #define ERL_NIF_MAJOR_VERSION 2 +#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT +#define ERL_NIF_MINOR_VERSION 5 +#else #define ERL_NIF_MINOR_VERSION 4 +#endif #include <stdlib.h> @@ -159,6 +168,14 @@ typedef int ErlNifTSDKey; typedef ErlDrvThreadOpts ErlNifThreadOpts; +#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT +typedef enum +{ + ERL_NIF_DIRTY_JOB_CPU_BOUND = 1, + ERL_NIF_DIRTY_JOB_IO_BOUND = 2 +}ErlNifDirtyTaskFlags; +#endif + #if (defined(__WIN32__) || defined(_WIN32) || defined(_WIN32_)) # define ERL_NIF_API_FUNC_DECL(RET_TYPE, NAME, ARGS) RET_TYPE (*NAME) ARGS typedef struct { diff --git a/erts/emulator/beam/erl_nif_api_funcs.h b/erts/emulator/beam/erl_nif_api_funcs.h index 2f841645e1..f5b27dfdfa 100644 --- a/erts/emulator/beam/erl_nif_api_funcs.h +++ b/erts/emulator/beam/erl_nif_api_funcs.h @@ -141,6 +141,13 @@ ERL_NIF_API_FUNC_DECL(int,enif_is_number,(ErlNifEnv*, ERL_NIF_TERM term)); ERL_NIF_API_FUNC_DECL(void*,enif_dlopen,(const char* lib, void (*err_handler)(void*,const char*), void* err_arg)); ERL_NIF_API_FUNC_DECL(void*,enif_dlsym,(void* handle, const char* symbol, void (*err_handler)(void*,const char*), void* err_arg)); ERL_NIF_API_FUNC_DECL(int,enif_consume_timeslice,(ErlNifEnv*, int percent)); +#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT +ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_schedule_dirty_nif,(ErlNifEnv*,int,ERL_NIF_TERM (*)(ErlNifEnv*,int,const ERL_NIF_TERM[]),int,const ERL_NIF_TERM[])); +ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_schedule_dirty_nif_finalizer,(ErlNifEnv*,ERL_NIF_TERM,ERL_NIF_TERM (*)(ErlNifEnv*,ERL_NIF_TERM))); +ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_dirty_nif_finalizer,(ErlNifEnv*,ERL_NIF_TERM)); +ERL_NIF_API_FUNC_DECL(int,enif_is_on_dirty_scheduler,(ErlNifEnv*)); +ERL_NIF_API_FUNC_DECL(int,enif_have_dirty_schedulers,(void)); +#endif /* ** Add new entries here to keep compatibility on Windows!!! @@ -266,6 +273,13 @@ ERL_NIF_API_FUNC_DECL(int,enif_consume_timeslice,(ErlNifEnv*, int percent)); # define enif_dlopen ERL_NIF_API_FUNC_MACRO(enif_dlopen) # define enif_dlsym ERL_NIF_API_FUNC_MACRO(enif_dlsym) # define enif_consume_timeslice ERL_NIF_API_FUNC_MACRO(enif_consume_timeslice) +#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT +# define enif_schedule_dirty_nif ERL_NIF_API_FUNC_MACRO(enif_schedule_dirty_nif) +# define enif_schedule_dirty_nif_finalizer ERL_NIF_API_FUNC_MACRO(enif_schedule_dirty_nif_finalizer) +# define enif_dirty_nif_finalizer ERL_NIF_API_FUNC_MACRO(enif_dirty_nif_finalizer) +# define enif_is_on_dirty_scheduler ERL_NIF_API_FUNC_MACRO(enif_is_on_dirty_scheduler) +# define enif_have_dirty_schedulers ERL_NIF_API_FUNC_MACRO(enif_have_dirty_schedulers) +#endif /* ** Add new entries here diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c index 547a42beb2..d4108067d0 100644 --- a/erts/emulator/beam/erl_port_task.c +++ b/erts/emulator/beam/erl_port_task.c @@ -877,6 +877,11 @@ enqueue_port(ErtsRunQueue *runq, Port *pp) ASSERT(runq->ports.start && runq->ports.end); erts_smp_inc_runq_len(runq, &runq->ports.info, ERTS_PORT_PRIO_LEVEL); + +#ifdef ERTS_SMP + if (runq->halt_in_progress) + erts_non_empty_runq(runq); +#endif } static ERTS_INLINE Port * diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 21fd8dd50a..937881212a 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2013. All Rights Reserved. + * Copyright Ericsson AB 1996-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -144,7 +144,13 @@ extern BeamInstr beam_exit[]; extern BeamInstr beam_continue_exit[]; int erts_sched_compact_load; +int erts_sched_balance_util = 0; Uint erts_no_schedulers; +#ifdef ERTS_DIRTY_SCHEDULERS +Uint erts_no_dirty_cpu_schedulers; +Uint erts_no_dirty_cpu_schedulers_online; +Uint erts_no_dirty_io_schedulers; +#endif #define ERTS_THR_PRGR_LATER_CLEANUP_OP_THRESHOLD_VERY_LAZY (4*1024*1024) #define ERTS_THR_PRGR_LATER_CLEANUP_OP_THRESHOLD_LAZY (512*1024) @@ -258,6 +264,10 @@ ErtsAlignedRunQueue *erts_aligned_run_queues; Uint erts_no_run_queues; ErtsAlignedSchedulerData *erts_aligned_scheduler_data; +#ifdef ERTS_DIRTY_SCHEDULERS +ErtsAlignedSchedulerData *erts_aligned_dirty_cpu_scheduler_data; +ErtsAlignedSchedulerData *erts_aligned_dirty_io_scheduler_data; +#endif typedef union { ErtsSchedulerSleepInfo ssi; @@ -265,6 +275,12 @@ typedef union { } ErtsAlignedSchedulerSleepInfo; static ErtsAlignedSchedulerSleepInfo *aligned_sched_sleep_info; +#ifdef ERTS_DIRTY_SCHEDULERS +#ifdef ERTS_SMP +static ErtsAlignedSchedulerSleepInfo *aligned_dirty_cpu_sched_sleep_info; +static ErtsAlignedSchedulerSleepInfo *aligned_dirty_io_sched_sleep_info; +#endif +#endif static Uint last_reductions; static Uint last_exact_reductions; @@ -331,6 +347,16 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist, (ASSERT(-1 <= ((int) (IX)) \ && ((int) (IX)) < ((int) erts_no_schedulers)), \ &aligned_sched_sleep_info[(IX)].ssi) +#ifdef ERTS_DIRTY_SCHEDULERS +#define ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(IX) \ + (ASSERT(0 <= ((int) (IX)) \ + && ((int) (IX)) < ((int) erts_no_dirty_cpu_schedulers)), \ + &aligned_dirty_cpu_sched_sleep_info[(IX)].ssi) +#define ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(IX) \ + (ASSERT(0 <= ((int) (IX)) \ + && ((int) (IX)) < ((int) erts_no_dirty_io_schedulers)), \ + &aligned_dirty_io_sched_sleep_info[(IX)].ssi) +#endif #define ERTS_FOREACH_RUNQ(RQVAR, DO) \ do { \ @@ -518,6 +544,13 @@ erts_pre_init_process(void) erts_psd_required_locks[ERTS_PSD_DELAYED_GC_TASK_QS].set_locks = ERTS_PSD_DELAYED_GC_TASK_QS_SET_LOCKS; +#ifdef ERTS_DIRTY_SCHEDULERS + erts_psd_required_locks[ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT].get_locks + = ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT].set_locks + = ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_SET_LOCKS; +#endif + /* Check that we have locks for all entries */ for (ix = 0; ix < ERTS_PSD_SIZE; ix++) { ERTS_SMP_LC_ASSERT(erts_psd_required_locks[ix].get_locks); @@ -608,6 +641,7 @@ erts_late_init_process(void) static void init_sched_wall_time(ErtsSchedWallTime *swtp) { + swtp->need = erts_sched_balance_util; swtp->enabled = 0; swtp->start = 0; swtp->working.total = 0; @@ -630,27 +664,253 @@ sched_wall_time_ts(void) #endif } +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + +#ifdef ARCH_64 + +static ERTS_INLINE Uint64 +aschedtime_read(ErtsAtomicSchedTime *var) +{ + return (Uint64) erts_atomic_read_nob((erts_atomic_t *) var); +} + +static ERTS_INLINE void +aschedtime_set(ErtsAtomicSchedTime *var, Uint64 val) +{ + erts_atomic_set_nob((erts_atomic_t *) var, (erts_aint_t) val); +} + +static ERTS_INLINE void +aschedtime_init(ErtsAtomicSchedTime *var) +{ + erts_atomic_init_nob((erts_atomic_t *) var, (erts_aint_t) 0); +} + +#elif defined(ARCH_32) + +static ERTS_INLINE Uint64 +aschedtime_read(ErtsAtomicSchedTime *var) +{ + erts_dw_aint_t dw; + erts_dw_atomic_read_nob((erts_dw_atomic_t *) var, &dw); +#ifdef ETHR_SU_DW_NAINT_T__ + return (Uint64) dw.dw_sint; +#else + { + Uint64 res; + res = (Uint64) ((Uint32) dw.sint[ERTS_DW_AINT_HIGH_WORD]); + res <<= 32; + res |= (Uint64) ((Uint32) dw.sint[ERTS_DW_AINT_LOW_WORD]); + return res; + } +#endif +} + +static ERTS_INLINE void +aschedtime_set(ErtsAtomicSchedTime *var, Uint64 val) +{ + erts_dw_aint_t dw; +#ifdef ETHR_SU_DW_NAINT_T__ + dw.dw_sint = (ETHR_SU_DW_NAINT_T__) val; +#else + dw.sint[ERTS_DW_AINT_LOW_WORD] = (erts_aint_t) (val & 0xffffffff); + dw.sint[ERTS_DW_AINT_HIGH_WORD] = (erts_aint_t) ((val >> 32) & 0xffffffff); +#endif + erts_dw_atomic_set_nob((erts_dw_atomic_t *) var, &dw); +} + +static ERTS_INLINE void +aschedtime_init(ErtsAtomicSchedTime *var) +{ + erts_dw_aint_t dw; + dw.sint[ERTS_DW_AINT_LOW_WORD] = (erts_aint_t) 0; + dw.sint[ERTS_DW_AINT_HIGH_WORD] = (erts_aint_t) 0; + erts_dw_atomic_init_nob((erts_dw_atomic_t *) var, &dw); +} + +#else +# error :-/ +#endif + +#define ERTS_GET_AVG_MAX_UNLOCKED_TRY 50 +#define ERTS_SCHED_AVG_UTIL_WRITE_MARKER (~((Uint64) 0)) + +/* Intervals in nanoseconds */ +#define ERTS_SCHED_UTIL_SHORT_INTERVAL ((Uint64) 1*1000*1000*1000) +#define ERTS_SCHED_UTIL_LONG_INTERVAL ((Uint64) 10*1000*1000*1000) + + +#define ERTS_SCHED_UTIL_IGNORE_IMBALANCE_DIFF 5000 /* ppm */ + +static ERTS_INLINE Uint64 +calc_sched_worktime(int is_working, Uint64 now, Uint64 last, + Uint64 interval, Uint64 old_worktime) +{ + Uint64 worktime; + Uint64 new; + + if (now <= last) + return old_worktime; + + new = now - last; + + if (new >= interval) + return is_working ? interval : (Uint64) 0; + + + /* + * Division by 1000 in order to avoid + * overflow. If changed update assertions + * in init_runq_sched_util(). + */ + worktime = old_worktime; + worktime *= (interval - new)/1000; + worktime /= (interval/1000); + if (is_working) + worktime += new; + + ASSERT(0 <= worktime && worktime <= interval); + + return worktime; +} + +static ERTS_INLINE void +update_avg_sched_util(ErtsSchedulerData *esdp, Uint64 now, int is_working) +{ + ErtsRunQueue *rq; + int worked; + Uint64 swt, lwt, last; + + rq = esdp->run_queue; + last = aschedtime_read(&rq->sched_util.last); + + if (now <= last) { + ASSERT(last == ERTS_SCHED_AVG_UTIL_WRITE_MARKER); + return; + } + + ASSERT(now >= last); + + worked = rq->sched_util.is_working; + + swt = calc_sched_worktime(worked, now, last, ERTS_SCHED_UTIL_SHORT_INTERVAL, + rq->sched_util.worktime.short_interval); + lwt = calc_sched_worktime(worked, now, last, ERTS_SCHED_UTIL_LONG_INTERVAL, + rq->sched_util.worktime.long_interval); + + aschedtime_set(&rq->sched_util.last, ERTS_SCHED_AVG_UTIL_WRITE_MARKER); + ERTS_THR_WRITE_MEMORY_BARRIER; + rq->sched_util.is_working = is_working; + rq->sched_util.worktime.short_interval = swt; + rq->sched_util.worktime.long_interval = lwt; + ERTS_THR_WRITE_MEMORY_BARRIER; + aschedtime_set(&rq->sched_util.last, now); +} + +int +erts_get_sched_util(ErtsRunQueue *rq, int initially_locked, int short_interval) +{ + /* Average scheduler utilization in ppm */ + int util, is_working, try = 0, locked = initially_locked; + Uint64 worktime, old_worktime, now, last, interval, *old_worktimep; + + if (short_interval) { + old_worktimep = &rq->sched_util.worktime.short_interval; + interval = ERTS_SCHED_UTIL_SHORT_INTERVAL; + } + else { + old_worktimep = &rq->sched_util.worktime.long_interval; + interval = ERTS_SCHED_UTIL_LONG_INTERVAL; + } + + while (1) { + Uint64 chk_last; + last = aschedtime_read(&rq->sched_util.last); + ERTS_THR_READ_MEMORY_BARRIER; + is_working = rq->sched_util.is_working; + old_worktime = *old_worktimep; + ERTS_THR_READ_MEMORY_BARRIER; + chk_last = aschedtime_read(&rq->sched_util.last); + if (chk_last == last) + break; + if (!locked) { + if (++try >= ERTS_GET_AVG_MAX_UNLOCKED_TRY) { + /* Writer will eventually block on runq-lock */ + erts_smp_runq_lock(rq); + locked = 1; + } + } + } + + if (!initially_locked && locked) + erts_smp_runq_unlock(rq); + + now = sched_wall_time_ts(); + worktime = calc_sched_worktime(is_working, now, last, interval, old_worktime); + + util = (int) ((worktime * 1000000)/interval); + + ASSERT(0 <= util && util <= 1000000); + + return util; +} + +static void +init_runq_sched_util(ErtsRunQueueSchedUtil *rqsu, int enabled) +{ + aschedtime_init(&rqsu->last); + if (!enabled) + aschedtime_set(&rqsu->last, ERTS_SCHED_AVG_UTIL_WRITE_MARKER); + rqsu->is_working = 0; + rqsu->worktime.short_interval = (Uint64) 0; + rqsu->worktime.long_interval = (Uint64) 0; + +#ifdef DEBUG + { + Uint64 intrvl; + /* + * If one of these asserts fail we may have + * overflow in calc_sched_worktime(). Which + * have to be fixed either by shrinking + * interval size, or fix calculation of + * worktime in calc_sched_worktime(). + */ + intrvl = ERTS_SCHED_UTIL_SHORT_INTERVAL; + ASSERT(intrvl*(intrvl/1000) > intrvl); + intrvl = ERTS_SCHED_UTIL_LONG_INTERVAL; + ASSERT(intrvl*(intrvl/1000) > intrvl); + } +#endif +} + +#endif /* ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT */ + static ERTS_INLINE void sched_wall_time_change(ErtsSchedulerData *esdp, int working) { - if (esdp->sched_wall_time.enabled) { + if (esdp->sched_wall_time.need) { Uint64 ts = sched_wall_time_ts(); - if (working) { +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + update_avg_sched_util(esdp, ts, working); +#endif + if (esdp->sched_wall_time.enabled) { + if (working) { #ifdef DEBUG - ASSERT(!esdp->sched_wall_time.working.currently); - esdp->sched_wall_time.working.currently = 1; + ASSERT(!esdp->sched_wall_time.working.currently); + esdp->sched_wall_time.working.currently = 1; #endif - ts -= esdp->sched_wall_time.start; - esdp->sched_wall_time.working.start = ts; - } - else { + ts -= esdp->sched_wall_time.start; + esdp->sched_wall_time.working.start = ts; + } + else { #ifdef DEBUG - ASSERT(esdp->sched_wall_time.working.currently); - esdp->sched_wall_time.working.currently = 0; + ASSERT(esdp->sched_wall_time.working.currently); + esdp->sched_wall_time.working.currently = 0; #endif - ts -= esdp->sched_wall_time.start; - ts -= esdp->sched_wall_time.working.start; - esdp->sched_wall_time.working.total += ts; + ts -= esdp->sched_wall_time.start; + ts -= esdp->sched_wall_time.working.start; + esdp->sched_wall_time.working.total += ts; + } } } } @@ -703,12 +963,17 @@ reply_sched_wall_time(void *vswtrp) ErlHeapFragment *bp = NULL; ASSERT(esdp); - +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); +#endif if (swtrp->set) { - if (!swtrp->enable && esdp->sched_wall_time.enabled) + if (!swtrp->enable && esdp->sched_wall_time.enabled) { + esdp->sched_wall_time.need = erts_sched_balance_util; esdp->sched_wall_time.enabled = 0; + } else if (swtrp->enable && !esdp->sched_wall_time.enabled) { Uint64 ts = sched_wall_time_ts(); + esdp->sched_wall_time.need = 1; esdp->sched_wall_time.enabled = 1; esdp->sched_wall_time.start = ts; esdp->sched_wall_time.working.total = 0; @@ -784,6 +1049,9 @@ erts_sched_wall_time_request(Process *c_p, int set, int enable) if (!set && !esdp->sched_wall_time.enabled) return THE_NON_VALUE; +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); +#endif swtrp = swtreq_alloc(); ref = erts_make_ref(c_p); @@ -1261,6 +1529,9 @@ erts_schedule_multi_misc_aux_work(int ignore_self, if (ignore_self) { ErtsSchedulerData *esdp = erts_get_scheduler_data(); +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); +#endif if (esdp) self = (int) esdp->no; } @@ -1370,7 +1641,7 @@ void erts_alloc_notify_delayed_dealloc(int ix) { ErtsSchedulerData *esdp = erts_get_scheduler_data(); - if (esdp) + if (esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)) schedule_aux_work_wakeup(&esdp->aux_work_data, ix, ERTS_SSI_AUX_WORK_DD); @@ -1384,6 +1655,10 @@ erts_alloc_ensure_handle_delayed_dealloc_call(int ix) { #ifdef DEBUG ErtsSchedulerData *esdp = erts_get_scheduler_data(); +#ifdef ERTS_DIRTY_SCHEDULERS + if (esdp && ERTS_SCHEDULER_IS_DIRTY(esdp)) + return; +#endif ASSERT(!esdp || ix == (int) esdp->no); #endif set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(ix-1), @@ -1989,6 +2264,9 @@ static ERTS_INLINE void sched_active_sys(Uint no, ErtsRunQueue *rq) { ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); +#endif ASSERT(rq->waiting < 0); rq->waiting *= -1; rq->waiting--; @@ -2045,6 +2323,9 @@ static ERTS_INLINE void sched_change_waiting_sys_to_waiting(Uint no, ErtsRunQueue *rq) { ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); +#endif ASSERT(rq->waiting < 0); rq->waiting *= -1; } @@ -2060,7 +2341,7 @@ sched_waiting(Uint no, ErtsRunQueue *rq) else rq->waiting++; rq->woken = 0; - if (erts_system_profile_flags.scheduler) + if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && erts_system_profile_flags.scheduler) profile_scheduler(make_small(no), am_inactive); } @@ -2072,7 +2353,7 @@ sched_active(Uint no, ErtsRunQueue *rq) rq->waiting++; else rq->waiting--; - if (erts_system_profile_flags.scheduler) + if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && erts_system_profile_flags.scheduler) profile_scheduler(make_small(no), am_active); } @@ -2084,10 +2365,9 @@ ongoing_multi_scheduling_block(void) } static ERTS_INLINE void -empty_runq(ErtsRunQueue *rq) +empty_runq_aux(ErtsRunQueue *rq, Uint32 old_flags) { - Uint32 old_flags = ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_NONEMPTY|ERTS_RUNQ_FLG_PROTECTED); - if (old_flags & ERTS_RUNQ_FLG_NONEMPTY) { + if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && old_flags & ERTS_RUNQ_FLG_NONEMPTY) { #ifdef DEBUG erts_aint32_t empty = erts_smp_atomic32_read_nob(&no_empty_run_queues); /* @@ -2107,10 +2387,27 @@ empty_runq(ErtsRunQueue *rq) } static ERTS_INLINE void +empty_runq(ErtsRunQueue *rq) +{ + Uint32 old_flags = ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_NONEMPTY|ERTS_RUNQ_FLG_PROTECTED); + empty_runq_aux(rq, old_flags); +} + +static ERTS_INLINE Uint32 +empty_protected_runq(ErtsRunQueue *rq) +{ + Uint32 old_flags = ERTS_RUNQ_FLGS_BSET(rq, + ERTS_RUNQ_FLG_NONEMPTY|ERTS_RUNQ_FLG_PROTECTED, + ERTS_RUNQ_FLG_PROTECTED); + empty_runq_aux(rq, old_flags); + return old_flags; +} + +static ERTS_INLINE void non_empty_runq(ErtsRunQueue *rq) { Uint32 old_flags = ERTS_RUNQ_FLGS_SET(rq, ERTS_RUNQ_FLG_NONEMPTY); - if (!(old_flags & ERTS_RUNQ_FLG_NONEMPTY)) { + if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && (!(old_flags & ERTS_RUNQ_FLG_NONEMPTY))) { #ifdef DEBUG erts_aint32_t empty = erts_smp_atomic32_read_nob(&no_empty_run_queues); /* @@ -2130,6 +2427,18 @@ non_empty_runq(ErtsRunQueue *rq) } } +void +erts_empty_runq(ErtsRunQueue *rq) +{ + empty_runq(rq); +} + +void +erts_non_empty_runq(ErtsRunQueue *rq) +{ + non_empty_runq(rq); +} + static erts_aint32_t sched_prep_spin_wait(ErtsSchedulerSleepInfo *ssi) { @@ -2343,18 +2652,37 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) + erts_smp_spin_lock(&rq->sleepers.lock); +#endif flgs = sched_prep_spin_wait(ssi); if (flgs & ERTS_SSI_FLG_SUSPENDED) { /* Go suspend instead... */ +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) + erts_smp_spin_unlock(&rq->sleepers.lock); +#endif return; } +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) { + ssi->prev = NULL; + ssi->next = rq->sleepers.list; + if (rq->sleepers.list) + rq->sleepers.list->prev = ssi; + rq->sleepers.list = ssi; + erts_smp_spin_unlock(&rq->sleepers.lock); + } +#endif + /* * If all schedulers are waiting, one of them *should* * be waiting in erl_sys_schedule() */ - if (!prepare_for_sys_schedule()) { + if (ERTS_SCHEDULER_IS_DIRTY(esdp) || !prepare_for_sys_schedule()) { sched_waiting(esdp->no, rq); @@ -2364,12 +2692,13 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) tse_wait: - if (thr_prgr_active != working) + if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && thr_prgr_active != working) sched_wall_time_change(esdp, thr_prgr_active); while (1) { - aux_work = erts_atomic32_read_acqb(&ssi->aux_work); + aux_work = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 : + erts_atomic32_read_acqb(&ssi->aux_work); if (aux_work) { if (!thr_prgr_active) { erts_thr_progress_active(esdp, thr_prgr_active = 1); @@ -2383,11 +2712,13 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) if (aux_work) flgs = erts_smp_atomic32_read_acqb(&ssi->flags); else { - if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); - sched_wall_time_change(esdp, 0); + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + if (thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 0); + sched_wall_time_change(esdp, 0); + } + erts_thr_progress_prepare_wait(esdp); } - erts_thr_progress_prepare_wait(esdp); flgs = sched_spin_wait(ssi, spincount); if (flgs & ERTS_SSI_FLG_SLEEPING) { @@ -2402,7 +2733,8 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) } while (res == EINTR); } } - erts_thr_progress_finalize_wait(esdp); + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) + erts_thr_progress_finalize_wait(esdp); } if (!(flgs & ERTS_SSI_FLG_WAITING)) { @@ -2423,7 +2755,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) if (flgs & ~ERTS_SSI_FLG_SUSPENDED) erts_smp_atomic32_read_band_nob(&ssi->flags, ERTS_SSI_FLG_SUSPENDED); - if (!thr_prgr_active) { + if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && !thr_prgr_active) { erts_thr_progress_active(esdp, thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } @@ -2440,6 +2772,9 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) erts_smp_atomic32_set_relb(&function_calls, 0); *fcalls = 0; +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); +#endif sched_waiting_sys(esdp->no, rq); @@ -2632,7 +2967,7 @@ ssi_flags_set_wake(ErtsSchedulerSleepInfo *ssi) } static void -wake_scheduler(ErtsRunQueue *rq, int incq) +wake_scheduler(ErtsRunQueue *rq) { ErtsSchedulerSleepInfo *ssi; erts_aint32_t flgs; @@ -2651,10 +2986,35 @@ wake_scheduler(ErtsRunQueue *rq, int incq) flgs = ssi_flags_set_wake(ssi); erts_sched_finish_poke(ssi, flgs); +} + +#ifdef ERTS_DIRTY_SCHEDULERS +static void +wake_dirty_scheduler(ErtsRunQueue *rq) +{ + ErtsSchedulerSleepInfo *ssi; + ErtsSchedulerSleepList *sl; - if (incq && (flgs & ERTS_SSI_FLG_WAITING)) - non_empty_runq(rq); + ASSERT(ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); + + sl = &rq->sleepers; + erts_smp_spin_lock(&sl->lock); + ssi = sl->list; + if (!ssi) + erts_smp_spin_unlock(&sl->lock); + else { + sl->list = NULL; + erts_smp_spin_unlock(&sl->lock); + + ERTS_THR_MEMORY_BARRIER; + do { + ErtsSchedulerSleepInfo *wake_ssi = ssi; + ssi = ssi->next; + erts_sched_finish_poke(wake_ssi, ssi_flags_set_wake(wake_ssi)); + } while (ssi); + } } +#endif #define ERTS_NO_USED_RUNQS_SHIFT 16 #define ERTS_NO_RUNQS_MASK 0xffff @@ -2744,7 +3104,7 @@ chk_wake_sched(ErtsRunQueue *crq, int ix, int activate) if (try_inc_no_active_runqs(ix+1)) (void) ERTS_RUNQ_FLGS_UNSET(wrq, ERTS_RUNQ_FLG_INACTIVE); } - wake_scheduler(wrq, 0); + wake_scheduler(wrq); return 1; } return 0; @@ -2791,8 +3151,14 @@ static ERTS_INLINE void smp_notify_inc_runq(ErtsRunQueue *runq) { #ifdef ERTS_SMP - if (runq) - wake_scheduler(runq, 1); + if (runq) { +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_RUNQ_IX_IS_DIRTY(runq->ix)) + wake_dirty_scheduler(runq); + else +#endif + wake_scheduler(runq); + } #endif } @@ -2810,7 +3176,7 @@ erts_sched_notify_check_cpu_bind(void) for (ix = 0; ix < erts_no_run_queues; ix++) { ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); (void) ERTS_RUNQ_FLGS_SET(rq, ERTS_RUNQ_FLG_CHK_CPU_BIND); - wake_scheduler(rq, 0); + wake_scheduler(rq); } #else erts_sched_check_cpu_bind(erts_get_scheduler_data()); @@ -2938,6 +3304,11 @@ check_immigration_need(ErtsRunQueue *c_rq, ErtsMigrationPath *mp, int prio) if (!f_rq) return NULL; +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + if (mp->sched_util) + return NULL; +#endif + f_rq_flags = ERTS_RUNQ_FLGS_GET(f_rq); if (f_rq_flags & ERTS_RUNQ_FLG_PROTECTED) return NULL; @@ -3077,7 +3448,7 @@ suspend_run_queue(ErtsRunQueue *rq) ERTS_SSI_FLG_SUSPENDED); (void) ERTS_RUNQ_FLGS_SET(rq, ERTS_RUNQ_FLG_SUSPENDED); - wake_scheduler(rq, 0); + wake_scheduler(rq); } static void scheduler_ix_resume_wake(Uint ix); @@ -3169,6 +3540,9 @@ evacuate_run_queue(ErtsRunQueue *rq, to_rq->misc.start = start; to_rq->misc.end = end; + + non_empty_runq(to_rq); + erts_smp_runq_unlock(to_rq); smp_notify_inc_runq(to_rq); erts_smp_runq_lock(to_rq); @@ -3381,7 +3755,7 @@ try_steal_task(ErtsRunQueue *rq) Uint32 flags; /* Protect jobs we steal from getting stolen from us... */ - flags = ERTS_RUNQ_FLGS_SET(rq, ERTS_RUNQ_FLG_PROTECTED); + flags = empty_protected_runq(rq); if (flags & ERTS_RUNQ_FLG_SUSPENDED) return 0; /* go suspend instead... */ @@ -3460,6 +3834,9 @@ typedef struct { int full_reds_history_change; int oowc; int max_len; +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + int sched_util; +#endif } ErtsRunQueueBalance; static ErtsRunQueueBalance *run_queue_info; @@ -3623,6 +4000,9 @@ check_balance(ErtsRunQueue *c_rq) Sint64 scheds_reds, full_scheds_reds; int forced, active, current_active, oowc, half_full_scheds, full_scheds, mmax_len, blnc_no_rqs, qix, pix, freds_hist_ix; +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + int sched_util_balancing; +#endif if (erts_smp_atomic32_xchg_nob(&balance_info.checking_balance, 1)) { c_rq->check_balance_reds = INT_MAX; @@ -3678,6 +4058,10 @@ check_balance(ErtsRunQueue *c_rq) return; } +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + sched_util_balancing = 0; +#endif + freds_hist_ix = balance_info.full_reds_history_index; balance_info.full_reds_history_index++; if (balance_info.full_reds_history_index >= ERTS_FULL_REDS_HISTORY_SIZE) @@ -3708,7 +4092,12 @@ check_balance(ErtsRunQueue *c_rq) run_queue_info[qix].oowc = rq->out_of_work_count; run_queue_info[qix].max_len = rq->max_len; rq->check_balance_reds = INT_MAX; - + +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + if (erts_sched_balance_util) + run_queue_info[qix].sched_util = erts_get_sched_util(rq, 1, 0); +#endif + erts_smp_runq_unlock(rq); } @@ -3778,8 +4167,38 @@ check_balance(ErtsRunQueue *c_rq) mmax_len = run_queue_info[qix].max_len; } - if (!erts_sched_compact_load) + if (!erts_sched_compact_load) { +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + if (erts_sched_balance_util && full_scheds < blnc_no_rqs) { + int avg_util = 0; + + for (qix = 0; qix < blnc_no_rqs; qix++) + avg_util += run_queue_info[qix].sched_util; + + avg_util /= blnc_no_rqs; /* in ppm */ + + sched_util_balancing = 1; + /* + * In order to avoid renaming a large amount of fields + * we write utilization values instead of lenght values + * in the 'max_len' and 'migration_limit' fields... + */ + for (qix = 0; qix < blnc_no_rqs; qix++) { + run_queue_info[qix].flags = 0; /* Reset for later use... */ + for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { + run_queue_info[qix].prio[pix].emigrate_to = -1; + run_queue_info[qix].prio[pix].immigrate_from = -1; + run_queue_info[qix].prio[pix].avail = 100; + run_queue_info[qix].prio[pix].max_len = run_queue_info[qix].sched_util; + run_queue_info[qix].prio[pix].migration_limit = avg_util; + } + } + active = blnc_no_rqs; + goto setup_migration_paths; + } +#endif goto all_active; + } if (!forced && half_full_scheds != blnc_no_rqs) { int min = 1; @@ -3896,15 +4315,30 @@ check_balance(ErtsRunQueue *c_rq) } } +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + setup_migration_paths: +#endif + /* Setup migration paths for all priorities */ for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { int low = 0, high = 0; for (qix = 0; qix < blnc_no_rqs; qix++) { int len_diff = run_queue_info[qix].prio[pix].max_len; len_diff -= run_queue_info[qix].prio[pix].migration_limit; + #ifdef DBG_PRINT if (pix == 2) erts_fprintf(stderr, "%d ", len_diff); #endif + +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + if (sched_util_balancing + && -ERTS_SCHED_UTIL_IGNORE_IMBALANCE_DIFF <= len_diff + && len_diff <= ERTS_SCHED_UTIL_IGNORE_IMBALANCE_DIFF) { + /* ignore minor imbalance */ + len_diff = 0; + } +#endif + run_queue_compare[qix].qix = qix; run_queue_compare[qix].len = len_diff; if (len_diff != 0) { @@ -4031,6 +4465,9 @@ erts_fprintf(stderr, "--------------------------------\n"); Uint32 flags = run_queue_info[qix].flags; ErtsMigrationPath *mp = &new_mpaths->mpath[qix]; +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + mp->sched_util = sched_util_balancing; +#endif mp->flags = flags; mp->misc_evac_runq = NULL; @@ -4572,7 +5009,10 @@ erts_sched_set_wake_cleanup_threshold(char *str) static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp) { - awdp->sched_id = esdp ? (int) esdp->no : 0; + if (!esdp || ERTS_SCHEDULER_IS_DIRTY(esdp)) + awdp->sched_id = 0; + else + awdp->sched_id = (int) esdp->no; awdp->esdp = esdp; awdp->ssi = esdp ? esdp->ssi : NULL; #ifdef ERTS_SMP @@ -4612,14 +5052,71 @@ init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp) #endif } +static void +init_scheduler_data(ErtsSchedulerData* esdp, int num, + ErtsSchedulerSleepInfo* ssi, + ErtsRunQueue* runq, + char** daww_ptr, size_t daww_sz) +{ +#ifdef ERTS_SMP + erts_bits_init_state(&esdp->erl_bits_state); + esdp->match_pseudo_process = NULL; + esdp->free_process = NULL; +#endif + esdp->x_reg_array = + erts_alloc_permanent_cache_aligned(ERTS_ALC_T_BEAM_REGISTER, + ERTS_X_REGS_ALLOCATED * + sizeof(Eterm)); + esdp->f_reg_array = + erts_alloc_permanent_cache_aligned(ERTS_ALC_T_BEAM_REGISTER, + MAX_REG * sizeof(FloatDef)); +#if !HEAP_ON_C_STACK + esdp->num_tmp_heap_used = 0; +#endif +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_RUNQ_IX_IS_DIRTY(runq->ix)) { + esdp->no = 0; + esdp->dirty_no = (Uint) num; + } + else { + esdp->no = (Uint) num; + esdp->dirty_no = 0; + } +#else + esdp->no = (Uint) num; +#endif + esdp->ssi = ssi; + esdp->current_process = NULL; + esdp->current_port = NULL; + + esdp->virtual_reds = 0; + esdp->cpu_id = -1; + + erts_init_atom_cache_map(&esdp->atom_cache_map); + + esdp->run_queue = runq; + esdp->run_queue->scheduler = esdp; + + if (daww_ptr) { + init_aux_work_data(&esdp->aux_work_data, esdp, *daww_ptr); +#ifdef ERTS_SMP + *daww_ptr += daww_sz; +#endif + } + + esdp->reductions = 0; + + init_sched_wall_time(&esdp->sched_wall_time); + erts_port_task_handle_init(&esdp->nosuspend_port_task_handle); +} + void erts_init_scheduling(int no_schedulers, int no_schedulers_online) { int ix, n, no_ssi; char *daww_ptr; -#ifdef ERTS_SMP size_t daww_sz; -#endif + size_t size_runqs; init_misc_op_list_alloc(); init_proc_sys_task_queues_alloc(); @@ -4628,6 +5125,11 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) set_wakeup_other_data(); #endif +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + if (erts_sched_balance_util) + erts_sched_compact_load = 0; +#endif + ASSERT(no_schedulers_online <= no_schedulers); ASSERT(no_schedulers_online >= 1); ASSERT(no_schedulers >= 1); @@ -4635,19 +5137,26 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) /* Create and initialize run queues */ n = no_schedulers; - - erts_aligned_run_queues = - erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, - sizeof(ErtsAlignedRunQueue) * n); + size_runqs = sizeof(ErtsAlignedRunQueue) * (n + ERTS_NUM_DIRTY_RUNQS); + erts_aligned_run_queues = + erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs); #ifdef ERTS_SMP +#ifdef ERTS_DIRTY_SCHEDULERS + erts_aligned_run_queues += ERTS_NUM_DIRTY_RUNQS; +#endif erts_smp_atomic32_init_nob(&no_empty_run_queues, 0); #endif erts_no_run_queues = n; - for (ix = 0; ix < n; ix++) { + for (ix = -(ERTS_NUM_DIRTY_RUNQS); ix < n; ix++) { int pix, rix; +#ifdef ERTS_DIRTY_SCHEDULERS + ErtsRunQueue *rq = ERTS_RUNQ_IX_IS_DIRTY(ix) ? + ERTS_DIRTY_RUNQ_IX(ix) : ERTS_RUNQ_IX(ix); +#else ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); +#endif rq->ix = ix; @@ -4658,6 +5167,15 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) erts_smp_mtx_init_x(&rq->mtx, "run_queue", make_small(ix + 1)); erts_smp_cnd_init(&rq->cnd); +#ifdef ERTS_DIRTY_SCHEDULERS +#ifdef ERTS_SMP + if (ERTS_RUNQ_IX_IS_DIRTY(ix)) { + erts_smp_spinlock_init(&rq->sleepers.lock, "dirty_run_queue_sleep_list"); + rq->sleepers.list = NULL; + } +#endif +#endif + rq->waiting = 0; rq->woken = 0; ERTS_RUNQ_FLGS_INIT(rq, ERTS_RUNQ_FLG_NONEMPTY); @@ -4696,6 +5214,11 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) rq->ports.info.reds = 0; rq->ports.start = NULL; rq->ports.end = NULL; + +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + init_runq_sched_util(&rq->sched_util, erts_sched_balance_util); +#endif + } #ifdef ERTS_SMP @@ -4739,6 +5262,29 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) #ifdef ERTS_SMP aligned_sched_sleep_info++; + +#ifdef ERTS_DIRTY_SCHEDULERS + aligned_dirty_cpu_sched_sleep_info = + erts_alloc_permanent_cache_aligned( + ERTS_ALC_T_SCHDLR_SLP_INFO, + erts_no_dirty_cpu_schedulers*sizeof(ErtsAlignedSchedulerSleepInfo)); + for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) { + ErtsSchedulerSleepInfo *ssi = &aligned_dirty_cpu_sched_sleep_info[ix].ssi; + erts_smp_atomic32_init_nob(&ssi->flags, 0); + ssi->event = NULL; /* initialized in sched_thread_func */ + erts_atomic32_init_nob(&ssi->aux_work, 0); + } + aligned_dirty_io_sched_sleep_info = + erts_alloc_permanent_cache_aligned( + ERTS_ALC_T_SCHDLR_SLP_INFO, + erts_no_dirty_io_schedulers*sizeof(ErtsAlignedSchedulerSleepInfo)); + for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { + ErtsSchedulerSleepInfo *ssi = &aligned_dirty_io_sched_sleep_info[ix].ssi; + erts_smp_atomic32_init_nob(&ssi->flags, 0); + ssi->event = NULL; /* initialized in sched_thread_func */ + erts_atomic32_init_nob(&ssi->aux_work, 0); + } +#endif #endif /* Create and initialize scheduler specific data */ @@ -4749,6 +5295,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) daww_ptr = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, daww_sz*n); #else + daww_sz = 0; daww_ptr = NULL; #endif @@ -4758,45 +5305,32 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) for (ix = 0; ix < n; ix++) { ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(ix); -#ifdef ERTS_SMP - erts_bits_init_state(&esdp->erl_bits_state); - esdp->match_pseudo_process = NULL; - esdp->free_process = NULL; -#endif - esdp->x_reg_array = - erts_alloc_permanent_cache_aligned(ERTS_ALC_T_BEAM_REGISTER, - ERTS_X_REGS_ALLOCATED * - sizeof(Eterm)); - esdp->f_reg_array = - erts_alloc_permanent_cache_aligned(ERTS_ALC_T_BEAM_REGISTER, - MAX_REG * sizeof(FloatDef)); -#if !HEAP_ON_C_STACK - esdp->num_tmp_heap_used = 0; -#endif - esdp->no = (Uint) ix+1; - esdp->ssi = ERTS_SCHED_SLEEP_INFO_IX(ix); - esdp->current_process = NULL; - esdp->current_port = NULL; - - esdp->virtual_reds = 0; - esdp->cpu_id = -1; - - erts_init_atom_cache_map(&esdp->atom_cache_map); - - esdp->run_queue = ERTS_RUNQ_IX(ix); - esdp->run_queue->scheduler = esdp; + init_scheduler_data(esdp, ix+1, ERTS_SCHED_SLEEP_INFO_IX(ix), + ERTS_RUNQ_IX(ix), &daww_ptr, daww_sz); + } - init_aux_work_data(&esdp->aux_work_data, esdp, daww_ptr); +#ifdef ERTS_DIRTY_SCHEDULERS #ifdef ERTS_SMP - daww_ptr += daww_sz; -#endif - - esdp->reductions = 0; - - init_sched_wall_time(&esdp->sched_wall_time); - erts_port_task_handle_init(&esdp->nosuspend_port_task_handle); - + erts_aligned_dirty_cpu_scheduler_data = + erts_alloc_permanent_cache_aligned( + ERTS_ALC_T_SCHDLR_DATA, + erts_no_dirty_cpu_schedulers*sizeof(ErtsAlignedSchedulerData)); + for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) { + ErtsSchedulerData *esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix); + init_scheduler_data(esdp, ix+1, ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix), + ERTS_DIRTY_CPU_RUNQ, NULL, 0); + } + erts_aligned_dirty_io_scheduler_data = + erts_alloc_permanent_cache_aligned( + ERTS_ALC_T_SCHDLR_DATA, + erts_no_dirty_io_schedulers*sizeof(ErtsAlignedSchedulerData)); + for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { + ErtsSchedulerData *esdp = ERTS_DIRTY_IO_SCHEDULER_IX(ix); + init_scheduler_data(esdp, ix+1, ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix), + ERTS_DIRTY_IO_RUNQ, NULL, 0); } +#endif +#endif init_misc_aux_work(); #if !HALFWORD_HEAP @@ -4860,6 +5394,10 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) #endif } erts_no_schedulers = 1; +#ifdef ERTS_DIRTY_SCHEDULERS + erts_no_dirty_cpu_schedulers = 0; + erts_no_dirty_io_schedulers = 0; +#endif #endif erts_smp_atomic32_init_nob(&function_calls, 0); @@ -4972,28 +5510,39 @@ check_enqueue_in_prio_queue(erts_aint32_t *prq_prio_p, *prq_prio_p = aprio; - max_qbit = (actual >> ERTS_PSFLGS_IN_PRQ_MASK_OFFSET) & ERTS_PSFLGS_QMASK; - max_qbit |= 1 << ERTS_PSFLGS_QMASK_BITS; - max_qbit &= -max_qbit; - /* - * max_qbit now either contain bit set for highest prio queue or a bit - * out of range (which will have a value larger than valid range). - */ +#ifdef ERTS_DIRTY_SCHEDULERS + if (!(actual & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC))) { +#endif + max_qbit = (actual >> ERTS_PSFLGS_IN_PRQ_MASK_OFFSET) & ERTS_PSFLGS_QMASK; + max_qbit |= 1 << ERTS_PSFLGS_QMASK_BITS; + max_qbit &= -max_qbit; + /* + * max_qbit now either contain bit set for highest prio queue or a bit + * out of range (which will have a value larger than valid range). + */ - if (qbit >= max_qbit) - return 0; /* Already queued in higher or equal prio */ + if (qbit >= max_qbit) + return 0; /* Already queued in higher or equal prio */ - /* Need to enqueue (if already enqueued, it is in lower prio) */ - *newp |= qbit << ERTS_PSFLGS_IN_PRQ_MASK_OFFSET; + /* Need to enqueue (if already enqueued, it is in lower prio) */ + *newp |= qbit << ERTS_PSFLGS_IN_PRQ_MASK_OFFSET; - if ((actual & (ERTS_PSFLG_IN_RUNQ|ERTS_PSFLGS_USR_PRIO_MASK)) - != (aprio << ERTS_PSFLGS_USR_PRIO_OFFSET)) { - /* - * Process struct already enqueued, or actual prio not - * equal to user prio, i.e., enqueue using proxy. - */ - return -1; + if ((actual & (ERTS_PSFLG_IN_RUNQ|ERTS_PSFLGS_USR_PRIO_MASK)) + != (aprio << ERTS_PSFLGS_USR_PRIO_OFFSET)) { + /* + * Process struct already enqueued, or actual prio not + * equal to user prio, i.e., enqueue using proxy. + */ + return -1; + } +#ifdef ERTS_DIRTY_SCHEDULERS + } else { + if (actual & ERTS_PSFLG_DIRTY_CPU_PROC) + *newp |= ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q; + else + *newp |= ERTS_PSFLG_DIRTY_IO_PROC_IN_Q; } +#endif /* * Enqueue using process struct. @@ -5049,10 +5598,21 @@ schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, Proces } else { Process *sched_p; - ErtsRunQueue *runq = erts_get_runq_proc(p); + ErtsRunQueue *runq; ASSERT(!(n & ERTS_PSFLG_SUSPENDED) || (n & ERTS_PSFLG_ACTIVE_SYS)); +#ifdef ERTS_DIRTY_SCHEDULERS +#ifdef ERTS_SMP + if (ERTS_PSFLG_DIRTY_CPU_PROC & a) + runq = ERTS_DIRTY_CPU_RUNQ; + else if (ERTS_PSFLG_DIRTY_IO_PROC & a) + runq = ERTS_DIRTY_IO_RUNQ; + else +#endif +#endif + runq = erts_get_runq_proc(p); + if (enqueue < 0) sched_p = make_proxy_proc(proxy, p, enq_prio); else { @@ -5062,7 +5622,11 @@ schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, Proces } #ifdef ERTS_SMP - if (!(ERTS_PSFLG_BOUND & n)) { + if (!(ERTS_PSFLG_BOUND & n) +#ifdef ERTS_DIRTY_SCHEDULERS + && !(n & (ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q)) +#endif + ) { ErtsRunQueue *new_runq = erts_check_emigration_need(runq, enq_prio); if (new_runq) { RUNQ_SET_RQ(&sched_p->run_queue, new_runq); @@ -5489,6 +6053,9 @@ suspend_scheduler(ErtsSchedulerData *esdp) * Regardless of why a scheduler is suspended, it ends up here. */ +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); +#endif ASSERT(no != 1); evacuate_run_queue(esdp->run_queue, &sbp); @@ -5656,22 +6223,44 @@ ErtsSchedSuspendResult erts_schedulers_state(Uint *total, Uint *online, Uint *active, + Uint *dirty_cpu, + Uint *dirty_cpu_online, + Uint *dirty_io, int yield_allowed) { - int res; + int res = ERTS_SCHDLR_SSPND_EINVAL; erts_aint32_t changing; - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); - if (yield_allowed && (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER)) - res = ERTS_SCHDLR_SSPND_YIELD_RESTART; - else { - *active = *online = schdlr_sspnd.online; - if (ongoing_multi_scheduling_block()) - *active = 1; - res = ERTS_SCHDLR_SSPND_DONE; + if (total) { + ASSERT(online); + ASSERT(active); + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); + if (yield_allowed && (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER)) + res = ERTS_SCHDLR_SSPND_YIELD_RESTART; + else { + *active = *online = schdlr_sspnd.online; + if (ongoing_multi_scheduling_block()) + *active = 1; + res = ERTS_SCHDLR_SSPND_DONE; + } + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + *total = erts_no_schedulers; } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - *total = erts_no_schedulers; +#ifdef ERTS_DIRTY_SCHEDULERS + if (dirty_cpu) + *dirty_cpu = erts_no_dirty_cpu_schedulers; + if (dirty_cpu_online) + *dirty_cpu_online = erts_no_dirty_cpu_schedulers_online; + if (dirty_io) + *dirty_io = erts_no_dirty_io_schedulers; +#else + if (dirty_cpu) + *dirty_cpu = 0; + if (dirty_cpu_online) + *dirty_cpu_online = 0; + if (dirty_io) + *dirty_io = 0; +#endif return res; } @@ -5761,8 +6350,12 @@ erts_set_schedulers_online(Process *p, for (ix = no; ix < online; ix++) { ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); - wake_scheduler(rq, 0); + wake_scheduler(rq); } +#ifdef ERTS_DIRTY_SCHEDULERS + wake_dirty_scheduler(ERTS_DIRTY_CPU_RUNQ); + wake_dirty_scheduler(ERTS_DIRTY_IO_RUNQ); +#endif } } @@ -5860,7 +6453,7 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) for (ix = 1; ix < online; ix++) { ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); - wake_scheduler(rq, 0); + wake_scheduler(rq); } if (erts_smp_atomic32_read_nob(&schdlr_sspnd.active) @@ -6110,6 +6703,92 @@ sched_thread_func(void *vesdp) return NULL; } +#ifdef ERTS_DIRTY_SCHEDULERS +#ifdef ERTS_SMP +static void* +sched_dirty_cpu_thread_func(void *vesdp) +{ + ErtsThrPrgrCallbacks callbacks; + ErtsSchedulerData *esdp = vesdp; + Uint no = esdp->dirty_no; + ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(no-1)->event = erts_tse_fetch(); + callbacks.arg = (void *) esdp->ssi; + callbacks.wakeup = thr_prgr_wakeup; + callbacks.prepare_wait = NULL; + callbacks.wait = NULL; + callbacks.finalize_wait = NULL; + + erts_thr_progress_register_unmanaged_thread(&callbacks); +#ifdef ERTS_ENABLE_LOCK_CHECK + { + char buf[31]; + erts_snprintf(&buf[0], 31, "dirty cpu scheduler %beu", no); + erts_lc_set_thread_name(&buf[0]); + } +#endif + erts_tsd_set(sched_data_key, vesdp); +#if ERTS_USE_ASYNC_READY_Q + esdp->aux_work_data.async_ready.queue = NULL; +#endif + + erts_proc_lock_prepare_proc_lock_waiter(); + +#ifdef HIPE + hipe_thread_signal_init(); +#endif + erts_thread_init_float(); + + process_main(); + /* No schedulers should *ever* terminate */ + erl_exit(ERTS_ABORT_EXIT, + "Dirty CPU scheduler thread number %beu terminated\n", + no); + return NULL; +} + +static void* +sched_dirty_io_thread_func(void *vesdp) +{ + ErtsThrPrgrCallbacks callbacks; + ErtsSchedulerData *esdp = vesdp; + Uint no = esdp->dirty_no; + ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(no-1)->event = erts_tse_fetch(); + callbacks.arg = (void *) esdp->ssi; + callbacks.wakeup = thr_prgr_wakeup; + callbacks.prepare_wait = NULL; + callbacks.wait = NULL; + callbacks.finalize_wait = NULL; + + erts_thr_progress_register_unmanaged_thread(&callbacks); +#ifdef ERTS_ENABLE_LOCK_CHECK + { + char buf[31]; + erts_snprintf(&buf[0], 31, "dirty io scheduler %beu", no); + erts_lc_set_thread_name(&buf[0]); + } +#endif + erts_tsd_set(sched_data_key, vesdp); +#if ERTS_USE_ASYNC_READY_Q + esdp->aux_work_data.async_ready.queue = NULL; +#endif + + erts_proc_lock_prepare_proc_lock_waiter(); + +#ifdef HIPE + hipe_thread_signal_init(); +#endif + erts_thread_init_float(); + + process_main(); + /* No schedulers should *ever* terminate */ + erl_exit(ERTS_ABORT_EXIT, + "Dirty I/O scheduler thread number %beu terminated\n", + no); + return NULL; +} +#endif +#endif + static ethr_tid aux_tid; void @@ -6160,6 +6839,26 @@ erts_start_schedulers(void) erts_no_schedulers = actual; +#ifdef ERTS_DIRTY_SCHEDULERS +#ifdef ERTS_SMP + { + int ix; + for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) { + ErtsSchedulerData *esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix); + res = ethr_thr_create(&esdp->tid,sched_dirty_cpu_thread_func,(void*)esdp,&opts); + if (res != 0) + erl_exit(1, "Failed to create dirty cpu scheduler thread %d\n", ix); + } + for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { + ErtsSchedulerData *esdp = ERTS_DIRTY_IO_SCHEDULER_IX(ix); + res = ethr_thr_create(&esdp->tid,sched_dirty_io_thread_func,(void*)esdp,&opts); + if (res != 0) + erl_exit(1, "Failed to create dirty io scheduler thread %d\n", ix); + } + } +#endif +#endif + ERTS_THR_MEMORY_BARRIER; res = ethr_thr_create(&aux_tid, aux_thread, NULL, &opts); @@ -7148,7 +7847,8 @@ Process *schedule(Process *p, int calls) input_reductions = INPUT_REDUCTIONS; } - ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); + ERTS_SMP_LC_ASSERT(ERTS_SCHEDULER_IS_DIRTY(erts_get_scheduler_data()) + || !erts_thr_progress_is_blocking()); /* * Clean up after the process being scheduled out. @@ -7259,42 +7959,40 @@ Process *schedule(Process *p, int calls) } - ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); + ERTS_SMP_LC_ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp) + || !erts_thr_progress_is_blocking()); check_activities_to_run: { #ifdef ERTS_SMP ErtsMigrationPaths *mps; ErtsMigrationPath *mp; - -#ifdef ERTS_SMP - { - ErtsProcList *pnd_xtrs = rq->procs.pending_exiters; - if (erts_proclist_fetch(&pnd_xtrs, NULL)) { - rq->procs.pending_exiters = NULL; - erts_smp_runq_unlock(rq); - handle_pending_exiters(pnd_xtrs); - erts_smp_runq_lock(rq); - } - + ErtsProcList *pnd_xtrs = rq->procs.pending_exiters; + if (erts_proclist_fetch(&pnd_xtrs, NULL)) { + rq->procs.pending_exiters = NULL; + erts_smp_runq_unlock(rq); + handle_pending_exiters(pnd_xtrs); + erts_smp_runq_lock(rq); } -#endif - if (rq->check_balance_reds <= 0) - check_balance(rq); + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + if (rq->check_balance_reds <= 0) + check_balance(rq); - ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); - mps = erts_get_migration_paths_managed(); - mp = &mps->mpath[rq->ix]; + mps = erts_get_migration_paths_managed(); + mp = &mps->mpath[rq->ix]; - if (mp->flags & ERTS_RUNQ_FLGS_IMMIGRATE_QMASK) - immigrate(rq, mp); + if (mp->flags & ERTS_RUNQ_FLGS_IMMIGRATE_QMASK) + immigrate(rq, mp); + } + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); continue_check_activities_to_run: flags = ERTS_RUNQ_FLGS_GET_NOB(rq); continue_check_activities_to_run_known_flags: - + ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp) + || flags & ERTS_RUNQ_FLG_NONEMPTY); if (flags & (ERTS_RUNQ_FLG_CHK_CPU_BIND|ERTS_RUNQ_FLG_SUSPENDED)) { @@ -7309,7 +8007,7 @@ Process *schedule(Process *p, int calls) } } - { + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { erts_aint32_t aux_work; int leader_update = erts_thr_progress_update(esdp); aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work); @@ -7321,9 +8019,9 @@ Process *schedule(Process *p, int calls) handle_aux_work(&esdp->aux_work_data, aux_work, 0); erts_smp_runq_lock(rq); } - } - ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); + ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); + } ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); #else /* ERTS_SMP */ @@ -7346,20 +8044,16 @@ Process *schedule(Process *p, int calls) rq->wakeup_other = 0; rq->wakeup_other_reds = 0; - empty_runq(rq); - flags = ERTS_RUNQ_FLGS_GET_NOB(rq); - if (flags & ERTS_RUNQ_FLG_SUSPENDED) { - non_empty_runq(rq); + if (flags & ERTS_RUNQ_FLG_SUSPENDED) goto continue_check_activities_to_run_known_flags; - } - else if (!(flags & ERTS_RUNQ_FLG_INACTIVE)) { - if (try_steal_task(rq)) { - non_empty_runq(rq); + if (flags & ERTS_RUNQ_FLG_INACTIVE) + empty_runq(rq); + else { + if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && try_steal_task(rq)) goto continue_check_activities_to_run; - } - (void) ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_PROTECTED); + empty_runq(rq); /* * Check for ERTS_RUNQ_FLG_SUSPENDED has to be done @@ -7368,10 +8062,10 @@ Process *schedule(Process *p, int calls) flags = ERTS_RUNQ_FLGS_GET_NOB(rq); if (flags & ERTS_RUNQ_FLG_SUSPENDED) { non_empty_runq(rq); + flags |= ERTS_RUNQ_FLG_NONEMPTY; goto continue_check_activities_to_run_known_flags; } } - #endif scheduler_wait(&fcalls, esdp, rq); @@ -7382,7 +8076,8 @@ Process *schedule(Process *p, int calls) goto check_activities_to_run; } - else if (fcalls > input_reductions && prepare_for_sys_schedule()) { + else if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && + (fcalls > input_reductions && prepare_for_sys_schedule())) { /* * Schedule system-level activities. */ @@ -7486,6 +8181,14 @@ Process *schedule(Process *p, int calls) psflg_band_mask = ~(((erts_aint32_t) 1) << (ERTS_PSFLGS_GET_PRQ_PRIO(state) + ERTS_PSFLGS_IN_PRQ_MASK_OFFSET)); +#ifdef ERTS_DIRTY_SCHEDULERS + /* if a non-dirty scheduler picks up a process marked as already being + in a dirty run queue, just drop it and go get another process */ + if (state & (ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q) && + !ERTS_SCHEDULER_IS_DIRTY(esdp)) + goto pick_next_process; +#endif + if (!(state & ERTS_PSFLG_PROXY)) psflg_band_mask &= ~ERTS_PSFLG_IN_RUNQ; else { @@ -7568,6 +8271,10 @@ Process *schedule(Process *p, int calls) (UWord) esdp->no); int migrated = old && old != esdp->no; +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); +#endif + prio = (int) ERTS_PSFLGS_GET_USR_PRIO(state); erts_smp_spin_lock(&erts_sched_stat.lock); @@ -8486,6 +9193,10 @@ erts_schedule_misc_op(void (*func)(void *), void *arg) rq->misc.start = molp; rq->misc.end = molp; +#ifdef ERTS_SMP + non_empty_runq(rq); +#endif + erts_smp_runq_unlock(rq); smp_notify_inc_runq(rq); @@ -8755,7 +9466,6 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). p->htop = p->heap; p->heap_sz = sz; p->catches = 0; - p->extra_root = NULL; p->bin_vheap_sz = p->min_vheap_size; p->bin_old_vheap_sz = p->min_vheap_size; @@ -9372,8 +10082,15 @@ save_pending_exiter(Process *p) erts_proclist_store_last(&rq->procs.pending_exiters, plp); + non_empty_runq(rq); + erts_smp_runq_unlock(rq); - wake_scheduler(rq, 1); +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) + wake_dirty_scheduler(rq); + else +#endif + wake_scheduler(rq); } #endif @@ -10219,12 +10936,6 @@ erts_continue_exit_process(Process *p) if (pbt) erts_free(ERTS_ALC_T_BPD, (void *) pbt); - if (p->extra_root != NULL) { - (p->extra_root->cleanup)(p->extra_root); /* Should deallocate - whole structure */ - p->extra_root = NULL; - } - delete_process(p); #ifdef ERTS_SMP diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 043621125c..dcb9251d0d 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2013. All Rights Reserved. + * Copyright Ericsson AB 1996-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -70,7 +70,14 @@ typedef struct process Process; struct ErtsNodesMonitor_; +#define ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT_OPT 0 +#define ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT 0 + #define ERTS_MAX_NO_OF_SCHEDULERS 1024 +#ifdef ERTS_DIRTY_SCHEDULERS +#define ERTS_MAX_NO_OF_DIRTY_CPU_SCHEDULERS ERTS_MAX_NO_OF_SCHEDULERS +#define ERTS_MAX_NO_OF_DIRTY_IO_SCHEDULERS ERTS_MAX_NO_OF_SCHEDULERS +#endif #define ERTS_DEFAULT_MAX_PROCESSES (1 << 18) @@ -98,7 +105,13 @@ struct saved_calls { extern Export exp_send, exp_receive, exp_timeout; extern int erts_sched_compact_load; +extern int erts_sched_balance_util; extern Uint erts_no_schedulers; +#ifdef ERTS_DIRTY_SCHEDULERS +extern Uint erts_no_dirty_cpu_schedulers; +extern Uint erts_no_dirty_cpu_schedulers_online; +extern Uint erts_no_dirty_io_schedulers; +#endif extern Uint erts_no_run_queues; extern int erts_sched_thread_suggested_stack_size; #define ERTS_SCHED_THREAD_MIN_STACK_SIZE 4 /* Kilo words */ @@ -198,6 +211,10 @@ extern int erts_sched_thread_suggested_stack_size; #define ERTS_RUNQ_FLGS_SET(RQ, FLGS) \ ((Uint32) erts_smp_atomic32_read_bor_relb(&(RQ)->flags, \ (erts_aint32_t) (FLGS))) +#define ERTS_RUNQ_FLGS_BSET(RQ, MSK, FLGS) \ + ((Uint32) erts_smp_atomic32_read_bset_relb(&(RQ)->flags, \ + (erts_aint32_t) (MSK), \ + (erts_aint32_t) (FLGS))) #define ERTS_RUNQ_FLGS_UNSET(RQ, FLGS) \ ((Uint32) erts_smp_atomic32_read_band_relb(&(RQ)->flags, \ (erts_aint32_t) ~(FLGS))) @@ -267,6 +284,13 @@ typedef enum { typedef struct ErtsSchedulerSleepInfo_ ErtsSchedulerSleepInfo; +#ifdef ERTS_DIRTY_SCHEDULERS +typedef struct { + erts_smp_spinlock_t lock; + ErtsSchedulerSleepInfo *list; +} ErtsSchedulerSleepList; +#endif + struct ErtsSchedulerSleepInfo_ { #ifdef ERTS_SMP ErtsSchedulerSleepInfo *next; @@ -316,9 +340,40 @@ typedef struct { int reds; } ErtsRunQueueInfo; + +#ifdef HAVE_GETHRTIME +# undef ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT_OPT +# define ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT_OPT 1 +#endif + #ifdef ERTS_SMP +#undef ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT +#define ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT_OPT + +#ifdef ARCH_64 +typedef erts_atomic_t ErtsAtomicSchedTime; +#elif defined(ARCH_32) +typedef erts_dw_atomic_t ErtsAtomicSchedTime; +#else +# error :-/ +#endif + +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT +typedef struct { + ErtsAtomicSchedTime last; + struct { + Uint64 short_interval; + Uint64 long_interval; + } worktime; + int is_working; +} ErtsRunQueueSchedUtil; +#endif + typedef struct { +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + int sched_util; +#endif Uint32 flags; ErtsRunQueue *misc_evac_runq; struct { @@ -348,6 +403,12 @@ struct ErtsRunQueue_ { erts_smp_mtx_t mtx; erts_smp_cnd_t cnd; +#ifdef ERTS_DIRTY_SCHEDULERS +#ifdef ERTS_SMP + ErtsSchedulerSleepList sleepers; +#endif +#endif + ErtsSchedulerData *scheduler; int waiting; /* < 0 in sys schedule; > 0 on cnd variable */ int woken; @@ -385,6 +446,9 @@ struct ErtsRunQueue_ { Port *start; Port *end; } ports; +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + ErtsRunQueueSchedUtil sched_util; +#endif }; #ifdef ERTS_SMP @@ -414,6 +478,7 @@ do { \ } while (0) typedef struct { + int need; /* "+sbu true" or scheduler_wall_time enabled */ int enabled; Uint64 start; struct { @@ -499,12 +564,14 @@ struct ErtsSchedulerData_ { Eterm tmp_heap[TMP_HEAP_SIZE]; int num_tmp_heap_used; Eterm beam_emu_tmp_heap[BEAM_EMU_TMP_HEAP_SIZE]; - Eterm cmp_tmp_heap[CMP_TMP_HEAP_SIZE]; Eterm erl_arith_tmp_heap[ERL_ARITH_TMP_HEAP_SIZE]; #endif ErtsSchedulerSleepInfo *ssi; Process *current_process; - Uint no; /* Scheduler number */ + Uint no; /* Scheduler number for normal schedulers */ +#ifdef ERTS_DIRTY_SCHEDULERS + Uint dirty_no; /* Scheduler number for dirty schedulers */ +#endif Port *current_port; ErtsRunQueue *run_queue; int virtual_reds; @@ -531,6 +598,10 @@ typedef union { } ErtsAlignedSchedulerData; extern ErtsAlignedSchedulerData *erts_aligned_scheduler_data; +#ifdef ERTS_DIRTY_SCHEDULERS +extern ErtsAlignedSchedulerData *erts_aligned_dirty_cpu_scheduler_data; +extern ErtsAlignedSchedulerData *erts_aligned_dirty_io_scheduler_data; +#endif #ifndef ERTS_SMP extern ErtsSchedulerData *erts_scheduler_data; @@ -542,6 +613,12 @@ int erts_smp_lc_runq_is_locked(ErtsRunQueue *); #ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS +#ifdef ERTS_SMP +void erts_empty_runq(ErtsRunQueue *rq); +void erts_non_empty_runq(ErtsRunQueue *rq); +#endif + + /* * Run queue locked during modifications. We use atomic ops since * other threads peek at values without run queue lock. @@ -574,6 +651,10 @@ erts_smp_inc_runq_len(ErtsRunQueue *rq, ErtsRunQueueInfo *rqi, int prio) erts_smp_atomic32_set_relb(&rqi->len, len); +#ifdef ERTS_SMP + if (rq->len == 0) + erts_non_empty_runq(rq); +#endif rq->len++; if (rq->max_len < rq->len) rq->max_len = len; @@ -632,8 +713,13 @@ erts_smp_reset_max_len(ErtsRunQueue *rq, ErtsRunQueueInfo *rqi) #define ERTS_PSD_DIST_ENTRY 3 #define ERTS_PSD_CALL_TIME_BP 4 #define ERTS_PSD_DELAYED_GC_TASK_QS 5 +#ifdef ERTS_DIRTY_SCHEDULERS +#define ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT 6 +#define ERTS_PSD_SIZE 7 +#else #define ERTS_PSD_SIZE 6 +#endif typedef struct { void *data[ERTS_PSD_SIZE]; @@ -660,6 +746,11 @@ typedef struct { #define ERTS_PSD_DELAYED_GC_TASK_QS_GET_LOCKS ERTS_PROC_LOCK_MAIN #define ERTS_PSD_DELAYED_GC_TASK_QS_SET_LOCKS ERTS_PROC_LOCK_MAIN +#ifdef ERTS_DIRTY_SCHEDULERS +#define ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_GET_LOCKS ERTS_PROC_LOCK_MAIN +#define ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_SET_LOCKS ERTS_PROC_LOCK_MAIN +#endif + typedef struct { ErtsProcLocks get_locks; ErtsProcLocks set_locks; @@ -711,13 +802,6 @@ struct ErtsPendingSuspend_ { #endif -typedef struct ErlExtraRootSet_ ErlExtraRootSet; -struct ErlExtraRootSet_ { - Eterm *objv; - Uint sz; - void (*cleanup)(ErlExtraRootSet *); -}; - /* Defines to ease the change of memory architecture */ # define HEAP_START(p) (p)->heap # define HEAP_TOP(p) (p)->htop @@ -811,8 +895,6 @@ struct process { ErlMessageQueue msg; /* Message queue */ - ErlExtraRootSet *extra_root; /* Used by trapping BIF's */ - union { ErtsBifTimer *bif_timers; /* Bif timers aiming at this process */ void *terminate; @@ -982,6 +1064,12 @@ void erts_check_for_holes(Process* p); #define ERTS_PSFLG_RUNNING_SYS ERTS_PSFLG_BIT(15) #define ERTS_PSFLG_PROXY ERTS_PSFLG_BIT(16) #define ERTS_PSFLG_DELAYED_SYS ERTS_PSFLG_BIT(17) +#ifdef ERTS_DIRTY_SCHEDULERS +#define ERTS_PSFLG_DIRTY_CPU_PROC ERTS_PSFLG_BIT(18) +#define ERTS_PSFLG_DIRTY_IO_PROC ERTS_PSFLG_BIT(19) +#define ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q ERTS_PSFLG_BIT(20) +#define ERTS_PSFLG_DIRTY_IO_PROC_IN_Q ERTS_PSFLG_BIT(21) +#endif #define ERTS_PSFLGS_IN_PRQ_MASK (ERTS_PSFLG_IN_PRQ_MAX \ | ERTS_PSFLG_IN_PRQ_HIGH \ @@ -1187,12 +1275,46 @@ extern struct erts_system_profile_flags_t erts_system_profile_flags; (p)->flags &= ~F_TIMO; \ } while (0) +#if defined(ERTS_DIRTY_SCHEDULERS) && defined(ERTS_SMP) +#define ERTS_NUM_DIRTY_RUNQS 2 +#else +#define ERTS_NUM_DIRTY_RUNQS 0 +#endif + #define ERTS_RUNQ_IX(IX) \ (ASSERT(0 <= (IX) && (IX) < erts_no_run_queues), \ &erts_aligned_run_queues[(IX)].runq) +#ifdef ERTS_DIRTY_SCHEDULERS +#define ERTS_RUNQ_IX_IS_DIRTY(IX) \ + (-(ERTS_NUM_DIRTY_RUNQS) <= (IX) && (IX) < 0) +#define ERTS_DIRTY_RUNQ_IX(IX) \ + (ASSERT(ERTS_RUNQ_IX_IS_DIRTY(IX)), \ + &erts_aligned_run_queues[(IX)].runq) +#define ERTS_DIRTY_CPU_RUNQ (&erts_aligned_run_queues[-1].runq) +#define ERTS_DIRTY_IO_RUNQ (&erts_aligned_run_queues[-2].runq) +#else +#define ERTS_RUNQ_IX_IS_DIRTY(IX) 0 +#endif #define ERTS_SCHEDULER_IX(IX) \ (ASSERT(0 <= (IX) && (IX) < erts_no_schedulers), \ &erts_aligned_scheduler_data[(IX)].esd) +#ifdef ERTS_DIRTY_SCHEDULERS +#define ERTS_DIRTY_CPU_SCHEDULER_IX(IX) \ + (ASSERT(0 <= (IX) && (IX) < erts_no_dirty_cpu_schedulers), \ + &erts_aligned_dirty_cpu_scheduler_data[(IX)].esd) +#define ERTS_DIRTY_IO_SCHEDULER_IX(IX) \ + (ASSERT(0 <= (IX) && (IX) < erts_no_dirty_io_schedulers), \ + &erts_aligned_dirty_io_scheduler_data[(IX)].esd) +#ifdef ERTS_SMP +#define ERTS_SCHEDULER_IS_DIRTY(ESDP) \ + ((ESDP)->dirty_no != 0) +#else +#define ERTS_SCHEDULER_IS_DIRTY(ESDP) 0 +#endif +#else +#define ERTS_RUNQ_IX_IS_DIRTY(IX) 0 +#define ERTS_SCHEDULER_IS_DIRTY(ESDP) 0 +#endif void erts_pre_init_process(void); void erts_late_init_process(void); @@ -1395,9 +1517,11 @@ int erts_dbg_check_halloc_lock(Process *p); void erts_dbg_multi_scheduling_return_trap(Process *, Eterm); #endif int erts_get_max_no_executing_schedulers(void); -#ifdef ERTS_SMP +#if defined(ERTS_SMP) || defined(ERTS_DIRTY_SCHEDULERS) ErtsSchedSuspendResult -erts_schedulers_state(Uint *, Uint *, Uint *, int); +erts_schedulers_state(Uint *, Uint *, Uint *, Uint *, Uint *, Uint *, int); +#endif +#ifdef ERTS_SMP ErtsSchedSuspendResult erts_set_schedulers_online(Process *p, ErtsProcLocks plocks, @@ -1515,7 +1639,7 @@ do { \ ErtsSchedulerData *esdp__ = ((P) \ ? ERTS_PROC_GET_SCHDATA((Process *) (P)) \ : erts_get_scheduler_data()); \ - if (esdp__) \ + if (esdp__ && !ERTS_SCHEDULER_IS_DIRTY(esdp__)) \ esdp__->verify_unused_temp_alloc( \ esdp__->verify_unused_temp_alloc_data); \ } while (0) @@ -1650,6 +1774,13 @@ erts_psd_set(Process *p, ErtsProcLocks plocks, int ix, void *data) #define ERTS_PROC_SET_DELAYED_GC_TASK_QS(P, L, PBT) \ ((ErtsProcSysTaskQs *) erts_psd_set((P), (L), ERTS_PSD_DELAYED_GC_TASK_QS, (void *) (PBT))) +#ifdef ERTS_DIRTY_SCHEDULERS +#define ERTS_PROC_GET_DIRTY_SCHED_TRAP_EXPORT(P) \ + ((Export *) erts_psd_get((P), ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT)) +#define ERTS_PROC_SET_DIRTY_SCHED_TRAP_EXPORT(P, L, DSTE) \ + ((Export *) erts_psd_set((P), (L), ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT, (void *) (DSTE))) +#endif + ERTS_GLB_INLINE Eterm erts_proc_get_error_handler(Process *p); ERTS_GLB_INLINE Eterm erts_proc_set_error_handler(Process *p, @@ -1695,6 +1826,13 @@ erts_proc_set_error_handler(Process *p, ErtsProcLocks plocks, Eterm handler) extern erts_atomic_t erts_migration_paths; +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT +int erts_get_sched_util(ErtsRunQueue *rq, + int initially_locked, + int short_interval); +#endif + + ERTS_GLB_INLINE ErtsMigrationPaths *erts_get_migration_paths_managed(void); ERTS_GLB_INLINE ErtsMigrationPaths *erts_get_migration_paths(void); ERTS_GLB_INLINE ErtsRunQueue *erts_check_emigration_need(ErtsRunQueue *c_rq, @@ -1746,22 +1884,36 @@ erts_check_emigration_need(ErtsRunQueue *c_rq, int prio) return mp->prio[prio].runq; } - - if (prio == ERTS_PORT_PRIO_LEVEL) - len = RUNQ_READ_LEN(&c_rq->ports.info.len); +#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT + if (mp->sched_util) { + ErtsRunQueue *rq = mp->prio[prio].runq; + /* No migration if other is non-empty */ + if (!(ERTS_RUNQ_FLGS_GET(rq) & ERTS_RUNQ_FLG_NONEMPTY) + && erts_get_sched_util(rq, 0, 1) < mp->prio[prio].limit.other + && erts_get_sched_util(c_rq, 0, 1) > mp->prio[prio].limit.this) { + return rq; + } + } else - len = RUNQ_READ_LEN(&c_rq->procs.prio_info[prio].len); - - if (len > mp->prio[prio].limit.this) { - ErtsRunQueue *n_rq = mp->prio[prio].runq; - if (n_rq) { - if (prio == ERTS_PORT_PRIO_LEVEL) - len = RUNQ_READ_LEN(&n_rq->ports.info.len); - else - len = RUNQ_READ_LEN(&n_rq->procs.prio_info[prio].len); - - if (len < mp->prio[prio].limit.other) - return n_rq; +#endif + { + + if (prio == ERTS_PORT_PRIO_LEVEL) + len = RUNQ_READ_LEN(&c_rq->ports.info.len); + else + len = RUNQ_READ_LEN(&c_rq->procs.prio_info[prio].len); + + if (len > mp->prio[prio].limit.this) { + ErtsRunQueue *n_rq = mp->prio[prio].runq; + if (n_rq) { + if (prio == ERTS_PORT_PRIO_LEVEL) + len = RUNQ_READ_LEN(&n_rq->ports.info.len); + else + len = RUNQ_READ_LEN(&n_rq->procs.prio_info[prio].len); + + if (len < mp->prio[prio].limit.other) + return n_rq; + } } } } @@ -1822,7 +1974,12 @@ Uint erts_get_scheduler_id(void) { #ifdef ERTS_SMP ErtsSchedulerData *esdp = erts_get_scheduler_data(); - return esdp ? esdp->no : (Uint) 0; +#ifdef ERTS_DIRTY_SCHEDULERS + if (esdp && ERTS_SCHEDULER_IS_DIRTY(esdp)) + return 0; + else +#endif + return esdp ? esdp->no : (Uint) 0; #else return erts_get_scheduler_data() ? (Uint) 1 : (Uint) 0; #endif diff --git a/erts/emulator/beam/erl_term.h b/erts/emulator/beam/erl_term.h index 953edf79ea..50d3e63c58 100644 --- a/erts/emulator/beam/erl_term.h +++ b/erts/emulator/beam/erl_term.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2000-2013. All Rights Reserved. + * Copyright Ericsson AB 2000-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -1126,6 +1126,7 @@ extern unsigned tag_val_def(Wterm); #define make_tuple_rel make_boxed_rel #define make_external_rel make_boxed_rel #define make_internal_ref_rel make_boxed_rel +#define make_big_rel make_boxed_rel #define binary_val_rel(RTERM, BASE) binary_val(rterm2wterm(RTERM, BASE)) #define list_val_rel(RTERM, BASE) list_val(rterm2wterm(RTERM, BASE)) diff --git a/erts/emulator/beam/erl_vm.h b/erts/emulator/beam/erl_vm.h index 337422eead..b7de8208ad 100644 --- a/erts/emulator/beam/erl_vm.h +++ b/erts/emulator/beam/erl_vm.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2013. All Rights Reserved. + * Copyright Ericsson AB 1996-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -46,7 +46,6 @@ heap data on the C stack or if we use the buffers in the scheduler data. */ #define TMP_HEAP_SIZE 128 /* Number of Eterm in the schedulers small heap for transient heap data */ -#define CMP_TMP_HEAP_SIZE 32 /* cmp wants its own tmp-heap... */ #define ERL_ARITH_TMP_HEAP_SIZE 4 /* as does erl_arith... */ #define BEAM_EMU_TMP_HEAP_SIZE 2 /* and beam_emu... */ diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index 2cb44a5b64..5e7a5cab6e 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2013. All Rights Reserved. + * Copyright Ericsson AB 1996-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -87,7 +87,8 @@ static Export term_to_binary_trap_export; static byte* enc_term(ErtsAtomCacheMap *, Eterm, byte*, Uint32, struct erl_off_heap_header** off_heap); -static int enc_term_int(Process *p,ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags, +struct TTBEncodeContext_; +static int enc_term_int(struct TTBEncodeContext_*,ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags, struct erl_off_heap_header** off_heap, Sint *reds, byte **res); static Uint is_external_string(Eterm obj, int* p_is_string); static byte* enc_atom(ErtsAtomCacheMap *, Eterm, byte*, Uint32); @@ -103,7 +104,8 @@ static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint fla Binary *context_b); static Uint encode_size_struct2(ErtsAtomCacheMap *, Eterm, unsigned); -static int encode_size_struct_int(Process *p, ErtsAtomCacheMap *acmp, Eterm obj, +struct TTBSizeContext_; +static int encode_size_struct_int(struct TTBSizeContext_*, ErtsAtomCacheMap *acmp, Eterm obj, unsigned dflags, Sint *reds, Uint *res); static Export binary_to_term_trap_export; @@ -1086,7 +1088,6 @@ BIF_RETTYPE term_to_binary_2(BIF_ALIST_2) int level = 0; Uint flags = TERM_TO_BINARY_DFLAGS; Eterm res; - Binary *bin = NULL; while (is_list(Flags)) { Eterm arg = CAR(list_val(Flags)); @@ -1123,7 +1124,7 @@ BIF_RETTYPE term_to_binary_2(BIF_ALIST_2) goto error; } - res = erts_term_to_binary_int(p, Term, level, flags, bin); + res = erts_term_to_binary_int(p, Term, level, flags, NULL); if (is_tuple(res)) { erts_set_gc_state(p, 0); BIF_TRAP1(&term_to_binary_trap_export,BIF_P,res); @@ -1726,14 +1727,20 @@ erts_term_to_binary(Process* p, Eterm Term, int level, Uint flags) { typedef enum { TTBSize, TTBEncode, TTBCompress } TTBState; -typedef struct { +typedef struct TTBSizeContext_ { Uint flags; int level; + Uint result; + Eterm obj; + ErtsEStack estack; } TTBSizeContext; -typedef struct { +typedef struct TTBEncodeContext_ { Uint flags; int level; + byte* ep; + Eterm obj; + ErtsWStack wstack; Binary *result_bin; } TTBEncodeContext; @@ -1763,8 +1770,10 @@ static void ttb_context_destructor(Binary *context_bin) context->alive = 0; switch (context->state) { case TTBSize: + DESTROY_SAVED_ESTACK(&context->s.sc.estack); break; case TTBEncode: + DESTROY_SAVED_WSTACK(&context->s.ec.wstack); if (context->s.ec.result_bin != NULL) { /* Set to NULL if ever made alive! */ ASSERT(erts_refc_read(&(context->s.ec.result_bin->refc),0) == 0); erts_bin_free(context->s.ec.result_bin); @@ -1829,6 +1838,7 @@ static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint fla /* Setup enough to get started */ context->state = TTBSize; context->alive = 1; + context->s.sc.estack.start = NULL; context->s.sc.flags = flags; context->s.sc.level = level; } else { @@ -1844,7 +1854,8 @@ static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint fla int level; Uint flags; /* Try for fast path */ - if (encode_size_struct_int(p, NULL, Term, context->s.sc.flags, &reds, &size) < 0) { + if (encode_size_struct_int(&context->s.sc, NULL, Term, + context->s.sc.flags, &reds, &size) < 0) { EXPORT_CONTEXT(); /* Same state */ RETURN_STATE(); @@ -1870,6 +1881,7 @@ static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint fla context->state = TTBEncode; context->s.ec.flags = flags; context->s.ec.level = level; + context->s.ec.wstack.wstart = NULL; context->s.ec.result_bin = result_bin; break; } @@ -1881,7 +1893,7 @@ static Eterm erts_term_to_binary_int(Process* p, Eterm Term, int level, Uint fla Binary *result_bin; flags = context->s.ec.flags; - if (enc_term_int(p,NULL,Term, bytes+1, flags, NULL, &reds, &endp) < 0) { + if (enc_term_int(&context->s.ec, NULL,Term, bytes+1, flags, NULL, &reds, &endp) < 0) { EXPORT_CONTEXT(); RETURN_STATE(); } @@ -2289,27 +2301,6 @@ dec_pid(ErtsDistExternal *edep, Eterm** hpp, byte* ep, ErlOffHeap* off_heap, Ete #define ENC_PATCH_FUN_SIZE ((Eterm) 2) #define ENC_LAST_ARRAY_ELEMENT ((Eterm) 3) -/* Free extra rootset (used when trapping) */ -static void cleanup_ttb_extra_root(ErlExtraRootSet *rs) -{ - if (rs->objv != NULL) { - erts_free(ERTS_ALC_T_EXTRA_ROOT, rs->objv); - } - erts_free(ERTS_ALC_T_EXTRA_ROOT, rs); -} - -/* Same as above, but we have an extra "stack" beyond GC reach, i.e. an array of two extra roots */ -static void cleanup_ttb_extra_root_2(ErlExtraRootSet *rs) -{ - if (rs->objv != NULL) { - erts_free(ERTS_ALC_T_EXTRA_ROOT, rs->objv); - } - if (rs[1].objv != NULL) { - erts_free(ERTS_ALC_T_EXTRA_ROOT, rs[1].objv); - } - - erts_free(ERTS_ALC_T_EXTRA_ROOT, rs); -} static byte* enc_term(ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags, @@ -2321,39 +2312,43 @@ enc_term(ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags, } static int -enc_term_int(Process *p,ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags, +enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags, struct erl_off_heap_header** off_heap, Sint *reds, byte **res) { - DECLARE_ESTACK(s); - DECLARE_WSTACK(com); + DECLARE_WSTACK(s); Uint n; Uint i; Uint j; Uint* ptr; Eterm val; FloatDef f; - int count_reds = (p != NULL && reds != NULL); Sint r = 0; +#if HALFWORD_HEAP + UWord wobj; +#endif + - if (count_reds) { - ESTACK_CHANGE_ALLOCATOR(s, ERTS_ALC_T_EXTRA_ROOT); - WSTACK_CHANGE_ALLOCATOR(com, ERTS_ALC_T_EXTRA_ROOT); + if (ctx) { + WSTACK_CHANGE_ALLOCATOR(s, ERTS_ALC_T_SAVED_ESTACK); r = *reds; - } - if (p && p->extra_root) { /* restore saved stacks and byte pointer */ - ESTACK_RESTORE(s,p->extra_root[0].objv, p->extra_root[0].sz); - obj = ESTACK_POP(s); - WSTACK_RESTORE(com, p->extra_root[1].objv, p->extra_root[1].sz); - ep = (byte *) WSTACK_POP(com); + if (ctx->wstack.wstart) { /* restore saved stacks and byte pointer */ + WSTACK_RESTORE(s, &ctx->wstack); + ep = ctx->ep; + obj = ctx->obj; + } } goto L_jump_start; outer_loop: - while (!ESTACK_ISEMPTY(s)) { - obj = ESTACK_POP(s); - switch (val = WSTACK_POP(com)) { + while (!WSTACK_ISEMPTY(s)) { +#if HALFWORD_HEAP + obj = (Eterm) (wobj = WSTACK_POP(s)); +#else + obj = WSTACK_POP(s); +#endif + switch (val = WSTACK_POP(s)) { case ENC_TERM: break; case ENC_ONE_CONS: @@ -2364,55 +2359,52 @@ enc_term_int(Process *p,ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dfla obj = CAR(cons); tl = CDR(cons); - WSTACK_PUSH(com, is_list(tl) ? ENC_ONE_CONS : ENC_TERM); - ESTACK_PUSH(s, tl); + WSTACK_PUSH(s, is_list(tl) ? ENC_ONE_CONS : ENC_TERM); + WSTACK_PUSH(s, tl); } break; case ENC_PATCH_FUN_SIZE: - /* obj will be discarded, it was NIL */ { - byte* size_p = (byte *) WSTACK_POP(com); +#if HALFWORD_HEAP + byte* size_p = (byte *) wobj; +#else + byte* size_p = (byte *) obj; +#endif put_int32(ep - size_p, size_p); } goto outer_loop; case ENC_LAST_ARRAY_ELEMENT: /* obj is the tuple */ { - Eterm* ptr = tuple_val(obj); - i = arityval(*ptr); - obj = ptr[i]; +#if HALFWORD_HEAP + Eterm* ptr = (Eterm *) wobj; +#else + Eterm* ptr = (Eterm *) obj; +#endif + obj = *ptr; } break; default: /* ENC_LAST_ARRAY_ELEMENT+1 and upwards */ { - Eterm* ptr = tuple_val(obj); - i = arityval(*ptr); - ESTACK_PUSH(s, obj); /* put back tuple and next element index */ - WSTACK_PUSH(com, val-1); - obj = ptr[i - (val - ENC_LAST_ARRAY_ELEMENT)]; /* the index is counting down */ +#if HALFWORD_HEAP + Eterm* ptr = (Eterm *) wobj; +#else + Eterm* ptr = (Eterm *) obj; +#endif + WSTACK_PUSH(s, val-1); + obj = *ptr++; + WSTACK_PUSH(s, (UWord)ptr); } break; } L_jump_start: - if (count_reds && --r == 0) { + if (ctx && --r == 0) { *reds = r; - ESTACK_PUSH(s,obj); /* push back current object, to be popped on restore */ - WSTACK_PUSH(com,((UWord) ep)); - if (p->extra_root == NULL) { - /* NB. Allocate an array of two "extra-roots", of which only the first element - is seen and handled by the GC. Index 1 holds the Wstack. */ - p->extra_root = erts_alloc(ERTS_ALC_T_EXTRA_ROOT, sizeof(ErlExtraRootSet)*2); - p->extra_root->objv = NULL; - p->extra_root->sz = 0; - p->extra_root->cleanup = cleanup_ttb_extra_root_2; - p->extra_root[1].objv = NULL; - p->extra_root[1].sz = 0; - p->extra_root[1].cleanup = NULL; /* Never used */ - } - ESTACK_SAVE(s, p->extra_root[0].objv, p->extra_root[0].sz); - WSTACK_SAVE(com, p->extra_root[1].objv, (p->extra_root[1].sz)); + ctx->obj = obj; + ctx->ep = ep; + WSTACK_SAVE(s, &ctx->wstack); return -1; } switch(tag_val_def(obj)) { @@ -2558,8 +2550,8 @@ enc_term_int(Process *p,ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dfla ep += 4; } if (i > 0) { - WSTACK_PUSH(com, ENC_LAST_ARRAY_ELEMENT+i-1); - ESTACK_PUSH(s, obj); + WSTACK_PUSH(s, ENC_LAST_ARRAY_ELEMENT+i-1); + WSTACK_PUSH(s, (UWord)ptr); } break; @@ -2703,9 +2695,8 @@ enc_term_int(Process *p,ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dfla int ei; *ep++ = NEW_FUN_EXT; - WSTACK_PUSH(com, (UWord) ep); /* Position for patching in size */ - WSTACK_PUSH(com, ENC_PATCH_FUN_SIZE); - ESTACK_PUSH(s,NIL); /* Will be thrown away */ + WSTACK_PUSH(s, ENC_PATCH_FUN_SIZE); + WSTACK_PUSH(s, (UWord) ep); /* Position for patching in size */ ep += 4; *ep = funp->arity; ep += 1; @@ -2722,8 +2713,8 @@ enc_term_int(Process *p,ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dfla fun_env: for (ei = funp->num_free-1; ei > 0; ei--) { - WSTACK_PUSH(com, ENC_TERM); - ESTACK_PUSH(s, (UWord) funp->env[ei]); + WSTACK_PUSH(s, ENC_TERM); + WSTACK_PUSH(s, (UWord) funp->env[ei]); } if (funp->num_free != 0) { obj = funp->env[0]; @@ -2766,13 +2757,9 @@ enc_term_int(Process *p,ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dfla break; } } - DESTROY_ESTACK(s); - DESTROY_WSTACK(com); - if (p && p->extra_root) { - cleanup_ttb_extra_root_2(p->extra_root); - p->extra_root = NULL; - } - if (count_reds) { + DESTROY_WSTACK(s); + if (ctx) { + ASSERT(ctx->wstack.wstart == NULL); *reds = r; } *res = ep; @@ -3742,26 +3729,24 @@ static Uint encode_size_struct2(ErtsAtomCacheMap *acmp, Eterm obj, unsigned dfla } static int -encode_size_struct_int(Process *p, ErtsAtomCacheMap *acmp, Eterm obj, +encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, unsigned dflags, Sint *reds, Uint *res) { DECLARE_ESTACK(s); Uint m, i, arity; Uint result = 0; - int count_reds = (p != NULL && reds != 0); Sint r = 0; - if (count_reds) { - ESTACK_CHANGE_ALLOCATOR(s, ERTS_ALC_T_EXTRA_ROOT); + if (ctx) { + ESTACK_CHANGE_ALLOCATOR(s, ERTS_ALC_T_SAVED_ESTACK); r = *reds; - } - - if (p && p->extra_root) { /* restore saved stack */ - ESTACK_RESTORE(s,p->extra_root->objv, p->extra_root->sz + 1); - result = ESTACK_POP(s); /*Untagged, beyond p->extra_root->sz */ - obj = ESTACK_POP(s); - } + if (ctx->estack.start) { /* restore saved stack */ + ESTACK_RESTORE(s, &ctx->estack); + result = ctx->result; + obj = ctx->obj; + } + } goto L_jump_start; @@ -3787,18 +3772,11 @@ encode_size_struct_int(Process *p, ErtsAtomCacheMap *acmp, Eterm obj, } L_jump_start: - if (count_reds && --r == 0) { + if (ctx && --r == 0) { *reds = r; - ESTACK_PUSH(s,obj); /* push back current object */ - ESTACK_PUSH(s,result); /* Untagged, will be out of GC reach */ - if (p->extra_root == NULL) { - p->extra_root = erts_alloc(ERTS_ALC_T_EXTRA_ROOT, sizeof(ErlExtraRootSet)); - p->extra_root->objv = NULL; - p->extra_root->sz = 0; - p->extra_root->cleanup = cleanup_ttb_extra_root; - } - ESTACK_SAVE(s, p->extra_root->objv, p->extra_root->sz); - --p->extra_root->sz; /* Hide result from GC */ + ctx->obj = obj; + ctx->result = result; + ESTACK_SAVE(s, &ctx->estack); return -1; } switch (tag_val_def(obj)) { @@ -4001,11 +3979,8 @@ encode_size_struct_int(Process *p, ErtsAtomCacheMap *acmp, Eterm obj, } DESTROY_ESTACK(s); - if (p && p->extra_root) { - cleanup_ttb_extra_root(p->extra_root); - p->extra_root = NULL; - } - if (count_reds) { + if (ctx) { + ASSERT(ctx->estack.start == NULL); *reds = r; } *res = result; @@ -4074,7 +4049,9 @@ init_done: switch (tag) { case INTEGER_EXT: SKIP(4); +#if !defined(ARCH_64) || HALFWORD_HEAP heap_size += BIG_UINT_HEAP_SIZE; +#endif break; case SMALL_INTEGER_EXT: SKIP(1); diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 6e5d352e5b..83a8911a36 100755 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2013. All Rights Reserved. + * Copyright Ericsson AB 1996-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -370,231 +370,233 @@ extern int stackdump_on_exit; * DESTROY_ESTACK(Stack) */ +typedef struct { + Eterm* start; + Eterm* sp; + Eterm* end; + ErtsAlcType_t alloc_type; +}ErtsEStack; -void erl_grow_stack(ErtsAlcType_t a_type, Eterm** start, Eterm** sp, Eterm** end); -#define ESTK_CONCAT(a,b) a##b -#define ESTK_SUBSCRIPT(s,i) *((Eterm *)((byte *)ESTK_CONCAT(s,_start) + (i))) #define DEF_ESTACK_SIZE (16) -#define DECLARE_ESTACK(s) \ - Eterm ESTK_CONCAT(s,_default_stack)[DEF_ESTACK_SIZE]; \ - Eterm* ESTK_CONCAT(s,_start) = ESTK_CONCAT(s,_default_stack); \ - Eterm* ESTK_CONCAT(s,_sp) = ESTK_CONCAT(s,_start); \ - Eterm* ESTK_CONCAT(s,_end) = ESTK_CONCAT(s,_start) + DEF_ESTACK_SIZE;\ - ErtsAlcType_t ESTK_CONCAT(s,_alloc_type) = ERTS_ALC_T_ESTACK +void erl_grow_estack(ErtsEStack*, Eterm* def_stack); +#define ESTK_CONCAT(a,b) a##b +#define ESTK_DEF_STACK(s) ESTK_CONCAT(s,_default_estack) + +#define DECLARE_ESTACK(s) \ + Eterm ESTK_DEF_STACK(s)[DEF_ESTACK_SIZE]; \ + ErtsEStack s = { \ + ESTK_DEF_STACK(s), /* start */ \ + ESTK_DEF_STACK(s), /* sp */ \ + ESTK_DEF_STACK(s) + DEF_ESTACK_SIZE, /* end */ \ + ERTS_ALC_T_ESTACK /* alloc_type */ \ + } #define ESTACK_CHANGE_ALLOCATOR(s,t) \ do { \ - if (ESTK_CONCAT(s,_start) != ESTK_CONCAT(s,_default_stack)) { \ + if (s.start != ESTK_DEF_STACK(s)) { \ erl_exit(1, "Internal error - trying to change allocator " \ "type of active estack\n"); \ } \ - ESTK_CONCAT(s,_alloc_type) = (t); \ + s.alloc_type = (t); \ } while (0) +#define DESTROY_ESTACK(s) \ +do { \ + if (s.start != ESTK_DEF_STACK(s)) { \ + erts_free(s.alloc_type, s.start); \ + } \ +} while(0) + + /* - * Do not free the stack after this, it may have pointers into what - * was saved in 'v'. 'v' and 'vsize' are changed by this macro. If - * 'v' points to anything, it should have been allocated by a previous - * call to this macro. Be careful to set a correct allocator prior to - * saving. - * 'v' can be any lvalue pointer, it will point to an array of UWord - * after calling this macro. + * Do not free the stack after this, it may have pointers into what + * was saved in 'dst'. */ -#define ESTACK_SAVE(s,v,vsize) /* v and vsize are "name parameters" */ \ -do { \ - Uint _esz = ESTACK_COUNT(s); \ - if (ESTK_CONCAT(s,_start) == ESTK_CONCAT(s,_default_stack)) { \ - if ((v) == NULL) { \ - (v) = erts_alloc(ESTK_CONCAT(s,_alloc_type), \ - DEF_ESTACK_SIZE * sizeof(Eterm)); \ - } \ - memcpy((v),ESTK_CONCAT(s,_start),_esz*sizeof(Eterm)); \ - } else { \ - (v) = (void *) ESTK_CONCAT(s,_start); \ - } \ - (vsize) = _esz; \ +#define ESTACK_SAVE(s,dst)\ +do {\ + if (s.start == ESTK_DEF_STACK(s)) {\ + UWord _wsz = ESTACK_COUNT(s);\ + (dst)->start = erts_alloc(s.alloc_type,\ + DEF_ESTACK_SIZE * sizeof(Eterm));\ + memcpy((dst)->start, s.start,_wsz*sizeof(Eterm));\ + (dst)->sp = (dst)->start + _wsz;\ + (dst)->end = (dst)->start + DEF_ESTACK_SIZE;\ + (dst)->alloc_type = s.alloc_type;\ + } else\ + *(dst) = s;\ } while (0) -/* - * Use on empty stack, only the allocator can be changed before this - * The vector parameter is reset to NULL if the vector is moved to stack, - * otherwise it's kept for reuse, so a saved and restored vector might - * need freeing using the correct allocator parameter. - * 'v' can be any lvalue pointer, it's cast to an (Eterm *). +#define DESTROY_SAVED_ESTACK(estack)\ +do {\ + if ((estack)->start) {\ + erts_free((estack)->alloc_type, (estack)->start);\ + (estack)->start = NULL;\ + }\ +} while(0) + +/* + * Use on empty stack, only the allocator can be changed before this. + * The src stack is reset to NULL. */ -#define ESTACK_RESTORE(s, v, vsize) /*v is a "name parameter"*/ \ -do { \ - if ((vsize) > DEF_ESTACK_SIZE) { \ - Uint _ca = DEF_ESTACK_SIZE; \ - while (_ca < (vsize)) \ - _ca = _ca * 2; \ - ESTK_CONCAT(s,_start) = (Eterm *) (v); \ - ESTK_CONCAT(s,_end) = ((Eterm *)(v)) + _ca; \ - ESTK_CONCAT(s,_sp) = ESTK_CONCAT(s,_start) + (vsize); \ - (v) = NULL; \ - } else { \ - memcpy(ESTK_CONCAT(s,_start),(v),(vsize)*sizeof(Eterm));\ - ESTK_CONCAT(s,_sp) = ESTK_CONCAT(s,_start) + (vsize); \ - } \ - } while (0) +#define ESTACK_RESTORE(s, src) \ +do { \ + ASSERT(s.start == ESTK_DEF_STACK(s)); \ + s = *(src); /* struct copy */ \ + (src)->start = NULL; \ + ASSERT(s.sp >= s.start); \ + ASSERT(s.sp <= s.end); \ +} while (0) -#define ESTACK_IS_STATIC(s) (ESTK_CONCAT(s,_start) == ESTK_CONCAT(s,_default_stack)) +#define ESTACK_IS_STATIC(s) (s.start == ESTK_DEF_STACK(s))) -#define DESTROY_ESTACK(s) \ -do { \ - if (ESTK_CONCAT(s,_start) != ESTK_CONCAT(s,_default_stack)) { \ - erts_free(ESTK_CONCAT(s,_alloc_type), ESTK_CONCAT(s,_start)); \ - } \ +#define ESTACK_PUSH(s, x) \ +do { \ + if (s.sp == s.end) { \ + erl_grow_estack(&s, ESTK_DEF_STACK(s)); \ + } \ + *s.sp++ = (x); \ } while(0) -#define ESTACK_PUSH(s, x) \ -do { \ - if (ESTK_CONCAT(s,_sp) == ESTK_CONCAT(s,_end)) { \ - erl_grow_stack(ESTK_CONCAT(s,_alloc_type),&ESTK_CONCAT(s,_start), \ - &ESTK_CONCAT(s,_sp), &ESTK_CONCAT(s,_end)); \ - } \ - *ESTK_CONCAT(s,_sp)++ = (x); \ +#define ESTACK_PUSH2(s, x, y) \ +do { \ + if (s.sp > s.end - 2) { \ + erl_grow_estack(&s, ESTK_DEF_STACK(s)); \ + } \ + *s.sp++ = (x); \ + *s.sp++ = (y); \ } while(0) -#define ESTACK_PUSH2(s, x, y) \ -do { \ - if (ESTK_CONCAT(s,_sp) > ESTK_CONCAT(s,_end) - 2) { \ - erl_grow_stack(ESTK_CONCAT(s,_alloc_type),&ESTK_CONCAT(s,_start), \ - &ESTK_CONCAT(s,_sp), &ESTK_CONCAT(s,_end)); \ - } \ - *ESTK_CONCAT(s,_sp)++ = (x); \ - *ESTK_CONCAT(s,_sp)++ = (y); \ +#define ESTACK_PUSH3(s, x, y, z) \ +do { \ + if (s.sp > s.end - 3) { \ + erl_grow_estack(&s, ESTK_DEF_STACK(s)); \ + } \ + *s.sp++ = (x); \ + *s.sp++ = (y); \ + *s.sp++ = (z); \ } while(0) -#define ESTACK_PUSH3(s, x, y, z) \ -do { \ - if (ESTK_CONCAT(s,_sp) > ESTK_CONCAT(s,_end) - 3) { \ - erl_grow_stack(&ESTK_CONCAT(s,_start), &ESTK_CONCAT(s,_sp), \ - &ESTK_CONCAT(s,_end)); \ - } \ - *ESTK_CONCAT(s,_sp)++ = (x); \ - *ESTK_CONCAT(s,_sp)++ = (y); \ - *ESTK_CONCAT(s,_sp)++ = (z); \ -} while(0) +#define ESTACK_COUNT(s) (s.sp - s.start) +#define ESTACK_ISEMPTY(s) (s.sp == s.start) +#define ESTACK_POP(s) (*(--s.sp)) -#define ESTACK_COUNT(s) (ESTK_CONCAT(s,_sp) - ESTK_CONCAT(s,_start)) -#define ESTACK_ISEMPTY(s) (ESTK_CONCAT(s,_sp) == ESTK_CONCAT(s,_start)) -#define ESTACK_POP(s) (*(--ESTK_CONCAT(s,_sp))) +/* + * WSTACK: same as ESTACK but with UWord instead of Eterm + */ +typedef struct { + UWord* wstart; + UWord* wsp; + UWord* wend; + ErtsAlcType_t alloc_type; +}ErtsWStack; -void erl_grow_wstack(ErtsAlcType_t a_type, UWord** start, UWord** sp, UWord** end); -#define WSTK_CONCAT(a,b) a##b -#define WSTK_SUBSCRIPT(s,i) *((UWord *)((byte *)WSTK_CONCAT(s,_start) + (i))) #define DEF_WSTACK_SIZE (16) -#define DECLARE_WSTACK(s) \ - UWord WSTK_CONCAT(s,_default_stack)[DEF_WSTACK_SIZE]; \ - UWord* WSTK_CONCAT(s,_start) = WSTK_CONCAT(s,_default_stack); \ - UWord* WSTK_CONCAT(s,_sp) = WSTK_CONCAT(s,_start); \ - UWord* WSTK_CONCAT(s,_end) = WSTK_CONCAT(s,_start) + DEF_WSTACK_SIZE; \ - ErtsAlcType_t WSTK_CONCAT(s,_alloc_type) = ERTS_ALC_T_ESTACK +void erl_grow_wstack(ErtsWStack*, UWord* def_stack); +#define WSTK_CONCAT(a,b) a##b +#define WSTK_DEF_STACK(s) WSTK_CONCAT(s,_default_wstack) + +#define DECLARE_WSTACK(s) \ + UWord WSTK_DEF_STACK(s)[DEF_WSTACK_SIZE]; \ + ErtsWStack s = { \ + WSTK_DEF_STACK(s), /* wstart */ \ + WSTK_DEF_STACK(s), /* wsp */ \ + WSTK_DEF_STACK(s) + DEF_WSTACK_SIZE, /* wend */ \ + ERTS_ALC_T_ESTACK /* alloc_type */ \ + } #define WSTACK_CHANGE_ALLOCATOR(s,t) \ do { \ - if (WSTK_CONCAT(s,_start) != WSTK_CONCAT(s,_default_stack)) { \ + if (s.wstart != WSTK_DEF_STACK(s)) { \ erl_exit(1, "Internal error - trying to change allocator " \ "type of active wstack\n"); \ } \ - WSTK_CONCAT(s,_alloc_type) = (t); \ + s.alloc_type = (t); \ } while (0) -#define DESTROY_WSTACK(s) \ -do { \ - if (WSTK_CONCAT(s,_start) != WSTK_CONCAT(s,_default_stack)) { \ - erts_free(WSTK_CONCAT(s,_alloc_type), WSTK_CONCAT(s,_start)); \ - } \ +#define DESTROY_WSTACK(s) \ +do { \ + if (s.wstart != WSTK_DEF_STACK(s)) { \ + erts_free(s.alloc_type, s.wstart); \ + } \ } while(0) + /* - * Do not free the stack after this, it may have pointers into what - * was saved in 'v'. 'v' and 'vsize' are changed by this macro. If - * 'v' points to anything, it should have been allocated by a previous - * call to this macro. Be careful to set a correct allocator prior to - * saving. - * 'v' can be any lvalue pointer, it will point to an array of UWord - * after calling this macro. + * Do not free the stack after this, it may have pointers into what + * was saved in 'dst'. */ -#define WSTACK_SAVE(s,v,vsize) /* v and vsize are "name parameters" */ \ -do { \ - Uint _wsz = WSTACK_COUNT(s); \ - if (WSTK_CONCAT(s,_start) == WSTK_CONCAT(s,_default_stack)) { \ - if ((v) == NULL) { \ - (v) = erts_alloc(WSTK_CONCAT(s,_alloc_type), \ - DEF_WSTACK_SIZE * sizeof(UWord)); \ - } \ - memcpy((v),WSTK_CONCAT(s,_start),_wsz*sizeof(UWord)); \ - } else { \ - (v) = (void *) WSTK_CONCAT(s,_start); \ - } \ - (vsize) = _wsz; \ +#define WSTACK_SAVE(s,dst)\ +do {\ + if (s.wstart == WSTK_DEF_STACK(s)) {\ + UWord _wsz = WSTACK_COUNT(s);\ + (dst)->wstart = erts_alloc(s.alloc_type,\ + DEF_WSTACK_SIZE * sizeof(UWord));\ + memcpy((dst)->wstart, s.wstart,_wsz*sizeof(UWord));\ + (dst)->wsp = (dst)->wstart + _wsz;\ + (dst)->wend = (dst)->wstart + DEF_WSTACK_SIZE;\ + (dst)->alloc_type = s.alloc_type;\ + } else\ + *(dst) = s;\ } while (0) -/* - * Use on empty stack, only the allocator can be changed before this - * The vector parameter is reset to NULL if the vector is moved to stack, - * otherwise it's kept for reuse, so a saved and restored vector might - * need freeing using the correct allocator parameter. - * 'v' can be any lvalue pointer, it's cast to an (UWord *). +#define DESTROY_SAVED_WSTACK(wstack)\ +do {\ + if ((wstack)->wstart) {\ + erts_free((wstack)->alloc_type, (wstack)->wstart);\ + (wstack)->wstart = NULL;\ + }\ +} while(0) + +/* + * Use on empty stack, only the allocator can be changed before this. + * The src stack is reset to NULL. */ -#define WSTACK_RESTORE(s, v, vsize) /*v is a "name parameter"*/ \ -do { \ - if ((vsize) > DEF_WSTACK_SIZE) { \ - Uint _ca = DEF_WSTACK_SIZE; \ - while (_ca < (vsize)) \ - _ca = _ca * 2; \ - WSTK_CONCAT(s,_start) = (UWord *) (v); \ - WSTK_CONCAT(s,_end) = ((UWord *)(v)) + _ca; \ - WSTK_CONCAT(s,_sp) = WSTK_CONCAT(s,_start) + (vsize); \ - (v) = NULL; \ - } else { \ - memcpy(WSTK_CONCAT(s,_start),(v),(vsize)*sizeof(UWord));\ - WSTK_CONCAT(s,_sp) = WSTK_CONCAT(s,_start) + (vsize); \ - } \ - } while (0) +#define WSTACK_RESTORE(s, src) \ +do { \ + ASSERT(s.wstart == WSTK_DEF_STACK(s)); \ + s = *(src); /* struct copy */ \ + (src)->wstart = NULL; \ + ASSERT(s.wsp >= s.wstart); \ + ASSERT(s.wsp <= s.wend); \ +} while (0) -#define WSTACK_IS_STATIC(s) (WSTK_CONCAT(s,_start) == WSTK_CONCAT(s,_default_stack)) +#define WSTACK_IS_STATIC(s) (s.wstart == WSTK_DEF_STACK(s))) -#define WSTACK_PUSH(s, x) \ -do { \ - if (WSTK_CONCAT(s,_sp) == WSTK_CONCAT(s,_end)) { \ - erl_grow_wstack(WSTK_CONCAT(s,_alloc_type), &WSTK_CONCAT(s,_start), \ - &WSTK_CONCAT(s,_sp), &WSTK_CONCAT(s,_end)); \ - } \ - *WSTK_CONCAT(s,_sp)++ = (x); \ +#define WSTACK_PUSH(s, x) \ +do { \ + if (s.wsp == s.wend) { \ + erl_grow_wstack(&s, WSTK_DEF_STACK(s)); \ + } \ + *s.wsp++ = (x); \ } while(0) -#define WSTACK_PUSH2(s, x, y) \ -do { \ - if (WSTK_CONCAT(s,_sp) > WSTK_CONCAT(s,_end) - 2) { \ - erl_grow_wstack(WSTK_CONCAT(s,_alloc_type), &WSTK_CONCAT(s,_start), \ - &WSTK_CONCAT(s,_sp), &WSTK_CONCAT(s,_end)); \ - } \ - *WSTK_CONCAT(s,_sp)++ = (x); \ - *WSTK_CONCAT(s,_sp)++ = (y); \ +#define WSTACK_PUSH2(s, x, y) \ +do { \ + if (s.wsp > s.wend - 2) { \ + erl_grow_wstack(&s, WSTK_DEF_STACK(s)); \ + } \ + *s.wsp++ = (x); \ + *s.wsp++ = (y); \ } while(0) -#define WSTACK_PUSH3(s, x, y, z) \ -do { \ - if (WSTK_CONCAT(s,_sp) > WSTK_CONCAT(s,_end) - 3) { \ - erl_grow_wstack(WSTK_CONCAT(s,_alloc_type), &WSTK_CONCAT(s,_start), \ - &WSTK_CONCAT(s,_sp), &WSTK_CONCAT(s,_end)); \ - } \ - *WSTK_CONCAT(s,_sp)++ = (x); \ - *WSTK_CONCAT(s,_sp)++ = (y); \ - *WSTK_CONCAT(s,_sp)++ = (z); \ +#define WSTACK_PUSH3(s, x, y, z) \ +do { \ + if (s.wsp > s.wend - 3) { \ + erl_grow_wstack(&s, WSTK_DEF_STACK(s)); \ + } \ + *s.wsp++ = (x); \ + *s.wsp++ = (y); \ + *s.wsp++ = (z); \ } while(0) -#define WSTACK_COUNT(s) (WSTK_CONCAT(s,_sp) - WSTK_CONCAT(s,_start)) +#define WSTACK_COUNT(s) (s.wsp - s.wstart) +#define WSTACK_ISEMPTY(s) (s.wsp == s.wstart) +#define WSTACK_POP(s) (*(--s.wsp)) -#define WSTACK_ISEMPTY(s) (WSTK_CONCAT(s,_sp) == WSTK_CONCAT(s,_start)) -#define WSTACK_POP(s) (*(--WSTK_CONCAT(s,_sp))) /* binary.c */ diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c index 297c4bf439..e0776cf67d 100644 --- a/erts/emulator/beam/utils.c +++ b/erts/emulator/beam/utils.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2013. All Rights Reserved. + * Copyright Ericsson AB 1996-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -185,39 +185,41 @@ erts_set_hole_marker(Eterm* ptr, Uint sz) * Helper function for the ESTACK macros defined in global.h. */ void -erl_grow_stack(ErtsAlcType_t a_type, Eterm** start, Eterm** sp, Eterm** end) +erl_grow_estack(ErtsEStack* s, Eterm* default_estack) { - Uint old_size = (*end - *start); + Uint old_size = (s->end - s->start); Uint new_size = old_size * 2; - Uint sp_offs = *sp - *start; - if (new_size > 2 * DEF_ESTACK_SIZE) { - *start = erts_realloc(a_type, (void *) *start, new_size*sizeof(Eterm)); + Uint sp_offs = s->sp - s->start; + if (s->start != default_estack) { + s->start = erts_realloc(s->alloc_type, s->start, + new_size*sizeof(Eterm)); } else { - Eterm* new_ptr = erts_alloc(a_type, new_size*sizeof(Eterm)); - sys_memcpy(new_ptr, *start, old_size*sizeof(Eterm)); - *start = new_ptr; + Eterm* new_ptr = erts_alloc(s->alloc_type, new_size*sizeof(Eterm)); + sys_memcpy(new_ptr, s->start, old_size*sizeof(Eterm)); + s->start = new_ptr; } - *end = *start + new_size; - *sp = *start + sp_offs; + s->end = s->start + new_size; + s->sp = s->start + sp_offs; } /* - * Helper function for the ESTACK macros defined in global.h. + * Helper function for the WSTACK macros defined in global.h. */ void -erl_grow_wstack(ErtsAlcType_t a_type, UWord** start, UWord** sp, UWord** end) +erl_grow_wstack(ErtsWStack* s, UWord* default_wstack) { - Uint old_size = (*end - *start); + Uint old_size = (s->wend - s->wstart); Uint new_size = old_size * 2; - Uint sp_offs = *sp - *start; - if (new_size > 2 * DEF_ESTACK_SIZE) { - *start = erts_realloc(a_type, (void *) *start, new_size*sizeof(UWord)); + Uint sp_offs = s->wsp - s->wstart; + if (s->wstart != default_wstack) { + s->wstart = erts_realloc(s->alloc_type, s->wstart, + new_size*sizeof(UWord)); } else { - UWord* new_ptr = erts_alloc(a_type, new_size*sizeof(UWord)); - sys_memcpy(new_ptr, *start, old_size*sizeof(UWord)); - *start = new_ptr; + UWord* new_ptr = erts_alloc(s->alloc_type, new_size*sizeof(UWord)); + sys_memcpy(new_ptr, s->wstart, old_size*sizeof(UWord)); + s->wstart = new_ptr; } - *end = *start + new_size; - *sp = *start + sp_offs; + s->wend = s->wstart + new_size; + s->wsp = s->wstart + sp_offs; } /* CTYPE macros */ @@ -2686,11 +2688,6 @@ tailrecur_ne: { FloatDef f1, f2; Eterm big; -#if HEAP_ON_C_STACK - Eterm big_buf[CMP_TMP_HEAP_SIZE]; /* If HEAP_ON_C_STACK */ -#else - Eterm *big_buf = erts_get_scheduler_data()->cmp_tmp_heap; -#endif #if HALFWORD_HEAP Wterm aw = is_immed(a) ? a : rterm2wterm(a,a_base); Wterm bw = is_immed(b) ? b : rterm2wterm(b,b_base); @@ -2701,6 +2698,8 @@ tailrecur_ne: #define MAX_LOSSLESS_FLOAT ((double)((1LL << 53) - 2)) #define MIN_LOSSLESS_FLOAT ((double)(((1LL << 53) - 2)*-1)) #define BIG_ARITY_FLOAT_MAX (1024 / D_EXP) /* arity of max float as a bignum */ + Eterm big_buf[BIG_NEED_SIZE(BIG_ARITY_FLOAT_MAX)]; + b_tag = tag_val_def(bw); switch(_NUMBER_CODE(a_tag, b_tag)) { @@ -2716,8 +2715,9 @@ tailrecur_ne: /* Float is within the no loss limit */ f1.fd = signed_val(aw); j = float_comp(f1.fd, f2.fd); + } #if ERTS_SIZEOF_ETERM == 8 - } else if (f2.fd > (double) (MAX_SMALL + 1)) { + else if (f2.fd > (double) (MAX_SMALL + 1)) { /* Float is a positive bignum, i.e. bigger */ j = -1; } else if (f2.fd < (double) (MIN_SMALL - 1)) { @@ -2728,7 +2728,7 @@ tailrecur_ne: j = signed_val(aw) - (Sint) f2.fd; } #else - } else { + else { /* If float is positive it is bigger than small */ j = (f2.fd > 0.0) ? -1 : 1; } @@ -2762,8 +2762,8 @@ tailrecur_ne: j = float_comp(f1.fd, f2.fd); } } else { - big = double_to_big(f2.fd, big_buf); - j = big_comp(aw, big); + big = double_to_big(f2.fd, big_buf, sizeof(big_buf)/sizeof(Eterm)); + j = big_comp(aw, rterm2wterm(big,big_buf)); } if (_NUMBER_CODE(a_tag, b_tag) == FLOAT_BIG) { j = -j; @@ -2775,8 +2775,9 @@ tailrecur_ne: /* Float is within the no loss limit */ f2.fd = signed_val(bw); j = float_comp(f1.fd, f2.fd); + } #if ERTS_SIZEOF_ETERM == 8 - } else if (f1.fd > (double) (MAX_SMALL + 1)) { + else if (f1.fd > (double) (MAX_SMALL + 1)) { /* Float is a positive bignum, i.e. bigger */ j = 1; } else if (f1.fd < (double) (MIN_SMALL - 1)) { @@ -2787,7 +2788,7 @@ tailrecur_ne: j = (Sint) f1.fd - signed_val(bw); } #else - } else { + else { /* If float is positive it is bigger than small */ j = (f1.fd > 0.0) ? 1 : -1; } @@ -2846,7 +2847,7 @@ pop_next: return 0; not_equal: - DESTROY_ESTACK(stack); + DESTROY_WSTACK(stack); return j; #undef CMP_NODES diff --git a/erts/emulator/drivers/common/efile_drv.c b/erts/emulator/drivers/common/efile_drv.c index 8de578d8b7..dca979c13a 100644 --- a/erts/emulator/drivers/common/efile_drv.c +++ b/erts/emulator/drivers/common/efile_drv.c @@ -111,7 +111,6 @@ #include "erl_driver.h" #include "erl_efile.h" #include "erl_threads.h" -#include "zlib.h" #include "gzio.h" #include "dtrace-wrapper.h" #include <ctype.h> @@ -818,7 +817,7 @@ file_start(ErlDrvPort port, char* command) static void do_close(int flags, SWord fd) { if (flags & EFILE_COMPRESSED) { - erts_gzclose((gzFile)(fd)); + erts_gzclose((ErtsGzFile)(fd)); } else { efile_closefile((int) fd); } @@ -1136,7 +1135,7 @@ static void invoke_read(void *data) } read_size = size; if (d->flags & EFILE_COMPRESSED) { - read_size = erts_gzread((gzFile)d->fd, + read_size = erts_gzread((ErtsGzFile)d->fd, d->c.read.binp->orig_bytes + d->c.read.bin_offset, size); status = (read_size != (size_t) -1); @@ -1209,7 +1208,7 @@ static void invoke_read_line(void *data) size = need - d->c.read_line.read_size; } if (d->flags & EFILE_COMPRESSED) { - read_size = erts_gzread((gzFile)d->fd, + read_size = erts_gzread((ErtsGzFile)d->fd, d->c.read_line.binp->orig_bytes + d->c.read_line.read_offset + d->c.read_line.read_size, size); @@ -1250,7 +1249,7 @@ static void invoke_read_line(void *data) d->c.read_line.read_size -= too_much; ASSERT(d->c.read_line.read_size >= 0); if (d->flags & EFILE_COMPRESSED) { - Sint64 location = erts_gzseek((gzFile)d->fd, + Sint64 location = erts_gzseek((ErtsGzFile)d->fd, -((Sint64) too_much), EFILE_SEEK_CUR); if (location == -1) { d->result_ok = 0; @@ -1535,7 +1534,7 @@ static void invoke_writev(void *data) { */ errno = EINVAL; if (! (status = - erts_gzwrite((gzFile)d->fd, + erts_gzwrite((ErtsGzFile)d->fd, iov[i].iov_base, iov[i].iov_len)) == iov[i].iov_len) { d->errInfo.posix_errno = @@ -1797,7 +1796,7 @@ static void invoke_lseek(void *data) d->errInfo.posix_errno = EINVAL; status = 0; } else { - d->c.lseek.location = erts_gzseek((gzFile)d->fd, + d->c.lseek.location = erts_gzseek((ErtsGzFile)d->fd, offset, d->c.lseek.origin); if (d->c.lseek.location == -1) { d->errInfo.posix_errno = errno; @@ -1885,7 +1884,7 @@ static void invoke_open(void *data) if (status || (d->errInfo.posix_errno != EISDIR)) { mode = (d->flags & EFILE_MODE_READ) ? "rb" : "wb"; d->fd = (SWord) erts_gzopen(d->b, mode); - if ((gzFile)d->fd) { + if ((ErtsGzFile)d->fd) { status = 1; } else { if (errno == 0) { diff --git a/erts/emulator/drivers/common/gzio.c b/erts/emulator/drivers/common/gzio.c index e085c262b0..653f3954b1 100644 --- a/erts/emulator/drivers/common/gzio.c +++ b/erts/emulator/drivers/common/gzio.c @@ -77,7 +77,7 @@ typedef struct gz_stream { * this structure. */ } gz_stream; -local gzFile gz_open OF((const char *path, const char *mode)); +local ErtsGzFile gz_open OF((const char *path, const char *mode)); local int get_byte OF((gz_stream *s)); local void check_header OF((gz_stream *s)); local int destroy OF((gz_stream *s)); @@ -144,7 +144,7 @@ local uLong getLong OF((gz_stream *s)); can be checked to distinguish the two cases (if errno is zero, the zlib error is Z_MEM_ERROR). */ -local gzFile gz_open (path, mode) +local ErtsGzFile gz_open (path, mode) const char *path; const char *mode; { @@ -179,7 +179,7 @@ local gzFile gz_open (path, mode) s->path = (char*)ALLOC(FILENAME_BYTELEN(path)+FILENAME_CHARSIZE); if (s->path == NULL) { - return s->destroy(s), (gzFile)Z_NULL; + return s->destroy(s), (ErtsGzFile)Z_NULL; } FILENAME_COPY(s->path, path); /* do this early for debugging */ @@ -197,7 +197,7 @@ local gzFile gz_open (path, mode) } while (*p++ && m < fmode + sizeof(fmode) - 1); *m = '\0'; if (s->mode == '\0') - return s->destroy(s), (gzFile)Z_NULL; + return s->destroy(s), (ErtsGzFile)Z_NULL; if (s->mode == 'w') { err = deflateInit2(&(s->stream), level, @@ -207,7 +207,7 @@ local gzFile gz_open (path, mode) s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE); if (err != Z_OK || s->outbuf == Z_NULL) { - return s->destroy(s), (gzFile)Z_NULL; + return s->destroy(s), (ErtsGzFile)Z_NULL; } } else { /* @@ -221,7 +221,7 @@ local gzFile gz_open (path, mode) s->stream.next_in = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); if (err != Z_OK || s->inbuf == Z_NULL) { - return s->destroy(s), (gzFile)Z_NULL; + return s->destroy(s), (ErtsGzFile)Z_NULL; } } s->stream.avail_out = Z_BUFSIZE; @@ -229,17 +229,16 @@ local gzFile gz_open (path, mode) errno = 0; #if defined(FILENAMES_16BIT) { - char wfmode[160]; - int i=0,j; - for(j=0;fmode[j] != '\0';++j) { - wfmode[i++]=fmode[j]; - wfmode[i++]='\0'; + WCHAR wfmode[80]; + int i = 0; + int j; + for(j = 0; fmode[j] != '\0'; ++j) { + wfmode[i++] = (WCHAR) fmode[j]; } - wfmode[i++] = '\0'; - wfmode[i++] = '\0'; - s->file = F_OPEN(path, wfmode); + wfmode[i++] = L'\0'; + s->file = _wfopen((WCHAR *)path, wfmode); if (s->file == NULL) { - return s->destroy(s), (gzFile)Z_NULL; + return s->destroy(s), (ErtsGzFile)Z_NULL; } } #elif defined(UNIX) @@ -249,18 +248,18 @@ local gzFile gz_open (path, mode) s->file = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666); } if (s->file == -1) { - return s->destroy(s), (gzFile)Z_NULL; + return s->destroy(s), (ErtsGzFile)Z_NULL; } #else - s->file = F_OPEN(path, fmode); + s->file = fopen(path, fmode); if (s->file == NULL) { - return s->destroy(s), (gzFile)Z_NULL; + return s->destroy(s), (ErtsGzFile)Z_NULL; } #endif if (s->mode == 'r') { check_header(s); /* skip the .gz header */ } - return (gzFile)s; + return (ErtsGzFile)s; } /* =========================================================================== @@ -296,7 +295,7 @@ local int gz_rewind (gz_stream *s) /* =========================================================================== Opens a gzip (.gz) file for reading or writing. */ -gzFile erts_gzopen (path, mode) +ErtsGzFile erts_gzopen (path, mode) const char *path; const char *mode; { @@ -447,7 +446,7 @@ local int destroy (s) gzread returns the number of bytes actually read (0 for end of file). */ int -erts_gzread(gzFile file, voidp buf, unsigned len) +erts_gzread(ErtsGzFile file, voidp buf, unsigned len) { gz_stream *s = (gz_stream*)file; Bytef *start = buf; /* starting point for crc computation */ @@ -557,7 +556,7 @@ erts_gzread(gzFile file, voidp buf, unsigned len) gzwrite returns the number of bytes actually written (0 in case of error). */ int -erts_gzwrite(gzFile file, voidp buf, unsigned len) +erts_gzwrite(ErtsGzFile file, voidp buf, unsigned len) { gz_stream *s = (gz_stream*)file; @@ -593,7 +592,7 @@ erts_gzwrite(gzFile file, voidp buf, unsigned len) */ int -erts_gzseek(gzFile file, int offset, int whence) +erts_gzseek(ErtsGzFile file, int offset, int whence) { int pos; gz_stream* s = (gz_stream *) file; @@ -655,7 +654,7 @@ erts_gzseek(gzFile file, int offset, int whence) degrade compression. */ int -erts_gzflush(gzFile file, int flush) +erts_gzflush(ErtsGzFile file, int flush) { uInt len; int done = 0; @@ -714,7 +713,7 @@ local uLong getLong (s) and deallocates all the (de)compression state. */ int -erts_gzclose(gzFile file) +erts_gzclose(ErtsGzFile file) { int err; gz_stream *s = (gz_stream*)file; @@ -723,9 +722,9 @@ erts_gzclose(gzFile file) if (s->mode == 'w') { err = erts_gzflush (file, Z_FINISH); - if (err != Z_OK) return s->destroy(file); + if (err != Z_OK) return s->destroy(s); } - return s->destroy(file); + return s->destroy(s); } diff --git a/erts/emulator/drivers/common/gzio.h b/erts/emulator/drivers/common/gzio.h index 3f1e546140..ea50d922ec 100644 --- a/erts/emulator/drivers/common/gzio.h +++ b/erts/emulator/drivers/common/gzio.h @@ -17,11 +17,15 @@ * %CopyrightEnd% */ -gzFile erts_gzopen (const char *path, const char *mode); -int erts_gzread(gzFile file, voidp buf, unsigned len); -int erts_gzwrite(gzFile file, voidp buf, unsigned len); -int erts_gzseek(gzFile, int, int); -int erts_gzflush(gzFile file, int flush); -int erts_gzclose(gzFile file); +#include "zlib.h" + +typedef struct erts_gzFile* ErtsGzFile; + +ErtsGzFile erts_gzopen (const char *path, const char *mode); +int erts_gzread(ErtsGzFile file, voidp buf, unsigned len); +int erts_gzwrite(ErtsGzFile file, voidp buf, unsigned len); +int erts_gzseek(ErtsGzFile, int, int); +int erts_gzflush(ErtsGzFile file, int flush); +int erts_gzclose(ErtsGzFile file); ErlDrvBinary* erts_gzinflate_buffer(char*, uLong); ErlDrvBinary* erts_gzdeflate_buffer(char*, uLong); diff --git a/erts/emulator/drivers/common/gzio_zutil.h b/erts/emulator/drivers/common/gzio_zutil.h index 00eccc80fc..854205cc2c 100644 --- a/erts/emulator/drivers/common/gzio_zutil.h +++ b/erts/emulator/drivers/common/gzio_zutil.h @@ -23,12 +23,6 @@ * that may change or not exist at all. */ -#ifndef HAVE_LIBZ -/* Use our "real" copy of zutil.h if we don't use shared zlib */ -#include "zutil.h" - -#else /* HAVE_LIBZ: Shared zlib is used */ - #define local static #define DEF_MEM_LEVEL 8 #define zmemcpy sys_memcpy @@ -77,6 +71,3 @@ # define OS_CODE 0x03 /* assume Unix */ #endif - -#endif /* HAVE_LIBZ */ - diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 80937dfcc8..4a861b121c 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -854,9 +854,10 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n) #define INET_IFNAMSIZ 16 /* INET Ignore states */ -#define INET_IGNORE_NONE 0 -#define INET_IGNORE_READ 1 -#define INET_IGNORE_WRITE 1 << 1 +#define INET_IGNORE_NONE 0 +#define INET_IGNORE_READ (1 << 0) +#define INET_IGNORE_WRITE (1 << 1) +#define INET_IGNORE_PASSIVE (1 << 2) /* Max length of Erlang Term Buffer (for outputting structured terms): */ #ifdef HAVE_SCTP @@ -8307,11 +8308,19 @@ static ErlDrvSSizeT inet_ctl(inet_descriptor* desc, int cmd, char* buf, if (*buf == 1 && !desc->is_ignored) { sock_select(desc, (FD_READ|FD_WRITE|FD_CLOSE|ERL_DRV_USE_NO_CALLBACK), 0); - desc->is_ignored = INET_IGNORE_READ; + if (desc->active) + desc->is_ignored = INET_IGNORE_READ; + else + desc->is_ignored = INET_IGNORE_PASSIVE; } else if (*buf == 0 && desc->is_ignored) { - int flags = (FD_READ|FD_CLOSE|((desc->is_ignored & INET_IGNORE_WRITE)?FD_WRITE:0)); + int flags = FD_CLOSE; + if (desc->is_ignored & INET_IGNORE_READ) + flags |= FD_READ; + if (desc->is_ignored & INET_IGNORE_WRITE) + flags |= FD_WRITE; desc->is_ignored = INET_IGNORE_NONE; - sock_select(desc, flags, 1); + if (flags != FD_CLOSE) + sock_select(desc, flags, 1); } else return ctl_error(EINVAL, rbuf, rsize); @@ -8988,6 +8997,8 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, driver_set_timer(desc->inet.port, timeout); if (!INETP(desc)->is_ignored) sock_select(INETP(desc),(FD_READ|FD_CLOSE),1); + else + INETP(desc)->is_ignored |= INET_IGNORE_READ; } } return ctl_reply(INET_REP_OK, tbuf, 2, rbuf, rsize); diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c index 59e34eb819..a5294ad84e 100644 --- a/erts/emulator/sys/unix/sys.c +++ b/erts/emulator/sys/unix/sys.c @@ -409,8 +409,10 @@ void sys_tty_reset(int exit_code) #ifdef __tile__ /* Direct malloc to spread memory around the caches of multiple tiles. */ #include <malloc.h> +#if defined(MALLOC_USE_HASH) MALLOC_USE_HASH(1); #endif +#endif #ifdef USE_THREADS diff --git a/erts/emulator/sys/win32/erl_win_dyn_driver.h b/erts/emulator/sys/win32/erl_win_dyn_driver.h index b9a9838a36..4010d939e5 100644 --- a/erts/emulator/sys/win32/erl_win_dyn_driver.h +++ b/erts/emulator/sys/win32/erl_win_dyn_driver.h @@ -82,7 +82,6 @@ WDD_TYPEDEF(int, erl_drv_send_term, (ErlDrvTermData, ErlDrvTermData, ErlDrvTermD WDD_TYPEDEF(int, driver_send_term, (ErlDrvPort, ErlDrvTermData, ErlDrvTermData*, int)); WDD_TYPEDEF(unsigned int, driver_async_port_key, (ErlDrvPort)); WDD_TYPEDEF(long, driver_async, (ErlDrvPort,unsigned int*,void (*)(void*),void*,void (*)(void*))); -WDD_TYPEDEF(int, driver_async_cancel, (unsigned int)); WDD_TYPEDEF(int, driver_lock_driver, (ErlDrvPort)); WDD_TYPEDEF(void *, driver_dl_open, (char *)); WDD_TYPEDEF(void *, driver_dl_sym, (void *, char *)); @@ -200,7 +199,6 @@ typedef struct { WDD_FTYPE(driver_send_term) *driver_send_term; WDD_FTYPE(driver_async_port_key) *driver_async_port_key; WDD_FTYPE(driver_async) *driver_async; - WDD_FTYPE(driver_async_cancel) *driver_async_cancel; WDD_FTYPE(driver_lock_driver) *driver_lock_driver; WDD_FTYPE(driver_dl_open) *driver_dl_open; WDD_FTYPE(driver_dl_sym) *driver_dl_sym; @@ -312,7 +310,6 @@ extern TWinDynDriverCallbacks WinDynDriverCallbacks; #define driver_send_term (WinDynDriverCallbacks.driver_send_term) #define driver_async_port_key (WinDynDriverCallbacks.driver_async_port_key) #define driver_async (WinDynDriverCallbacks.driver_async) -#define driver_async_cancel (WinDynDriverCallbacks.driver_async_cancel) #define driver_lock_driver (WinDynDriverCallbacks.driver_lock_driver) #define driver_dl_open (WinDynDriverCallbacks.driver_dl_open) #define driver_dl_sym (WinDynDriverCallbacks.driver_dl_sym) @@ -448,7 +445,6 @@ do { \ ((W).driver_send_term) = driver_send_term; \ ((W).driver_async_port_key) = driver_async_port_key; \ ((W).driver_async) = driver_async; \ -((W).driver_async_cancel) = driver_async_cancel; \ ((W).driver_lock_driver) = driver_lock_driver; \ ((W).driver_dl_open) = driver_dl_open; \ ((W).driver_dl_sym) = driver_dl_sym; \ diff --git a/erts/emulator/test/binary_SUITE.erl b/erts/emulator/test/binary_SUITE.erl index bce4278337..a390c536bb 100644 --- a/erts/emulator/test/binary_SUITE.erl +++ b/erts/emulator/test/binary_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1997-2013. All Rights Reserved. +%% Copyright Ericsson AB 1997-2014. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -58,10 +58,10 @@ ordering/1,unaligned_order/1,gc_test/1, bit_sized_binary_sizes/1, otp_6817/1,deep/1,obsolete_funs/1,robustness/1,otp_8117/1, - otp_8180/1, ttb_trap/1]). + otp_8180/1, trapping/1]). %% Internal exports. --export([sleeper/0,ttb_loop/2]). +-export([sleeper/0,trapping_loop/4]). suite() -> [{ct_hooks,[ts_install_cth]}, {timetrap,{minutes,2}}]. @@ -76,7 +76,7 @@ all() -> bad_term_to_binary, more_bad_terms, otp_5484, otp_5933, ordering, unaligned_order, gc_test, bit_sized_binary_sizes, otp_6817, otp_8117, deep, - obsolete_funs, robustness, otp_8180, ttb_trap]. + obsolete_funs, robustness, otp_8180, trapping]. groups() -> []. @@ -506,8 +506,8 @@ external_size(Config) when is_list(Config) -> io:format("Unaligned size: ~p\n", [Sz2]), ?line ?t:fail() end, - ?line erlang:external_size(Bin) =:= erlang:external_size(Bin, [{minor_version, 1}]), - ?line erlang:external_size(Unaligned) =:= erlang:external_size(Unaligned, [{minor_version, 1}]). + true = (erlang:external_size(Bin) =:= erlang:external_size(Bin, [{minor_version, 1}])), + true = (erlang:external_size(Unaligned) =:= erlang:external_size(Unaligned, [{minor_version, 1}])). external_size_1(Term, Size0, Limit) when Size0 < Limit -> case erlang:external_size(Term) of @@ -1241,16 +1241,27 @@ bsbs_1(A) -> Bin = binary_to_term_stress(<<131,$M,5:32,A,0,0,0,0,0>>), BinSize = bit_size(Bin). +%% lists:foldl(_,_,lists:seq(_,_)) with less heap consumption +lists_foldl_seq(Fun, Acc0, N, To) when N =< To -> + Acc1 = Fun(N, Acc0), + lists_foldl_seq(Fun, Acc1, N+1, To); + +lists_foldl_seq(_, Acc, _, _) -> + Acc. + deep(Config) when is_list(Config) -> - ?line deep_roundtrip(lists:foldl(fun(E, A) -> - [E,A] - end, [], lists:seq(1, 1000000))), - ?line deep_roundtrip(lists:foldl(fun(E, A) -> - {E,A} - end, [], lists:seq(1, 1000000))), - ?line deep_roundtrip(lists:foldl(fun(E, A) -> - fun() -> {E,A} end - end, [], lists:seq(1, 1000000))), + deep_roundtrip(lists_foldl_seq(fun(E, A) -> + [E,A] + end, [], 1, 1000000)), + erlang:garbage_collect(), + deep_roundtrip(lists_foldl_seq(fun(E, A) -> + {E,A} + end, [], 1, 1000000)), + erlang:garbage_collect(), + deep_roundtrip(lists_foldl_seq(fun(E, A) -> + fun() -> {E,A} end + end, [], 1, 1000000)), + erlang:garbage_collect(), ok. deep_roundtrip(T) -> @@ -1334,36 +1345,44 @@ run_otp_8180(Name) -> end || Bin <- Bins], ok. -%% Test that exit and GC during term_to_binary trap does not crash. -ttb_trap(Config) when is_list(Config)-> - case erlang:system_info(wordsize) of - N when N < 8 -> - {skipped, "Only on 64bit machines"}; - _ -> - do_ttb_trap(5) - end. +%% Test that exit and GC during trapping term_to_binary and binary_to_term +%% does not crash. +trapping(Config) when is_list(Config)-> + do_trapping(5, term_to_binary, + fun() -> [lists:duplicate(2000000,2000000)] end), + do_trapping(5, binary_to_term, + fun() -> [term_to_binary(lists:duplicate(2000000,2000000))] end). -do_ttb_trap(0) -> +do_trapping(0, _, _) -> ok; -do_ttb_trap(N) -> - Pid = spawn(?MODULE,ttb_loop,[1000,self()]), +do_trapping(N, Bif, ArgFun) -> + io:format("N=~p: Do ~p ~s gc.\n", [N, Bif, case N rem 2 of 0 -> "with"; 1 -> "without" end]), + Pid = spawn(?MODULE,trapping_loop,[Bif, ArgFun, 1000, self()]), receive ok -> ok end, receive after 100 -> ok end, - erlang:garbage_collect(Pid), - receive after 100 -> ok end, + Ref = make_ref(), + case N rem 2 of + 0 -> erlang:garbage_collect(Pid, [{async,Ref}]), + receive after 100 -> ok end; + 1 -> void + end, exit(Pid,kill), + case N rem 2 of + 0 -> receive {garbage_collect, Ref, _} -> ok end; + 1 -> void + end, receive after 1 -> ok end, - do_ttb_trap(N-1). + do_trapping(N-1, Bif, ArgFun). -ttb_loop(N,Pid) -> - Term = lists:duplicate(2000000,2000000), +trapping_loop(Bif, ArgFun, N, Pid) -> + Args = ArgFun(), Pid ! ok, - ttb_loop2(N,Term). -ttb_loop2(0,_T) -> + trapping_loop2(Bif,Args,N). +trapping_loop2(_,_,0) -> ok; -ttb_loop2(N,T) -> - apply(erlang,term_to_binary,[T]), - ttb_loop2(N-1,T). +trapping_loop2(Bif,Args,N) -> + apply(erlang,Bif,Args), + trapping_loop2(Bif, Args, N-1). %% Utilities. diff --git a/erts/emulator/test/driver_SUITE.erl b/erts/emulator/test/driver_SUITE.erl index 7087542899..06211406b4 100644 --- a/erts/emulator/test/driver_SUITE.erl +++ b/erts/emulator/test/driver_SUITE.erl @@ -2075,6 +2075,21 @@ thr_msg_blast(Config) when is_list(Config) -> Res end. +-define(IN_RANGE(LoW_, VaLuE_, HiGh_), + case in_range(LoW_, VaLuE_, HiGh_) of + true -> ok; + false -> + case erlang:system_info(lock_checking) of + true -> + ?t:format("~p:~p: Ignore bad sched count due to " + "lock checking~n", + [?MODULE,?LINE]); + false -> + ?t:fail({unexpected_sched_counts, VaLuE_}) + end + end). + + consume_timeslice(Config) when is_list(Config) -> %% %% Verify that erl_drv_consume_timeslice() works. @@ -2131,15 +2146,8 @@ consume_timeslice(Config) when is_list(Config) -> Proc1 ! Go, wait_command_msgs(Port, 10), [{Port, Sprt1}, {Proc1, Sproc1}] = count_pp_sched_stop([Port, Proc1]), - case Sprt1 of - 10 -> - true = in_range(5, Sproc1-10, 7); - _ -> - case erlang:system_info(lock_checking) of - true -> ?t:format("Ignore bad sched count due to lock checking", []); - false -> ?t:fail({unexpected_sched_counts, Sprt1, Sproc1}) - end - end, + ?IN_RANGE(10, Sprt1, 10), + ?IN_RANGE(5, Sproc1-10, 7), "disabled" = port_control(Port, $D, ""), Proc2 = spawn_link(fun () -> @@ -2160,15 +2168,8 @@ consume_timeslice(Config) when is_list(Config) -> Proc2 ! Go, wait_command_msgs(Port, 10), [{Port, Sprt2}, {Proc2, Sproc2}] = count_pp_sched_stop([Port, Proc2]), - case Sprt2 of - 10 -> - true = in_range(1, Sproc2-10, 2); - _ -> - case erlang:system_info(lock_checking) of - true -> ?t:format("Ignore bad sched count due to lock checking", []); - false -> ?t:fail({unexpected_sched_counts, Sprt2, Sproc2}) - end - end, + ?IN_RANGE(10, Sprt2, 10), + ?IN_RANGE(1, Sproc2-10, 2), "enabled" = port_control(Port, $E, ""), Proc3 = spawn_link(fun () -> @@ -2188,15 +2189,8 @@ consume_timeslice(Config) when is_list(Config) -> Proc3 ! Go, wait_command_msgs(Port, 10), [{Port, Sprt3}, {Proc3, Sproc3}] = count_pp_sched_stop([Port, Proc3]), - case Sprt3 of - 10 -> - true = in_range(5, Sproc3-10, 7); - _ -> - case erlang:system_info(lock_checking) of - true -> ?t:format("Ignore bad sched count due to lock checking", []); - false -> ?t:fail({unexpected_sched_counts, Sprt3, Sproc3}) - end - end, + ?IN_RANGE(10, Sprt3, 10), + ?IN_RANGE(5, Sproc3-10, 7), "disabled" = port_control(Port, $D, ""), Proc4 = spawn_link(fun () -> @@ -2216,15 +2210,8 @@ consume_timeslice(Config) when is_list(Config) -> Proc4 ! Go, wait_command_msgs(Port, 10), [{Port, Sprt4}, {Proc4, Sproc4}] = count_pp_sched_stop([Port, Proc4]), - case Sprt4 of - 10 -> - true = in_range(1, Sproc4-10, 2); - _ -> - case erlang:system_info(lock_checking) of - true -> ?t:format("Ignore bad sched count due to lock checking", []); - false -> ?t:fail({unexpected_sched_counts, Sprt4, Sproc4}) - end - end, + ?IN_RANGE(10, Sprt4, 10), + ?IN_RANGE(1, Sproc4-10, 2), SOnl = erlang:system_info(schedulers_online), %% If only one scheduler use port with parallelism set to true, @@ -2272,8 +2259,8 @@ consume_timeslice(Config) when is_list(Config) -> wait_procs_exit([W5, Proc5]), wait_command_msgs(Port2, 10), [{Port2, Sprt5}, {Proc5, Sproc5}] = count_pp_sched_stop([Port2, Proc5]), - true = in_range(2, Sproc5, 3), - true = in_range(7, Sprt5, 20), + ?IN_RANGE(2, Sproc5, 3), + ?IN_RANGE(6, Sprt5, 20), count_pp_sched_start(), "disabled" = port_control(Port2, $D, ""), @@ -2307,8 +2294,8 @@ consume_timeslice(Config) when is_list(Config) -> wait_procs_exit([W6, Proc6]), wait_command_msgs(Port2, 10), [{Port2, Sprt6}, {Proc6, Sproc6}] = count_pp_sched_stop([Port2, Proc6]), - true = in_range(2, Sproc6, 3), - true = in_range(3, Sprt6, 6), + ?IN_RANGE(2, Sproc6, 3), + ?IN_RANGE(2, Sprt6, 6), process_flag(scheduler, 0), @@ -2316,6 +2303,7 @@ consume_timeslice(Config) when is_list(Config) -> receive {Port2, closed} -> ok end, ok. + wait_command_msgs(_, 0) -> ok; wait_command_msgs(Port, N) -> diff --git a/erts/emulator/test/driver_SUITE_data/ioq_exit_drv.c b/erts/emulator/test/driver_SUITE_data/ioq_exit_drv.c index 9d8bbac231..e2b338f801 100644 --- a/erts/emulator/test/driver_SUITE_data/ioq_exit_drv.c +++ b/erts/emulator/test/driver_SUITE_data/ioq_exit_drv.c @@ -277,10 +277,6 @@ static void stop(ErlDrvData drv_data) case IOQ_EXIT_TIMEOUT_ASYNC: driver_cancel_timer(ddp->port); break; - case IOQ_EXIT_READY_ASYNC: - if (ddp->outstanding_async_task) - driver_async_cancel(ddp->async_task); - break; default: break; } diff --git a/erts/emulator/test/driver_SUITE_data/otp_9302_drv.c b/erts/emulator/test/driver_SUITE_data/otp_9302_drv.c index 88df73f696..7c144d20cf 100644 --- a/erts/emulator/test/driver_SUITE_data/otp_9302_drv.c +++ b/erts/emulator/test/driver_SUITE_data/otp_9302_drv.c @@ -227,6 +227,4 @@ static void output(ErlDrvData drv_data, ad[4]->term_data.msg = driver_mk_atom("end_of_jobs"); for (i = 0; i < sizeof(id)/sizeof(id[0]); i++) id[i] = driver_async(data->port, &key, async_invoke, ad[i], driver_free); - if (id[2] > 0) - driver_async_cancel(id[2]); } diff --git a/erts/emulator/test/driver_SUITE_data/sys_info_base_drv.c b/erts/emulator/test/driver_SUITE_data/sys_info_base_drv.c index c22a415c59..e44c7dbd5e 100644 --- a/erts/emulator/test/driver_SUITE_data/sys_info_base_drv.c +++ b/erts/emulator/test/driver_SUITE_data/sys_info_base_drv.c @@ -19,14 +19,14 @@ /* * Author: Rickard Green * - * Description: Driver that fakes driver version 2.0 and tests + * Description: Driver that fakes driver version 3.0 and tests * driver_system_info(). * */ #include "sys_info_drv_impl.h" -#define SYS_INFO_DRV_MAJOR_VSN 2 +#define SYS_INFO_DRV_MAJOR_VSN 3 #define SYS_INFO_DRV_MINOR_VSN 0 #define SYS_INFO_DRV_NAME_STR "sys_info_base_drv" #define SYS_INFO_DRV_NAME sys_info_base_drv diff --git a/erts/emulator/test/driver_SUITE_data/sys_info_prev_drv.c b/erts/emulator/test/driver_SUITE_data/sys_info_prev_drv.c index 815d96cc97..63c69f751c 100644 --- a/erts/emulator/test/driver_SUITE_data/sys_info_prev_drv.c +++ b/erts/emulator/test/driver_SUITE_data/sys_info_prev_drv.c @@ -19,14 +19,14 @@ /* * Author: Rickard Green * - * Description: Driver that fakes driver version 2.0 and tests + * Description: Driver that fakes driver version 3.0 and tests * driver_system_info(). * */ #include "sys_info_drv_impl.h" -#define SYS_INFO_DRV_MAJOR_VSN 2 +#define SYS_INFO_DRV_MAJOR_VSN 3 #define SYS_INFO_DRV_MINOR_VSN 0 #define SYS_INFO_DRV_NAME_STR "sys_info_prev_drv" #define SYS_INFO_DRV_NAME sys_info_prev_drv diff --git a/erts/emulator/test/exception_SUITE.erl b/erts/emulator/test/exception_SUITE.erl index 109cec25cb..09a7a87a9a 100644 --- a/erts/emulator/test/exception_SUITE.erl +++ b/erts/emulator/test/exception_SUITE.erl @@ -589,6 +589,13 @@ line_numbers(Config) when is_list(Config) -> [{file,ModFile},{line,_}]}|_]}} = (catch build_binary2(8, bad_binary)), + <<"abc",357:16>> = build_binary3(<<"abc">>), + {'EXIT',{badarg,[{?MODULE,build_binary3,1, + [{file,"bit_syntax.erl"},{line,72511}]}, + {?MODULE,line_numbers,1, + [{file,ModFile},{line,_}]}|_]}} = + (catch build_binary3(no_binary)), + {'EXIT',{function_clause, [{?MODULE,do_call_abs,[y,y], [{file,"gc_bif.erl"},{line,18}]}, @@ -691,6 +698,10 @@ build_binary2(Size, Bin) -> %Line 72505 id(0), %Line 72506 <<7:Size,Bin/binary>>. %Line 72507 +build_binary3(Bin) -> %Line 72509 + id(0), %Line 72510 + <<Bin/binary,357:16>>. %Line 72511 + -file("gc_bif.erl", 17). do_call_abs(x, Arg) -> %Line 18 abs(Arg). %Line 19 diff --git a/erts/emulator/test/match_spec_SUITE.erl b/erts/emulator/test/match_spec_SUITE.erl index bcc46d78ba..330bef7104 100644 --- a/erts/emulator/test/match_spec_SUITE.erl +++ b/erts/emulator/test/match_spec_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1999-2013. All Rights Reserved. +%% Copyright Ericsson AB 1999-2014. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -25,6 +25,7 @@ trace_control_word/1, silent/1, silent_no_ms/1, silent_test/1, ms_trace2/1, ms_trace3/1, boxed_and_small/1, destructive_in_test_bif/1, guard_exceptions/1, + empty_list/1, unary_plus/1, unary_minus/1, moving_labels/1]). -export([fpe/1]). -export([otp_9422/1]). @@ -60,6 +61,7 @@ all() -> guard_exceptions, unary_plus, unary_minus, fpe, moving_labels, faulty_seq_trace, + empty_list, otp_9422]; true -> [not_run] end. @@ -897,6 +899,11 @@ fpe(Config) when is_list(Config) -> _ -> ok end. +empty_list(Config) when is_list(Config) -> + Val=[{'$1',[], [{message,'$1'},{message,{caller}},{return_trace}]}], + %% Did crash debug VM in faulty assert: + erlang:match_spec_test([],Val,trace). + moving_labels(Config) when is_list(Config) -> %% Force an andalso/orelse construction to be moved by placing it %% in a tuple followed by a constant term. Labels should still diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl index 9a70e8646a..affb66289b 100644 --- a/erts/emulator/test/nif_SUITE.erl +++ b/erts/emulator/test/nif_SUITE.erl @@ -36,7 +36,7 @@ threading/1, send/1, send2/1, send3/1, send_threaded/1, neg/1, is_checks/1, get_length/1, make_atom/1, make_string/1, reverse_list_test/1, - otp_9668/1, consume_timeslice/1 + otp_9668/1, consume_timeslice/1, dirty_nif/1 ]). -export([many_args_100/100]). @@ -63,7 +63,7 @@ all() -> resource_takeover, threading, send, send2, send3, send_threaded, neg, is_checks, get_length, make_atom, make_string,reverse_list_test, - otp_9668, consume_timeslice + otp_9668, consume_timeslice, dirty_nif ]. groups() -> @@ -1343,6 +1343,20 @@ consume_timeslice(Config) when is_list(Config) -> ok. +dirty_nif(Config) when is_list(Config) -> + try erlang:system_info(dirty_cpu_schedulers) of + N when is_integer(N) -> + ensure_lib_loaded(Config), + Val1 = 42, + Val2 = "Erlang", + Val3 = list_to_binary([Val2, 0]), + {Val1, Val2, Val3} = call_dirty_nif(Val1, Val2, Val3), + ok + catch + error:badarg -> + {skipped,"No dirty scheduler support"} + end. + next_msg(Pid) -> receive M -> M @@ -1472,6 +1486,7 @@ echo_int(_) -> ?nif_stub. type_sizes() -> ?nif_stub. otp_9668_nif(_) -> ?nif_stub. consume_timeslice_nif(_,_) -> ?nif_stub. +call_dirty_nif(_,_,_) -> ?nif_stub. nif_stub_error(Line) -> exit({nif_not_loaded,module,?MODULE,line,Line}). diff --git a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c index 0c4a9f7e5c..6f902e186d 100644 --- a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c +++ b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c @@ -1494,6 +1494,48 @@ static ERL_NIF_TERM consume_timeslice_nif(ErlNifEnv* env, int argc, const ERL_NI } } +#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT +static ERL_NIF_TERM dirty_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + int n; + char s[10]; + ErlNifBinary b; + ERL_NIF_TERM result; + if (enif_have_dirty_schedulers()) { + assert(enif_is_on_dirty_scheduler(env)); + } + assert(argc == 3); + enif_get_int(env, argv[0], &n); + enif_get_string(env, argv[1], s, sizeof s, ERL_NIF_LATIN1); + enif_inspect_binary(env, argv[2], &b); + result = enif_make_tuple3(env, + enif_make_int(env, n), + enif_make_string(env, s, ERL_NIF_LATIN1), + enif_make_binary(env, &b)); + return enif_schedule_dirty_nif_finalizer(env, result, enif_dirty_nif_finalizer); +} + +static ERL_NIF_TERM call_dirty_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]) +{ + int n; + char s[10]; + ErlNifBinary b; + assert(!enif_is_on_dirty_scheduler(env)); + if (argc != 3) + return enif_make_badarg(env); + if (enif_have_dirty_schedulers()) { + if (enif_get_int(env, argv[0], &n) && + enif_get_string(env, argv[1], s, sizeof s, ERL_NIF_LATIN1) && + enif_inspect_binary(env, argv[2], &b)) + return enif_schedule_dirty_nif(env, ERL_NIF_DIRTY_JOB_CPU_BOUND, dirty_nif, argc, argv); + else + return enif_make_badarg(env); + } else { + return dirty_nif(env, argc, argv); + } +} +#endif + static ErlNifFunc nif_funcs[] = { {"lib_version", 0, lib_version}, @@ -1543,7 +1585,10 @@ static ErlNifFunc nif_funcs[] = {"echo_int", 1, echo_int}, {"type_sizes", 0, type_sizes}, {"otp_9668_nif", 1, otp_9668_nif}, - {"consume_timeslice_nif", 2, consume_timeslice_nif} + {"consume_timeslice_nif", 2, consume_timeslice_nif}, +#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT + {"call_dirty_nif", 3, call_dirty_nif}, +#endif }; ERL_NIF_INIT(nif_SUITE,nif_funcs,load,reload,upgrade,unload) diff --git a/erts/emulator/zlib/adler32.c b/erts/emulator/zlib/adler32.c index 4368c31d70..c693a42b7c 100644 --- a/erts/emulator/zlib/adler32.c +++ b/erts/emulator/zlib/adler32.c @@ -1,19 +1,20 @@ /* adler32.c -- compute the Adler-32 checksum of a data stream - * Copyright (C) 1995-2004 Mark Adler + * Copyright (C) 1995-2011 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - /* @(#) $Id$ */ #ifdef HAVE_CONFIG_H # include "config.h" #endif -#define ZLIB_INTERNAL -#include "zlib.h" +#include "zutil.h" + +#define local static + +local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); -#define BASE 65521UL /* largest prime smaller than 65536 */ +#define BASE 65521 /* largest prime smaller than 65536 */ #define NMAX 5552 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ @@ -23,39 +24,44 @@ #define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); #define DO16(buf) DO8(buf,0); DO8(buf,8); -/* use NO_DIVIDE if your processor does not do division in hardware */ +/* use NO_DIVIDE if your processor does not do division in hardware -- + try it both ways to see which is faster */ #ifdef NO_DIVIDE -# define MOD(a) \ +/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 + (thank you to John Reiser for pointing this out) */ +# define CHOP(a) \ + do { \ + unsigned long tmp = a >> 16; \ + a &= 0xffffUL; \ + a += (tmp << 4) - tmp; \ + } while (0) +# define MOD28(a) \ do { \ - if (a >= (BASE << 16)) a -= (BASE << 16); \ - if (a >= (BASE << 15)) a -= (BASE << 15); \ - if (a >= (BASE << 14)) a -= (BASE << 14); \ - if (a >= (BASE << 13)) a -= (BASE << 13); \ - if (a >= (BASE << 12)) a -= (BASE << 12); \ - if (a >= (BASE << 11)) a -= (BASE << 11); \ - if (a >= (BASE << 10)) a -= (BASE << 10); \ - if (a >= (BASE << 9)) a -= (BASE << 9); \ - if (a >= (BASE << 8)) a -= (BASE << 8); \ - if (a >= (BASE << 7)) a -= (BASE << 7); \ - if (a >= (BASE << 6)) a -= (BASE << 6); \ - if (a >= (BASE << 5)) a -= (BASE << 5); \ - if (a >= (BASE << 4)) a -= (BASE << 4); \ - if (a >= (BASE << 3)) a -= (BASE << 3); \ - if (a >= (BASE << 2)) a -= (BASE << 2); \ - if (a >= (BASE << 1)) a -= (BASE << 1); \ + CHOP(a); \ if (a >= BASE) a -= BASE; \ } while (0) -# define MOD4(a) \ +# define MOD(a) \ do { \ - if (a >= (BASE << 4)) a -= (BASE << 4); \ - if (a >= (BASE << 3)) a -= (BASE << 3); \ - if (a >= (BASE << 2)) a -= (BASE << 2); \ - if (a >= (BASE << 1)) a -= (BASE << 1); \ + CHOP(a); \ + MOD28(a); \ + } while (0) +# define MOD63(a) \ + do { /* this assumes a is not negative */ \ + z_off64_t tmp = a >> 32; \ + a &= 0xffffffffL; \ + a += (tmp << 8) - (tmp << 5) + tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ if (a >= BASE) a -= BASE; \ } while (0) #else # define MOD(a) a %= BASE -# define MOD4(a) a %= BASE +# define MOD28(a) a %= BASE +# define MOD63(a) a %= BASE #endif /* ========================================================================= */ @@ -94,7 +100,7 @@ uLong ZEXPORT adler32(adler, buf, len) } if (adler >= BASE) adler -= BASE; - MOD4(sum2); /* only added so many BASE's */ + MOD28(sum2); /* only added so many BASE's */ return adler | (sum2 << 16); } @@ -130,25 +136,47 @@ uLong ZEXPORT adler32(adler, buf, len) } /* ========================================================================= */ -uLong ZEXPORT adler32_combine(adler1, adler2, len2) +local uLong adler32_combine_(adler1, adler2, len2) uLong adler1; uLong adler2; - z_off_t len2; + z_off64_t len2; { unsigned long sum1; unsigned long sum2; unsigned rem; + /* for negative len, return invalid adler32 as a clue for debugging */ + if (len2 < 0) + return 0xffffffffUL; + /* the derivation of this formula is left as an exercise for the reader */ - rem = (unsigned)(len2 % BASE); + MOD63(len2); /* assumes len2 >= 0 */ + rem = (unsigned)len2; sum1 = adler1 & 0xffff; sum2 = rem * sum1; MOD(sum2); sum1 += (adler2 & 0xffff) + BASE - 1; sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; - if (sum1 > BASE) sum1 -= BASE; - if (sum1 > BASE) sum1 -= BASE; - if (sum2 > (BASE << 1)) sum2 -= (BASE << 1); - if (sum2 > BASE) sum2 -= BASE; + if (sum1 >= BASE) sum1 -= BASE; + if (sum1 >= BASE) sum1 -= BASE; + if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1); + if (sum2 >= BASE) sum2 -= BASE; return sum1 | (sum2 << 16); } + +/* ========================================================================= */ +uLong ZEXPORT adler32_combine(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off_t len2; +{ + return adler32_combine_(adler1, adler2, len2); +} + +uLong ZEXPORT adler32_combine64(adler1, adler2, len2) + uLong adler1; + uLong adler2; + z_off64_t len2; +{ + return adler32_combine_(adler1, adler2, len2); +} diff --git a/erts/emulator/zlib/compress.c b/erts/emulator/zlib/compress.c index 28bceb15f8..8ecef0f790 100644 --- a/erts/emulator/zlib/compress.c +++ b/erts/emulator/zlib/compress.c @@ -1,10 +1,8 @@ /* compress.c -- compress a memory buffer - * Copyright (C) 1995-2003 Jean-loup Gailly. + * Copyright (C) 1995-2005 Jean-loup Gailly. * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - /* @(#) $Id$ */ #ifdef HAVE_CONFIG_H @@ -34,7 +32,7 @@ int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) z_stream stream; int err; - stream.next_in = (Bytef*)source; + stream.next_in = (z_const Bytef *)source; stream.avail_in = (uInt)sourceLen; #ifdef MAXSEG_64K /* Check for source > 64K on 16-bit machine: */ @@ -80,5 +78,6 @@ int ZEXPORT compress (dest, destLen, source, sourceLen) uLong ZEXPORT compressBound (sourceLen) uLong sourceLen; { - return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + 11; + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13; } diff --git a/erts/emulator/zlib/crc32.c b/erts/emulator/zlib/crc32.c index b9c10bb9b3..ba506d8dd3 100644 --- a/erts/emulator/zlib/crc32.c +++ b/erts/emulator/zlib/crc32.c @@ -1,19 +1,14 @@ /* crc32.c -- compute the CRC-32 of a data stream - * Copyright (C) 1995-2005 Mark Adler + * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h * * Thanks to Rodney Brown <[email protected]> for his contribution of faster * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing * tables for updating the shift register in one step with three exclusive-ors -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif * instead of four steps with four exclusive-ors. This results in about a * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. */ -/* %ExternalCopyright% */ - /* @(#) $Id$ */ /* @@ -22,6 +17,8 @@ of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should first call get_crc_table() to initialize the tables before allowing more than one thread to use crc32(). + + DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. */ #ifdef MAKECRCH @@ -31,35 +28,19 @@ # endif /* !DYNAMIC_CRC_TABLE */ #endif /* MAKECRCH */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + #include "zutil.h" /* for STDC and FAR definitions */ #define local static -/* Find a four-byte integer type for crc32_little() and crc32_big(). */ -#ifndef NOBYFOUR -# ifdef STDC /* need ANSI C limits.h to determine sizes */ -# include <limits.h> -# define BYFOUR -# if (UINT_MAX == 0xffffffffUL) - typedef unsigned int u4; -# else -# if (ULONG_MAX == 0xffffffffUL) - typedef unsigned long u4; -# else -# if (USHRT_MAX == 0xffffffffUL) - typedef unsigned short u4; -# else -# undef BYFOUR /* can't find a four-byte integer type! */ -# endif -# endif -# endif -# endif /* STDC */ -#endif /* !NOBYFOUR */ - /* Definitions for doing the crc four data bytes at a time. */ +#if !defined(NOBYFOUR) && defined(Z_U4) +# define BYFOUR +#endif #ifdef BYFOUR -# define REV(w) (((w)>>24)+(((w)>>8)&0xff00)+ \ - (((w)&0xff00)<<8)+(((w)&0xff)<<24)) local unsigned long crc32_little OF((unsigned long, const unsigned char FAR *, unsigned)); local unsigned long crc32_big OF((unsigned long, @@ -73,14 +54,16 @@ local unsigned long gf2_matrix_times OF((unsigned long *mat, unsigned long vec)); local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); +local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2)); + #ifdef DYNAMIC_CRC_TABLE local volatile int crc_table_empty = 1; -local unsigned long FAR crc_table[TBLS][256]; +local z_crc_t FAR crc_table[TBLS][256]; local void make_crc_table OF((void)); #ifdef MAKECRCH - local void write_table OF((FILE *, const unsigned long FAR *)); + local void write_table OF((FILE *, const z_crc_t FAR *)); #endif /* MAKECRCH */ /* Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: @@ -110,9 +93,9 @@ local void make_crc_table OF((void)); */ local void make_crc_table() { - unsigned long c; + z_crc_t c; int n, k; - unsigned long poly; /* polynomial exclusive-or pattern */ + z_crc_t poly; /* polynomial exclusive-or pattern */ /* terms of polynomial defining this crc (except x^32): */ static volatile int first = 1; /* flag to limit concurrent making */ static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; @@ -124,13 +107,13 @@ local void make_crc_table() first = 0; /* make exclusive-or pattern from polynomial (0xedb88320UL) */ - poly = 0UL; - for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++) - poly |= 1UL << (31 - p[n]); + poly = 0; + for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) + poly |= (z_crc_t)1 << (31 - p[n]); /* generate a crc for every 8-bit value */ for (n = 0; n < 256; n++) { - c = (unsigned long)n; + c = (z_crc_t)n; for (k = 0; k < 8; k++) c = c & 1 ? poly ^ (c >> 1) : c >> 1; crc_table[0][n] = c; @@ -141,11 +124,11 @@ local void make_crc_table() and then the byte reversal of those as well as the first table */ for (n = 0; n < 256; n++) { c = crc_table[0][n]; - crc_table[4][n] = REV(c); + crc_table[4][n] = ZSWAP32(c); for (k = 1; k < 4; k++) { c = crc_table[0][c & 0xff] ^ (c >> 8); crc_table[k][n] = c; - crc_table[k + 4][n] = REV(c); + crc_table[k + 4][n] = ZSWAP32(c); } } #endif /* BYFOUR */ @@ -167,7 +150,7 @@ local void make_crc_table() if (out == NULL) return; fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); - fprintf(out, "local const unsigned long FAR "); + fprintf(out, "local const z_crc_t FAR "); fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); write_table(out, crc_table[0]); # ifdef BYFOUR @@ -187,12 +170,13 @@ local void make_crc_table() #ifdef MAKECRCH local void write_table(out, table) FILE *out; - const unsigned long FAR *table; + const z_crc_t FAR *table; { int n; for (n = 0; n < 256; n++) - fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", table[n], + fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", + (unsigned long)(table[n]), n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); } #endif /* MAKECRCH */ @@ -207,13 +191,13 @@ local void write_table(out, table) /* ========================================================================= * This function can be used by asm versions of crc32() */ -const unsigned long FAR * ZEXPORT get_crc_table() +const z_crc_t FAR * ZEXPORT get_crc_table() { #ifdef DYNAMIC_CRC_TABLE if (crc_table_empty) make_crc_table(); #endif /* DYNAMIC_CRC_TABLE */ - return (const unsigned long FAR *)crc_table; + return (const z_crc_t FAR *)crc_table; } /* ========================================================================= */ @@ -224,7 +208,7 @@ const unsigned long FAR * ZEXPORT get_crc_table() unsigned long ZEXPORT crc32(crc, buf, len) unsigned long crc; const unsigned char FAR *buf; - unsigned len; + uInt len; { if (buf == Z_NULL) return 0UL; @@ -235,7 +219,7 @@ unsigned long ZEXPORT crc32(crc, buf, len) #ifdef BYFOUR if (sizeof(void *) == sizeof(ptrdiff_t)) { - u4 endian; + z_crc_t endian; endian = 1; if (*((unsigned char *)(&endian))) @@ -269,17 +253,17 @@ local unsigned long crc32_little(crc, buf, len) const unsigned char FAR *buf; unsigned len; { - register u4 c; - register const u4 FAR *buf4; + register z_crc_t c; + register const z_crc_t FAR *buf4; - c = (u4)crc; + c = (z_crc_t)crc; c = ~c; while (len && ((ptrdiff_t)buf & 3)) { c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); len--; } - buf4 = (const u4 FAR *)(const void FAR *)buf; + buf4 = (const z_crc_t FAR *)(const void FAR *)buf; while (len >= 32) { DOLIT32; len -= 32; @@ -309,17 +293,17 @@ local unsigned long crc32_big(crc, buf, len) const unsigned char FAR *buf; unsigned len; { - register u4 c; - register const u4 FAR *buf4; + register z_crc_t c; + register const z_crc_t FAR *buf4; - c = REV((u4)crc); + c = ZSWAP32((z_crc_t)crc); c = ~c; while (len && ((ptrdiff_t)buf & 3)) { c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); len--; } - buf4 = (const u4 FAR *)(const void FAR *)buf; + buf4 = (const z_crc_t FAR *)(const void FAR *)buf; buf4--; while (len >= 32) { DOBIG32; @@ -336,7 +320,7 @@ local unsigned long crc32_big(crc, buf, len) c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); } while (--len); c = ~c; - return (unsigned long)(REV(c)); + return (unsigned long)(ZSWAP32(c)); } #endif /* BYFOUR */ @@ -372,22 +356,22 @@ local void gf2_matrix_square(square, mat) } /* ========================================================================= */ -uLong ZEXPORT crc32_combine(crc1, crc2, len2) +local uLong crc32_combine_(crc1, crc2, len2) uLong crc1; uLong crc2; - z_off_t len2; + z_off64_t len2; { int n; unsigned long row; unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ - /* degenerate case */ - if (len2 == 0) + /* degenerate case (also disallow negative lengths) */ + if (len2 <= 0) return crc1; /* put operator for one zero bit in odd */ - odd[0] = 0xedb88320L; /* CRC-32 polynomial */ + odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ row = 1; for (n = 1; n < GF2_DIM; n++) { odd[n] = row; @@ -426,3 +410,20 @@ uLong ZEXPORT crc32_combine(crc1, crc2, len2) crc1 ^= crc2; return crc1; } + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off_t len2; +{ + return crc32_combine_(crc1, crc2, len2); +} + +uLong ZEXPORT crc32_combine64(crc1, crc2, len2) + uLong crc1; + uLong crc2; + z_off64_t len2; +{ + return crc32_combine_(crc1, crc2, len2); +} diff --git a/erts/emulator/zlib/crc32.h b/erts/emulator/zlib/crc32.h index 49cd69a4c2..9e0c778102 100644 --- a/erts/emulator/zlib/crc32.h +++ b/erts/emulator/zlib/crc32.h @@ -2,9 +2,7 @@ * Generated automatically by crc32.c */ -/* %ExternalCopyright% */ - -local const unsigned long FAR crc_table[TBLS][256] = +local const z_crc_t FAR crc_table[TBLS][256] = { { 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, diff --git a/erts/emulator/zlib/deflate.c b/erts/emulator/zlib/deflate.c index 92f4be57c5..943c26dfb2 100644 --- a/erts/emulator/zlib/deflate.c +++ b/erts/emulator/zlib/deflate.c @@ -1,10 +1,8 @@ /* deflate.c -- compress data using the deflation algorithm - * Copyright (C) 1995-2005 Jean-loup Gailly. + * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - /* * ALGORITHM * @@ -39,7 +37,7 @@ * REFERENCES * * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". - * Available in http://www.ietf.org/rfc/rfc1951.txt + * Available in http://tools.ietf.org/html/rfc1951 * * A description of the Rabin and Karp algorithm is given in the book * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. @@ -57,7 +55,7 @@ #include "deflate.h" const char deflate_copyright[] = - " deflate 1.2.3 Copyright 1995-2005 Jean-loup Gailly "; + " deflate 1.2.8 Copyright 1995-2013 Jean-loup Gailly and Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -84,19 +82,18 @@ local block_state deflate_fast OF((deflate_state *s, int flush)); #ifndef FASTEST local block_state deflate_slow OF((deflate_state *s, int flush)); #endif +local block_state deflate_rle OF((deflate_state *s, int flush)); +local block_state deflate_huff OF((deflate_state *s, int flush)); local void lm_init OF((deflate_state *s)); local void putShortMSB OF((deflate_state *s, uInt b)); local void flush_pending OF((z_streamp strm)); local int read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); -#ifndef FASTEST #ifdef ASMV void match_init OF((void)); /* asm code initialization */ uInt longest_match OF((deflate_state *s, IPos cur_match)); #else local uInt longest_match OF((deflate_state *s, IPos cur_match)); #endif -#endif -local uInt longest_match_fast OF((deflate_state *s, IPos cur_match)); #ifdef DEBUG local void check_match OF((deflate_state *s, IPos start, IPos match, @@ -115,11 +112,6 @@ local void check_match OF((deflate_state *s, IPos start, IPos match, #endif /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ -#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) -/* Minimum amount of lookahead, except at the end of the input file. - * See deflate.c for comments about the MIN_MATCH+1. - */ - /* Values for max_lazy_match, good_match and max_chain_length, depending on * the desired pack level (0..9). The values given below have been tuned to * exclude worst case performance for pathological files. Better values may be @@ -166,6 +158,9 @@ local const config configuration_table[10] = { struct static_tree_desc_s {int dummy;}; /* for buggy compilers */ #endif +/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */ +#define RANK(f) (((f) << 1) - ((f) > 4 ? 9 : 0)) + /* =========================================================================== * Update a hash value with the given input byte * IN assertion: all calls to to UPDATE_HASH are made with consecutive @@ -246,10 +241,19 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, strm->msg = Z_NULL; if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else strm->zalloc = zcalloc; strm->opaque = (voidpf)0; +#endif } - if (strm->zfree == (free_func)0) strm->zfree = zcfree; + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif #ifdef FASTEST if (level != 0) level = 1; @@ -293,6 +297,8 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + s->high_water = 0; /* nothing written to s->window yet */ + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); @@ -302,7 +308,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || s->pending_buf == Z_NULL) { s->status = FINISH_STATE; - strm->msg = (char*)ERR_MSG(Z_MEM_ERROR); + strm->msg = ERR_MSG(Z_MEM_ERROR); deflateEnd (strm); return Z_MEM_ERROR; } @@ -323,43 +329,70 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) uInt dictLength; { deflate_state *s; - uInt length = dictLength; - uInt n; - IPos hash_head = 0; + uInt str, n; + int wrap; + unsigned avail; + z_const unsigned char *next; - if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL || - strm->state->wrap == 2 || - (strm->state->wrap == 1 && strm->state->status != INIT_STATE)) + if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL) return Z_STREAM_ERROR; - s = strm->state; - if (s->wrap) - strm->adler = adler32(strm->adler, dictionary, dictLength); + wrap = s->wrap; + if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) + return Z_STREAM_ERROR; - if (length < MIN_MATCH) return Z_OK; - if (length > MAX_DIST(s)) { - length = MAX_DIST(s); - dictionary += dictLength - length; /* use the tail of the dictionary */ + /* when using zlib wrappers, compute Adler-32 for provided dictionary */ + if (wrap == 1) + strm->adler = adler32(strm->adler, dictionary, dictLength); + s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + + /* if dictionary would fill window, just replace the history */ + if (dictLength >= s->w_size) { + if (wrap == 0) { /* already empty otherwise */ + CLEAR_HASH(s); + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + dictionary += dictLength - s->w_size; /* use the tail */ + dictLength = s->w_size; } - zmemcpy(s->window, dictionary, length); - s->strstart = length; - s->block_start = (long)length; - /* Insert all strings in the hash table (except for the last two bytes). - * s->lookahead stays null, so s->ins_h will be recomputed at the next - * call of fill_window. - */ - s->ins_h = s->window[0]; - UPDATE_HASH(s, s->ins_h, s->window[1]); - for (n = 0; n <= length - MIN_MATCH; n++) { - INSERT_STRING(s, n, hash_head); + /* insert dictionary into window and hash */ + avail = strm->avail_in; + next = strm->next_in; + strm->avail_in = dictLength; + strm->next_in = (z_const Bytef *)dictionary; + fill_window(s); + while (s->lookahead >= MIN_MATCH) { + str = s->strstart; + n = s->lookahead - (MIN_MATCH-1); + do { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + } while (--n); + s->strstart = str; + s->lookahead = MIN_MATCH-1; + fill_window(s); } - if (hash_head) hash_head = 0; /* to make compiler happy */ + s->strstart += s->lookahead; + s->block_start = (long)s->strstart; + s->insert = s->lookahead; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + strm->next_in = next; + strm->avail_in = avail; + s->wrap = wrap; return Z_OK; } /* ========================================================================= */ -int ZEXPORT deflateReset (strm) +int ZEXPORT deflateResetKeep (strm) z_streamp strm; { deflate_state *s; @@ -389,12 +422,23 @@ int ZEXPORT deflateReset (strm) s->last_flush = Z_NO_FLUSH; _tr_init(s); - lm_init(s); return Z_OK; } /* ========================================================================= */ +int ZEXPORT deflateReset (strm) + z_streamp strm; +{ + int ret; + + ret = deflateResetKeep(strm); + if (ret == Z_OK) + lm_init(strm->state); + return ret; +} + +/* ========================================================================= */ int ZEXPORT deflateSetHeader (strm, head) z_streamp strm; gz_headerp head; @@ -406,14 +450,42 @@ int ZEXPORT deflateSetHeader (strm, head) } /* ========================================================================= */ +int ZEXPORT deflatePending (strm, pending, bits) + unsigned *pending; + int *bits; + z_streamp strm; +{ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + if (pending != Z_NULL) + *pending = strm->state->pending; + if (bits != Z_NULL) + *bits = strm->state->bi_valid; + return Z_OK; +} + +/* ========================================================================= */ int ZEXPORT deflatePrime (strm, bits, value) z_streamp strm; int bits; int value; { + deflate_state *s; + int put; + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; - strm->state->bi_valid = bits; - strm->state->bi_buf = (ush)(value & ((1 << bits) - 1)); + s = strm->state; + if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; + do { + put = Buf_size - s->bi_valid; + if (put > bits) + put = bits; + s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid); + s->bi_valid += put; + _tr_flush_bits(s); + value >>= put; + bits -= put; + } while (bits); return Z_OK; } @@ -440,9 +512,12 @@ int ZEXPORT deflateParams(strm, level, strategy) } func = configuration_table[s->level].func; - if (func != configuration_table[level].func && strm->total_in != 0) { + if ((strategy != s->strategy || func != configuration_table[level].func) && + strm->total_in != 0) { /* Flush the last buffer: */ - err = deflate(strm, Z_PARTIAL_FLUSH); + err = deflate(strm, Z_BLOCK); + if (err == Z_BUF_ERROR && s->pending == 0) + err = Z_OK; } if (s->level != level) { s->level = level; @@ -486,33 +561,66 @@ int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain) * resulting from using fixed blocks instead of stored blocks, which deflate * can emit on compressed data for some combinations of the parameters. * - * This function could be more sophisticated to provide closer upper bounds - * for every combination of windowBits and memLevel, as well as wrap. - * But even the conservative upper bound of about 14% expansion does not - * seem onerous for output buffer allocation. + * This function could be more sophisticated to provide closer upper bounds for + * every combination of windowBits and memLevel. But even the conservative + * upper bound of about 14% expansion does not seem onerous for output buffer + * allocation. */ uLong ZEXPORT deflateBound(strm, sourceLen) z_streamp strm; uLong sourceLen; { deflate_state *s; - uLong destLen; + uLong complen, wraplen; + Bytef *str; - /* conservative upper bound */ - destLen = sourceLen + - ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 11; + /* conservative upper bound for compressed data */ + complen = sourceLen + + ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5; - /* if can't get parameters, return conservative bound */ + /* if can't get parameters, return conservative bound plus zlib wrapper */ if (strm == Z_NULL || strm->state == Z_NULL) - return destLen; + return complen + 6; - /* if not default parameters, return conservative bound */ + /* compute wrapper length */ s = strm->state; + switch (s->wrap) { + case 0: /* raw deflate */ + wraplen = 0; + break; + case 1: /* zlib wrapper */ + wraplen = 6 + (s->strstart ? 4 : 0); + break; + case 2: /* gzip wrapper */ + wraplen = 18; + if (s->gzhead != Z_NULL) { /* user-supplied gzip header */ + if (s->gzhead->extra != Z_NULL) + wraplen += 2 + s->gzhead->extra_len; + str = s->gzhead->name; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + str = s->gzhead->comment; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + if (s->gzhead->hcrc) + wraplen += 2; + } + break; + default: /* for compiler happiness */ + wraplen = 6; + } + + /* if not default parameters, return conservative bound */ if (s->w_bits != 15 || s->hash_bits != 8 + 7) - return destLen; + return complen + wraplen; /* default settings: return tight bound for that case */ - return compressBound(sourceLen); + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13 - 6 + wraplen; } /* ========================================================================= @@ -537,19 +645,22 @@ local void putShortMSB (s, b) local void flush_pending(strm) z_streamp strm; { - unsigned len = strm->state->pending; + unsigned len; + deflate_state *s = strm->state; + _tr_flush_bits(s); + len = s->pending; if (len > strm->avail_out) len = strm->avail_out; if (len == 0) return; - zmemcpy(strm->next_out, strm->state->pending_out, len); + zmemcpy(strm->next_out, s->pending_out, len); strm->next_out += len; - strm->state->pending_out += len; + s->pending_out += len; strm->total_out += len; strm->avail_out -= len; - strm->state->pending -= len; - if (strm->state->pending == 0) { - strm->state->pending_out = strm->state->pending_buf; + s->pending -= len; + if (s->pending == 0) { + s->pending_out = s->pending_buf; } } @@ -562,7 +673,7 @@ int ZEXPORT deflate (strm, flush) deflate_state *s; if (strm == Z_NULL || strm->state == Z_NULL || - flush > Z_FINISH || flush < 0) { + flush > Z_BLOCK || flush < 0) { return Z_STREAM_ERROR; } s = strm->state; @@ -586,7 +697,7 @@ int ZEXPORT deflate (strm, flush) put_byte(s, 31); put_byte(s, 139); put_byte(s, 8); - if (s->gzhead == NULL) { + if (s->gzhead == Z_NULL) { put_byte(s, 0); put_byte(s, 0); put_byte(s, 0); @@ -613,7 +724,7 @@ int ZEXPORT deflate (strm, flush) (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? 4 : 0)); put_byte(s, s->gzhead->os & 0xff); - if (s->gzhead->extra != NULL) { + if (s->gzhead->extra != Z_NULL) { put_byte(s, s->gzhead->extra_len & 0xff); put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); } @@ -655,7 +766,7 @@ int ZEXPORT deflate (strm, flush) } #ifdef GZIP if (s->status == EXTRA_STATE) { - if (s->gzhead->extra != NULL) { + if (s->gzhead->extra != Z_NULL) { uInt beg = s->pending; /* start of bytes to update crc */ while (s->gzindex < (s->gzhead->extra_len & 0xffff)) { @@ -683,7 +794,7 @@ int ZEXPORT deflate (strm, flush) s->status = NAME_STATE; } if (s->status == NAME_STATE) { - if (s->gzhead->name != NULL) { + if (s->gzhead->name != Z_NULL) { uInt beg = s->pending; /* start of bytes to update crc */ int val; @@ -714,7 +825,7 @@ int ZEXPORT deflate (strm, flush) s->status = COMMENT_STATE; } if (s->status == COMMENT_STATE) { - if (s->gzhead->comment != NULL) { + if (s->gzhead->comment != Z_NULL) { uInt beg = s->pending; /* start of bytes to update crc */ int val; @@ -776,7 +887,7 @@ int ZEXPORT deflate (strm, flush) * flushes. For repeated and useless calls with Z_FINISH, we keep * returning Z_STREAM_END instead of Z_BUF_ERROR. */ - } else if (strm->avail_in == 0 && flush <= old_flush && + } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) && flush != Z_FINISH) { ERR_RETURN(strm, Z_BUF_ERROR); } @@ -792,7 +903,9 @@ int ZEXPORT deflate (strm, flush) (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { block_state bstate; - bstate = (*(configuration_table[s->level].func))(s, flush); + bstate = s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : + (s->strategy == Z_RLE ? deflate_rle(s, flush) : + (*(configuration_table[s->level].func))(s, flush)); if (bstate == finish_started || bstate == finish_done) { s->status = FINISH_STATE; @@ -813,13 +926,18 @@ int ZEXPORT deflate (strm, flush) if (bstate == block_done) { if (flush == Z_PARTIAL_FLUSH) { _tr_align(s); - } else { /* FULL_FLUSH or SYNC_FLUSH */ + } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */ _tr_stored_block(s, (char*)0, 0L, 0); /* For a full flush, this empty block will be recognized * as a special marker by inflate_sync(). */ if (flush == Z_FULL_FLUSH) { CLEAR_HASH(s); /* forget history */ + if (s->lookahead == 0) { + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } } } flush_pending(strm); @@ -914,12 +1032,12 @@ int ZEXPORT deflateCopy (dest, source) ss = source->state; - zmemcpy(dest, source, sizeof(z_stream)); + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); if (ds == Z_NULL) return Z_MEM_ERROR; dest->state = (struct internal_state FAR *) ds; - zmemcpy(ds, ss, sizeof(deflate_state)); + zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); ds->strm = dest; ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); @@ -935,8 +1053,8 @@ int ZEXPORT deflateCopy (dest, source) } /* following zmemcpy do not work for 16-bit MSDOS */ zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); - zmemcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); - zmemcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos)); zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); @@ -970,15 +1088,15 @@ local int read_buf(strm, buf, size) strm->avail_in -= len; + zmemcpy(buf, strm->next_in, len); if (strm->state->wrap == 1) { - strm->adler = adler32(strm->adler, strm->next_in, len); + strm->adler = adler32(strm->adler, buf, len); } #ifdef GZIP else if (strm->state->wrap == 2) { - strm->adler = crc32(strm->adler, strm->next_in, len); + strm->adler = crc32(strm->adler, buf, len); } #endif - zmemcpy(buf, strm->next_in, len); strm->next_in += len; strm->total_in += len; @@ -1005,6 +1123,7 @@ local void lm_init (s) s->strstart = 0; s->block_start = 0L; s->lookahead = 0; + s->insert = 0; s->match_length = s->prev_length = MIN_MATCH-1; s->match_available = 0; s->ins_h = 0; @@ -1172,12 +1291,13 @@ local uInt longest_match(s, cur_match) return s->lookahead; } #endif /* ASMV */ -#endif /* FASTEST */ + +#else /* FASTEST */ /* --------------------------------------------------------------------------- - * Optimized version for level == 1 or strategy == Z_RLE only + * Optimized version for FASTEST only */ -local uInt longest_match_fast(s, cur_match) +local uInt longest_match(s, cur_match) deflate_state *s; IPos cur_match; /* current match */ { @@ -1230,6 +1350,8 @@ local uInt longest_match_fast(s, cur_match) return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; } +#endif /* FASTEST */ + #ifdef DEBUG /* =========================================================================== * Check that the match at match_start is indeed a match. @@ -1276,6 +1398,8 @@ local void fill_window(s) unsigned more; /* Amount of free space at the end of the window. */ uInt wsize = s->w_size; + Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead"); + do { more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); @@ -1308,7 +1432,6 @@ local void fill_window(s) later. (Using level 0 permanently is not an optimal usage of zlib, so we don't care about this pathological case.) */ - /* %%% avoid this when Z_RLE */ n = s->hash_size; p = &s->head[n]; do { @@ -1329,7 +1452,7 @@ local void fill_window(s) #endif more += wsize; } - if (s->strm->avail_in == 0) return; + if (s->strm->avail_in == 0) break; /* If there was no sliding: * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && @@ -1348,39 +1471,88 @@ local void fill_window(s) s->lookahead += n; /* Initialize the hash value now that we have some input: */ - if (s->lookahead >= MIN_MATCH) { - s->ins_h = s->window[s->strstart]; - UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); + if (s->lookahead + s->insert >= MIN_MATCH) { + uInt str = s->strstart - s->insert; + s->ins_h = s->window[str]; + UPDATE_HASH(s, s->ins_h, s->window[str + 1]); #if MIN_MATCH != 3 Call UPDATE_HASH() MIN_MATCH-3 more times #endif + while (s->insert) { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + s->insert--; + if (s->lookahead + s->insert < MIN_MATCH) + break; + } } /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, * but this is not important since only literal bytes will be emitted. */ } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); + + /* If the WIN_INIT bytes after the end of the current data have never been + * written, then zero those bytes in order to avoid memory check reports of + * the use of uninitialized (or uninitialised as Julian writes) bytes by + * the longest match routines. Update the high water mark for the next + * time through here. WIN_INIT is set to MAX_MATCH since the longest match + * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. + */ + if (s->high_water < s->window_size) { + ulg curr = s->strstart + (ulg)(s->lookahead); + ulg init; + + if (s->high_water < curr) { + /* Previous high water mark below current data -- zero WIN_INIT + * bytes or up to end of window, whichever is less. + */ + init = s->window_size - curr; + if (init > WIN_INIT) + init = WIN_INIT; + zmemzero(s->window + curr, (unsigned)init); + s->high_water = curr + init; + } + else if (s->high_water < (ulg)curr + WIN_INIT) { + /* High water mark at or above current data, but below current data + * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up + * to end of window, whichever is less. + */ + init = (ulg)curr + WIN_INIT - s->high_water; + if (init > s->window_size - s->high_water) + init = s->window_size - s->high_water; + zmemzero(s->window + s->high_water, (unsigned)init); + s->high_water += init; + } + } + + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, + "not enough room for search"); } /* =========================================================================== * Flush the current block, with given end-of-file flag. * IN assertion: strstart is set to the end of the current match. */ -#define FLUSH_BLOCK_ONLY(s, eof) { \ +#define FLUSH_BLOCK_ONLY(s, last) { \ _tr_flush_block(s, (s->block_start >= 0L ? \ (charf *)&s->window[(unsigned)s->block_start] : \ (charf *)Z_NULL), \ (ulg)((long)s->strstart - s->block_start), \ - (eof)); \ + (last)); \ s->block_start = s->strstart; \ flush_pending(s->strm); \ Tracev((stderr,"[FLUSH]")); \ } /* Same but force premature exit if necessary. */ -#define FLUSH_BLOCK(s, eof) { \ - FLUSH_BLOCK_ONLY(s, eof); \ - if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ +#define FLUSH_BLOCK(s, last) { \ + FLUSH_BLOCK_ONLY(s, last); \ + if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \ } /* =========================================================================== @@ -1439,8 +1611,14 @@ local block_state deflate_stored(s, flush) FLUSH_BLOCK(s, 0); } } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if ((long)s->strstart > s->block_start) + FLUSH_BLOCK(s, 0); + return block_done; } /* =========================================================================== @@ -1454,7 +1632,7 @@ local block_state deflate_fast(s, flush) deflate_state *s; int flush; { - IPos hash_head = NIL; /* head of the hash chain */ + IPos hash_head; /* head of the hash chain */ int bflush; /* set if current block must be flushed */ for (;;) { @@ -1474,6 +1652,7 @@ local block_state deflate_fast(s, flush) /* Insert the string window[strstart .. strstart+2] in the * dictionary, and set hash_head to the head of the hash chain: */ + hash_head = NIL; if (s->lookahead >= MIN_MATCH) { INSERT_STRING(s, s->strstart, hash_head); } @@ -1486,19 +1665,8 @@ local block_state deflate_fast(s, flush) * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ -#ifdef FASTEST - if ((s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) || - (s->strategy == Z_RLE && s->strstart - hash_head == 1)) { - s->match_length = longest_match_fast (s, hash_head); - } -#else - if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) { - s->match_length = longest_match (s, hash_head); - } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) { - s->match_length = longest_match_fast (s, hash_head); - } -#endif - /* longest_match() or longest_match_fast() sets match_start */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ } if (s->match_length >= MIN_MATCH) { check_match(s, s->strstart, s->match_start, s->match_length); @@ -1546,8 +1714,14 @@ local block_state deflate_fast(s, flush) } if (bflush) FLUSH_BLOCK(s, 0); } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; } #ifndef FASTEST @@ -1560,7 +1734,7 @@ local block_state deflate_slow(s, flush) deflate_state *s; int flush; { - IPos hash_head = NIL; /* head of hash chain */ + IPos hash_head; /* head of hash chain */ int bflush; /* set if current block must be flushed */ /* Process the input block. */ @@ -1581,6 +1755,7 @@ local block_state deflate_slow(s, flush) /* Insert the string window[strstart .. strstart+2] in the * dictionary, and set hash_head to the head of the hash chain: */ + hash_head = NIL; if (s->lookahead >= MIN_MATCH) { INSERT_STRING(s, s->strstart, hash_head); } @@ -1596,12 +1771,8 @@ local block_state deflate_slow(s, flush) * of window index 0 (in particular we have to avoid a match * of the string with itself at the start of the input file). */ - if (s->strategy != Z_HUFFMAN_ONLY && s->strategy != Z_RLE) { - s->match_length = longest_match (s, hash_head); - } else if (s->strategy == Z_RLE && s->strstart - hash_head == 1) { - s->match_length = longest_match_fast (s, hash_head); - } - /* longest_match() or longest_match_fast() sets match_start */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ if (s->match_length <= 5 && (s->strategy == Z_FILTERED #if TOO_FAR <= 32767 @@ -1674,12 +1845,17 @@ local block_state deflate_slow(s, flush) _tr_tally_lit(s, s->window[s->strstart-1], bflush); s->match_available = 0; } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; } #endif /* FASTEST */ -#if 0 /* =========================================================================== * For Z_RLE, simply look for runs of bytes, generate matches only of distance * one. Do not maintain a hash table. (It will be regenerated if this run of @@ -1689,43 +1865,52 @@ local block_state deflate_rle(s, flush) deflate_state *s; int flush; { - int bflush; /* set if current block must be flushed */ - uInt run; /* length of run */ - uInt max; /* maximum length of run */ - uInt prev; /* byte at distance one to match */ - Bytef *scan; /* scan for end of run */ + int bflush; /* set if current block must be flushed */ + uInt prev; /* byte at distance one to match */ + Bytef *scan, *strend; /* scan goes up to strend for length of run */ for (;;) { /* Make sure that we always have enough lookahead, except * at the end of the input file. We need MAX_MATCH bytes - * for the longest encodable run. + * for the longest run, plus one for the unrolled loop. */ - if (s->lookahead < MAX_MATCH) { + if (s->lookahead <= MAX_MATCH) { fill_window(s); - if (s->lookahead < MAX_MATCH && flush == Z_NO_FLUSH) { + if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) { return need_more; } if (s->lookahead == 0) break; /* flush the current block */ } /* See how many times the previous byte repeats */ - run = 0; - if (s->strstart > 0) { /* if there is a previous byte, that is */ - max = s->lookahead < MAX_MATCH ? s->lookahead : MAX_MATCH; + s->match_length = 0; + if (s->lookahead >= MIN_MATCH && s->strstart > 0) { scan = s->window + s->strstart - 1; - prev = *scan++; - do { - if (*scan++ != prev) - break; - } while (++run < max); + prev = *scan; + if (prev == *++scan && prev == *++scan && prev == *++scan) { + strend = s->window + s->strstart + MAX_MATCH; + do { + } while (prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + scan < strend); + s->match_length = MAX_MATCH - (int)(strend - scan); + if (s->match_length > s->lookahead) + s->match_length = s->lookahead; + } + Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan"); } /* Emit match if have run of MIN_MATCH or longer, else emit literal */ - if (run >= MIN_MATCH) { - check_match(s, s->strstart, s->strstart - 1, run); - _tr_tally_dist(s, 1, run - MIN_MATCH, bflush); - s->lookahead -= run; - s->strstart += run; + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->strstart - 1, s->match_length); + + _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + s->strstart += s->match_length; + s->match_length = 0; } else { /* No match, output a literal byte */ Tracevv((stderr,"%c", s->window[s->strstart])); @@ -1735,7 +1920,51 @@ local block_state deflate_rle(s, flush) } if (bflush) FLUSH_BLOCK(s, 0); } - FLUSH_BLOCK(s, flush == Z_FINISH); - return flush == Z_FINISH ? finish_done : block_done; + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; +} + +/* =========================================================================== + * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. + * (It will be regenerated if this run of deflate switches away from Huffman.) + */ +local block_state deflate_huff(s, flush) + deflate_state *s; + int flush; +{ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we have a literal to write. */ + if (s->lookahead == 0) { + fill_window(s); + if (s->lookahead == 0) { + if (flush == Z_NO_FLUSH) + return need_more; + break; /* flush the current block */ + } + } + + /* Output a literal byte */ + s->match_length = 0; + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit (s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->last_lit) + FLUSH_BLOCK(s, 0); + return block_done; } -#endif diff --git a/erts/emulator/zlib/deflate.h b/erts/emulator/zlib/deflate.h index 92b037c9d2..ce0299edd1 100644 --- a/erts/emulator/zlib/deflate.h +++ b/erts/emulator/zlib/deflate.h @@ -1,10 +1,8 @@ /* deflate.h -- internal compression state - * Copyright (C) 1995-2004 Jean-loup Gailly + * Copyright (C) 1995-2012 Jean-loup Gailly * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - /* WARNING: this file should *not* be used by applications. It is part of the implementation of the compression library and is subject to change. Applications should only use zlib.h. @@ -50,6 +48,9 @@ #define MAX_BITS 15 /* All codes must not exceed MAX_BITS bits */ +#define Buf_size 16 +/* size of bit buffer in bi_buf */ + #define INIT_STATE 42 #define EXTRA_STATE 69 #define NAME_STATE 73 @@ -103,7 +104,7 @@ typedef struct internal_state { int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ gz_headerp gzhead; /* gzip header information to write */ uInt gzindex; /* where in extra, name, or comment */ - Byte method; /* STORED (for zip only) or DEFLATED */ + Byte method; /* can only be DEFLATED */ int last_flush; /* value of flush param for previous deflate call */ /* used by deflate.c: */ @@ -190,7 +191,7 @@ typedef struct internal_state { int nice_match; /* Stop searching when current match exceeds this */ /* used by trees.c: */ - /* Didn't use ct_data typedef below to supress compiler warning */ + /* Didn't use ct_data typedef below to suppress compiler warning */ struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ @@ -246,7 +247,7 @@ typedef struct internal_state { ulg opt_len; /* bit length of current block with optimal trees */ ulg static_len; /* bit length of current block with static trees */ uInt matches; /* number of string matches in current block */ - int last_eob_len; /* bit length of EOB code for last block */ + uInt insert; /* bytes at end of window left to insert */ #ifdef DEBUG ulg compressed_len; /* total bit length of compressed file mod 2^32 */ @@ -262,6 +263,13 @@ typedef struct internal_state { * are always zero. */ + ulg high_water; + /* High water mark offset in window for initialized bytes -- bytes above + * this are set to zero in order to avoid memory check warnings when + * longest match routines access bytes past the input. This is then + * updated to the new high water mark. + */ + } FAR deflate_state; /* Output a byte on the stream. @@ -280,14 +288,19 @@ typedef struct internal_state { * distances are limited to MAX_DIST instead of WSIZE. */ +#define WIN_INIT MAX_MATCH +/* Number of bytes after end of data in window to initialize in order to avoid + memory checker errors from longest match routines */ + /* in trees.c */ -void _tr_init OF((deflate_state *s)); -int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); -void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, - int eof)); -void _tr_align OF((deflate_state *s)); -void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, - int eof)); +void ZLIB_INTERNAL _tr_init OF((deflate_state *s)); +int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); +void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); +void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s)); +void ZLIB_INTERNAL _tr_align OF((deflate_state *s)); +void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); #define d_code(dist) \ ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) @@ -300,11 +313,11 @@ void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, /* Inline versions of _tr_tally for speed: */ #if defined(GEN_TREES_H) || !defined(STDC) - extern uch _length_code[]; - extern uch _dist_code[]; + extern uch ZLIB_INTERNAL _length_code[]; + extern uch ZLIB_INTERNAL _dist_code[]; #else - extern const uch _length_code[]; - extern const uch _dist_code[]; + extern const uch ZLIB_INTERNAL _length_code[]; + extern const uch ZLIB_INTERNAL _dist_code[]; #endif # define _tr_tally_lit(s, c, flush) \ diff --git a/erts/emulator/zlib/example.c b/erts/emulator/zlib/example.c deleted file mode 100644 index ebe828f72d..0000000000 --- a/erts/emulator/zlib/example.c +++ /dev/null @@ -1,570 +0,0 @@ -/* example.c -- usage example of the zlib compression library - * Copyright (C) 1995-2004 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* %ExternalCopyright% */ - -/* @(#) $Id$ */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif -#include <stdio.h> -#include "zlib.h" - -#ifdef STDC -# include <string.h> -# include <stdlib.h> -#endif - -#if defined(VMS) || defined(RISCOS) -# define TESTFILE "foo-gz" -#else -# define TESTFILE "foo.gz" -#endif - -#define CHECK_ERR(err, msg) { \ - if (err != Z_OK) { \ - fprintf(stderr, "%s error: %d\n", msg, err); \ - exit(1); \ - } \ -} - -const char hello[] = "hello, hello!"; -/* "hello world" would be more standard, but the repeated "hello" - * stresses the compression code better, sorry... - */ - -const char dictionary[] = "hello"; -uLong dictId; /* Adler32 value of the dictionary */ - -void test_compress OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_gzio OF((const char *fname, - Byte *uncompr, uLong uncomprLen)); -void test_deflate OF((Byte *compr, uLong comprLen)); -void test_inflate OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_large_deflate OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_large_inflate OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_flush OF((Byte *compr, uLong *comprLen)); -void test_sync OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -void test_dict_deflate OF((Byte *compr, uLong comprLen)); -void test_dict_inflate OF((Byte *compr, uLong comprLen, - Byte *uncompr, uLong uncomprLen)); -int main OF((int argc, char *argv[])); - -/* =========================================================================== - * Test compress() and uncompress() - */ -void test_compress(compr, comprLen, uncompr, uncomprLen) - Byte *compr, *uncompr; - uLong comprLen, uncomprLen; -{ - int err; - uLong len = (uLong)strlen(hello)+1; - - err = compress(compr, &comprLen, (const Bytef*)hello, len); - CHECK_ERR(err, "compress"); - - strcpy((char*)uncompr, "garbage"); - - err = uncompress(uncompr, &uncomprLen, compr, comprLen); - CHECK_ERR(err, "uncompress"); - - if (strcmp((char*)uncompr, hello)) { - fprintf(stderr, "bad uncompress\n"); - exit(1); - } else { - printf("uncompress(): %s\n", (char *)uncompr); - } -} - -/* =========================================================================== - * Test read/write of .gz files - */ -void test_gzio(fname, uncompr, uncomprLen) - const char *fname; /* compressed file name */ - Byte *uncompr; - uLong uncomprLen; -{ -#ifdef NO_GZCOMPRESS - fprintf(stderr, "NO_GZCOMPRESS -- gz* functions cannot compress\n"); -#else - int err; - int len = (int)strlen(hello)+1; - gzFile file; - z_off_t pos; - - file = gzopen(fname, "wb"); - if (file == NULL) { - fprintf(stderr, "gzopen error\n"); - exit(1); - } - gzputc(file, 'h'); - if (gzputs(file, "ello") != 4) { - fprintf(stderr, "gzputs err: %s\n", gzerror(file, &err)); - exit(1); - } - if (gzprintf(file, ", %s!", "hello") != 8) { - fprintf(stderr, "gzprintf err: %s\n", gzerror(file, &err)); - exit(1); - } - gzseek(file, 1L, SEEK_CUR); /* add one zero byte */ - gzclose(file); - - file = gzopen(fname, "rb"); - if (file == NULL) { - fprintf(stderr, "gzopen error\n"); - exit(1); - } - strcpy((char*)uncompr, "garbage"); - - if (gzread(file, uncompr, (unsigned)uncomprLen) != len) { - fprintf(stderr, "gzread err: %s\n", gzerror(file, &err)); - exit(1); - } - if (strcmp((char*)uncompr, hello)) { - fprintf(stderr, "bad gzread: %s\n", (char*)uncompr); - exit(1); - } else { - printf("gzread(): %s\n", (char*)uncompr); - } - - pos = gzseek(file, -8L, SEEK_CUR); - if (pos != 6 || gztell(file) != pos) { - fprintf(stderr, "gzseek error, pos=%ld, gztell=%ld\n", - (long)pos, (long)gztell(file)); - exit(1); - } - - if (gzgetc(file) != ' ') { - fprintf(stderr, "gzgetc error\n"); - exit(1); - } - - if (gzungetc(' ', file) != ' ') { - fprintf(stderr, "gzungetc error\n"); - exit(1); - } - - gzgets(file, (char*)uncompr, (int)uncomprLen); - if (strlen((char*)uncompr) != 7) { /* " hello!" */ - fprintf(stderr, "gzgets err after gzseek: %s\n", gzerror(file, &err)); - exit(1); - } - if (strcmp((char*)uncompr, hello + 6)) { - fprintf(stderr, "bad gzgets after gzseek\n"); - exit(1); - } else { - printf("gzgets() after gzseek: %s\n", (char*)uncompr); - } - - gzclose(file); -#endif -} - -/* =========================================================================== - * Test deflate() with small buffers - */ -void test_deflate(compr, comprLen) - Byte *compr; - uLong comprLen; -{ - z_stream c_stream; /* compression stream */ - int err; - uLong len = (uLong)strlen(hello)+1; - - c_stream.zalloc = (alloc_func)0; - c_stream.zfree = (free_func)0; - c_stream.opaque = (voidpf)0; - - err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); - CHECK_ERR(err, "deflateInit"); - - c_stream.next_in = (Bytef*)hello; - c_stream.next_out = compr; - - while (c_stream.total_in != len && c_stream.total_out < comprLen) { - c_stream.avail_in = c_stream.avail_out = 1; /* force small buffers */ - err = deflate(&c_stream, Z_NO_FLUSH); - CHECK_ERR(err, "deflate"); - } - /* Finish the stream, still forcing small buffers: */ - for (;;) { - c_stream.avail_out = 1; - err = deflate(&c_stream, Z_FINISH); - if (err == Z_STREAM_END) break; - CHECK_ERR(err, "deflate"); - } - - err = deflateEnd(&c_stream); - CHECK_ERR(err, "deflateEnd"); -} - -/* =========================================================================== - * Test inflate() with small buffers - */ -void test_inflate(compr, comprLen, uncompr, uncomprLen) - Byte *compr, *uncompr; - uLong comprLen, uncomprLen; -{ - int err; - z_stream d_stream; /* decompression stream */ - - strcpy((char*)uncompr, "garbage"); - - d_stream.zalloc = (alloc_func)0; - d_stream.zfree = (free_func)0; - d_stream.opaque = (voidpf)0; - - d_stream.next_in = compr; - d_stream.avail_in = 0; - d_stream.next_out = uncompr; - - err = inflateInit(&d_stream); - CHECK_ERR(err, "inflateInit"); - - while (d_stream.total_out < uncomprLen && d_stream.total_in < comprLen) { - d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */ - err = inflate(&d_stream, Z_NO_FLUSH); - if (err == Z_STREAM_END) break; - CHECK_ERR(err, "inflate"); - } - - err = inflateEnd(&d_stream); - CHECK_ERR(err, "inflateEnd"); - - if (strcmp((char*)uncompr, hello)) { - fprintf(stderr, "bad inflate\n"); - exit(1); - } else { - printf("inflate(): %s\n", (char *)uncompr); - } -} - -/* =========================================================================== - * Test deflate() with large buffers and dynamic change of compression level - */ -void test_large_deflate(compr, comprLen, uncompr, uncomprLen) - Byte *compr, *uncompr; - uLong comprLen, uncomprLen; -{ - z_stream c_stream; /* compression stream */ - int err; - - c_stream.zalloc = (alloc_func)0; - c_stream.zfree = (free_func)0; - c_stream.opaque = (voidpf)0; - - err = deflateInit(&c_stream, Z_BEST_SPEED); - CHECK_ERR(err, "deflateInit"); - - c_stream.next_out = compr; - c_stream.avail_out = (uInt)comprLen; - - /* At this point, uncompr is still mostly zeroes, so it should compress - * very well: - */ - c_stream.next_in = uncompr; - c_stream.avail_in = (uInt)uncomprLen; - err = deflate(&c_stream, Z_NO_FLUSH); - CHECK_ERR(err, "deflate"); - if (c_stream.avail_in != 0) { - fprintf(stderr, "deflate not greedy\n"); - exit(1); - } - - /* Feed in already compressed data and switch to no compression: */ - deflateParams(&c_stream, Z_NO_COMPRESSION, Z_DEFAULT_STRATEGY); - c_stream.next_in = compr; - c_stream.avail_in = (uInt)comprLen/2; - err = deflate(&c_stream, Z_NO_FLUSH); - CHECK_ERR(err, "deflate"); - - /* Switch back to compressing mode: */ - deflateParams(&c_stream, Z_BEST_COMPRESSION, Z_FILTERED); - c_stream.next_in = uncompr; - c_stream.avail_in = (uInt)uncomprLen; - err = deflate(&c_stream, Z_NO_FLUSH); - CHECK_ERR(err, "deflate"); - - err = deflate(&c_stream, Z_FINISH); - if (err != Z_STREAM_END) { - fprintf(stderr, "deflate should report Z_STREAM_END\n"); - exit(1); - } - err = deflateEnd(&c_stream); - CHECK_ERR(err, "deflateEnd"); -} - -/* =========================================================================== - * Test inflate() with large buffers - */ -void test_large_inflate(compr, comprLen, uncompr, uncomprLen) - Byte *compr, *uncompr; - uLong comprLen, uncomprLen; -{ - int err; - z_stream d_stream; /* decompression stream */ - - strcpy((char*)uncompr, "garbage"); - - d_stream.zalloc = (alloc_func)0; - d_stream.zfree = (free_func)0; - d_stream.opaque = (voidpf)0; - - d_stream.next_in = compr; - d_stream.avail_in = (uInt)comprLen; - - err = inflateInit(&d_stream); - CHECK_ERR(err, "inflateInit"); - - for (;;) { - d_stream.next_out = uncompr; /* discard the output */ - d_stream.avail_out = (uInt)uncomprLen; - err = inflate(&d_stream, Z_NO_FLUSH); - if (err == Z_STREAM_END) break; - CHECK_ERR(err, "large inflate"); - } - - err = inflateEnd(&d_stream); - CHECK_ERR(err, "inflateEnd"); - - if (d_stream.total_out != 2*uncomprLen + comprLen/2) { - fprintf(stderr, "bad large inflate: %ld\n", d_stream.total_out); - exit(1); - } else { - printf("large_inflate(): OK\n"); - } -} - -/* =========================================================================== - * Test deflate() with full flush - */ -void test_flush(compr, comprLen) - Byte *compr; - uLong *comprLen; -{ - z_stream c_stream; /* compression stream */ - int err; - uInt len = (uInt)strlen(hello)+1; - - c_stream.zalloc = (alloc_func)0; - c_stream.zfree = (free_func)0; - c_stream.opaque = (voidpf)0; - - err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); - CHECK_ERR(err, "deflateInit"); - - c_stream.next_in = (Bytef*)hello; - c_stream.next_out = compr; - c_stream.avail_in = 3; - c_stream.avail_out = (uInt)*comprLen; - err = deflate(&c_stream, Z_FULL_FLUSH); - CHECK_ERR(err, "deflate"); - - compr[3]++; /* force an error in first compressed block */ - c_stream.avail_in = len - 3; - - err = deflate(&c_stream, Z_FINISH); - if (err != Z_STREAM_END) { - CHECK_ERR(err, "deflate"); - } - err = deflateEnd(&c_stream); - CHECK_ERR(err, "deflateEnd"); - - *comprLen = c_stream.total_out; -} - -/* =========================================================================== - * Test inflateSync() - */ -void test_sync(compr, comprLen, uncompr, uncomprLen) - Byte *compr, *uncompr; - uLong comprLen, uncomprLen; -{ - int err; - z_stream d_stream; /* decompression stream */ - - strcpy((char*)uncompr, "garbage"); - - d_stream.zalloc = (alloc_func)0; - d_stream.zfree = (free_func)0; - d_stream.opaque = (voidpf)0; - - d_stream.next_in = compr; - d_stream.avail_in = 2; /* just read the zlib header */ - - err = inflateInit(&d_stream); - CHECK_ERR(err, "inflateInit"); - - d_stream.next_out = uncompr; - d_stream.avail_out = (uInt)uncomprLen; - - inflate(&d_stream, Z_NO_FLUSH); - CHECK_ERR(err, "inflate"); - - d_stream.avail_in = (uInt)comprLen-2; /* read all compressed data */ - err = inflateSync(&d_stream); /* but skip the damaged part */ - CHECK_ERR(err, "inflateSync"); - - err = inflate(&d_stream, Z_FINISH); - if (err != Z_DATA_ERROR) { - fprintf(stderr, "inflate should report DATA_ERROR\n"); - /* Because of incorrect adler32 */ - exit(1); - } - err = inflateEnd(&d_stream); - CHECK_ERR(err, "inflateEnd"); - - printf("after inflateSync(): hel%s\n", (char *)uncompr); -} - -/* =========================================================================== - * Test deflate() with preset dictionary - */ -void test_dict_deflate(compr, comprLen) - Byte *compr; - uLong comprLen; -{ - z_stream c_stream; /* compression stream */ - int err; - - c_stream.zalloc = (alloc_func)0; - c_stream.zfree = (free_func)0; - c_stream.opaque = (voidpf)0; - - err = deflateInit(&c_stream, Z_BEST_COMPRESSION); - CHECK_ERR(err, "deflateInit"); - - err = deflateSetDictionary(&c_stream, - (const Bytef*)dictionary, sizeof(dictionary)); - CHECK_ERR(err, "deflateSetDictionary"); - - dictId = c_stream.adler; - c_stream.next_out = compr; - c_stream.avail_out = (uInt)comprLen; - - c_stream.next_in = (Bytef*)hello; - c_stream.avail_in = (uInt)strlen(hello)+1; - - err = deflate(&c_stream, Z_FINISH); - if (err != Z_STREAM_END) { - fprintf(stderr, "deflate should report Z_STREAM_END\n"); - exit(1); - } - err = deflateEnd(&c_stream); - CHECK_ERR(err, "deflateEnd"); -} - -/* =========================================================================== - * Test inflate() with a preset dictionary - */ -void test_dict_inflate(compr, comprLen, uncompr, uncomprLen) - Byte *compr, *uncompr; - uLong comprLen, uncomprLen; -{ - int err; - z_stream d_stream; /* decompression stream */ - - strcpy((char*)uncompr, "garbage"); - - d_stream.zalloc = (alloc_func)0; - d_stream.zfree = (free_func)0; - d_stream.opaque = (voidpf)0; - - d_stream.next_in = compr; - d_stream.avail_in = (uInt)comprLen; - - err = inflateInit(&d_stream); - CHECK_ERR(err, "inflateInit"); - - d_stream.next_out = uncompr; - d_stream.avail_out = (uInt)uncomprLen; - - for (;;) { - err = inflate(&d_stream, Z_NO_FLUSH); - if (err == Z_STREAM_END) break; - if (err == Z_NEED_DICT) { - if (d_stream.adler != dictId) { - fprintf(stderr, "unexpected dictionary"); - exit(1); - } - err = inflateSetDictionary(&d_stream, (const Bytef*)dictionary, - sizeof(dictionary)); - } - CHECK_ERR(err, "inflate with dict"); - } - - err = inflateEnd(&d_stream); - CHECK_ERR(err, "inflateEnd"); - - if (strcmp((char*)uncompr, hello)) { - fprintf(stderr, "bad inflate with dict\n"); - exit(1); - } else { - printf("inflate with dictionary: %s\n", (char *)uncompr); - } -} - -/* =========================================================================== - * Usage: example [output.gz [input.gz]] - */ - -int main(argc, argv) - int argc; - char *argv[]; -{ - Byte *compr, *uncompr; - uLong comprLen = 10000*sizeof(int); /* don't overflow on MSDOS */ - uLong uncomprLen = comprLen; - static const char* myVersion = ZLIB_VERSION; - - if (zlibVersion()[0] != myVersion[0]) { - fprintf(stderr, "incompatible zlib version\n"); - exit(1); - - } else if (strcmp(zlibVersion(), ZLIB_VERSION) != 0) { - fprintf(stderr, "warning: different zlib version\n"); - } - - printf("zlib version %s = 0x%04x, compile flags = 0x%lx\n", - ZLIB_VERSION, ZLIB_VERNUM, zlibCompileFlags()); - - compr = (Byte*)calloc((uInt)comprLen, 1); - uncompr = (Byte*)calloc((uInt)uncomprLen, 1); - /* compr and uncompr are cleared to avoid reading uninitialized - * data and to ensure that uncompr compresses well. - */ - if (compr == Z_NULL || uncompr == Z_NULL) { - printf("out of memory\n"); - exit(1); - } - test_compress(compr, comprLen, uncompr, uncomprLen); - - test_gzio((argc > 1 ? argv[1] : TESTFILE), - uncompr, uncomprLen); - - test_deflate(compr, comprLen); - test_inflate(compr, comprLen, uncompr, uncomprLen); - - test_large_deflate(compr, comprLen, uncompr, uncomprLen); - test_large_inflate(compr, comprLen, uncompr, uncomprLen); - - test_flush(compr, &comprLen); - test_sync(compr, comprLen, uncompr, uncomprLen); - comprLen = uncomprLen; - - test_dict_deflate(compr, comprLen); - test_dict_inflate(compr, comprLen, uncompr, uncomprLen); - - free(compr); - free(uncompr); - - return 0; -} diff --git a/erts/emulator/zlib/gzguts.h b/erts/emulator/zlib/gzguts.h new file mode 100644 index 0000000000..d87659d031 --- /dev/null +++ b/erts/emulator/zlib/gzguts.h @@ -0,0 +1,209 @@ +/* gzguts.h -- zlib internal header definitions for gz* operations + * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifdef _LARGEFILE64_SOURCE +# ifndef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE 1 +# endif +# ifdef _FILE_OFFSET_BITS +# undef _FILE_OFFSET_BITS +# endif +#endif + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include <stdio.h> +#include "zlib.h" +#ifdef STDC +# include <string.h> +# include <stdlib.h> +# include <limits.h> +#endif +#include <fcntl.h> + +#ifdef _WIN32 +# include <stddef.h> +#endif + +#if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32) +# include <io.h> +#endif + +#ifdef WINAPI_FAMILY +# define open _open +# define read _read +# define write _write +# define close _close +#endif + +#ifdef NO_DEFLATE /* for compatibility with old definition */ +# define NO_GZCOMPRESS +#endif + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS +/* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 +/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 ) +# define vsnprintf _vsnprintf +# endif +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +# ifdef VMS +# define NO_vsnprintf +# endif +# ifdef __OS400__ +# define NO_vsnprintf +# endif +# ifdef __MVS__ +# define NO_vsnprintf +# endif +#endif + +/* unlike snprintf (which is required in C99, yet still not supported by + Microsoft more than a decade later!), _snprintf does not guarantee null + termination of the result -- however this is only used in gzlib.c where + the result is assured to fit in the space provided */ +#ifdef _MSC_VER +# define snprintf _snprintf +#endif + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +/* gz* functions always use library allocation functions */ +#ifndef STDC + extern voidp malloc OF((uInt size)); + extern void free OF((voidpf ptr)); +#endif + +/* get errno and strerror definition */ +#if defined UNDER_CE +# include <windows.h> +# define zstrerror() gz_strwinerror((DWORD)GetLastError()) +#else +# ifndef NO_STRERROR +# include <errno.h> +# define zstrerror() strerror(errno) +# else +# define zstrerror() "stdio error (consult errno)" +# endif +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); +#endif + +/* default memLevel */ +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif + +/* default i/o buffer size -- double this for output when reading (this and + twice this must be able to fit in an unsigned type) */ +#define GZBUFSIZE 8192 + +/* gzip modes, also provide a little integrity check on the passed structure */ +#define GZ_NONE 0 +#define GZ_READ 7247 +#define GZ_WRITE 31153 +#define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */ + +/* values for gz_state how */ +#define LOOK 0 /* look for a gzip header */ +#define COPY 1 /* copy input directly */ +#define GZIP 2 /* decompress a gzip stream */ + +/* internal gzip file state data structure */ +typedef struct { + /* exposed contents for gzgetc() macro */ + struct gzFile_s x; /* "x" for exposed */ + /* x.have: number of bytes available at x.next */ + /* x.next: next output data to deliver or write */ + /* x.pos: current position in uncompressed data */ + /* used for both reading and writing */ + int mode; /* see gzip modes above */ + int fd; /* file descriptor */ + char *path; /* path or fd for error messages */ + unsigned size; /* buffer size, zero if not allocated yet */ + unsigned want; /* requested buffer size, default is GZBUFSIZE */ + unsigned char *in; /* input buffer */ + unsigned char *out; /* output buffer (double-sized when reading) */ + int direct; /* 0 if processing gzip, 1 if transparent */ + /* just for reading */ + int how; /* 0: get header, 1: copy, 2: decompress */ + z_off64_t start; /* where the gzip data started, for rewinding */ + int eof; /* true if end of input file reached */ + int past; /* true if read requested past end */ + /* just for writing */ + int level; /* compression level */ + int strategy; /* compression strategy */ + /* seek request */ + z_off64_t skip; /* amount to skip (already rewound if backwards) */ + int seek; /* true if seek request pending */ + /* error information */ + int err; /* error code */ + char *msg; /* error message */ + /* zlib inflate or deflate stream */ + z_stream strm; /* stream structure in-place (not a pointer) */ +} gz_state; +typedef gz_state FAR *gz_statep; + +/* shared functions */ +void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *)); +#if defined UNDER_CE +char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error)); +#endif + +/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t + value -- needed when comparing unsigned to z_off64_t, which is signed + (possible z_off64_t types off_t, off64_t, and long are all signed) */ +#ifdef INT_MAX +# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) +#else +unsigned ZLIB_INTERNAL gz_intmax OF((void)); +# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) +#endif diff --git a/erts/emulator/zlib/inffast.c b/erts/emulator/zlib/inffast.c index eb81884888..5187743fde 100644 --- a/erts/emulator/zlib/inffast.c +++ b/erts/emulator/zlib/inffast.c @@ -1,10 +1,8 @@ /* inffast.c -- fast decoding - * Copyright (C) 1995-2004 Mark Adler + * Copyright (C) 1995-2008, 2010, 2013 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - #ifdef HAVE_CONFIG_H # include "config.h" #endif @@ -69,13 +67,13 @@ requires strm->avail_out >= 258 for each loop to avoid checking for output space. */ -void inflate_fast(strm, start) +void ZLIB_INTERNAL inflate_fast(strm, start) z_streamp strm; unsigned start; /* inflate()'s starting value for strm->avail_out */ { struct inflate_state FAR *state; - unsigned char FAR *in; /* local strm->next_in */ - unsigned char FAR *last; /* while in < last, enough input available */ + z_const unsigned char FAR *in; /* local strm->next_in */ + z_const unsigned char FAR *last; /* have enough input while in < last */ unsigned char FAR *out; /* local strm->next_out */ unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ unsigned char FAR *end; /* while out < end, enough space available */ @@ -84,7 +82,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ #endif unsigned wsize; /* window size or zero if not using window */ unsigned whave; /* valid bytes in the window */ - unsigned write; /* window write index */ + unsigned wnext; /* window write index */ unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ unsigned long hold; /* local strm->hold */ unsigned bits; /* local strm->bits */ @@ -92,7 +90,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ code const FAR *dcode; /* local strm->distcode */ unsigned lmask; /* mask for first level of length codes */ unsigned dmask; /* mask for first level of distance codes */ - code this; /* retrieved table entry */ + code here; /* retrieved table entry */ unsigned op; /* code bits, operation, extra bits, or */ /* window position, window bytes to copy */ unsigned len; /* match length, unused bytes */ @@ -111,7 +109,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ #endif wsize = state->wsize; whave = state->whave; - write = state->write; + wnext = state->wnext; window = state->window; hold = state->hold; bits = state->bits; @@ -129,20 +127,20 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ hold += (unsigned long)(PUP(in)) << bits; bits += 8; } - this = lcode[hold & lmask]; + here = lcode[hold & lmask]; dolen: - op = (unsigned)(this.bits); + op = (unsigned)(here.bits); hold >>= op; bits -= op; - op = (unsigned)(this.op); + op = (unsigned)(here.op); if (op == 0) { /* literal */ - Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", this.val)); - PUP(out) = (unsigned char)(this.val); + "inflate: literal 0x%02x\n", here.val)); + PUP(out) = (unsigned char)(here.val); } else if (op & 16) { /* length base */ - len = (unsigned)(this.val); + len = (unsigned)(here.val); op &= 15; /* number of extra bits */ if (op) { if (bits < op) { @@ -160,14 +158,14 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ hold += (unsigned long)(PUP(in)) << bits; bits += 8; } - this = dcode[hold & dmask]; + here = dcode[hold & dmask]; dodist: - op = (unsigned)(this.bits); + op = (unsigned)(here.bits); hold >>= op; bits -= op; - op = (unsigned)(this.op); + op = (unsigned)(here.op); if (op & 16) { /* distance base */ - dist = (unsigned)(this.val); + dist = (unsigned)(here.val); op &= 15; /* number of extra bits */ if (bits < op) { hold += (unsigned long)(PUP(in)) << bits; @@ -192,12 +190,34 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ if (dist > op) { /* see if copy from window */ op = dist - op; /* distance back in window */ if (op > whave) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; + if (state->sane) { + strm->msg = + (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + if (len <= op - whave) { + do { + PUP(out) = 0; + } while (--len); + continue; + } + len -= op - whave; + do { + PUP(out) = 0; + } while (--op > whave); + if (op == 0) { + from = out - dist; + do { + PUP(out) = PUP(from); + } while (--len); + continue; + } +#endif } from = window - OFF; - if (write == 0) { /* very common case */ + if (wnext == 0) { /* very common case */ from += wsize - op; if (op < len) { /* some from window */ len -= op; @@ -207,17 +227,17 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ from = out - dist; /* rest from output */ } } - else if (write < op) { /* wrap around window */ - from += wsize + write - op; - op -= write; + else if (wnext < op) { /* wrap around window */ + from += wsize + wnext - op; + op -= wnext; if (op < len) { /* some from end of window */ len -= op; do { PUP(out) = PUP(from); } while (--op); from = window - OFF; - if (write < len) { /* some from start of window */ - op = write; + if (wnext < len) { /* some from start of window */ + op = wnext; len -= op; do { PUP(out) = PUP(from); @@ -227,7 +247,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } } else { /* contiguous in window */ - from += write - op; + from += wnext - op; if (op < len) { /* some from window */ len -= op; do { @@ -264,7 +284,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } } else if ((op & 64) == 0) { /* 2nd level distance code */ - this = dcode[this.val + (hold & ((1U << op) - 1))]; + here = dcode[here.val + (hold & ((1U << op) - 1))]; goto dodist; } else { @@ -274,7 +294,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ } } else if ((op & 64) == 0) { /* 2nd level length code */ - this = lcode[this.val + (hold & ((1U << op) - 1))]; + here = lcode[here.val + (hold & ((1U << op) - 1))]; goto dolen; } else if (op & 32) { /* end-of-block */ @@ -310,7 +330,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */ inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): - Using bit fields for code structure - Different op definition to avoid & for extra bits (do & for table bits) - - Three separate decoding do-loops for direct, window, and write == 0 + - Three separate decoding do-loops for direct, window, and wnext == 0 - Special case for distance > 1 copies to do overlapped load and store copy - Explicit branch predictions (based on measured branch probabilities) - Deferring match copy and interspersed it with decoding subsequent codes diff --git a/erts/emulator/zlib/inffast.h b/erts/emulator/zlib/inffast.h index 623ed83c08..e5c1aa4ca8 100644 --- a/erts/emulator/zlib/inffast.h +++ b/erts/emulator/zlib/inffast.h @@ -1,13 +1,11 @@ /* inffast.h -- header to use inffast.c - * Copyright (C) 1995-2003 Mark Adler + * Copyright (C) 1995-2003, 2010 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - /* WARNING: this file should *not* be used by applications. It is part of the implementation of the compression library and is subject to change. Applications should only use zlib.h. */ -void inflate_fast OF((z_streamp strm, unsigned start)); +void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); diff --git a/erts/emulator/zlib/inffixed.h b/erts/emulator/zlib/inffixed.h index 75ed4b5978..d628327769 100644 --- a/erts/emulator/zlib/inffixed.h +++ b/erts/emulator/zlib/inffixed.h @@ -2,9 +2,9 @@ * Generated automatically by makefixed(). */ - /* WARNING: this file should *not* be used by applications. It - is part of the implementation of the compression library and - is subject to change. Applications should only use zlib.h. + /* WARNING: this file should *not* be used by applications. + It is part of the implementation of this library and is + subject to change. Applications should only use zlib.h. */ static const code lenfix[512] = { diff --git a/erts/emulator/zlib/inflate.c b/erts/emulator/zlib/inflate.c index 1764447c66..532330b06b 100644 --- a/erts/emulator/zlib/inflate.c +++ b/erts/emulator/zlib/inflate.c @@ -1,13 +1,8 @@ /* inflate.c -- zlib decompression - * Copyright (C) 1995-2005 Mark Adler + * Copyright (C) 1995-2012 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif /* * Change history: * @@ -50,7 +45,7 @@ * - Rearrange window copies in inflate_fast() for speed and simplification * - Unroll last copy for window match in inflate_fast() * - Use local copies of window variables in inflate_fast() for speed - * - Pull out common write == 0 case for speed in inflate_fast() + * - Pull out common wnext == 0 case for speed in inflate_fast() * - Make op and len in inflate_fast() unsigned for consistency * - Add FAR to lcode and dcode declarations in inflate_fast() * - Simplified bad distance check in inflate_fast() @@ -85,6 +80,9 @@ * The history for versions after 1.2.0 are in ChangeLog in zlib distribution. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif #include "zutil.h" #include "inftrees.h" #include "inflate.h" @@ -98,14 +96,15 @@ /* function prototypes */ local void fixedtables OF((struct inflate_state FAR *state)); -local int updatewindow OF((z_streamp strm, unsigned out)); +local int updatewindow OF((z_streamp strm, const unsigned char FAR *end, + unsigned copy)); #ifdef BUILDFIXED void makefixed OF((void)); #endif -local unsigned syncsearch OF((unsigned FAR *have, unsigned char FAR *buf, +local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf, unsigned len)); -int ZEXPORT inflateReset(strm) +int ZEXPORT inflateResetKeep(strm) z_streamp strm; { struct inflate_state FAR *state; @@ -114,36 +113,71 @@ z_streamp strm; state = (struct inflate_state FAR *)strm->state; strm->total_in = strm->total_out = state->total = 0; strm->msg = Z_NULL; - strm->adler = 1; /* to support ill-conceived Java test suite */ + if (state->wrap) /* to support ill-conceived Java test suite */ + strm->adler = state->wrap & 1; state->mode = HEAD; state->last = 0; state->havedict = 0; state->dmax = 32768U; state->head = Z_NULL; - state->wsize = 0; - state->whave = 0; - state->write = 0; state->hold = 0; state->bits = 0; state->lencode = state->distcode = state->next = state->codes; + state->sane = 1; + state->back = -1; Tracev((stderr, "inflate: reset\n")); return Z_OK; } -int ZEXPORT inflatePrime(strm, bits, value) +int ZEXPORT inflateReset(strm) z_streamp strm; -int bits; -int value; { struct inflate_state FAR *state; if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; state = (struct inflate_state FAR *)strm->state; - if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR; - value &= (1L << bits) - 1; - state->hold += value << state->bits; - state->bits += bits; - return Z_OK; + state->wsize = 0; + state->whave = 0; + state->wnext = 0; + return inflateResetKeep(strm); +} + +int ZEXPORT inflateReset2(strm, windowBits) +z_streamp strm; +int windowBits; +{ + int wrap; + struct inflate_state FAR *state; + + /* get the state */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* extract wrap request from windowBits parameter */ + if (windowBits < 0) { + wrap = 0; + windowBits = -windowBits; + } + else { + wrap = (windowBits >> 4) + 1; +#ifdef GUNZIP + if (windowBits < 48) + windowBits &= 15; +#endif + } + + /* set number of window bits, free window if different */ + if (windowBits && (windowBits < 8 || windowBits > 15)) + return Z_STREAM_ERROR; + if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { + ZFREE(strm, state->window); + state->window = Z_NULL; + } + + /* update state and reset the rest of it */ + state->wrap = wrap; + state->wbits = (unsigned)windowBits; + return inflateReset(strm); } int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) @@ -152,6 +186,7 @@ int windowBits; const char *version; int stream_size; { + int ret; struct inflate_state FAR *state; if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || @@ -160,33 +195,31 @@ int stream_size; if (strm == Z_NULL) return Z_STREAM_ERROR; strm->msg = Z_NULL; /* in case we return an error */ if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else strm->zalloc = zcalloc; strm->opaque = (voidpf)0; +#endif } - if (strm->zfree == (free_func)0) strm->zfree = zcfree; + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif state = (struct inflate_state FAR *) ZALLOC(strm, 1, sizeof(struct inflate_state)); if (state == Z_NULL) return Z_MEM_ERROR; Tracev((stderr, "inflate: allocated\n")); strm->state = (struct internal_state FAR *)state; - if (windowBits < 0) { - state->wrap = 0; - windowBits = -windowBits; - } - else { - state->wrap = (windowBits >> 4) + 1; -#ifdef GUNZIP - if (windowBits < 48) windowBits &= 15; -#endif - } - if (windowBits < 8 || windowBits > 15) { + state->window = Z_NULL; + ret = inflateReset2(strm, windowBits); + if (ret != Z_OK) { ZFREE(strm, state); strm->state = Z_NULL; - return Z_STREAM_ERROR; } - state->wbits = (unsigned)windowBits; - state->window = Z_NULL; - return inflateReset(strm); + return ret; } int ZEXPORT inflateInit_(strm, version, stream_size) @@ -197,6 +230,27 @@ int stream_size; return inflateInit2_(strm, DEF_WBITS, version, stream_size); } +int ZEXPORT inflatePrime(strm, bits, value) +z_streamp strm; +int bits; +int value; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (bits < 0) { + state->hold = 0; + state->bits = 0; + return Z_OK; + } + if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR; + value &= (1L << bits) - 1; + state->hold += value << state->bits; + state->bits += bits; + return Z_OK; +} + /* Return state with length and distance decoding tables and index sizes set to fixed code decoding. Normally this returns fixed tables from inffixed.h. @@ -291,8 +345,8 @@ void makefixed() low = 0; for (;;) { if ((low % 7) == 0) printf("\n "); - printf("{%u,%u,%d}", state.lencode[low].op, state.lencode[low].bits, - state.lencode[low].val); + printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op, + state.lencode[low].bits, state.lencode[low].val); if (++low == size) break; putchar(','); } @@ -325,12 +379,13 @@ void makefixed() output will fall in the output data, making match copies simpler and faster. The advantage may be dependent on the size of the processor's data caches. */ -local int updatewindow(strm, out) +local int updatewindow(strm, end, copy) z_streamp strm; -unsigned out; +const Bytef *end; +unsigned copy; { struct inflate_state FAR *state; - unsigned copy, dist; + unsigned dist; state = (struct inflate_state FAR *)strm->state; @@ -345,30 +400,29 @@ unsigned out; /* if window not in use yet, initialize */ if (state->wsize == 0) { state->wsize = 1U << state->wbits; - state->write = 0; + state->wnext = 0; state->whave = 0; } /* copy state->wsize or less output bytes into the circular window */ - copy = out - strm->avail_out; if (copy >= state->wsize) { - zmemcpy(state->window, strm->next_out - state->wsize, state->wsize); - state->write = 0; + zmemcpy(state->window, end - state->wsize, state->wsize); + state->wnext = 0; state->whave = state->wsize; } else { - dist = state->wsize - state->write; + dist = state->wsize - state->wnext; if (dist > copy) dist = copy; - zmemcpy(state->window + state->write, strm->next_out - copy, dist); + zmemcpy(state->window + state->wnext, end - copy, dist); copy -= dist; if (copy) { - zmemcpy(state->window, strm->next_out - copy, copy); - state->write = copy; + zmemcpy(state->window, end - copy, copy); + state->wnext = copy; state->whave = state->wsize; } else { - state->write += dist; - if (state->write == state->wsize) state->write = 0; + state->wnext += dist; + if (state->wnext == state->wsize) state->wnext = 0; if (state->whave < state->wsize) state->whave += dist; } } @@ -469,11 +523,6 @@ unsigned out; bits -= bits & 7; \ } while (0) -/* Reverse the bytes in a 32-bit value */ -#define REVERSE(q) \ - ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ - (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) - /* inflate() uses a state machine to process as much input data and generate as much output data as possible before returning. The state machine is @@ -561,7 +610,7 @@ z_streamp strm; int flush; { struct inflate_state FAR *state; - unsigned char FAR *next; /* next input */ + z_const unsigned char FAR *next; /* next input */ unsigned char FAR *put; /* next output */ unsigned have, left; /* available input and output */ unsigned long hold; /* bit buffer */ @@ -569,7 +618,7 @@ int flush; unsigned in, out; /* save starting available input and output */ unsigned copy; /* number of stored or match bytes to copy */ unsigned char FAR *from; /* where to copy match bytes from */ - code this; /* current decoding table entry */ + code here; /* current decoding table entry */ code last; /* parent table entry */ unsigned len; /* length to copy for repeats, bits to drop */ int ret; /* return code */ @@ -624,7 +673,9 @@ int flush; } DROPBITS(4); len = BITS(4) + 8; - if (len > state->wbits) { + if (state->wbits == 0) + state->wbits = len; + else if (len > state->wbits) { strm->msg = (char *)"invalid window size"; state->mode = BAD; break; @@ -765,7 +816,7 @@ int flush; #endif case DICTID: NEEDBITS(32); - strm->adler = state->check = REVERSE(hold); + strm->adler = state->check = ZSWAP32(hold); INITBITS(); state->mode = DICT; case DICT: @@ -776,7 +827,7 @@ int flush; strm->adler = state->check = adler32(0L, Z_NULL, 0); state->mode = TYPE; case TYPE: - if (flush == Z_BLOCK) goto inf_leave; + if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; case TYPEDO: if (state->last) { BYTEBITS(); @@ -796,7 +847,11 @@ int flush; fixedtables(state); Tracev((stderr, "inflate: fixed codes block%s\n", state->last ? " (last)" : "")); - state->mode = LEN; /* decode codes */ + state->mode = LEN_; /* decode codes */ + if (flush == Z_TREES) { + DROPBITS(2); + goto inf_leave; + } break; case 2: /* dynamic block */ Tracev((stderr, "inflate: dynamic codes block%s\n", @@ -821,6 +876,9 @@ int flush; Tracev((stderr, "inflate: stored length %u\n", state->length)); INITBITS(); + state->mode = COPY_; + if (flush == Z_TREES) goto inf_leave; + case COPY_: state->mode = COPY; case COPY: copy = state->length; @@ -866,7 +924,7 @@ int flush; while (state->have < 19) state->lens[order[state->have++]] = 0; state->next = state->codes; - state->lencode = (code const FAR *)(state->next); + state->lencode = (const code FAR *)(state->next); state->lenbits = 7; ret = inflate_table(CODES, state->lens, 19, &(state->next), &(state->lenbits), state->work); @@ -881,19 +939,18 @@ int flush; case CODELENS: while (state->have < state->nlen + state->ndist) { for (;;) { - this = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(this.bits) <= bits) break; + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; PULLBYTE(); } - if (this.val < 16) { - NEEDBITS(this.bits); - DROPBITS(this.bits); - state->lens[state->have++] = this.val; + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; } else { - if (this.val == 16) { - NEEDBITS(this.bits + 2); - DROPBITS(this.bits); + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); if (state->have == 0) { strm->msg = (char *)"invalid bit length repeat"; state->mode = BAD; @@ -903,16 +960,16 @@ int flush; copy = 3 + BITS(2); DROPBITS(2); } - else if (this.val == 17) { - NEEDBITS(this.bits + 3); - DROPBITS(this.bits); + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); len = 0; copy = 3 + BITS(3); DROPBITS(3); } else { - NEEDBITS(this.bits + 7); - DROPBITS(this.bits); + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); len = 0; copy = 11 + BITS(7); DROPBITS(7); @@ -930,9 +987,18 @@ int flush; /* handle error breaks in while */ if (state->mode == BAD) break; - /* build code tables */ + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ state->next = state->codes; - state->lencode = (code const FAR *)(state->next); + state->lencode = (const code FAR *)(state->next); state->lenbits = 9; ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), &(state->lenbits), state->work); @@ -941,7 +1007,7 @@ int flush; state->mode = BAD; break; } - state->distcode = (code const FAR *)(state->next); + state->distcode = (const code FAR *)(state->next); state->distbits = 6; ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, &(state->next), &(state->distbits), state->work); @@ -951,88 +1017,102 @@ int flush; break; } Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN_; + if (flush == Z_TREES) goto inf_leave; + case LEN_: state->mode = LEN; case LEN: if (have >= 6 && left >= 258) { RESTORE(); inflate_fast(strm, out); LOAD(); + if (state->mode == TYPE) + state->back = -1; break; } + state->back = 0; for (;;) { - this = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(this.bits) <= bits) break; + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; PULLBYTE(); } - if (this.op && (this.op & 0xf0) == 0) { - last = this; + if (here.op && (here.op & 0xf0) == 0) { + last = here; for (;;) { - this = state->lencode[last.val + + here = state->lencode[last.val + (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + this.bits) <= bits) break; + if ((unsigned)(last.bits + here.bits) <= bits) break; PULLBYTE(); } DROPBITS(last.bits); + state->back += last.bits; } - DROPBITS(this.bits); - state->length = (unsigned)this.val; - if ((int)(this.op) == 0) { - Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ? + DROPBITS(here.bits); + state->back += here.bits; + state->length = (unsigned)here.val; + if ((int)(here.op) == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", this.val)); + "inflate: literal 0x%02x\n", here.val)); state->mode = LIT; break; } - if (this.op & 32) { + if (here.op & 32) { Tracevv((stderr, "inflate: end of block\n")); + state->back = -1; state->mode = TYPE; break; } - if (this.op & 64) { + if (here.op & 64) { strm->msg = (char *)"invalid literal/length code"; state->mode = BAD; break; } - state->extra = (unsigned)(this.op) & 15; + state->extra = (unsigned)(here.op) & 15; state->mode = LENEXT; case LENEXT: if (state->extra) { NEEDBITS(state->extra); state->length += BITS(state->extra); DROPBITS(state->extra); + state->back += state->extra; } Tracevv((stderr, "inflate: length %u\n", state->length)); + state->was = state->length; state->mode = DIST; case DIST: for (;;) { - this = state->distcode[BITS(state->distbits)]; - if ((unsigned)(this.bits) <= bits) break; + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; PULLBYTE(); } - if ((this.op & 0xf0) == 0) { - last = this; + if ((here.op & 0xf0) == 0) { + last = here; for (;;) { - this = state->distcode[last.val + + here = state->distcode[last.val + (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + this.bits) <= bits) break; + if ((unsigned)(last.bits + here.bits) <= bits) break; PULLBYTE(); } DROPBITS(last.bits); + state->back += last.bits; } - DROPBITS(this.bits); - if (this.op & 64) { + DROPBITS(here.bits); + state->back += here.bits; + if (here.op & 64) { strm->msg = (char *)"invalid distance code"; state->mode = BAD; break; } - state->offset = (unsigned)this.val; - state->extra = (unsigned)(this.op) & 15; + state->offset = (unsigned)here.val; + state->extra = (unsigned)(here.op) & 15; state->mode = DISTEXT; case DISTEXT: if (state->extra) { NEEDBITS(state->extra); state->offset += BITS(state->extra); DROPBITS(state->extra); + state->back += state->extra; } #ifdef INFLATE_STRICT if (state->offset > state->dmax) { @@ -1041,11 +1121,6 @@ int flush; break; } #endif - if (state->offset > state->whave + out - left) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } Tracevv((stderr, "inflate: distance %u\n", state->offset)); state->mode = MATCH; case MATCH: @@ -1053,12 +1128,32 @@ int flush; copy = out - left; if (state->offset > copy) { /* copy from window */ copy = state->offset - copy; - if (copy > state->write) { - copy -= state->write; + if (copy > state->whave) { + if (state->sane) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + Trace((stderr, "inflate.c too far\n")); + copy -= state->whave; + if (copy > state->length) copy = state->length; + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = 0; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; +#endif + } + if (copy > state->wnext) { + copy -= state->wnext; from = state->window + (state->wsize - copy); } else - from = state->window + (state->write - copy); + from = state->window + (state->wnext - copy); if (copy > state->length) copy = state->length; } else { /* copy from output */ @@ -1093,7 +1188,7 @@ int flush; #ifdef GUNZIP state->flags ? hold : #endif - REVERSE(hold)) != state->check) { + ZSWAP32(hold)) != state->check) { strm->msg = (char *)"incorrect data check"; state->mode = BAD; break; @@ -1137,8 +1232,9 @@ int flush; */ inf_leave: RESTORE(); - if (state->wsize || (state->mode < CHECK && out != strm->avail_out)) - if (updatewindow(strm, out)) { + if (state->wsize || (out != strm->avail_out && state->mode < BAD && + (state->mode < CHECK || flush != Z_FINISH))) + if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { state->mode = MEM; return Z_MEM_ERROR; } @@ -1151,7 +1247,8 @@ int flush; strm->adler = state->check = UPDATE(state->check, strm->next_out - out, out); strm->data_type = state->bits + (state->last ? 64 : 0) + - (state->mode == TYPE ? 128 : 0); + (state->mode == TYPE ? 128 : 0) + + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) ret = Z_BUF_ERROR; return ret; @@ -1171,13 +1268,37 @@ z_streamp strm; return Z_OK; } +int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength) +z_streamp strm; +Bytef *dictionary; +uInt *dictLength; +{ + struct inflate_state FAR *state; + + /* check state */ + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* copy dictionary */ + if (state->whave && dictionary != Z_NULL) { + zmemcpy(dictionary, state->window + state->wnext, + state->whave - state->wnext); + zmemcpy(dictionary + state->whave - state->wnext, + state->window, state->wnext); + } + if (dictLength != Z_NULL) + *dictLength = state->whave; + return Z_OK; +} + int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength) z_streamp strm; const Bytef *dictionary; uInt dictLength; { struct inflate_state FAR *state; - unsigned long id; + unsigned long dictid; + int ret; /* check state */ if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; @@ -1185,29 +1306,21 @@ uInt dictLength; if (state->wrap != 0 && state->mode != DICT) return Z_STREAM_ERROR; - /* check for correct dictionary id */ + /* check for correct dictionary identifier */ if (state->mode == DICT) { - id = adler32(0L, Z_NULL, 0); - id = adler32(id, dictionary, dictLength); - if (id != state->check) + dictid = adler32(0L, Z_NULL, 0); + dictid = adler32(dictid, dictionary, dictLength); + if (dictid != state->check) return Z_DATA_ERROR; } - /* copy dictionary to window */ - if (updatewindow(strm, strm->avail_out)) { + /* copy dictionary to window using updatewindow(), which will amend the + existing dictionary if appropriate */ + ret = updatewindow(strm, dictionary + dictLength, dictLength); + if (ret) { state->mode = MEM; return Z_MEM_ERROR; } - if (dictLength > state->wsize) { - zmemcpy(state->window, dictionary + dictLength - state->wsize, - state->wsize); - state->whave = state->wsize; - } - else { - zmemcpy(state->window + state->wsize - dictLength, dictionary, - dictLength); - state->whave = dictLength; - } state->havedict = 1; Tracev((stderr, "inflate: dictionary set\n")); return Z_OK; @@ -1243,7 +1356,7 @@ gz_headerp head; */ local unsigned syncsearch(have, buf, len) unsigned FAR *have; -unsigned char FAR *buf; +const unsigned char FAR *buf; unsigned len; { unsigned got; @@ -1355,8 +1468,8 @@ z_streamp source; } /* copy state */ - zmemcpy(dest, source, sizeof(z_stream)); - zmemcpy(copy, state, sizeof(struct inflate_state)); + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); if (state->lencode >= state->codes && state->lencode <= state->codes + ENOUGH - 1) { copy->lencode = copy->codes + (state->lencode - state->codes); @@ -1371,3 +1484,32 @@ z_streamp source; dest->state = (struct internal_state FAR *)copy; return Z_OK; } + +int ZEXPORT inflateUndermine(strm, subvert) +z_streamp strm; +int subvert; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + state->sane = !subvert; +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + return Z_OK; +#else + state->sane = 1; + return Z_DATA_ERROR; +#endif +} + +long ZEXPORT inflateMark(strm) +z_streamp strm; +{ + struct inflate_state FAR *state; + + if (strm == Z_NULL || strm->state == Z_NULL) return -1L << 16; + state = (struct inflate_state FAR *)strm->state; + return ((long)(state->back) << 16) + + (state->mode == COPY ? state->length : + (state->mode == MATCH ? state->was - state->length : 0)); +} diff --git a/erts/emulator/zlib/inflate.h b/erts/emulator/zlib/inflate.h index 59164091c5..95f4986d40 100644 --- a/erts/emulator/zlib/inflate.h +++ b/erts/emulator/zlib/inflate.h @@ -1,10 +1,8 @@ /* inflate.h -- internal inflate state definition - * Copyright (C) 1995-2004 Mark Adler + * Copyright (C) 1995-2009 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - /* WARNING: this file should *not* be used by applications. It is part of the implementation of the compression library and is subject to change. Applications should only use zlib.h. @@ -34,11 +32,13 @@ typedef enum { TYPE, /* i: waiting for type bits, including last-flag bit */ TYPEDO, /* i: same, but skip check to exit inflate on new block */ STORED, /* i: waiting for stored size (length and complement) */ + COPY_, /* i/o: same as COPY below, but only first time in */ COPY, /* i/o: waiting for input or output to copy stored block */ TABLE, /* i: waiting for dynamic block table lengths */ LENLENS, /* i: waiting for code length code lengths */ CODELENS, /* i: waiting for length/lit and distance code lengths */ - LEN, /* i: waiting for length/lit code */ + LEN_, /* i: same as LEN below, but only first time in */ + LEN, /* i: waiting for length/lit/eob code */ LENEXT, /* i: waiting for length extra bits */ DIST, /* i: waiting for distance code */ DISTEXT, /* i: waiting for distance extra bits */ @@ -55,19 +55,21 @@ typedef enum { /* State transitions between above modes - - (most modes can go to the BAD or MEM mode -- not shown for clarity) + (most modes can go to BAD or MEM on error -- not shown for clarity) Process header: - HEAD -> (gzip) or (zlib) - (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME - NAME -> COMMENT -> HCRC -> TYPE + HEAD -> (gzip) or (zlib) or (raw) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> + HCRC -> TYPE (zlib) -> DICTID or TYPE DICTID -> DICT -> TYPE + (raw) -> TYPEDO Read deflate blocks: - TYPE -> STORED or TABLE or LEN or CHECK - STORED -> COPY -> TYPE - TABLE -> LENLENS -> CODELENS -> LEN - Read deflate codes: + TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK + STORED -> COPY_ -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN_ + LEN_ -> LEN + Read deflate codes in fixed or dynamic block: LEN -> LENEXT or LIT or TYPE LENEXT -> DIST -> DISTEXT -> MATCH -> LEN LIT -> LEN @@ -75,7 +77,7 @@ typedef enum { CHECK -> LENGTH -> DONE */ -/* state maintained between inflate() calls. Approximately 7K bytes. */ +/* state maintained between inflate() calls. Approximately 10K bytes. */ struct inflate_state { inflate_mode mode; /* current inflate mode */ int last; /* true if processing last block */ @@ -90,7 +92,7 @@ struct inflate_state { unsigned wbits; /* log base 2 of requested window size */ unsigned wsize; /* window size or zero if not using window */ unsigned whave; /* valid bytes in the window */ - unsigned write; /* window write index */ + unsigned wnext; /* window write index */ unsigned char FAR *window; /* allocated sliding window, if needed */ /* bit accumulator */ unsigned long hold; /* input bit accumulator */ @@ -114,4 +116,7 @@ struct inflate_state { unsigned short lens[320]; /* temporary storage for code lengths */ unsigned short work[288]; /* work area for code table building */ code codes[ENOUGH]; /* space for code tables */ + int sane; /* if false, allow invalid distance too far */ + int back; /* bits back of last unprocessed length/lit */ + unsigned was; /* initial length of match */ }; diff --git a/erts/emulator/zlib/inftrees.c b/erts/emulator/zlib/inftrees.c index 832fe28668..3766fa2646 100644 --- a/erts/emulator/zlib/inftrees.c +++ b/erts/emulator/zlib/inftrees.c @@ -1,10 +1,8 @@ /* inftrees.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995-2005 Mark Adler + * Copyright (C) 1995-2013 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - #ifdef HAVE_CONFIG_H # include "config.h" #endif @@ -14,7 +12,7 @@ #define MAXBITS 15 const char inflate_copyright[] = - " inflate 1.2.3 Copyright 1995-2005 Mark Adler "; + " inflate 1.2.8 Copyright 1995-2013 Mark Adler "; /* If you use the zlib library in a product, an acknowledgment is welcome in the documentation of your product. If for some reason you cannot @@ -34,7 +32,7 @@ const char inflate_copyright[] = table index bits. It will differ if the request is greater than the longest code or if it is less than the shortest code. */ -int inflate_table(type, lens, codes, table, bits, work) +int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work) codetype type; unsigned short FAR *lens; unsigned codes; @@ -55,7 +53,7 @@ unsigned short FAR *work; unsigned fill; /* index for replicating entries */ unsigned low; /* low bits for current root entry */ unsigned mask; /* mask for low root bits */ - code this; /* table entry for duplication */ + code here; /* table entry for duplication */ code FAR *next; /* next available space in table */ const unsigned short FAR *base; /* base value table to use */ const unsigned short FAR *extra; /* extra bits table to use */ @@ -67,7 +65,7 @@ unsigned short FAR *work; 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; static const unsigned short lext[31] = { /* Length codes 257..285 extra */ 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, - 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196}; + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 72, 78}; static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, @@ -120,15 +118,15 @@ unsigned short FAR *work; if (count[max] != 0) break; if (root > max) root = max; if (max == 0) { /* no symbols to code at all */ - this.op = (unsigned char)64; /* invalid code marker */ - this.bits = (unsigned char)1; - this.val = (unsigned short)0; - *(*table)++ = this; /* make a table to force an error */ - *(*table)++ = this; + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)1; + here.val = (unsigned short)0; + *(*table)++ = here; /* make a table to force an error */ + *(*table)++ = here; *bits = 1; return 0; /* no symbols, but wait for decoding to report error */ } - for (min = 1; min <= MAXBITS; min++) + for (min = 1; min < max; min++) if (count[min] != 0) break; if (root < min) root = min; @@ -171,11 +169,10 @@ unsigned short FAR *work; entered in the tables. used keeps track of how many table entries have been allocated from the - provided *table space. It is checked when a LENS table is being made - against the space in *table, ENOUGH, minus the maximum space needed by - the worst case distance code, MAXD. This should never happen, but the - sufficiency of ENOUGH has not been proven exhaustively, hence the check. - This assumes that when type == LENS, bits == 9. + provided *table space. It is checked for LENS and DIST tables against + the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in + the initial root table size constants. See the comments in inftrees.h + for more information. sym increments through all symbols, and the loop terminates when all codes of length max, i.e. all codes, have been processed. This @@ -214,24 +211,25 @@ unsigned short FAR *work; mask = used - 1; /* mask for comparing low */ /* check available table space */ - if (type == LENS && used >= ENOUGH - MAXD) + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) return 1; /* process all codes and make table entries */ for (;;) { /* create table entry */ - this.bits = (unsigned char)(len - drop); + here.bits = (unsigned char)(len - drop); if ((int)(work[sym]) < end) { - this.op = (unsigned char)0; - this.val = work[sym]; + here.op = (unsigned char)0; + here.val = work[sym]; } else if ((int)(work[sym]) > end) { - this.op = (unsigned char)(extra[work[sym]]); - this.val = base[work[sym]]; + here.op = (unsigned char)(extra[work[sym]]); + here.val = base[work[sym]]; } else { - this.op = (unsigned char)(32 + 64); /* end of block */ - this.val = 0; + here.op = (unsigned char)(32 + 64); /* end of block */ + here.val = 0; } /* replicate for those indices with low len bits equal to huff */ @@ -240,7 +238,7 @@ unsigned short FAR *work; min = fill; /* save offset to next table */ do { fill -= incr; - next[(huff >> drop) + fill] = this; + next[(huff >> drop) + fill] = here; } while (fill != 0); /* backwards increment the len-bit code huff */ @@ -282,7 +280,8 @@ unsigned short FAR *work; /* check for enough space */ used += 1U << curr; - if (type == LENS && used >= ENOUGH - MAXD) + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) return 1; /* point entry in root table to sub-table */ @@ -293,38 +292,14 @@ unsigned short FAR *work; } } - /* - Fill in rest of table for incomplete codes. This loop is similar to the - loop above in incrementing huff for table indices. It is assumed that - len is equal to curr + drop, so there is no loop needed to increment - through high index bits. When the current sub-table is filled, the loop - drops back to the root table to fill in any remaining entries there. - */ - this.op = (unsigned char)64; /* invalid code marker */ - this.bits = (unsigned char)(len - drop); - this.val = (unsigned short)0; - while (huff != 0) { - /* when done with sub-table, drop back to root table */ - if (drop != 0 && (huff & mask) != low) { - drop = 0; - len = root; - next = *table; - this.bits = (unsigned char)len; - } - - /* put invalid code marker in table */ - next[huff >> drop] = this; - - /* backwards increment the len-bit code huff */ - incr = 1U << (len - 1); - while (huff & incr) - incr >>= 1; - if (incr != 0) { - huff &= incr - 1; - huff += incr; - } - else - huff = 0; + /* fill in remaining table entry if code is incomplete (guaranteed to have + at most one remaining entry, since if the code is incomplete, the + maximum code length that was allowed to get this far is one bit) */ + if (huff != 0) { + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)(len - drop); + here.val = (unsigned short)0; + next[huff] = here; } /* set return parameters */ diff --git a/erts/emulator/zlib/inftrees.h b/erts/emulator/zlib/inftrees.h index 808100f70a..baa53a0b1a 100644 --- a/erts/emulator/zlib/inftrees.h +++ b/erts/emulator/zlib/inftrees.h @@ -1,10 +1,8 @@ /* inftrees.h -- header to use inftrees.c - * Copyright (C) 1995-2005 Mark Adler + * Copyright (C) 1995-2005, 2010 Mark Adler * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - /* WARNING: this file should *not* be used by applications. It is part of the implementation of the compression library and is subject to change. Applications should only use zlib.h. @@ -37,21 +35,28 @@ typedef struct { 01000000 - invalid code */ -/* Maximum size of dynamic tree. The maximum found in a long but non- - exhaustive search was 1444 code structures (852 for length/literals - and 592 for distances, the latter actually the result of an - exhaustive search). The true maximum is not known, but the value - below is more than safe. */ -#define ENOUGH 2048 -#define MAXD 592 +/* Maximum size of the dynamic table. The maximum number of code structures is + 1444, which is the sum of 852 for literal/length codes and 592 for distance + codes. These values were found by exhaustive searches using the program + examples/enough.c found in the zlib distribtution. The arguments to that + program are the number of symbols, the initial root table size, and the + maximum bit length of a code. "enough 286 9 15" for literal/length codes + returns returns 852, and "enough 30 6 15" for distance codes returns 592. + The initial root table size (9 or 6) is found in the fifth argument of the + inflate_table() calls in inflate.c and infback.c. If the root table size is + changed, then these maximum sizes would be need to be recalculated and + updated. */ +#define ENOUGH_LENS 852 +#define ENOUGH_DISTS 592 +#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) -/* Type of code to build for inftable() */ +/* Type of code to build for inflate_table() */ typedef enum { CODES, LENS, DISTS } codetype; -extern int inflate_table OF((codetype type, unsigned short FAR *lens, +int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens, unsigned codes, code FAR * FAR *table, unsigned FAR *bits, unsigned short FAR *work)); diff --git a/erts/emulator/zlib/trees.c b/erts/emulator/zlib/trees.c index 7d9f77f451..465e944e5b 100644 --- a/erts/emulator/zlib/trees.c +++ b/erts/emulator/zlib/trees.c @@ -1,10 +1,9 @@ /* trees.c -- output deflated data using Huffman coding - * Copyright (C) 1995-2005 Jean-loup Gailly + * Copyright (C) 1995-2012 Jean-loup Gailly + * detect_data_type() function provided freely by Cosmin Truta, 2006 * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - /* * ALGORITHM * @@ -78,11 +77,6 @@ local const uch bl_order[BL_CODES] * probability, to avoid transmitting the lengths for unused bit length codes. */ -#define Buf_size (8 * 2*sizeof(char)) -/* Number of bits used within bi_buf. (bi_buf might be implemented on - * more than 16 bits on some systems.) - */ - /* =========================================================================== * Local data. These are initialized only once. */ @@ -155,9 +149,9 @@ local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); local int build_bl_tree OF((deflate_state *s)); local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, int blcodes)); -local void compress_block OF((deflate_state *s, ct_data *ltree, - ct_data *dtree)); -local void set_data_type OF((deflate_state *s)); +local void compress_block OF((deflate_state *s, const ct_data *ltree, + const ct_data *dtree)); +local int detect_data_type OF((deflate_state *s)); local unsigned bi_reverse OF((unsigned value, int length)); local void bi_windup OF((deflate_state *s)); local void bi_flush OF((deflate_state *s)); @@ -208,12 +202,12 @@ local void send_bits(s, value, length) * unused bits in value. */ if (s->bi_valid > (int)Buf_size - length) { - s->bi_buf |= (value << s->bi_valid); + s->bi_buf |= (ush)value << s->bi_valid; put_short(s, s->bi_buf); s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); s->bi_valid += length - Buf_size; } else { - s->bi_buf |= value << s->bi_valid; + s->bi_buf |= (ush)value << s->bi_valid; s->bi_valid += length; } } @@ -223,12 +217,12 @@ local void send_bits(s, value, length) { int len = length;\ if (s->bi_valid > (int)Buf_size - len) {\ int val = value;\ - s->bi_buf |= (val << s->bi_valid);\ + s->bi_buf |= (ush)val << s->bi_valid;\ put_short(s, s->bi_buf);\ s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ s->bi_valid += len - Buf_size;\ } else {\ - s->bi_buf |= (value) << s->bi_valid;\ + s->bi_buf |= (ush)(value) << s->bi_valid;\ s->bi_valid += len;\ }\ } @@ -255,11 +249,13 @@ local void tr_static_init() if (static_init_done) return; /* For some embedded targets, global variables are not initialized: */ +#ifdef NO_INIT_GLOBAL_POINTERS static_l_desc.static_tree = static_ltree; static_l_desc.extra_bits = extra_lbits; static_d_desc.static_tree = static_dtree; static_d_desc.extra_bits = extra_dbits; static_bl_desc.extra_bits = extra_blbits; +#endif /* Initialize the mapping length (0..255) -> length code (0..28) */ length = 0; @@ -353,13 +349,14 @@ void gen_trees_header() static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); } - fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n"); + fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); for (i = 0; i < DIST_CODE_LEN; i++) { fprintf(header, "%2u%s", _dist_code[i], SEPARATOR(i, DIST_CODE_LEN-1, 20)); } - fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + fprintf(header, + "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { fprintf(header, "%2u%s", _length_code[i], SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); @@ -384,7 +381,7 @@ void gen_trees_header() /* =========================================================================== * Initialize the tree data structures for a new zlib stream. */ -void _tr_init(s) +void ZLIB_INTERNAL _tr_init(s) deflate_state *s; { tr_static_init(); @@ -400,7 +397,6 @@ void _tr_init(s) s->bi_buf = 0; s->bi_valid = 0; - s->last_eob_len = 8; /* enough lookahead for inflate */ #ifdef DEBUG s->compressed_len = 0L; s->bits_sent = 0L; @@ -869,13 +865,13 @@ local void send_all_trees(s, lcodes, dcodes, blcodes) /* =========================================================================== * Send a stored block */ -void _tr_stored_block(s, buf, stored_len, eof) +void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last) deflate_state *s; charf *buf; /* input block */ ulg stored_len; /* length of input block */ - int eof; /* true if this is the last block for a file */ + int last; /* one if this is the last block for a file */ { - send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ + send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */ #ifdef DEBUG s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; s->compressed_len += (stored_len + 4) << 3; @@ -884,17 +880,19 @@ void _tr_stored_block(s, buf, stored_len, eof) } /* =========================================================================== + * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) + */ +void ZLIB_INTERNAL _tr_flush_bits(s) + deflate_state *s; +{ + bi_flush(s); +} + +/* =========================================================================== * Send one empty static block to give enough lookahead for inflate. * This takes 10 bits, of which 7 may remain in the bit buffer. - * The current inflate code requires 9 bits of lookahead. If the - * last two codes for the previous block (real code plus EOB) were coded - * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode - * the last real code. In this case we send two empty static blocks instead - * of one. (There are no problems if the previous block is stored or fixed.) - * To simplify the code, we assume the worst case of last real code encoded - * on one bit only. */ -void _tr_align(s) +void ZLIB_INTERNAL _tr_align(s) deflate_state *s; { send_bits(s, STATIC_TREES<<1, 3); @@ -903,31 +901,17 @@ void _tr_align(s) s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ #endif bi_flush(s); - /* Of the 10 bits for the empty block, we have already sent - * (10 - bi_valid) bits. The lookahead for the last real code (before - * the EOB of the previous block) was thus at least one plus the length - * of the EOB plus what we have just sent of the empty static block. - */ - if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { - send_bits(s, STATIC_TREES<<1, 3); - send_code(s, END_BLOCK, static_ltree); -#ifdef DEBUG - s->compressed_len += 10L; -#endif - bi_flush(s); - } - s->last_eob_len = 7; } /* =========================================================================== * Determine the best encoding for the current block: dynamic trees, static * trees or store, and output the encoded block to the zip file. */ -void _tr_flush_block(s, buf, stored_len, eof) +void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) deflate_state *s; charf *buf; /* input block, or NULL if too old */ ulg stored_len; /* length of input block */ - int eof; /* true if this is the last block for a file */ + int last; /* one if this is the last block for a file */ { ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ int max_blindex = 0; /* index of last bit length code of non zero freq */ @@ -936,8 +920,8 @@ void _tr_flush_block(s, buf, stored_len, eof) if (s->level > 0) { /* Check if the file is binary or text */ - if (stored_len > 0 && s->strm->data_type == Z_UNKNOWN) - set_data_type(s); + if (s->strm->data_type == Z_UNKNOWN) + s->strm->data_type = detect_data_type(s); /* Construct the literal and distance trees */ build_tree(s, (tree_desc *)(&(s->l_desc))); @@ -983,23 +967,25 @@ void _tr_flush_block(s, buf, stored_len, eof) * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to * transform a block into a stored block. */ - _tr_stored_block(s, buf, stored_len, eof); + _tr_stored_block(s, buf, stored_len, last); #ifdef FORCE_STATIC } else if (static_lenb >= 0) { /* force static trees */ #else } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { #endif - send_bits(s, (STATIC_TREES<<1)+eof, 3); - compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); + send_bits(s, (STATIC_TREES<<1)+last, 3); + compress_block(s, (const ct_data *)static_ltree, + (const ct_data *)static_dtree); #ifdef DEBUG s->compressed_len += 3 + s->static_len; #endif } else { - send_bits(s, (DYN_TREES<<1)+eof, 3); + send_bits(s, (DYN_TREES<<1)+last, 3); send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, max_blindex+1); - compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); + compress_block(s, (const ct_data *)s->dyn_ltree, + (const ct_data *)s->dyn_dtree); #ifdef DEBUG s->compressed_len += 3 + s->opt_len; #endif @@ -1010,21 +996,21 @@ void _tr_flush_block(s, buf, stored_len, eof) */ init_block(s); - if (eof) { + if (last) { bi_windup(s); #ifdef DEBUG s->compressed_len += 7; /* align on byte boundary */ #endif } Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, - s->compressed_len-7*eof)); + s->compressed_len-7*last)); } /* =========================================================================== * Save the match info and tally the frequency counts. Return true if * the current block must be flushed. */ -int _tr_tally (s, dist, lc) +int ZLIB_INTERNAL _tr_tally (s, dist, lc) deflate_state *s; unsigned dist; /* distance of matched string */ unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ @@ -1076,8 +1062,8 @@ int _tr_tally (s, dist, lc) */ local void compress_block(s, ltree, dtree) deflate_state *s; - ct_data *ltree; /* literal tree */ - ct_data *dtree; /* distance tree */ + const ct_data *ltree; /* literal tree */ + const ct_data *dtree; /* distance tree */ { unsigned dist; /* distance of matched string */ int lc; /* match length or unmatched char (if dist == 0) */ @@ -1119,28 +1105,48 @@ local void compress_block(s, ltree, dtree) } while (lx < s->last_lit); send_code(s, END_BLOCK, ltree); - s->last_eob_len = ltree[END_BLOCK].Len; } /* =========================================================================== - * Set the data type to BINARY or TEXT, using a crude approximation: - * set it to Z_TEXT if all symbols are either printable characters (33 to 255) - * or white spaces (9 to 13, or 32); or set it to Z_BINARY otherwise. + * Check if the data type is TEXT or BINARY, using the following algorithm: + * - TEXT if the two conditions below are satisfied: + * a) There are no non-portable control characters belonging to the + * "black list" (0..6, 14..25, 28..31). + * b) There is at least one printable character belonging to the + * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). + * - BINARY otherwise. + * - The following partially-portable control characters form a + * "gray list" that is ignored in this detection algorithm: + * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). * IN assertion: the fields Freq of dyn_ltree are set. */ -local void set_data_type(s) +local int detect_data_type(s) deflate_state *s; { + /* black_mask is the bit mask of black-listed bytes + * set bits 0..6, 14..25, and 28..31 + * 0xf3ffc07f = binary 11110011111111111100000001111111 + */ + unsigned long black_mask = 0xf3ffc07fUL; int n; - for (n = 0; n < 9; n++) + /* Check for non-textual ("black-listed") bytes. */ + for (n = 0; n <= 31; n++, black_mask >>= 1) + if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) + return Z_BINARY; + + /* Check for textual ("white-listed") bytes. */ + if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 + || s->dyn_ltree[13].Freq != 0) + return Z_TEXT; + for (n = 32; n < LITERALS; n++) if (s->dyn_ltree[n].Freq != 0) - break; - if (n == 9) - for (n = 14; n < 32; n++) - if (s->dyn_ltree[n].Freq != 0) - break; - s->strm->data_type = (n == 32) ? Z_TEXT : Z_BINARY; + return Z_TEXT; + + /* There are no "black-listed" or "white-listed" bytes: + * this stream either is empty or has tolerated ("gray-listed") bytes only. + */ + return Z_BINARY; } /* =========================================================================== @@ -1206,7 +1212,6 @@ local void copy_block(s, buf, len, header) int header; /* true if block header must be written */ { bi_windup(s); /* align on byte boundary */ - s->last_eob_len = 8; /* enough lookahead for inflate */ if (header) { put_short(s, (ush)len); diff --git a/erts/emulator/zlib/trees.h b/erts/emulator/zlib/trees.h index 72facf900f..d35639d82a 100644 --- a/erts/emulator/zlib/trees.h +++ b/erts/emulator/zlib/trees.h @@ -70,7 +70,7 @@ local const ct_data static_dtree[D_CODES] = { {{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} }; -const uch _dist_code[DIST_CODE_LEN] = { +const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, @@ -99,7 +99,7 @@ const uch _dist_code[DIST_CODE_LEN] = { 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 }; -const uch _length_code[MAX_MATCH-MIN_MATCH+1]= { +const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, diff --git a/erts/emulator/zlib/uncompr.c b/erts/emulator/zlib/uncompr.c index cbc93cb1eb..864d571719 100644 --- a/erts/emulator/zlib/uncompr.c +++ b/erts/emulator/zlib/uncompr.c @@ -1,10 +1,8 @@ /* uncompr.c -- decompress a memory buffer - * Copyright (C) 1995-2003 Jean-loup Gailly. + * Copyright (C) 1995-2003, 2010 Jean-loup Gailly. * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - /* @(#) $Id$ */ #ifdef HAVE_CONFIG_H @@ -21,8 +19,6 @@ been saved previously by the compressor and transmitted to the decompressor by some mechanism outside the scope of this compression library.) Upon exit, destLen is the actual size of the compressed buffer. - This function can be used to decompress a whole file at once if the - input file is mmap'ed. uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if there was not enough room in the output @@ -37,7 +33,7 @@ int ZEXPORT uncompress (dest, destLen, source, sourceLen) z_stream stream; int err; - stream.next_in = (Bytef*)source; + stream.next_in = (z_const Bytef *)source; stream.avail_in = (uInt)sourceLen; /* Check for source > 64K on 16-bit machine: */ if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; diff --git a/erts/emulator/zlib/zconf.h b/erts/emulator/zlib/zconf.h index b7979d48d3..9987a77553 100644 --- a/erts/emulator/zlib/zconf.h +++ b/erts/emulator/zlib/zconf.h @@ -1,10 +1,8 @@ /* zconf.h -- configuration of the zlib compression library - * Copyright (C) 1995-2005 Jean-loup Gailly. + * Copyright (C) 1995-2013 Jean-loup Gailly. * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - /* @(#) $Id$ */ #ifndef ZCONF_H @@ -13,52 +11,145 @@ /* * If you *really* need a unique prefix for all types and library functions, * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". */ -#ifdef Z_PREFIX -# define deflateInit_ z_deflateInit_ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 # define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy # define deflateEnd z_deflateEnd -# define inflateInit_ z_inflateInit_ -# define inflate z_inflate -# define inflateEnd z_inflateEnd # define deflateInit2_ z_deflateInit2_ -# define deflateSetDictionary z_deflateSetDictionary -# define deflateCopy z_deflateCopy -# define deflateReset z_deflateReset +# define deflateInit_ z_deflateInit_ # define deflateParams z_deflateParams -# define deflateBound z_deflateBound +# define deflatePending z_deflatePending # define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzvprintf z_gzvprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetHeader z_inflateGetHeader # define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 # define inflateSetDictionary z_inflateSetDictionary +# define inflateGetDictionary z_inflateGetDictionary # define inflateSync z_inflateSync # define inflateSyncPoint z_inflateSyncPoint -# define inflateCopy z_inflateCopy -# define inflateReset z_inflateReset -# define inflateBack z_inflateBack -# define inflateBackEnd z_inflateBackEnd -# define compress z_compress -# define compress2 z_compress2 -# define compressBound z_compressBound -# define uncompress z_uncompress -# define adler32 z_adler32 -# define crc32 z_crc32 -# define get_crc_table z_get_crc_table +# define inflateUndermine z_inflateUndermine +# define inflateResetKeep z_inflateResetKeep +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# endif # define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef # define alloc_func z_alloc_func +# define charf z_charf # define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp # define in_func z_in_func +# define intf z_intf # define out_func z_out_func -# define Byte z_Byte # define uInt z_uInt -# define uLong z_uLong -# define Bytef z_Bytef -# define charf z_charf -# define intf z_intf # define uIntf z_uIntf +# define uLong z_uLong # define uLongf z_uLongf -# define voidpf z_voidpf # define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + #endif #if defined(__MSDOS__) && !defined(MSDOS) @@ -127,6 +218,12 @@ # endif #endif +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + /* Some Mac compilers merge all .h files incorrectly: */ #if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__) # define NO_DUMMY_DECL @@ -173,6 +270,14 @@ # endif #endif +#ifndef Z_ARG /* function prototypes for stdarg */ +# if defined(STDC) || defined(Z_HAVE_STDARG_H) +# define Z_ARG(args) args +# else +# define Z_ARG(args) () +# endif +#endif + /* The following definitions for FAR are needed only for MSDOS mixed * model programming (small or medium model with some far allocations). * This was tested only with MSC; for other MSDOS compilers you may have @@ -286,49 +391,121 @@ typedef uLong FAR uLongf; typedef Byte *voidp; #endif -#if 0 /* HAVE_UNISTD_H -- this line is updated by ./configure */ -# include <sys/types.h> /* for off_t */ -# include <unistd.h> /* for SEEK_* and off_t */ -# ifdef VMS -# include <unixio.h> /* for off_t */ +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include <limits.h> +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; +#else + typedef unsigned long z_crc_t; +#endif + +#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H +#endif + +#ifdef HAVE_STDARG_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include <sys/types.h> /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include <stdarg.h> /* for va_list */ # endif -# define z_off_t off_t #endif -#ifndef SEEK_SET + +#ifdef _WIN32 +# ifndef Z_SOLO +# include <stddef.h> /* for wchar_t */ +# endif +#endif + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H) +# define Z_HAVE_UNISTD_H +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE) +# include <unistd.h> /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include <unixio.h> /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) # define SEEK_SET 0 /* Seek from beginning of file. */ # define SEEK_CUR 1 /* Seek from current position. */ # define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ #endif + #ifndef z_off_t # define z_off_t long #endif -#if defined(__OS400__) -# define NO_vsnprintf -#endif - -#if defined(__MVS__) -# define NO_vsnprintf -# ifdef FAR -# undef FAR +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t # endif #endif /* MVS linker does not support external names larger than 8 bytes */ #if defined(__MVS__) -# pragma map(deflateInit_,"DEIN") -# pragma map(deflateInit2_,"DEIN2") -# pragma map(deflateEnd,"DEEND") -# pragma map(deflateBound,"DEBND") -# pragma map(inflateInit_,"ININ") -# pragma map(inflateInit2_,"ININ2") -# pragma map(inflateEnd,"INEND") -# pragma map(inflateSync,"INSY") -# pragma map(inflateSetDictionary,"INSEDI") -# pragma map(compressBound,"CMBND") -# pragma map(inflate_table,"INTABL") -# pragma map(inflate_fast,"INFA") -# pragma map(inflate_copyright,"INCOPY") + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") #endif #endif /* ZCONF_H */ diff --git a/erts/emulator/zlib/zlib.h b/erts/emulator/zlib/zlib.h index 9209774383..3e0c7672ac 100644 --- a/erts/emulator/zlib/zlib.h +++ b/erts/emulator/zlib/zlib.h @@ -1,7 +1,7 @@ /* zlib.h -- interface of the 'zlib' general purpose compression library - version 1.2.3, July 18th, 2005 + version 1.2.8, April 28th, 2013 - Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler + Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages @@ -24,12 +24,10 @@ The data format used by the zlib library is described by RFCs (Request for - Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt - (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). */ -/* %ExternalCopyright% */ - #ifndef ZLIB_H #define ZLIB_H @@ -39,41 +37,44 @@ extern "C" { #endif -#define ZLIB_VERSION "1.2.3" -#define ZLIB_VERNUM 0x1230 +#define ZLIB_VERSION "1.2.8" +#define ZLIB_VERNUM 0x1280 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 2 +#define ZLIB_VER_REVISION 8 +#define ZLIB_VER_SUBREVISION 0 /* - The 'zlib' compression library provides in-memory compression and - decompression functions, including integrity checks of the uncompressed - data. This version of the library supports only one compression method - (deflation) but other algorithms will be added later and will have the same - stream interface. - - Compression can be done in a single step if the buffers are large - enough (for example if an input file is mmap'ed), or can be done by - repeated calls of the compression function. In the latter case, the - application must provide more input and/or consume the output + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output (providing more output space) before each call. - The compressed data format used by default by the in-memory functions is + The compressed data format used by default by the in-memory functions is the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped around a deflate stream, which is itself documented in RFC 1951. - The library also supports reading and writing files in gzip (.gz) format + The library also supports reading and writing files in gzip (.gz) format with an interface similar to that of stdio using the functions that start with "gz". The gzip format is different from the zlib format. gzip is a gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. - This library can optionally read and write gzip streams in memory as well. + This library can optionally read and write gzip streams in memory as well. - The zlib format was designed to be compact and fast for use in memory + The zlib format was designed to be compact and fast for use in memory and on communications channels. The gzip format was designed for single- file compression on file systems, has a larger header than zlib to maintain directory information, and uses a different, slower check method than zlib. - The library does not install any signal handler. The decoder checks - the consistency of the compressed data, so the library should never - crash even in case of corrupted input. + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in case of corrupted input. */ typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); @@ -82,15 +83,15 @@ typedef void (*free_func) OF((voidpf opaque, voidpf address)); struct internal_state; typedef struct z_stream_s { - Bytef *next_in; /* next input byte */ + z_const Bytef *next_in; /* next input byte */ uInt avail_in; /* number of bytes available at next_in */ - uLong total_in; /* total nb of input bytes read so far */ + uLong total_in; /* total number of input bytes read so far */ Bytef *next_out; /* next output byte should be put there */ uInt avail_out; /* remaining free space at next_out */ - uLong total_out; /* total nb of bytes output so far */ + uLong total_out; /* total number of bytes output so far */ - char *msg; /* last error message, NULL if no error */ + z_const char *msg; /* last error message, NULL if no error */ struct internal_state FAR *state; /* not visible by applications */ alloc_func zalloc; /* used to allocate the internal state */ @@ -128,45 +129,45 @@ typedef struct gz_header_s { typedef gz_header FAR *gz_headerp; /* - The application must update next_in and avail_in when avail_in has - dropped to zero. It must update next_out and avail_out when avail_out - has dropped to zero. The application must initialize zalloc, zfree and - opaque before calling the init function. All other fields are set by the - compression library and must not be updated by the application. - - The opaque value provided by the application will be passed as the first - parameter for calls of zalloc and zfree. This can be useful for custom - memory management. The compression library attaches no meaning to the + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the opaque value. - zalloc must return Z_NULL if there is not enough memory for the object. + zalloc must return Z_NULL if there is not enough memory for the object. If zlib is used in a multi-threaded application, zalloc and zfree must be thread safe. - On 16-bit systems, the functions zalloc and zfree must be able to allocate - exactly 65536 bytes, but will not be required to allocate more than this - if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, - pointers returned by zalloc for objects of exactly 65536 bytes *must* - have their offset normalized to zero. The default allocation function - provided by this library ensures this (see zutil.c). To reduce memory - requirements and avoid any allocation of 64K objects, at the expense of - compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). - - The fields total_in and total_out can be used for statistics or - progress reports. After compression, total_in holds the total size of - the uncompressed data and may be saved for use in the decompressor - (particularly if the decompressor wants to decompress everything in - a single step). + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use in the decompressor (particularly + if the decompressor wants to decompress everything in a single step). */ /* constants */ #define Z_NO_FLUSH 0 -#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */ +#define Z_PARTIAL_FLUSH 1 #define Z_SYNC_FLUSH 2 #define Z_FULL_FLUSH 3 #define Z_FINISH 4 #define Z_BLOCK 5 +#define Z_TREES 6 /* Allowed flush values; see deflate() and inflate() below for details */ #define Z_OK 0 @@ -178,8 +179,8 @@ typedef gz_header FAR *gz_headerp; #define Z_MEM_ERROR (-4) #define Z_BUF_ERROR (-5) #define Z_VERSION_ERROR (-6) -/* Return codes for the compression/decompression functions. Negative - * values are errors, positive values are used for special but normal events. +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. */ #define Z_NO_COMPRESSION 0 @@ -209,119 +210,141 @@ typedef gz_header FAR *gz_headerp; #define zlib_version zlibVersion() /* for compatibility with versions < 1.0.2 */ + /* basic functions */ ZEXTERN const char * ZEXPORT zlibVersion OF((void)); /* The application can compare zlibVersion and ZLIB_VERSION for consistency. - If the first character differs, the library code actually used is - not compatible with the zlib.h header file used by the application. - This check is automatically made by deflateInit and inflateInit. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. */ /* ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level)); - Initializes the internal stream state for compression. The fields - zalloc, zfree and opaque must be initialized before by the caller. - If zalloc and zfree are set to Z_NULL, deflateInit updates them to - use default allocation functions. + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: - 1 gives best speed, 9 gives best compression, 0 gives no compression at - all (the input data is simply copied a block at a time). - Z_DEFAULT_COMPRESSION requests a default compromise between speed and - compression (currently equivalent to level 6). + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). - deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not - enough memory, Z_STREAM_ERROR if level is not a valid compression level, + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible - with the version assumed by the caller (ZLIB_VERSION). - msg is set to null if there is no error message. deflateInit does not - perform any compression: this will be done by deflate(). + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). */ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); /* deflate compresses as much data as possible, and stops when the input - buffer becomes empty or the output buffer becomes full. It may introduce some - output latency (reading input without producing any output) except when + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when forced to flush. - The detailed semantics are as follows. deflate performs one or both of the + The detailed semantics are as follows. deflate performs one or both of the following actions: - Compress more input starting at next_in and update next_in and avail_in - accordingly. If not all input can be processed (because there is not + accordingly. If not all input can be processed (because there is not enough room in the output buffer), next_in and avail_in are updated and processing will resume at this point for the next call of deflate(). - Provide more output starting at next_out and update next_out and avail_out - accordingly. This action is forced if the parameter flush is non zero. + accordingly. This action is forced if the parameter flush is non zero. Forcing flush frequently degrades the compression ratio, so this parameter - should be set only when necessary (in interactive applications). - Some output may be provided even if flush is not set. - - Before the call of deflate(), the application should ensure that at least - one of the actions is possible, by providing more input and/or consuming - more output, and updating avail_in or avail_out accordingly; avail_out - should never be zero before the call. The application can consume the - compressed output when it wants, for example when the output buffer is full - (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK - and with zero avail_out, it must be called again after making room in the - output buffer because there might be more output pending. + should be set only when necessary (in interactive applications). Some + output may be provided even if flush is not set. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to - decide how much data to accumualte before producing output, in order to + decide how much data to accumulate before producing output, in order to maximize compression. If the parameter flush is set to Z_SYNC_FLUSH, all pending output is flushed to the output buffer and the output is aligned on a byte boundary, so - that the decompressor can get all input data available so far. (In particular - avail_in is zero after the call if enough output space has been provided - before the call.) Flushing may degrade compression for some compression - algorithms and so it should be used only when necessary. + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed code + block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. If flush is set to Z_FULL_FLUSH, all output is flushed as with Z_SYNC_FLUSH, and the compression state is reset so that decompression can restart from this point if previous compressed data has been damaged or if - random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade compression. If deflate returns with avail_out == 0, this function must be called again with the same value of the flush parameter and more output space (updated avail_out), until the flush is complete (deflate returns with non-zero - avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that avail_out is greater than six to avoid repeated flush markers due to avail_out == 0 on return. If the parameter flush is set to Z_FINISH, pending input is processed, - pending output is flushed and deflate returns with Z_STREAM_END if there - was enough output space; if deflate returns with Z_OK, this function must be + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space; if deflate returns with Z_OK, this function must be called again with Z_FINISH and more output space (updated avail_out) but no - more input data, until it returns with Z_STREAM_END or an error. After - deflate has returned Z_STREAM_END, the only possible operations on the - stream are deflateReset or deflateEnd. + more input data, until it returns with Z_STREAM_END or an error. After + deflate has returned Z_STREAM_END, the only possible operations on the stream + are deflateReset or deflateEnd. Z_FINISH can be used immediately after deflateInit if all the compression - is to be done in a single step. In this case, avail_out must be at least - the value returned by deflateBound (see below). If deflate does not return - Z_STREAM_END, then it must be called again as described above. + is to be done in a single step. In this case, avail_out must be at least the + value returned by deflateBound (see below). Then deflate is guaranteed to + return Z_STREAM_END. If not enough output space is provided, deflate will + not return Z_STREAM_END, and it must be called again as described above. deflate() sets strm->adler to the adler32 checksum of all input read so far (that is, total_in bytes). deflate() may update strm->data_type if it can make a good guess about - the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered - binary. This field is only for information purposes and does not affect - the compression algorithm in any manner. + the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered + binary. This field is only for information purposes and does not affect the + compression algorithm in any manner. deflate() returns Z_OK if some progress has been made (more input processed or more output produced), Z_STREAM_END if all input has been consumed and all output has been produced (only when flush is set to Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example - if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible - (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not + if next_in or next_out was Z_NULL), Z_BUF_ERROR if no progress is possible + (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and deflate() can be called again with more input and more output space to continue compressing. */ @@ -330,13 +353,13 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush)); ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); /* All dynamically allocated data structures for this stream are freed. - This function discards any unprocessed input and does not flush any - pending output. + This function discards any unprocessed input and does not flush any pending + output. deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state was inconsistent, Z_DATA_ERROR if the stream was freed - prematurely (some input or output was discarded). In the error case, - msg may be set but then points to a static string (which must not be + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be deallocated). */ @@ -344,10 +367,10 @@ ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm)); /* ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); - Initializes the internal stream state for decompression. The fields + Initializes the internal stream state for decompression. The fields next_in, avail_in, zalloc, zfree and opaque must be initialized before by - the caller. If next_in is not Z_NULL and avail_in is large enough (the exact - value depends on the compression method), inflateInit determines the + the caller. If next_in is not Z_NULL and avail_in is large enough (the + exact value depends on the compression method), inflateInit determines the compression method from the zlib header and allocates all data structures accordingly; otherwise the allocation will be deferred to the first call of inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to @@ -355,95 +378,116 @@ ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_VERSION_ERROR if the zlib library version is incompatible with the - version assumed by the caller. msg is set to null if there is no error - message. inflateInit does not perform any decompression apart from reading - the zlib header if present: this will be done by inflate(). (So next_in and - avail_in may be modified, but next_out and avail_out are unchanged.) + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit() does not process any header information -- that is deferred + until inflate() is called. */ ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); /* inflate decompresses as much data as possible, and stops when the input - buffer becomes empty or the output buffer becomes full. It may introduce + buffer becomes empty or the output buffer becomes full. It may introduce some output latency (reading input without producing any output) except when forced to flush. - The detailed semantics are as follows. inflate performs one or both of the + The detailed semantics are as follows. inflate performs one or both of the following actions: - Decompress more input starting at next_in and update next_in and avail_in - accordingly. If not all input can be processed (because there is not - enough room in the output buffer), next_in is updated and processing - will resume at this point for the next call of inflate(). + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in is updated and processing will + resume at this point for the next call of inflate(). - Provide more output starting at next_out and update next_out and avail_out - accordingly. inflate() provides as much output as possible, until there - is no more input data or no more space in the output buffer (see below - about the flush parameter). - - Before the call of inflate(), the application should ensure that at least - one of the actions is possible, by providing more input and/or consuming - more output, and updating the next_* and avail_* values accordingly. - The application can consume the uncompressed output when it wants, for - example when the output buffer is full (avail_out == 0), or after each - call of inflate(). If inflate returns Z_OK and with zero avail_out, it - must be called again after making room in the output buffer because there - might be more output pending. - - The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, - Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much - output as possible to the output buffer. Z_BLOCK requests that inflate() stop - if and when it gets to the next deflate block boundary. When decoding the - zlib or gzip format, this will cause inflate() to return immediately after - the header and before the first block. When doing a raw inflate, inflate() - will go ahead and process the first block, and will return when it gets to - the end of that block, or when it runs out of data. + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. The Z_BLOCK option assists in appending to or combining deflate streams. Also to assist in this, on return inflate() will set strm->data_type to the - number of unused bits in the last byte taken from strm->next_in, plus 64 - if inflate() is currently decoding the last block in the deflate stream, - plus 128 if inflate() returned immediately after decoding an end-of-block - code or decoding the complete header up to just before the first byte of the - deflate stream. The end-of-block will not be indicated until all of the - uncompressed data from that block has been written to strm->next_out. The - number of unused bits may in general be greater than seven, except when - bit 7 of data_type is set, in which case the number of unused bits will be - less than eight. + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. inflate() should normally be called until it returns Z_STREAM_END or an - error. However if all decompression is to be performed in a single step - (a single call of inflate), the parameter flush should be set to - Z_FINISH. In this case all pending input is processed and all pending - output is flushed; avail_out must be large enough to hold all the - uncompressed data. (The size of the uncompressed data may have been saved - by the compressor for this purpose.) The next operation on this stream must - be inflateEnd to deallocate the decompression state. The use of Z_FINISH - is never required, but can be used to inform inflate that a faster approach - may be used for the single inflate() call. + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. In this implementation, inflate() always flushes as much output as possible to the output buffer, and always uses the faster approach on the - first call. So the only effect of the flush parameter in this implementation - is on the return value of inflate(), as noted below, or when it returns early - because Z_BLOCK is used. + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. If a preset dictionary is needed after this call (see inflateSetDictionary - below), inflate sets strm->adler to the adler32 checksum of the dictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary chosen by the compressor and returns Z_NEED_DICT; otherwise it sets - strm->adler to the adler32 checksum of all output produced so far (that is, + strm->adler to the Adler-32 checksum of all output produced so far (that is, total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described - below. At the end of the stream, inflate() checks that its computed adler32 + below. At the end of the stream, inflate() checks that its computed adler32 checksum is equal to that saved by the compressor and returns Z_STREAM_END only if the checksum is correct. - inflate() will decompress and check either zlib-wrapped or gzip-wrapped - deflate data. The header type is detected automatically. Any information - contained in the gzip header is not retained, so applications that need that - information should instead use raw inflate, see inflateInit2() below, or - inflateBack() and perform their own processing of the gzip header and - trailer. + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained, so applications that need that information should + instead use raw inflate, see inflateInit2() below, or inflateBack() and + perform their own processing of the gzip header and trailer. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + producted so far. The CRC-32 is checked against the gzip trailer. inflate() returns Z_OK if some progress has been made (more input processed or more output produced), Z_STREAM_END if the end of the compressed data has @@ -451,27 +495,28 @@ ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush)); preset dictionary is needed at this point, Z_DATA_ERROR if the input data was corrupted (input stream not conforming to the zlib format or incorrect check value), Z_STREAM_ERROR if the stream structure was inconsistent (for example - if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory, + next_in or next_out was Z_NULL), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if no progress is possible or if there was not enough room in the - output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and inflate() can be called again with more input and more output space to - continue decompressing. If Z_DATA_ERROR is returned, the application may then - call inflateSync() to look for a good compression block if a partial recovery - of the data is desired. + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is desired. */ ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm)); /* All dynamically allocated data structures for this stream are freed. - This function discards any unprocessed input and does not flush any - pending output. + This function discards any unprocessed input and does not flush any pending + output. inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state - was inconsistent. In the error case, msg may be set but then points to a + was inconsistent. In the error case, msg may be set but then points to a static string (which must not be deallocated). */ + /* Advanced functions */ /* @@ -486,55 +531,57 @@ ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm, int memLevel, int strategy)); - This is another version of deflateInit with more compression options. The - fields next_in, zalloc, zfree and opaque must be initialized before by - the caller. + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by the + caller. - The method parameter is the compression method. It must be Z_DEFLATED in + The method parameter is the compression method. It must be Z_DEFLATED in this version of the library. The windowBits parameter is the base two logarithm of the window size - (the size of the history buffer). It should be in the range 8..15 for this - version of the library. Larger values of this parameter result in better - compression at the expense of memory usage. The default value is 15 if + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if deflateInit is used instead. - windowBits can also be -8..-15 for raw deflate. In this case, -windowBits - determines the window size. deflate() will then generate raw deflate data + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data with no zlib header or trailer, and will not compute an adler32 check value. - windowBits can also be greater than 15 for optional gzip encoding. Add + windowBits can also be greater than 15 for optional gzip encoding. Add 16 to windowBits to write a simple gzip header and trailer around the - compressed data instead of a zlib wrapper. The gzip header will have no - file name, no extra data, no comment, no modification time (set to zero), - no header crc, and the operating system will be set to 255 (unknown). If a + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to 255 (unknown). If a gzip stream is being written, strm->adler is a crc32 instead of an adler32. The memLevel parameter specifies how much memory should be allocated - for the internal compression state. memLevel=1 uses minimum memory but - is slow and reduces compression ratio; memLevel=9 uses maximum memory - for optimal speed. The default value is 8. See zconf.h for total memory - usage as a function of windowBits and memLevel. + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. - The strategy parameter is used to tune the compression algorithm. Use the + The strategy parameter is used to tune the compression algorithm. Use the value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no string match), or Z_RLE to limit match distances to one (run-length - encoding). Filtered data consists mostly of small values with a somewhat - random distribution. In this case, the compression algorithm is tuned to - compress them better. The effect of Z_FILTERED is to force more Huffman + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman coding and less string matching; it is somewhat intermediate between - Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as - Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy - parameter only affects the compression ratio but not the correctness of the - compressed output even if it is not set appropriately. Z_FIXED prevents the - use of dynamic Huffman codes, allowing for a simpler decoder for special - applications. - - deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough - memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid - method). msg is set to null if there is no error message. deflateInit2 does - not perform any compression: this will be done by deflate(). + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). */ ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, @@ -542,38 +589,43 @@ ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm, uInt dictLength)); /* Initializes the compression dictionary from the given byte sequence - without producing any compressed output. This function must be called - immediately after deflateInit, deflateInit2 or deflateReset, before any - call of deflate. The compressor and decompressor must use exactly the same - dictionary (see inflateSetDictionary). + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). The dictionary should consist of strings (byte sequences) that are likely to be encountered later in the data to be compressed, with the most commonly - used strings preferably put towards the end of the dictionary. Using a + used strings preferably put towards the end of the dictionary. Using a dictionary is most useful when the data to be compressed is short and can be predicted with good accuracy; the data can then be compressed better than with the default empty dictionary. Depending on the size of the compression data structures selected by deflateInit or deflateInit2, a part of the dictionary may in effect be - discarded, for example if the dictionary is larger than the window size in - deflate or deflate2. Thus the strings most likely to be useful should be - put at the end of the dictionary, not at the front. In addition, the - current implementation of deflate will use at most the window size minus - 262 bytes of the provided dictionary. + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. Upon return of this function, strm->adler is set to the adler32 value of the dictionary; the decompressor may later use this value to determine - which dictionary has been used by the compressor. (The adler32 value + which dictionary has been used by the compressor. (The adler32 value applies to the whole dictionary even if only a subset of the dictionary is actually used by the compressor.) If a raw deflate was requested, then the adler32 value is not computed and strm->adler is not set. deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a - parameter is invalid (such as NULL dictionary) or the stream state is + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is inconsistent (for example if deflate has already been called for this stream - or if the compression method is bsort). deflateSetDictionary does not - perform any compression: this will be done by deflate(). + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). */ ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, @@ -583,26 +635,26 @@ ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest, This function can be useful when several compression strategies will be tried, for example when there are several ways of pre-processing the input - data with a filter. The streams that will be discarded should then be freed + data with a filter. The streams that will be discarded should then be freed by calling deflateEnd. Note that deflateCopy duplicates the internal - compression state which can be quite large, so this strategy is slow and - can consume lots of memory. + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_STREAM_ERROR if the source stream state was inconsistent - (such as zalloc being NULL). msg is left unchanged in both source and + (such as zalloc being Z_NULL). msg is left unchanged in both source and destination. */ ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm)); /* This function is equivalent to deflateEnd followed by deflateInit, - but does not free and reallocate all the internal compression state. - The stream will keep the same compression level and any other attributes - that may have been set by deflateInit2. + but does not free and reallocate all the internal compression state. The + stream will keep the same compression level and any other attributes that + may have been set by deflateInit2. - deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source - stream state was inconsistent (such as zalloc or state being NULL). + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). */ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, @@ -612,18 +664,18 @@ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, Dynamically update the compression level and compression strategy. The interpretation of level and strategy is as in deflateInit2. This can be used to switch between compression and straight copy of the input data, or - to switch to a different kind of input data requiring a different - strategy. If the compression level is changed, the input available so far - is compressed with the old level (and may be flushed); the new level will - take effect only at the next call of deflate(). + to switch to a different kind of input data requiring a different strategy. + If the compression level is changed, the input available so far is + compressed with the old level (and may be flushed); the new level will take + effect only at the next call of deflate(). Before the call of deflateParams, the stream state must be set as for - a call of deflate(), since the currently available input may have to - be compressed and flushed. In particular, strm->avail_out must be non-zero. + a call of deflate(), since the currently available input may have to be + compressed and flushed. In particular, strm->avail_out must be non-zero. deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source - stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR - if strm->avail_out was zero. + stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR if + strm->avail_out was zero. */ ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm, @@ -647,31 +699,53 @@ ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm, uLong sourceLen)); /* deflateBound() returns an upper bound on the compressed size after - deflation of sourceLen bytes. It must be called after deflateInit() - or deflateInit2(). This would be used to allocate an output buffer - for deflation in a single pass, and so would be called before deflate(). + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. */ +ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm, + unsigned *pending, + int *bits)); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm, int bits, int value)); /* deflatePrime() inserts bits in the deflate output stream. The intent - is that this function is used to start off the deflate output with the - bits leftover from a previous deflate stream when appending to it. As such, - this function can only be used for raw deflate, and must be used before the - first deflate() call after a deflateInit2() or deflateReset(). bits must be - less than or equal to 16, and that many of the least significant bits of - value will be inserted in the output. - - deflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source - stream state was inconsistent. + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. */ ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, gz_headerp head)); /* - deflateSetHeader() provides gzip header information for when a gzip + deflateSetHeader() provides gzip header information for when a gzip stream is requested by deflateInit2(). deflateSetHeader() may be called after deflateInit2() or deflateReset() and before the first call of deflate(). The text, time, os, extra field, name, and comment information @@ -684,11 +758,11 @@ ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, 1.3.x) do not support header crc's, and will report that it is a "multi-part gzip file" and give up. - If deflateSetHeader is not used, the default gzip header has text false, + If deflateSetHeader is not used, the default gzip header has text false, the time set to zero, and os set to 255, with no extra, name, or comment fields. The gzip header is returned to the default state by deflateReset(). - deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source stream state was inconsistent. */ @@ -696,43 +770,50 @@ ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm, ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm, int windowBits)); - This is another version of inflateInit with an extra parameter. The + This is another version of inflateInit with an extra parameter. The fields next_in, avail_in, zalloc, zfree and opaque must be initialized before by the caller. The windowBits parameter is the base two logarithm of the maximum window size (the size of the history buffer). It should be in the range 8..15 for - this version of the library. The default value is 15 if inflateInit is used - instead. windowBits must be greater than or equal to the windowBits value + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value provided to deflateInit2() while compressing, or it must be equal to 15 if - deflateInit2() was not used. If a compressed stream with a larger window + deflateInit2() was not used. If a compressed stream with a larger window size is given as input, inflate() will return with the error code Z_DATA_ERROR instead of trying to allocate a larger window. - windowBits can also be -8..-15 for raw inflate. In this case, -windowBits - determines the window size. inflate() will then process raw deflate data, + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, not looking for a zlib or gzip header, not generating a check value, and not - looking for any check values for comparison at the end of the stream. This + looking for any check values for comparison at the end of the stream. This is for use with other formats that use the deflate compressed data format - such as zip. Those formats provide their own check values. If a custom + such as zip. Those formats provide their own check values. If a custom format is developed using the raw deflate format for compressed data, it is recommended that a check value such as an adler32 or a crc32 be applied to the uncompressed data as is done in the zlib, gzip, and zip formats. For - most applications, the zlib format should be used as is. Note that comments + most applications, the zlib format should be used as is. Note that comments above on the use in deflateInit2() applies to the magnitude of windowBits. - windowBits can also be greater than 15 for optional gzip decoding. Add + windowBits can also be greater than 15 for optional gzip decoding. Add 32 to windowBits to enable zlib and gzip decoding with automatic header detection, or add 16 to decode only the gzip format (the zlib format will - return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is - a crc32 instead of an adler32. + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + crc32 instead of an adler32. inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough - memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg - is set to null if there is no error message. inflateInit2 does not perform - any decompression apart from reading the zlib header if present: this will - be done by inflate(). (So next_in and avail_in may be modified, but next_out - and avail_out are unchanged.) + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. */ ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, @@ -740,36 +821,56 @@ ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm, uInt dictLength)); /* Initializes the decompression dictionary from the given uncompressed byte - sequence. This function must be called immediately after a call of inflate, - if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor can be determined from the adler32 value returned by that call of inflate. The compressor and decompressor must use exactly the same dictionary (see - deflateSetDictionary). For raw inflate, this function can be called - immediately after inflateInit2() or inflateReset() and before any call of - inflate() to set the dictionary. The application must insure that the - dictionary that was used for compression is provided. + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a - parameter is invalid (such as NULL dictionary) or the stream state is + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the - expected one (incorrect adler32 value). inflateSetDictionary does not + expected one (incorrect adler32 value). inflateSetDictionary does not perform any decompression: this will be done by subsequent calls of inflate(). */ -ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); +ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm, + Bytef *dictionary, + uInt *dictLength)); /* - Skips invalid compressed data until a full flush point (see above the - description of deflate with Z_FULL_FLUSH) can be found, or until all - available input is skipped. No output is provided. + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similary, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ - inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR - if no more input was provided, Z_DATA_ERROR if no flush point has been found, - or Z_STREAM_ERROR if the stream structure was inconsistent. In the success - case, the application may save the current current value of total_in which - indicates where valid compressed data was found. In the error case, the - application may repeatedly call inflateSync, providing more input each time, - until success or end of the input data. +ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm)); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current current value of + total_in which indicates where valid compressed data was found. In the + error case, the application may repeatedly call inflateSync, providing more + input each time, until success or end of the input data. */ ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, @@ -784,18 +885,30 @@ ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest, inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_STREAM_ERROR if the source stream state was inconsistent - (such as zalloc being NULL). msg is left unchanged in both source and + (such as zalloc being Z_NULL). msg is left unchanged in both source and destination. */ ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm)); /* This function is equivalent to inflateEnd followed by inflateInit, - but does not free and reallocate all the internal decompression state. - The stream will keep attributes that may have been set by inflateInit2. + but does not free and reallocate all the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. - inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source - stream state was inconsistent (such as zalloc or state being NULL). + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm, + int windowBits)); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. */ ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, @@ -803,54 +916,87 @@ ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm, int value)); /* This function inserts bits in the inflate input stream. The intent is - that this function is used to start inflating at a bit position in the - middle of a byte. The provided bits will be used before any bytes are used - from next_in. This function should only be used with raw inflate, and - should be used before the first inflate() call after inflateInit2() or - inflateReset(). bits must be less than or equal to 16, and that many of the - least significant bits of value will be inserted in the input. - - inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source stream state was inconsistent. */ +ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm)); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above or -1 << 16 if the provided + source stream state was inconsistent. +*/ + ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm, gz_headerp head)); /* - inflateGetHeader() requests that gzip header information be stored in the + inflateGetHeader() requests that gzip header information be stored in the provided gz_header structure. inflateGetHeader() may be called after inflateInit2() or inflateReset(), and before the first call of inflate(). As inflate() processes the gzip stream, head->done is zero until the header is completed, at which time head->done is set to one. If a zlib stream is being decoded, then head->done is set to -1 to indicate that there will be - no gzip header information forthcoming. Note that Z_BLOCK can be used to - force inflate() to return immediately after header processing is complete - and before any actual data is decompressed. + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. - The text, time, xflags, and os fields are filled in with the gzip header + The text, time, xflags, and os fields are filled in with the gzip header contents. hcrc is set to true if there is a header CRC. (The header CRC - was valid if done is set to one.) If extra is not Z_NULL, then extra_max + was valid if done is set to one.) If extra is not Z_NULL, then extra_max contains the maximum number of bytes to write to extra. Once done is true, extra_len contains the actual extra field length, and extra contains the extra field, or that field truncated if extra_max is less than extra_len. If name is not Z_NULL, then up to name_max characters are written there, terminated with a zero unless the length is greater than name_max. If comment is not Z_NULL, then up to comm_max characters are written there, - terminated with a zero unless the length is greater than comm_max. When - any of extra, name, or comment are not Z_NULL and the respective field is - not present in the header, then that field is set to Z_NULL to signal its + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its absence. This allows the use of deflateSetHeader() with the returned structure to duplicate the header. However if those fields are set to allocated memory, then the application will need to save those pointers elsewhere so that they can be eventually freed. - If inflateGetHeader is not used, then the header information is simply + If inflateGetHeader is not used, then the header information is simply discarded. The header is always checked for validity, including the header CRC if present. inflateReset() will reset the process to discard the header information. The application would need to call inflateGetHeader() again to retrieve the header from the next gzip stream. - inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source stream state was inconsistent. */ @@ -871,12 +1017,13 @@ ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits, See inflateBack() for the usage of these routines. inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of - the paramaters are invalid, Z_MEM_ERROR if the internal state could not - be allocated, or Z_VERSION_ERROR if the version of the library does not - match the version of the header file. + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. */ -typedef unsigned (*in_func) OF((void FAR *, unsigned char FAR * FAR *)); +typedef unsigned (*in_func) OF((void FAR *, + z_const unsigned char FAR * FAR *)); typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned)); ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, @@ -884,24 +1031,25 @@ ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, out_func out, void FAR *out_desc)); /* inflateBack() does a raw inflate with a single call using a call-back - interface for input and output. This is more efficient than inflate() for - file i/o applications in that it avoids copying between the output and the - sliding window by simply making the window itself the output buffer. This - function trusts the application to not change the output buffer passed by - the output function, at least until inflateBack() returns. + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. inflateBackInit() must be called first to allocate the internal state and to initialize the state with the user-provided window buffer. inflateBack() may then be used multiple times to inflate a complete, raw - deflate stream with each call. inflateBackEnd() is then called to free - the allocated state. + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. A raw deflate stream is one with no zlib or gzip header or trailer. This routine would normally be used in a utility that reads zip or gzip files and writes out uncompressed files. The utility would decode the - header and process the trailer on its own, hence this routine expects - only the raw deflate stream to decompress. This is different from the - normal behavior of inflate(), which expects either a zlib or gzip header and + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the normal + behavior of inflate(), which expects either a zlib or gzip header and trailer around the deflate stream. inflateBack() uses two subroutines supplied by the caller that are then @@ -927,7 +1075,7 @@ ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in must also be initialized, and then if strm->avail_in is not zero, input will - initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. The in_desc and out_desc parameters of inflateBack() is passed as the first parameter of in() and out() respectively when they are called. These @@ -937,15 +1085,15 @@ ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm, On return, inflateBack() will set strm->next_in and strm->avail_in to pass back any unused input that was provided by the last in() call. The return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR - if in() or out() returned an error, Z_DATA_ERROR if there was a format - error in the deflate stream (in which case strm->msg is set to indicate the - nature of the error), or Z_STREAM_ERROR if the stream was not properly - initialized. In the case of Z_BUF_ERROR, an input or output error can be - distinguished using strm->next_in which will be Z_NULL only if in() returned - an error. If strm->next is not Z_NULL, then the Z_BUF_ERROR was due to - out() returning non-zero. (in() will always be called before out(), so - strm->next_in is assured to be defined if out() returns non-zero.) Note - that inflateBack() cannot return Z_OK. + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. */ ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm)); @@ -997,27 +1145,27 @@ ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void)); 27-31: 0 (reserved) */ +#ifndef Z_SOLO /* utility functions */ /* - The following utility functions are implemented on top of the - basic stream-oriented functions. To simplify the interface, some - default options are assumed (compression level and memory usage, - standard memory allocation functions). The source code of these - utility functions can easily be modified if you need special options. + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. */ ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen)); /* Compresses the source buffer into the destination buffer. sourceLen is - the byte length of the source buffer. Upon entry, destLen is the total - size of the destination buffer, which must be at least the value returned - by compressBound(sourceLen). Upon exit, destLen is the actual size of the + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the compressed buffer. - This function can be used to compress a whole file at once if the - input file is mmap'ed. + compress returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if there was not enough room in the output buffer. @@ -1027,11 +1175,11 @@ ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen, int level)); /* - Compresses the source buffer into the destination buffer. The level + Compresses the source buffer into the destination buffer. The level parameter has the same meaning as in deflateInit. sourceLen is the byte - length of the source buffer. Upon entry, destLen is the total size of the + length of the source buffer. Upon entry, destLen is the total size of the destination buffer, which must be at least the value returned by - compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressBound(sourceLen). Upon exit, destLen is the actual size of the compressed buffer. compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough @@ -1042,159 +1190,255 @@ ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen, ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen)); /* compressBound() returns an upper bound on the compressed size after - compress() or compress2() on sourceLen bytes. It would be used before - a compress() or compress2() call to allocate the destination buffer. + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. */ ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen, const Bytef *source, uLong sourceLen)); /* Decompresses the source buffer into the destination buffer. sourceLen is - the byte length of the source buffer. Upon entry, destLen is the total - size of the destination buffer, which must be large enough to hold the - entire uncompressed data. (The size of the uncompressed data must have - been saved previously by the compressor and transmitted to the decompressor - by some mechanism outside the scope of this compression library.) - Upon exit, destLen is the actual size of the compressed buffer. - This function can be used to decompress a whole file at once if the - input file is mmap'ed. + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed buffer. uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if there was not enough room in the output - buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. */ + /* gzip file access functions */ -typedef voidp gzFile; +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ -ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); /* - Opens a gzip (.gz) file for reading or writing. The mode parameter - is as in fopen ("rb" or "wb") but can also include a compression level - ("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for - Huffman only compression as in "wb1h", or 'R' for run-length encoding - as in "wb1R". (See the description of deflateInit2 for more information - about the strategy parameter.) +ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode)); + + Opens a gzip (.gz) file for reading or writing. The mode parameter is as + in fopen ("rb" or "wb") but can also include a compression level ("wb9") or + a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only + compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F' + for fixed code compression as in "wb9F". (See the description of + deflateInit2 for more information about the strategy parameter.) 'T' will + request transparent writing or appending with no compression and not using + the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. gzopen can be used to read a file which is not in gzip format; in this - case gzread will directly read from the file without decompression. + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ - gzopen returns NULL if the file could not be opened or if there was - insufficient memory to allocate the (de)compression state; errno - can be checked to distinguish the two cases (if errno is zero, the - zlib error is Z_MEM_ERROR). */ +ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +/* + gzdopen associates a gzFile with the file descriptor fd. File descriptors + are obtained from calls like open, dup, creat, pipe or fileno (if the file + has been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ -ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode)); +ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size)); /* - gzdopen() associates a gzFile with the file descriptor fd. File - descriptors are obtained from calls like open, dup, creat, pipe or - fileno (in the file has been previously opened with fopen). - The mode parameter is as in gzopen. - The next call of gzclose on the returned gzFile will also close the - file descriptor fd, just like fclose(fdopen(fd), mode) closes the file - descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode). - gzdopen returns NULL if there was insufficient memory to allocate - the (de)compression state. + Set the internal buffer size used by this library's functions. The + default buffer size is 8192 bytes. This function must be called after + gzopen() or gzdopen(), and before any other calls that read or write the + file. The buffer memory allocation is always deferred to the first read or + write. Two buffers are allocated, either both of the specified size when + writing, or one of the specified size and the other twice that size when + reading. A larger buffer size of, for example, 64K or 128K bytes will + noticeably increase the speed of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. */ ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy)); /* - Dynamically update the compression level or strategy. See the description + Dynamically update the compression level or strategy. See the description of deflateInit2 for the meaning of these parameters. + gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not opened for writing. */ -ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); +ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len)); /* - Reads the given number of uncompressed bytes from the compressed file. - If the input file was not in gzip format, gzread copies the given number - of bytes into the buffer. - gzread returns the number of uncompressed bytes actually read (0 for - end of file, -1 for error). */ + Reads the given number of uncompressed bytes from the compressed file. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. +*/ -ZEXTERN int ZEXPORT gzwrite OF((gzFile file, - voidpc buf, unsigned len)); +ZEXTERN int ZEXPORT gzwrite OF((gzFile file, + voidpc buf, unsigned len)); /* Writes the given number of uncompressed bytes into the compressed file. - gzwrite returns the number of uncompressed bytes actually written - (0 in case of error). + gzwrite returns the number of uncompressed bytes written or 0 in case of + error. */ -ZEXTERN int ZEXPORTVA gzprintf OF((gzFile file, const char *format, ...)); +ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...)); /* - Converts, formats, and writes the args to the compressed file under - control of the format string, as in fprintf. gzprintf returns the number of - uncompressed bytes actually written (0 in case of error). The number of - uncompressed bytes written is limited to 4095. The caller should assure that - this limit is not exceeded. If it is exceeded, then gzprintf() will return - return an error (0) with nothing written. In this case, there may also be a - buffer overflow with unpredictable consequences, which is possible only if - zlib was compiled with the insecure functions sprintf() or vsprintf() - because the secure snprintf() or vsnprintf() functions were not available. + Converts, formats, and writes the arguments to the compressed file under + control of the format string, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or 0 in case of error. The number of + uncompressed bytes written is limited to 8191, or one less than the buffer + size given to gzbuffer(). The caller should assure that this limit is not + exceeded. If it is exceeded, then gzprintf() will return an error (0) with + nothing written. In this case, there may also be a buffer overflow with + unpredictable consequences, which is possible only if zlib was compiled with + the insecure functions sprintf() or vsprintf() because the secure snprintf() + or vsnprintf() functions were not available. This can be determined using + zlibCompileFlags(). */ ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s)); /* - Writes the given null-terminated string to the compressed file, excluding + Writes the given null-terminated string to the compressed file, excluding the terminating null character. - gzputs returns the number of characters written, or -1 in case of error. + + gzputs returns the number of characters written, or -1 in case of error. */ ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len)); /* - Reads bytes from the compressed file until len-1 characters are read, or - a newline character is read and transferred to buf, or an end-of-file - condition is encountered. The string is then terminated with a null - character. - gzgets returns buf, or Z_NULL in case of error. + Reads bytes from the compressed file until len-1 characters are read, or a + newline character is read and transferred to buf, or an end-of-file + condition is encountered. If any characters are read or if len == 1, the + string is terminated with a null character. If no characters are read due + to an end-of-file or len < 1, then the buffer is left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. */ -ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); +ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c)); /* - Writes c, converted to an unsigned char, into the compressed file. - gzputc returns the value that was written, or -1 in case of error. + Writes c, converted to an unsigned char, into the compressed file. gzputc + returns the value that was written, or -1 in case of error. */ -ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); +ZEXTERN int ZEXPORT gzgetc OF((gzFile file)); /* - Reads one byte from the compressed file. gzgetc returns this byte - or -1 in case of end of file or error. + Reads one byte from the compressed file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. */ -ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); +ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file)); /* - Push one character back onto the stream to be read again later. - Only one character of push-back is allowed. gzungetc() returns the - character pushed, or -1 on failure. gzungetc() will fail if a - character has been pushed but not read yet, or if c is -1. The pushed - character will be discarded if the stream is repositioned with gzseek() - or gzrewind(). + Push one character back onto the stream to be read as the first character + on the next read. At least one character of push-back is allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). */ -ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); +ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush)); /* - Flushes all pending output into the compressed file. The parameter - flush is as in the deflate() function. The return value is the zlib - error number (see function gzerror below). gzflush returns Z_OK if - the flush parameter is Z_FINISH and all output could be flushed. - gzflush should be called only when strictly necessary because it can - degrade compression. + Flushes all pending output into the compressed file. The parameter flush + is as in the deflate() function. The return value is the zlib error number + (see function gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatented gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. */ -ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, - z_off_t offset, int whence)); /* - Sets the starting position for the next gzread or gzwrite on the - given compressed file. The offset represents a number of bytes in the - uncompressed data stream. The whence parameter is defined as in lseek(2); +ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file, + z_off_t offset, int whence)); + + Sets the starting position for the next gzread or gzwrite on the given + compressed file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); the value SEEK_END is not supported. + If the file is opened for reading, this function is emulated but can be - extremely slow. If the file is opened for writing, only forward seeks are + extremely slow. If the file is opened for writing, only forward seeks are supported; gzseek then compresses a sequence of zeroes up to the new starting position. - gzseek returns the resulting offset location as measured in bytes from + gzseek returns the resulting offset location as measured in bytes from the beginning of the uncompressed stream, or -1 in case of error, in particular if the file is opened for writing and the new starting position would be before the current position. @@ -1204,68 +1448,134 @@ ZEXTERN int ZEXPORT gzrewind OF((gzFile file)); /* Rewinds the given file. This function is supported only for reading. - gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET) */ +/* ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file)); + + Returns the starting position for the next gzread or gzwrite on the given + compressed file. This position represents a number of bytes in the + uncompressed data stream, and is zero when starting, even if appending or + reading a gzip stream from the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + /* - Returns the starting position for the next gzread or gzwrite on the - given compressed file. This position represents a number of bytes in the - uncompressed data stream. +ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file)); - gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) + Returns the current offset in the file being read or written. This offset + includes the count of bytes that precede the gzip stream, for example when + appending or when using gzdopen() for reading. When reading, the offset + does not include as yet unused buffered input. This information can be used + for a progress indicator. On error, gzoffset() returns -1. */ ZEXTERN int ZEXPORT gzeof OF((gzFile file)); /* - Returns 1 when EOF has previously been detected reading the given - input stream, otherwise zero. + Returns true (1) if the end-of-file indicator has been set while reading, + false (0) otherwise. Note that the end-of-file indicator is set only if the + read tried to go past the end of the input, but came up short. Therefore, + just like feof(), gzeof() may return false even if there is no more data to + read, in the event that the last read request was for the exact number of + bytes remaining in the input file. This will happen if the input file size + is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. */ ZEXTERN int ZEXPORT gzdirect OF((gzFile file)); /* - Returns 1 if file is being read directly without decompression, otherwise - zero. + Returns true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) */ ZEXTERN int ZEXPORT gzclose OF((gzFile file)); /* - Flushes all pending output if necessary, closes the compressed file - and deallocates all the (de)compression state. The return value is the zlib - error number (see function gzerror below). + Flushes all pending output if necessary, closes the compressed file and + deallocates the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r OF((gzFile file)); +ZEXTERN int ZEXPORT gzclose_w OF((gzFile file)); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. */ ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum)); /* - Returns the error message for the last error which occurred on the - given compressed file. errnum is set to zlib error number. If an - error occurred in the file system and not in the compression library, - errnum is set to Z_ERRNO and the application may consult errno - to get the exact error code. + Returns the error message for the last error which occurred on the given + compressed file. errnum is set to zlib error number. If an error occurred + in the file system and not in the compression library, errnum is set to + Z_ERRNO and the application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. */ ZEXTERN void ZEXPORT gzclearerr OF((gzFile file)); /* - Clears the error and end-of-file flags for file. This is analogous to the - clearerr() function in stdio. This is useful for continuing to read a gzip + Clears the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip file that is being written concurrently. */ +#endif /* !Z_SOLO */ + /* checksum functions */ /* These functions are not related to compression but are exported - anyway because they might be useful in applications using the - compression library. + anyway because they might be useful in applications using the compression + library. */ ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); /* Update a running Adler-32 checksum with the bytes buf[0..len-1] and - return the updated checksum. If buf is NULL, this function returns - the required initial value for the checksum. - An Adler-32 checksum is almost as reliable as a CRC32 but can be computed - much faster. Usage example: + return the updated checksum. If buf is Z_NULL, this function returns the + required initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC32 but can be computed + much faster. + + Usage example: uLong adler = adler32(0L, Z_NULL, 0); @@ -1275,21 +1585,25 @@ ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len)); if (adler != original_adler) error(); */ +/* ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2, z_off_t len2)); -/* + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of - seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. */ ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); /* Update a running CRC-32 with the bytes buf[0..len-1] and return the - updated CRC-32. If buf is NULL, this function returns the required initial - value for the for the crc. Pre- and post-conditioning (one's complement) is + updated CRC-32. If buf is Z_NULL, this function returns the required + initial value for the crc. Pre- and post-conditioning (one's complement) is performed within this function so it shouldn't be done by the application. + Usage example: uLong crc = crc32(0L, Z_NULL, 0); @@ -1300,9 +1614,9 @@ ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)); if (crc != original_crc) error(); */ +/* ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2)); -/* Combine two CRC-32 check values into one. For two sequences of bytes, seq1 and seq2 with lengths len1 and len2, CRC-32 check values were calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 @@ -1331,26 +1645,121 @@ ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits, const char *version, int stream_size)); #define deflateInit(strm, level) \ - deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream)) + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) #define inflateInit(strm) \ - inflateInit_((strm), ZLIB_VERSION, sizeof(z_stream)) + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) #define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ - (strategy), ZLIB_VERSION, sizeof(z_stream)) + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) #define inflateInit2(strm, windowBits) \ - inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream)) + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) #define inflateBackInit(strm, windowBits, window) \ inflateBackInit_((strm), (windowBits), (window), \ - ZLIB_VERSION, sizeof(z_stream)) + ZLIB_VERSION, (int)sizeof(z_stream)) + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g)) +#endif +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); + ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t)); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *)); + ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int)); + ZEXTERN z_off_t ZEXPORT gztell OF((gzFile)); + ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile)); + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); +#endif + +#else /* Z_SOLO */ + ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t)); + +#endif /* !Z_SOLO */ + +/* hack for buggy compilers */ #if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL) - struct internal_state {int dummy;}; /* hack for buggy compilers */ + struct internal_state {int dummy;}; #endif +/* undocumented functions */ ZEXTERN const char * ZEXPORT zError OF((int)); -ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp z)); -ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void)); +ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp)); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table OF((void)); +ZEXTERN int ZEXPORT inflateUndermine OF((z_streamp, int)); +ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp)); +ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp)); +#if defined(_WIN32) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w OF((const wchar_t *path, + const char *mode)); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf Z_ARG((gzFile file, + const char *format, + va_list va)); +# endif +#endif #ifdef __cplusplus } diff --git a/erts/emulator/zlib/zutil.c b/erts/emulator/zlib/zutil.c index fa5b43126a..27a8af4a2b 100644 --- a/erts/emulator/zlib/zutil.c +++ b/erts/emulator/zlib/zutil.c @@ -1,22 +1,23 @@ /* zutil.c -- target dependent utility functions for the compression library - * Copyright (C) 1995-2005 Jean-loup Gailly. + * Copyright (C) 1995-2005, 2010, 2011, 2012 Jean-loup Gailly. * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - /* @(#) $Id$ */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include "zutil.h" +#ifndef Z_SOLO +# include "gzguts.h" +#endif #ifndef NO_DUMMY_DECL struct internal_state {int dummy;}; /* for buggy compilers */ #endif -const char * const z_errmsg[10] = { +z_const char * const z_errmsg[10] = { "need dictionary", /* Z_NEED_DICT 2 */ "stream end", /* Z_STREAM_END 1 */ "", /* Z_OK 0 */ @@ -39,25 +40,25 @@ uLong ZEXPORT zlibCompileFlags() uLong flags; flags = 0; - switch (sizeof(uInt)) { + switch ((int)(sizeof(uInt))) { case 2: break; case 4: flags += 1; break; case 8: flags += 2; break; default: flags += 3; } - switch (sizeof(uLong)) { + switch ((int)(sizeof(uLong))) { case 2: break; case 4: flags += 1 << 2; break; case 8: flags += 2 << 2; break; default: flags += 3 << 2; } - switch (sizeof(voidpf)) { + switch ((int)(sizeof(voidpf))) { case 2: break; case 4: flags += 1 << 4; break; case 8: flags += 2 << 4; break; default: flags += 3 << 4; } - switch (sizeof(z_off_t)) { + switch ((int)(sizeof(z_off_t))) { case 2: break; case 4: flags += 1 << 6; break; case 8: flags += 2 << 6; break; @@ -90,27 +91,27 @@ uLong ZEXPORT zlibCompileFlags() #ifdef FASTEST flags += 1L << 21; #endif -#ifdef STDC +#if defined(STDC) || defined(Z_HAVE_STDARG_H) # ifdef NO_vsnprintf - flags += 1L << 25; + flags += 1L << 25; # ifdef HAS_vsprintf_void - flags += 1L << 26; + flags += 1L << 26; # endif # else # ifdef HAS_vsnprintf_void - flags += 1L << 26; + flags += 1L << 26; # endif # endif #else - flags += 1L << 24; + flags += 1L << 24; # ifdef NO_snprintf - flags += 1L << 25; + flags += 1L << 25; # ifdef HAS_sprintf_void - flags += 1L << 26; + flags += 1L << 26; # endif # else # ifdef HAS_snprintf_void - flags += 1L << 26; + flags += 1L << 26; # endif # endif #endif @@ -122,9 +123,9 @@ uLong ZEXPORT zlibCompileFlags() # ifndef verbose # define verbose 0 # endif -int z_verbose = verbose; +int ZLIB_INTERNAL z_verbose = verbose; -void z_error (m) +void ZLIB_INTERNAL z_error (m) char *m; { fprintf(stderr, "%s\n", m); @@ -151,7 +152,7 @@ const char * ZEXPORT zError(err) #ifndef HAVE_MEMCPY -void zmemcpy(dest, source, len) +void ZLIB_INTERNAL zmemcpy(dest, source, len) Bytef* dest; const Bytef* source; uInt len; @@ -162,7 +163,7 @@ void zmemcpy(dest, source, len) } while (--len != 0); } -int zmemcmp(s1, s2, len) +int ZLIB_INTERNAL zmemcmp(s1, s2, len) const Bytef* s1; const Bytef* s2; uInt len; @@ -175,7 +176,7 @@ int zmemcmp(s1, s2, len) return 0; } -void zmemzero(dest, len) +void ZLIB_INTERNAL zmemzero(dest, len) Bytef* dest; uInt len; { @@ -186,6 +187,7 @@ void zmemzero(dest, len) } #endif +#ifndef Z_SOLO #ifdef SYS16BIT @@ -218,7 +220,7 @@ local ptr_table table[MAX_PTR]; * a protected system like OS/2. Use Microsoft C instead. */ -voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) +voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size) { voidpf buf = opaque; /* just to make some compilers happy */ ulg bsize = (ulg)items*size; @@ -242,7 +244,7 @@ voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) return buf; } -void zcfree (voidpf opaque, voidpf ptr) +void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) { int n; if (*(ush*)&ptr != 0) { /* object < 64K */ @@ -277,13 +279,13 @@ void zcfree (voidpf opaque, voidpf ptr) # define _hfree hfree #endif -voidpf zcalloc (voidpf opaque, unsigned items, unsigned size) +voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, uInt items, uInt size) { if (opaque) opaque = 0; /* to make compiler happy */ return _halloc((long)items, size); } -void zcfree (voidpf opaque, voidpf ptr) +void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) { if (opaque) opaque = 0; /* to make compiler happy */ _hfree(ptr); @@ -302,26 +304,24 @@ extern voidp calloc OF((uInt items, uInt size)); extern void free OF((voidpf ptr)); #endif -extern void* sys_alloc(unsigned); -extern void* sys_free(void *); - -voidpf zcalloc (opaque, items, size) +voidpf ZLIB_INTERNAL zcalloc (opaque, items, size) voidpf opaque; unsigned items; unsigned size; { - unsigned sz = items * size; - voidpf* ptr = (voidpf) sys_alloc(sz); if (opaque) items += size - size; /* make compiler happy */ - return ptr; + return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : + (voidpf)calloc(items, size); } -void zcfree (opaque, ptr) +void ZLIB_INTERNAL zcfree (opaque, ptr) voidpf opaque; voidpf ptr; { - sys_free(ptr); + free(ptr); if (opaque) return; /* make compiler happy */ } #endif /* MY_ZCALLOC */ + +#endif /* !Z_SOLO */ diff --git a/erts/emulator/zlib/zutil.h b/erts/emulator/zlib/zutil.h index a8872e1c88..24ab06b1cf 100644 --- a/erts/emulator/zlib/zutil.h +++ b/erts/emulator/zlib/zutil.h @@ -1,10 +1,8 @@ /* zutil.h -- internal interface and configuration of the compression library - * Copyright (C) 1995-2005 Jean-loup Gailly. + * Copyright (C) 1995-2013 Jean-loup Gailly. * For conditions of distribution and use, see copyright notice in zlib.h */ -/* %ExternalCopyright% */ - /* WARNING: this file should *not* be used by applications. It is part of the implementation of the compression library and is subject to change. Applications should only use zlib.h. @@ -15,30 +13,24 @@ #ifndef ZUTIL_H #define ZUTIL_H -#define ZLIB_INTERNAL +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + #include "zlib.h" -#ifdef STDC -# ifndef _WIN32_WCE +#if defined(STDC) && !defined(Z_SOLO) +# if !(defined(_WIN32_WCE) && defined(_MSC_VER)) # include <stddef.h> # endif # include <string.h> # include <stdlib.h> #endif -#ifdef NO_ERRNO_H -# ifdef _WIN32_WCE - /* The Microsoft C Run-Time Library for Windows CE doesn't have - * errno. We define it as a global variable to simplify porting. - * Its value is always 0 and should not be used. We rename it to - * avoid conflict with other libraries that use the same workaround. - */ -# define errno z_errno -# endif - extern int errno; -#else -# ifndef _WIN32_WCE -# include <errno.h> -# endif + +#ifdef Z_SOLO + typedef long ptrdiff_t; /* guess -- will be caught if guess is wrong */ #endif #ifndef local @@ -52,13 +44,13 @@ typedef unsigned short ush; typedef ush FAR ushf; typedef unsigned long ulg; -extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ /* (size given to avoid silly warnings with Visual C++) */ #define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] #define ERR_RETURN(strm,err) \ - return (strm->msg = (char*)ERR_MSG(err), (err)) + return (strm->msg = ERR_MSG(err), (err)) /* To be used only when the state is known to be valid */ /* common constants */ @@ -90,16 +82,18 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ #if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) # define OS_CODE 0x00 -# if defined(__TURBOC__) || defined(__BORLANDC__) -# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) - /* Allow compilation with ANSI keywords only enabled */ - void _Cdecl farfree( void *block ); - void *_Cdecl farmalloc( unsigned long nbytes ); -# else -# include <alloc.h> +# ifndef Z_SOLO +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include <alloc.h> +# endif +# else /* MSC or DJGPP */ +# include <malloc.h> # endif -# else /* MSC or DJGPP */ -# include <malloc.h> # endif #endif @@ -119,18 +113,20 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ #ifdef OS2 # define OS_CODE 0x06 -# ifdef M_I86 - #include <malloc.h> +# if defined(M_I86) && !defined(Z_SOLO) +# include <malloc.h> # endif #endif #if defined(MACOS) || defined(TARGET_OS_MAC) # define OS_CODE 0x07 -# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os -# include <unix.h> /* for fdopen */ -# else -# ifndef fdopen -# define fdopen(fd,mode) NULL /* No fdopen() */ +# ifndef Z_SOLO +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include <unix.h> /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif # endif # endif #endif @@ -142,7 +138,6 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ #ifdef WIN32 # ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */ # define OS_CODE 0x0b -# define F_OPEN(name, mode) _wfopen((WCHAR *)(name), (WCHAR *)(mode)) /* Unicode */ # endif #endif @@ -154,7 +149,7 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ # define fdopen(fd,mode) NULL /* No fdopen() */ #endif -#if (defined(_MSC_VER) && (_MSC_VER > 600)) +#if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX # if defined(_WIN32_WCE) # define fdopen(fd,mode) NULL /* No fdopen() */ # ifndef _PTRDIFF_T_DEFINED @@ -166,6 +161,19 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ # endif #endif +#if defined(__BORLANDC__) && !defined(MSDOS) + #pragma warn -8004 + #pragma warn -8008 + #pragma warn -8066 +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_WIN32) && \ + (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0) + ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); + ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); +#endif + /* common defaults */ #ifndef OS_CODE @@ -178,40 +186,7 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ /* functions */ -#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) -# ifndef HAVE_VSNPRINTF -# define HAVE_VSNPRINTF -# endif -#endif -#if defined(__CYGWIN__) -# ifndef HAVE_VSNPRINTF -# define HAVE_VSNPRINTF -# endif -#endif -#ifndef HAVE_VSNPRINTF -# ifdef MSDOS - /* vsnprintf may exist on some MS-DOS compilers (DJGPP?), - but for now we just assume it doesn't. */ -# define NO_vsnprintf -# endif -# ifdef __TURBOC__ -# define NO_vsnprintf -# endif -# ifdef WIN32 - /* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ -# if !defined(vsnprintf) && !defined(NO_vsnprintf) -# define vsnprintf _vsnprintf -# endif -# endif -# ifdef __SASC -# define NO_vsnprintf -# endif -#endif -#ifdef VMS -# define NO_vsnprintf -#endif - -#if defined(pyr) +#if defined(pyr) || defined(Z_SOLO) # define NO_MEMCPY #endif #if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) @@ -235,16 +210,16 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ # define zmemzero(dest, len) memset(dest, 0, len) # endif #else - extern void zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); - extern int zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); - extern void zmemzero OF((Bytef* dest, uInt len)); + void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); + int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); + void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len)); #endif /* Diagnostic functions */ #ifdef DEBUG # include <stdio.h> - extern int z_verbose; - extern void z_error OF((char *m)); + extern int ZLIB_INTERNAL z_verbose; + extern void ZLIB_INTERNAL z_error OF((char *m)); # define Assert(cond,msg) {if(!(cond)) z_error(msg);} # define Trace(x) {if (z_verbose>=0) fprintf x ;} # define Tracev(x) {if (z_verbose>0) fprintf x ;} @@ -260,13 +235,19 @@ extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ # define Tracecv(c,x) #endif - -voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); -void zcfree OF((voidpf opaque, voidpf ptr)); +#ifndef Z_SOLO + voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items, + unsigned size)); + void ZLIB_INTERNAL zcfree OF((voidpf opaque, voidpf ptr)); +#endif #define ZALLOC(strm, items, size) \ (*((strm)->zalloc))((strm)->opaque, (items), (size)) #define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) #define TRY_FREE(s, p) {if (p) ZFREE(s, p);} +/* Reverse the bytes in a 32-bit value */ +#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + #endif /* ZUTIL_H */ diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c index c30203c632..2cf7280ebc 100644 --- a/erts/etc/common/erlexec.c +++ b/erts/etc/common/erlexec.c @@ -135,6 +135,7 @@ static char *pluss_val_switches[] = { "ws", "ss", "pp", + "ub", NULL }; /* +h arguments with values */ @@ -828,7 +829,17 @@ int main(int argc, char **argv) if (argv[i][2] == 'P') { if (argv[i][3] != '\0') goto the_default; - } else if (argv[i][2] != '\0') + } +#ifdef ERTS_DIRTY_SCHEDULERS + else if (argv[i][2] == 'D') { + char* type = argv[i]+3; + if (strcmp(type, "cpu") != 0 && + strcmp(type, "Pcpu") != 0 && + strcmp(type, "io") != 0) + goto the_default; + } +#endif + else if (argv[i][2] != '\0') goto the_default; if (i+1 >= argc) usage(argv[i]); diff --git a/erts/etc/unix/cerl.src b/erts/etc/unix/cerl.src index be8343e87e..78fefbea55 100644 --- a/erts/etc/unix/cerl.src +++ b/erts/etc/unix/cerl.src @@ -86,6 +86,7 @@ run_valgrind=no # Default rootdir ROOTDIR=%SRC_ROOTDIR% BINDIR="$ROOTDIR/bin/`$ROOTDIR/erts/autoconf/config.guess`" +TARGET=%TARGET% #BINDIR="$ROOTDIR/bin/%TARGET%" PROGNAME=$ROOTDIR/bin/cerl EMU=beam @@ -248,6 +249,12 @@ while [ $# -gt 0 ]; do done +if [ ! -f $BINDIR/erlexec -a -f $ROOTDIR/bin/$TARGET/erlexec ]; then + # We are in a strange target (I'm looking at you openbsd) where + # TARGET != config.guess + BINDIR=$ROOTDIR/bin/$TARGET +fi + PATH=$BINDIR:$ROOTDIR/bin:$PATH EXEC=$BINDIR/erlexec diff --git a/erts/etc/unix/etp-commands.in b/erts/etc/unix/etp-commands.in index 73887931cc..8520d58f47 100644 --- a/erts/etc/unix/etp-commands.in +++ b/erts/etc/unix/etp-commands.in @@ -652,7 +652,7 @@ end define etp-ct-atom-1 # Args: int # -# Determines if integer is a atom first character +# Determines if integer is an atom first character # # Non-reentrant # Returns: $etp_ct_atom @@ -1278,6 +1278,250 @@ document etpf-stackdump %--------------------------------------------------------------------------- end +define etp-heapdump +# Args: Process* +# +# Non-reentrant + etp-heapdump-1 ($arg0)->heap ($arg0)->htop +end + +document etp-heapdump +%--------------------------------------------------------------------------- +% etp-heapdump Process* +% +% Take an Process* and print a heapdump for the process heap. +%--------------------------------------------------------------------------- +end + +define etp-heapdump-old +# Args: Process* +# +# Non-reentrant + etp-heapdump-1 ($arg0)->old_heap ($arg0)->old_htop +end + +document etp-heapdump +%--------------------------------------------------------------------------- +% etp-heapdump-old Process* +% +% Take an Process* and print a heapdump for the process old heap (gen-heap). +%--------------------------------------------------------------------------- +end + + +define etp-heapdump-1 +# Args: Eterm* heap, Eterm* htop +# +# Non-reentrant + set $etp_heapdump_heap = (Eterm*)($arg0) + set $etp_heapdump_p = (Eterm*)($arg0) + set $etp_heapdump_end = (Eterm*)($arg1) + set $etp_heapdump_skips = 0 + printf "%% heapdump (%u):\n", $etp_heapdump_end-$etp_heapdump_p + while $etp_heapdump_p < $etp_heapdump_end + set $etp_heapdump_ix = 0 + printf " %p: ", $etp_heapdump_p + while $etp_heapdump_p < $etp_heapdump_end && $etp_heapdump_ix < 8 + if ($etp_heapdump_skips > 0) + printf "| 0x%08x ", ($etp_heapdump_p) + set $etp_heapdump_skips-- + else + etp-term-dump $etp_heapdump_p[0] + end + set $etp_heapdump_p++ + set $etp_heapdump_ix++ + end + printf "\n" + end +end + + +define etp-term-dump +# Args: Eterm + if (($arg0) & 0x3) == 0 + etp-term-dump-header ($arg0) + else + if (($arg0) & 0x3) == 1 + # Cons pointer + set $etp_term_dump_cons_p = ((Eterm*)(($arg0) & ~0x3)) + if $etp_term_dump_cons_p > $etp_heapdump_heap && $etp_term_dump_cons_p < $etp_heapdump_end + printf "| C:0x%08x ", $etp_term_dump_cons_p + #printf "| C: --> %5d ", $etp_heapdump_p - $etp_term_dump_cons_p - 1 + else + printf "| C:0x%08x ", $etp_term_dump_cons_p + end + else + if (($arg0) & 0x3) == 2 + # Box pointer + printf "| B:0x%08x ", ($arg0) + else + if (($arg0) & 0x3) == 3 + # immediate + etp-term-dump-immediate ($arg0) + else + printf "| U:0x%08x ", ($arg0) + end + end + end + end +end + +define etp-term-dump-immediate +# Args: immediate term + if (($arg0) & 0xF) == 0xf + # Fixnum + etp-ct-printable-1 ((long)((Sint)($arg0)>>4)) + if $etp_ct_printable + if $etp_ct_printable < 0 + printf "| I: %c (%3ld) ", (long)((Sint)($arg0)>>4), (long)((Sint)($arg0)>>4) + else + printf "| I: \\%c (%3ld) ", (long)((Sint)($arg0)>>4), (long)((Sint)($arg0)>>4) + end + else + printf "| I:%10ld ", (long)((Sint)($arg0)>>4) + end + else + if (($arg0) & 0xF) == 0x3 + etp-term-dump-pid ($arg0) + else + if (($arg0) & 0xF) == 0x7 + printf "| port:0x%05x ", ($arg0) + else + # Immediate2 - 0xB + if (($arg0) & 0x3f) == 0x0b + etp-term-dump-atom ($arg0) + else + if (($arg0) & 0x3f) == 0x1b + printf "| #Catch<%06d> ", ($arg0)>>6 + else + if (($arg0) == $etp_nil) + printf "| [] (NIL) " + else + printf "| I:0x%08x ", ($arg0) + end + end + end + end + end + end +end + +define etp-term-dump-atom +# Args: atom term + set $etp_atom_1_ap = (Atom*)erts_atom_table.seg_table[(Eterm)($arg0)>>16][((Eterm)($arg0)>>6)&0x3FF] + set $etp_atom_1_i = ($etp_atom_1_ap)->len + set $etp_atom_1_p = ($etp_atom_1_ap)->name + set $etp_atom_1_quote = 1 + set $etp_atom_indent = 13 + + if ($etp_atom_1_i < 11) + if ($etp_atom_1_i > 0) + etp-ct-atom-1 (*$etp_atom_1_p) + if $etp_ct_atom + set $etp_atom_indent = 13 + else + set $etp_atom_indent = 11 + end + end + # perform indentation + printf "|" + while ($etp_atom_1_i < $etp_atom_indent) + printf " " + set $etp_atom_1_i++ + end + set $etp_atom_1_i = ($etp_atom_1_ap)->len + # Check if atom has to be quoted + if ($etp_atom_1_i > 0) + etp-ct-atom-1 (*$etp_atom_1_p) + if $etp_ct_atom + # Atom start character + set $etp_atom_1_p++ + set $etp_atom_1_i-- + set $etp_atom_1_quote = 0 + else + set $etp_atom_1_i = 0 + end + end + while $etp_atom_1_i > 0 + etp-ct-name-1 (*$etp_atom_1_p) + if $etp_ct_name + # Name character + set $etp_atom_1_p++ + set $etp_atom_1_i-- + else + set $etp_atom_1_quote = 1 + set $etp_atom_1_i = 0 + end + end + # Print the atom + if $etp_atom_1_quote + printf "'" + end + set $etp_atom_1_i = ($etp_atom_1_ap)->len + set $etp_atom_1_p = ($etp_atom_1_ap)->name + while $etp_atom_1_i > 0 + etp-char-1 (*$etp_atom_1_p) '\'' + set $etp_atom_1_p++ + set $etp_atom_1_i-- + end + if $etp_atom_1_quote + printf "'" + end + printf " " + else + printf "| A:0x%08x ", ($arg0) + end +end + +define etp-term-dump-pid +# Args: Eterm pid +# +# Non-reentrant +# + set $etp_pid_1 = (Eterm)($arg0) + if ($etp_pid_1 & 0xF) == 0x3 + if (etp_arch_bits == 64 && etp_halfword == 0) + if (etp_big_endian) + set $etp_pid_data = (unsigned) ((((Uint64) $etp_pid_1) >> 36) & 0x0fffffff) + else + set $etp_pid_data = (unsigned) ((((Uint64) $etp_pid_1) >> 4) & 0x0fffffff) + end + else + set $etp_pid_data = (unsigned) (((((Uint32) $etp_pid_1) >> 4) & ~erts_proc.r.o.pix_mask) | ((((Uint32) $etp_pid_1) >> (erts_proc.r.o.pix_cl_shift + 4)) & erts_proc.r.o.pix_cl_mask) | (((((Uint32) $etp_pid_1) >> 4) & erts_proc.r.o.pix_cli_mask) << erts_proc.r.o.pix_cli_shift)) + end + # Internal pid + printf "| <0.%04u.%03u> ", $etp_pid_data & 0x7fff, ($etp_pid_data >> 15) & 0x1fff + else + printf "| #NotPid<%#x> ", ($arg0) + end +end + +define etp-term-dump-header +# Args: Header term + if (($arg0) & 0x3f) == 0 + printf "| H:%4d-tuple ", ($arg0) >> 6 + else + set $etp_heapdump_skips = ($arg0) >> 6 + if ((($arg0) & 0x3f) == 0x18) + printf "| H: float %3d ", ($arg0) >> 6 + else + if ((($arg0) & 0x3f) == 0x28) + # sub-binary + printf "| H: sub-bin " + else + if ((($arg0) & 0x3f) == 0x8) + # pos-bignum + printf "| H:bignum %3u ", ($arg0) >> 6 + else + printf "| header %5d ", ($arg0) >> 6 + end + end + end + end +end + + + define etp-pid2pix-1 # Args: Eterm # @@ -1445,7 +1689,7 @@ define etp-process-info # Args: Process* # printf " Pid: " - etp-1 $arg0->common.id + etp-1 ($arg0)->common.id printf "\n State: " etp-proc-state $arg0 if $proxy_process != 0 @@ -1523,11 +1767,104 @@ end document etp-processes %--------------------------------------------------------------------------- % etp-processes -% +% % Print misc info about all processes %--------------------------------------------------------------------------- end +define etp-processes-memory + if (!erts_initialized) + printf "No processes, since system isn't initialized!\n" + else + set $proc_ix = 0 + printf "--- (%ld processes in wheel)\n", erts_proc.r.o.max + while $proc_ix < erts_proc.r.o.max + set $proc = (Process *) *((UWord *) &erts_proc.r.o.tab[$proc_ix]) + if ($proc != ((Process *) 0) && $proc != &erts_invalid_process) + etp-process-memory-info $proc + end + set $proc_ix++ + end + printf "---\n", + end +end + +document etp-processes-memory +%--------------------------------------------------------------------------- +% etp-processes-memory +% +% Print memory info about all processes +%--------------------------------------------------------------------------- +end + +define etp-process-memory-info +# Args: Process* +# + if ((*(((Uint32 *) &(((Process *) $arg0)->state)))) & 0x400000) + set $proxy_process = 1 + else + set $proxy_process = 0 + end + printf " " + etp-1 $arg0->common.id + printf ": (Process *) %p ", $arg0 + if $proxy_process != 0 + printf "(Process *) %p ", $arg0 + printf " *** PROXY process struct *** refer to next: \n" + etp-pid2proc-1 $arg0->common.id + printf " -" + etp-process-memory-info $proc + else + printf " [Heap: %5ld", $arg0->heap_sz + if ($arg0->old_heap) + printf " | %5ld", $arg0->old_hend - $arg0->old_heap + else + printf " | none " + end + printf "] [Mbuf: %5ld", $arg0->mbuf_sz + if (etp_smp_compiled) + printf " | %3ld (%3ld | %3ld)", ($arg0->msg.len + $arg0->msg_inq.len), $arg0->msg.len, $arg0->msg_inq.len + else + printf " | %3ld", $arg0->msg.len + end + printf "] " + if ($arg0->i) + printf " I: " + etp-cp-1 $arg0->i + printf " " + end + + if ($arg0->current) + etp-1 $arg0->current[0] + printf ":" + etp-1 $arg0->current[1] + printf "/%d ", $arg0->current[2] + end + + if (*(((Uint32 *) &(((Process *) $arg0)->state))) & 0x4) == 0 + if ($arg0->common.u.alive.reg) + etp-1 $arg0->common.u.alive.reg->name + printf " " + end + end + + if ($arg0->cp) + printf " CP: " + etp-cp-1 $arg0->cp + printf " " + end + printf "\n" + end +end + +document etp-process-memory-info +%--------------------------------------------------------------------------- +% etp-process-memory-info Process* +% +% Print memory info about process +%--------------------------------------------------------------------------- +end + define etp-port-id2pix-1 # Args: Eterm # diff --git a/erts/include/erl_native_features_config.h.in b/erts/include/erl_native_features_config.h.in new file mode 100644 index 0000000000..d1674cb256 --- /dev/null +++ b/erts/include/erl_native_features_config.h.in @@ -0,0 +1,21 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2011. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* Dirty scheduler support */ +#undef ERL_NIF_DIRTY_SCHEDULER_SUPPORT diff --git a/erts/lib_src/Makefile.in b/erts/lib_src/Makefile.in index 4f0a5e5202..cf1aef518a 100644 --- a/erts/lib_src/Makefile.in +++ b/erts/lib_src/Makefile.in @@ -458,6 +458,7 @@ RELSYSDIR = $(RELEASE_PATH)/erts-$(VSN) RELEASE_INCLUDES= \ $(ERTS_INCL)/erl_memory_trace_parser.h \ $(ERTS_INCL)/$(TARGET)/erl_int_sizes_config.h \ + $(ERTS_INCL)/$(TARGET)/erl_native_features_config.h \ $(ERTS_INCL)/erl_fixed_size_int_types.h RELEASE_LIBS=$(ERTS_LIBS) diff --git a/erts/preloaded/ebin/erlang.beam b/erts/preloaded/ebin/erlang.beam Binary files differindex 3c77d6ae0f..f38377647c 100644 --- a/erts/preloaded/ebin/erlang.beam +++ b/erts/preloaded/ebin/erlang.beam diff --git a/erts/preloaded/ebin/zlib.beam b/erts/preloaded/ebin/zlib.beam Binary files differindex 5b51280838..ffd80f51ba 100644 --- a/erts/preloaded/ebin/zlib.beam +++ b/erts/preloaded/ebin/zlib.beam diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl index f99d5bfdd0..ee5bd3e515 100644 --- a/erts/preloaded/src/erlang.erl +++ b/erts/preloaded/src/erlang.erl @@ -2090,6 +2090,10 @@ subtract(_,_) -> (cpu_topology, CpuTopology) -> OldCpuTopology when CpuTopology :: cpu_topology(), OldCpuTopology :: cpu_topology(); + (dirty_cpu_schedulers_online, DirtyCPUSchedulersOnline) -> + OldDirtyCPUSchedulersOnline when + DirtyCPUSchedulersOnline :: pos_integer(), + OldDirtyCPUSchedulersOnline :: pos_integer(); (fullsweep_after, Number) -> OldNumber when Number :: non_neg_integer(), OldNumber :: non_neg_integer(); @@ -2220,6 +2224,9 @@ tuple_to_list(_Tuple) -> CpuTopology :: cpu_topology(); (creation) -> integer(); (debug_compiled) -> boolean(); + (dirty_cpu_schedulers) -> non_neg_integer(); + (dirty_cpu_schedulers_online) -> non_neg_integer(); + (dirty_io_schedulers) -> non_neg_integer(); (dist) -> binary(); (dist_buf_busy_limit) -> non_neg_integer(); (dist_ctrl) -> {Node :: node(), diff --git a/erts/preloaded/src/zlib.erl b/erts/preloaded/src/zlib.erl index 3d85533b80..df7b2e6198 100644 --- a/erts/preloaded/src/zlib.erl +++ b/erts/preloaded/src/zlib.erl @@ -47,6 +47,7 @@ %% compresssion strategy -define(Z_FILTERED, 1). -define(Z_HUFFMAN_ONLY, 2). +-define(Z_RLE, 3). -define(Z_DEFAULT_STRATEGY, 0). %% deflate compression method @@ -125,7 +126,7 @@ -type zmethod() :: 'deflated'. -type zwindowbits() :: -15..-9 | 9..47. -type zmemlevel() :: 1..9. --type zstrategy() :: 'default' | 'filtered' | 'huffman_only'. +-type zstrategy() :: 'default' | 'filtered' | 'huffman_only' | 'rle'. %%------------------------------------------------------------------------ @@ -486,6 +487,7 @@ arg_level(_) -> erlang:error(badarg). arg_strategy(filtered) -> ?Z_FILTERED; arg_strategy(huffman_only) -> ?Z_HUFFMAN_ONLY; +arg_strategy(rle) -> ?Z_RLE; arg_strategy(default) -> ?Z_DEFAULT_STRATEGY; arg_strategy(_) -> erlang:error(badarg). diff --git a/erts/test/erlc_SUITE.erl b/erts/test/erlc_SUITE.erl index ed7a43c7e7..5002836954 100644 --- a/erts/test/erlc_SUITE.erl +++ b/erts/test/erlc_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1997-2012. All Rights Reserved. +%% Copyright Ericsson AB 1997-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -334,7 +334,7 @@ make_dep_options(Config) -> run(Config, Cmd0, Name, Options, Expect) -> Cmd = Cmd0 ++ " " ++ Options ++ " " ++ Name, - io:format("~s", [Cmd]), + io:format("~ts", [Cmd]), Result = run_command(Config, Cmd), verify_result(Result, Expect). @@ -356,7 +356,7 @@ split([], Current, Lines) -> split([], [], [lists:reverse(Current)|Lines]). match_messages([Msg|Rest1], [Regexp|Rest2]) -> - case re:run(Msg, Regexp, [{capture,none}]) of + case re:run(Msg, Regexp, [{capture,none}, unicode]) of match -> ok; nomatch -> @@ -398,7 +398,7 @@ run_command(Config, Cmd) -> TmpDir = filename:join(?config(priv_dir, Config), "tmp"), file:make_dir(TmpDir), {RunFile, Run, Script} = run_command(TmpDir, os:type(), Cmd), - ok = file:write_file(filename:join(TmpDir, RunFile), Script), + ok = file:write_file(filename:join(TmpDir, RunFile), unicode:characters_to_binary(Script)), os:cmd(Run). run_command(Dir, {win32, _}, Cmd) -> diff --git a/erts/test/erlexec_SUITE.erl b/erts/test/erlexec_SUITE.erl index 0dfe6c2e5f..f5ea8f160a 100644 --- a/erts/test/erlexec_SUITE.erl +++ b/erts/test/erlexec_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2007-2011. All Rights Reserved. +%% Copyright Ericsson AB 2007-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -433,10 +433,10 @@ verify_not_args(Xs, Ys) -> Xs). emu_args(CmdLineArgs) -> - io:format("CmdLineArgs = ~s~n", [CmdLineArgs]), + io:format("CmdLineArgs = ~ts~n", [CmdLineArgs]), {ok,[[Erl]]} = init:get_argument(progname), EmuCL = os:cmd(Erl ++ " -emu_args_exit " ++ CmdLineArgs), - io:format("EmuCL = ~s", [EmuCL]), + io:format("EmuCL = ~ts", [EmuCL]), split_emu_clt(string:tokens(EmuCL, [$ ,$\t,$\n,$\r])). split_emu_clt(EmuCLT) -> diff --git a/erts/test/z_SUITE.erl b/erts/test/z_SUITE.erl index da72b18f05..056561d3db 100644 --- a/erts/test/z_SUITE.erl +++ b/erts/test/z_SUITE.erl @@ -116,7 +116,7 @@ find_cerl(false) -> end; find_cerl(DBTop) -> case catch filelib:wildcard(filename:join([DBTop, - "otp_src_R*", + "otp_src_*", "bin", "cerl"])) of [Cerl | _ ] -> @@ -242,7 +242,7 @@ dump_core(#core_search_conf{ cerl = Cerl }, Core) -> _ -> os:cmd(Cerl ++ " -dump " ++ Core) end, - ct:log("~s~n~n~s",[Core,Dump]). + ct:log("~ts~n~n~ts",[Core,Dump]). format_core(Conf, {ignore, Core}) -> |