From a2ed83ab92c6267f5d5d459e69347c7db7e9ab48 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Thu, 27 Dec 2012 12:45:26 +0000 Subject: Fix efile_drv crash when using async thread pool When using the async thread pool and compressed files, when an efile driver port instance is shutdown, the efile_drv stop callback closes the file descriptor (a gzFile instance actually) - this is dangerous if at the same time there's an async thread performing an operation against the file, for example calling invoke_read(), which can result in a segmentation fault, or calling invoke_close() which double closes the gzFile and this in turn causes 2 consecutive calls to driver_free() against same gzFile instance (resulting in later unexpected crashes in erl_bestfit_alloc.c for example). The following test program makes the emulator crash when using the async thread pool: -module(t2). -export([t/1]). t(N) -> file:delete("foo.bar"), % Use of 'compressed' option, for creating/writing the file, % is irrelevant. It only matters when opening it later for % reads - a non-compressed file open with the 'compressed' % option goes through an internal gzFile handle (instead of % a plain integer fd), just like a compressed file. %{ok, Fd} = file:open("foo.bar", [raw, write, binary]), {ok, Fd} = file:open("foo.bar", [raw, write, binary, compressed]), ok = file:write(Fd, <<"qwerty">>), ok = file:close(Fd), Pid = spawn_link(fun() -> process_flag(trap_exit, true), loop(N) end), Ref = erlang:monitor(process, Pid), receive {'DOWN', Ref, _, _, _} -> ok end. loop(0) -> ok; loop(N) -> Server = self(), Pid = spawn(fun() -> {ok, Fd} = file:open("foo.bar", [read, raw, binary, compressed]), Server ! continue, % Comment the file:read/2 call to make the file:close/1 call much % more likely to crash or end up causing efile_drv to close twice % the fd (gzFile), which will make the emulator crash later in the % best fit allocator (erl_bestfit_alloc.c). _ = file:read(Fd, 5), file:close(Fd) end), receive continue -> ok end, exit(Pid, shutdown), loop(N - 1). Running this test when using the async thread pool: shell> erl +A 4 Erlang R15B03 (erts-5.9.3.1) [source] [64-bit] [smp:4:4] [async-threads:4] [hipe] [kernel-poll:false] Eshell V5.9.3.1 (abort with ^G) 1> c(t2). {ok,t2} 2> t2:t(500000). Segmentation fault (core dumped) When not using the async thread pool, there are no issues: shell> erl Erlang R15B03 (erts-5.9.3.1) [source] [64-bit] [smp:4:4] [async-threads:0] [hipe] [kernel-poll:false] Eshell V5.9.3.1 (abort with ^G) 1> c(t2). {ok,t2} 2> t2:t(500000). ok 3> An example stack trace when the crash happens because there's an ongoing read operation is: Thread 1 (Thread 0x7f021cf2c700 (LWP 10687)): #0 updatewindow (strm=0x2691bf8, out=5) at zlib/inflate.c:338 #1 0x00000000005a2ba0 in inflate (strm=0x2691bf8, flush=0) at zlib/inflate.c:1141 #2 0x000000000055c46a in erts_gzread (file=0x2691bf8, buf=0x7f0215b29e80, len=5) at drivers/common/gzio.c:523 #3 0x00000000005849ef in invoke_read (data=0x26b2228) at drivers/common/efile_drv.c:1114 #4 0x000000000050adcb in async_main (arg=0x7f021bf5cf40) at beam/erl_async.c:488 #5 0x00000000005c21a0 in thr_wrapper (vtwd=0x7fff69c6ff10) at pthread/ethread.c:106 #6 0x00007f021c573e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0 #7 0x00007f021c097cbd in clone () from /lib/x86_64-linux-gnu/libc.so.6 #8 0x0000000000000000 in ?? () And when there's an ongoing close operation when the driver is stopped: Thread 1 (Thread 0x7fe5f5654700 (LWP 747)): #0 0x0000000000459b64 in bf_unlink_free_block (block=0x10b2a70, allctr=, flags=) at beam/erl_bestfit_alloc.c:792 #1 bf_unlink_free_block (flags=0, block=0x10b2a70, allctr=0x873380) at beam/erl_bestfit_alloc.c:822 #2 bf_get_free_block (allctr=0x873380, size=, cand_blk=, cand_size=, flags=0) at beam/erl_bestfit_alloc.c:869 #3 0x000000000044f0dd in mbc_alloc_block (alcu_flgsp=, blk_szp=, size=200, allctr=0x873380) at beam/erl_alloc_util.c:1198 #4 mbc_alloc (allctr=0x873380, size=200) at beam/erl_alloc_util.c:1345 #5 0x000000000045449b in do_erts_alcu_alloc (size=200, extra=0x873380, type=165) at beam/erl_alloc_util.c:3442 #6 erts_alcu_alloc_thr_pref (type=165, extra=, size=192) at beam/erl_alloc_util.c:3520 #7 0x000000000055c0bf in gz_open (mode=0x5d98b2 "rb", path=0x1103418 "foo.bar") at drivers/common/gzio.c:164 #8 erts_gzopen (path=0x1103418 "foo.bar", mode=0x5d98b2 "rb") at drivers/common/gzio.c:307 #9 0x0000000000584e47 in invoke_open (data=0x1103330) at drivers/common/efile_drv.c:1857 #10 0x000000000050adcb in async_main (arg=0x7fe5f698af80) at beam/erl_async.c:488 --- lib/kernel/test/file_SUITE.erl | 57 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) (limited to 'lib/kernel') diff --git a/lib/kernel/test/file_SUITE.erl b/lib/kernel/test/file_SUITE.erl index 914f0d6127..f34341f561 100644 --- a/lib/kernel/test/file_SUITE.erl +++ b/lib/kernel/test/file_SUITE.erl @@ -60,7 +60,8 @@ -export([ read_not_really_compressed/1, read_compressed_cooked/1, read_compressed_cooked_binary/1, read_cooked_tar_problem/1, - write_compressed/1, compress_errors/1, catenated_gzips/1]). + write_compressed/1, compress_errors/1, catenated_gzips/1, + compress_async_crash/1]). -export([ make_link/1, read_link_info_for_non_link/1, symlinks/1]). @@ -135,7 +136,8 @@ groups() -> {compression, [], [read_compressed_cooked, read_compressed_cooked_binary, read_cooked_tar_problem, read_not_really_compressed, - write_compressed, compress_errors, catenated_gzips]}, + write_compressed, compress_errors, catenated_gzips, + compress_async_crash]}, {links, [], [make_link, read_link_info_for_non_link, symlinks]}]. @@ -2312,6 +2314,57 @@ compress_errors(Config) when is_list(Config) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +compress_async_crash(suite) -> []; +compress_async_crash(doc) -> []; +compress_async_crash(Config) when is_list(Config) -> + ?line DataDir = ?config(data_dir, Config), + ?line Path = filename:join(DataDir, "test.gz"), + ExpectedData = <<"qwerty">>, + + ?line _ = ?FILE_MODULE:delete(Path), + ?line {ok, Fd} = ?FILE_MODULE:open(Path, [write, binary, compressed]), + ?line ok = ?FILE_MODULE:write(Fd, ExpectedData), + ?line ok = ?FILE_MODULE:close(Fd), + + % Test that when using async thread pool, the emulator doesn't crash + % when the efile port driver is stopped while a compressed file operation + % is in progress (being carried by an async thread). + ?line ok = compress_async_crash_loop(10000, Path, ExpectedData), + ?line ok = ?FILE_MODULE:delete(Path), + ok. + +compress_async_crash_loop(0, _Path, _ExpectedData) -> + ok; +compress_async_crash_loop(N, Path, ExpectedData) -> + Parent = self(), + {Pid, Ref} = spawn_monitor( + fun() -> + ?line {ok, Fd} = ?FILE_MODULE:open( + Path, [read, compressed, raw, binary]), + Len = byte_size(ExpectedData), + Parent ! {self(), continue}, + ?line {ok, ExpectedData} = ?FILE_MODULE:read(Fd, Len), + ?line ok = ?FILE_MODULE:close(Fd), + receive foobar -> ok end + end), + receive + {Pid, continue} -> + exit(Pid, shutdown), + receive + {'DOWN', Ref, _, _, Reason} -> + ?line shutdown = Reason + end; + {'DOWN', Ref, _, _, Reason2} -> + test_server:fail({worker_exited, Reason2}) + after 60000 -> + exit(Pid, shutdown), + erlang:demonitor(Ref, [flush]), + test_server:fail(worker_timeout) + end, + compress_async_crash_loop(N - 1, Path, ExpectedData). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + altname(doc) -> "Test the file:altname/1 function"; altname(suite) -> -- cgit v1.2.3