aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/test/system_info_SUITE.erl
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2010-09-15 22:14:51 +0200
committerRickard Green <[email protected]>2011-11-13 20:39:30 +0100
commita67e91e658bdbba24fcc3c79b06fdf10ff830bc9 (patch)
tree07f9e6b1fd715d516d2571521307fe1b9d7c3948 /erts/emulator/test/system_info_SUITE.erl
parent55358c54778ead444e51f565d00175ba887ef182 (diff)
downloadotp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.tar.gz
otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.tar.bz2
otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.zip
Optimize memory allocation
A number of memory allocation optimizations have been implemented. Most optimizations reduce contention caused by synchronization between threads during allocation and deallocation of memory. Most notably: * Synchronization of memory management in scheduler specific allocator instances has been rewritten to use lock-free synchronization. * Synchronization of memory management in scheduler specific pre-allocators has been rewritten to use lock-free synchronization. * The 'mseg_alloc' memory segment allocator now use scheduler specific instances instead of one instance. Apart from reducing contention this also ensures that memory allocators always create memory segments on the local NUMA node on a NUMA system.
Diffstat (limited to 'erts/emulator/test/system_info_SUITE.erl')
-rw-r--r--erts/emulator/test/system_info_SUITE.erl313
1 files changed, 311 insertions, 2 deletions
diff --git a/erts/emulator/test/system_info_SUITE.erl b/erts/emulator/test/system_info_SUITE.erl
index 9b782b35a2..0350eb671d 100644
--- a/erts/emulator/test/system_info_SUITE.erl
+++ b/erts/emulator/test/system_info_SUITE.erl
@@ -37,7 +37,7 @@
init_per_group/2,end_per_group/2,
init_per_testcase/2, end_per_testcase/2]).
--export([process_count/1, system_version/1, misc_smoke_tests/1, heap_size/1, wordsize/1]).
+-export([process_count/1, system_version/1, misc_smoke_tests/1, heap_size/1, wordsize/1, memory/1]).
-define(DEFAULT_TIMEOUT, ?t:minutes(2)).
@@ -45,7 +45,7 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
all() ->
[process_count, system_version, misc_smoke_tests,
- heap_size, wordsize].
+ heap_size, wordsize, memory].
groups() ->
[].
@@ -187,3 +187,312 @@ wordsize(Config) when is_list(Config) ->
Other ->
exit({unexpected_wordsizes,Other})
end.
+
+memory(doc) -> ["Verify that erlang:memory/0 and memory results in crashdump produce are similar"];
+memory(Config) when is_list(Config) ->
+ %%
+ %% Verify that erlang:memory/0 and memory results in
+ %% crashdump produce are similar.
+ %%
+ %% erlang:memory/0 requests information from each scheduler
+ %% thread and puts the information together in erlang code
+ %% (erlang.erl).
+ %%
+ %% When a crash dump is written we cannot use the
+ %% erlang:memory/0 implementation. The crashdump implementation
+ %% is a pure C implementation inspecting all allocator instances
+ %% after the system has been blocked (erts_memory() in erl_alloc.c).
+ %%
+ %% Since we got two implementations, modifications can easily
+ %% cause them to produce different results.
+ %%
+ %% erts_debug:get_internal_state(memory) blocks the system and
+ %% execute the same code as the crash dump writing uses.
+ %%
+
+ erts_debug:set_internal_state(available_internal_state, true),
+ %% Use a large heap size on the controling process in
+ %% order to avoid changes in its heap size during
+ %% comparisons.
+ MinHeapSize = process_flag(min_heap_size, 1024*1024),
+ Prio = process_flag(priority, max),
+ try
+ erlang:memory(), %% first call will init stat atoms
+ garbage_collect(), %% blow up heap
+ memory_test(Config)
+ catch
+ error:notsup -> {skipped, "erlang:memory() not supported"}
+ after
+ process_flag(min_heap_size, MinHeapSize),
+ process_flag(priority, Prio),
+ catch erts_debug:set_internal_state(available_internal_state, false)
+ end.
+
+memory_test(_Config) ->
+
+ MWs = spawn_mem_workers(),
+
+ DPs = mem_workers_call(MWs,
+ fun () ->
+ mapn(fun (_) ->
+ spawn(fun () ->
+ receive
+ after infinity ->
+ ok
+ end
+ end)
+ end,
+ 1000 div erlang:system_info(schedulers_online))
+ end,
+ []),
+ cmp_memory(MWs, "spawn procs"),
+
+ Ps = lists:flatten(DPs),
+
+ mem_workers_call(MWs,
+ fun () ->
+ lists:foreach(fun (P) -> link(P) end, Ps)
+ end,
+ []),
+ cmp_memory(MWs, "link procs"),
+ mem_workers_call(MWs,
+ fun () ->
+ lists:foreach(fun (P) -> unlink(P) end, Ps)
+ end,
+ []),
+ cmp_memory(MWs, "unlink procs"),
+
+ DMs = mem_workers_call(MWs,
+ fun () ->
+ lists:map(fun (P) ->
+ monitor(process, P)
+ end, Ps)
+ end,
+ []),
+ cmp_memory(MWs, "monitor procs"),
+ Ms = lists:flatten(DMs),
+ mem_workers_call(MWs,
+ fun () ->
+ lists:foreach(fun (M) ->
+ demonitor(M)
+ end, Ms)
+ end,
+ []),
+ cmp_memory(MWs, "demonitor procs"),
+
+ mem_workers_call(MWs,
+ fun () ->
+ lists:foreach(fun (P) ->
+ P ! {a, "message", make_ref()}
+ end, Ps)
+ end,
+ []),
+ cmp_memory(MWs, "message procs"),
+
+ mem_workers_call(MWs,
+ fun () ->
+ Mons = lists:map(fun (P) ->
+ exit(P, kill),
+ monitor(process, P)
+ end,
+ Ps),
+ lists:foreach(fun (Mon) ->
+ receive
+ {'DOWN', Mon, _, _, _} -> ok
+ end
+ end,
+ Mons)
+ end, []),
+ cmp_memory(MWs, "kill procs"),
+
+ mem_workers_call(MWs,
+ fun () ->
+ put(binary_data,
+ mapn(fun (_) -> list_to_binary(lists:duplicate(256,$?)) end, 100))
+ end,
+ []),
+
+ cmp_memory(MWs, "store binary data"),
+
+ mem_workers_call(MWs,
+ fun () ->
+ put(binary_data, false),
+ garbage_collect()
+ end,
+ []),
+ cmp_memory(MWs, "release binary data"),
+
+ mem_workers_call(MWs,
+ fun () ->
+ list_to_atom("an ugly atom "++integer_to_list(erlang:system_info(scheduler_id))),
+ list_to_atom("another ugly atom "++integer_to_list(erlang:system_info(scheduler_id))),
+ list_to_atom("yet another ugly atom "++integer_to_list(erlang:system_info(scheduler_id)))
+ end,
+ []),
+ cmp_memory(MWs, "new atoms"),
+
+
+ mem_workers_call(MWs,
+ fun () ->
+ T = ets:new(?MODULE, []),
+ ets:insert(T, {gurka, lists:seq(1,10000)}),
+ ets:insert(T, {banan, lists:seq(1,1024)}),
+ ets:insert(T, {appelsin, make_ref()}),
+ put(ets_id, T)
+ end,
+ []),
+ cmp_memory(MWs, "store ets data"),
+
+ mem_workers_call(MWs,
+ fun () ->
+ ets:delete(get(ets_id)),
+ put(ets_id, false)
+ end,
+ []),
+ cmp_memory(MWs, "remove ets data"),
+
+ lists:foreach(fun (MW) ->
+ unlink(MW),
+ Mon = monitor(process, MW),
+ exit(MW, kill),
+ receive
+ {'DOWN', Mon, _, _, _} -> ok
+ end
+ end,
+ MWs),
+ ok.
+
+mem_worker() ->
+ receive
+ {call, From, Fun, Args} ->
+ From ! {reply, self(), apply(Fun, Args)},
+ mem_worker();
+ {cast, _From, Fun, Args} ->
+ apply(Fun, Args),
+ mem_worker()
+ end.
+
+mem_workers_call(MWs, Fun, Args) ->
+ lists:foreach(fun (MW) ->
+ MW ! {call, self(), Fun, Args}
+ end,
+ MWs),
+ lists:map(fun (MW) ->
+ receive
+ {reply, MW, Res} ->
+ Res
+ end
+ end,
+ MWs).
+
+mem_workers_cast(MWs, Fun, Args) ->
+ lists:foreach(fun (MW) ->
+ MW ! {cast, self(), Fun, Args}
+ end,
+ MWs).
+
+spawn_mem_workers() ->
+ spawn_mem_workers(erlang:system_info(schedulers_online)).
+
+spawn_mem_workers(0) ->
+ [];
+spawn_mem_workers(N) ->
+ [spawn_opt(fun () -> mem_worker() end,
+ [{scheduler, N rem erlang:system_info(schedulers_online) + 1},
+ link]) | spawn_mem_workers(N-1)].
+
+
+
+mem_get(X, Mem) ->
+ case lists:keyfind(X, 1, Mem) of
+ {X, Val} -> Val;
+ false -> false
+ end.
+
+cmp_memory(What, Mem1, Mem2, 1) ->
+ R1 = mem_get(What, Mem1),
+ R2 = mem_get(What, Mem2),
+ true = R1 == R2;
+cmp_memory(What, Mem1, Mem2, RelDiff) ->
+ %% We allow RealDiff diff
+ R1 = mem_get(What, Mem1),
+ R2 = mem_get(What, Mem2),
+ case R1 == R2 of
+ true ->
+ ok;
+ false ->
+ case R1 > R2 of
+ true ->
+ true = R2*RelDiff > R1;
+ false ->
+ true = R1*RelDiff > R2
+ end
+ end.
+
+pos_int(Val) when Val >= 0 ->
+ Val;
+pos_int(Val) ->
+ exit({not_pos_int, Val}).
+
+check_sane_memory(Mem) ->
+ Tot = pos_int(mem_get(total, Mem)),
+ Proc = pos_int(mem_get(processes, Mem)),
+ ProcUsed = pos_int(mem_get(processes_used, Mem)),
+ Sys = pos_int(mem_get(system, Mem)),
+ Atom = pos_int(mem_get(atom, Mem)),
+ AtomUsed = pos_int(mem_get(atom_used, Mem)),
+ Bin = pos_int(mem_get(binary, Mem)),
+ Code = pos_int(mem_get(code, Mem)),
+ Ets = pos_int(mem_get(ets, Mem)),
+
+ Tot = Proc + Sys,
+ true = Sys > Atom + Bin + Code + Ets,
+ true = Proc >= ProcUsed,
+ true = Atom >= AtomUsed,
+
+ case mem_get(maximum, Mem) of
+ false -> ok;
+ Max -> true = pos_int(Max) >= Tot
+ end,
+ ok.
+
+cmp_memory(MWs, Str) ->
+ erlang:display(Str),
+ lists:foreach(fun (MW) -> garbage_collect(MW) end, MWs),
+ garbage_collect(),
+ erts_debug:set_internal_state(wait, deallocations),
+
+ EDM = erts_debug:get_internal_state(memory),
+ EM = erlang:memory(),
+
+ io:format("~s:~n"
+ "erlang:memory() = ~p~n"
+ "crash dump memory = ~p~n",
+ [Str, EM, EDM]),
+
+ ?line check_sane_memory(EM),
+ ?line check_sane_memory(EDM),
+
+ %% We expect these to always give us exactly the same result
+
+ ?line cmp_memory(atom, EM, EDM, 1),
+ ?line cmp_memory(atom_used, EM, EDM, 1),
+ ?line cmp_memory(binary, EM, EDM, 1),
+ ?line cmp_memory(code, EM, EDM, 1),
+ ?line cmp_memory(ets, EM, EDM, 1),
+
+ %% Total, processes, processes_used, and system will seldom
+ %% give us exactly the same result since the two readings
+ %% aren't taken atomically.
+
+ ?line cmp_memory(total, EM, EDM, 1.05),
+ ?line cmp_memory(processes, EM, EDM, 1.05),
+ ?line cmp_memory(processes_used, EM, EDM, 1.05),
+ ?line cmp_memory(system, EM, EDM, 1.05),
+
+ ok.
+
+mapn(_Fun, 0) ->
+ [];
+mapn(Fun, N) ->
+ [Fun(N) | mapn(Fun, N-1)].