From f2c4f6f83deecba0c2527e520f0f18fba7d84815 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Wed, 26 Sep 2018 11:55:01 +0200 Subject: erts: Implement fragmentation of distrubution messages --- erts/emulator/test/distribution_SUITE.erl | 182 +++++++++++++++++++++++++++- erts/emulator/test/erts_test_utils.erl | 31 ++++- erts/emulator/test/node_container_SUITE.erl | 23 +--- 3 files changed, 210 insertions(+), 26 deletions(-) (limited to 'erts/emulator/test') diff --git a/erts/emulator/test/distribution_SUITE.erl b/erts/emulator/test/distribution_SUITE.erl index 885c66331c..5e268c748f 100644 --- a/erts/emulator/test/distribution_SUITE.erl +++ b/erts/emulator/test/distribution_SUITE.erl @@ -62,7 +62,8 @@ bad_dist_ext_control/1, bad_dist_ext_connection_id/1, bad_dist_ext_size/1, - start_epmd_false/1, epmd_module/1]). + start_epmd_false/1, epmd_module/1, + bad_dist_fragments/1]). %% Internal exports. -export([sender/3, receiver2/2, dummy_waiter/0, dead_process/0, @@ -90,7 +91,7 @@ all() -> dist_parallel_send, atom_roundtrip, unicode_atom_roundtrip, atom_roundtrip_r16b, contended_atom_cache_entry, contended_unicode_atom_cache_entry, - bad_dist_structure, {group, bad_dist_ext}, + {group, bad_dist}, {group, bad_dist_ext}, start_epmd_false, epmd_module]. groups() -> @@ -100,6 +101,8 @@ groups() -> {trap_bif, [], [trap_bif_1, trap_bif_2, trap_bif_3]}, {dist_auto_connect, [], [dist_auto_connect_never, dist_auto_connect_once]}, + {bad_dist, [], + [bad_dist_structure, bad_dist_fragments]}, {bad_dist_ext, [], [bad_dist_ext_receive, bad_dist_ext_process_info, bad_dist_ext_size, @@ -1382,6 +1385,15 @@ get_conflicting_unicode_atoms(CIX, N) -> -define(DOP_DEMONITOR_P, 20). -define(DOP_MONITOR_P_EXIT, 21). +-define(DOP_SEND_SENDER, 22). +-define(DOP_SEND_SENDER_TT, 23). + +-define(DOP_PAYLOAD_EXIT, 24). +-define(DOP_PAYLOAD_EXIT_TT, 25). +-define(DOP_PAYLOAD_EXIT2, 26). +-define(DOP_PAYLOAD_EXIT2_TT, 27). +-define(DOP_PAYLOAD_MONITOR_P_EXIT, 28). + start_monitor(Offender,P) -> Parent = self(), Q = spawn(Offender, @@ -1515,7 +1527,145 @@ bad_dist_structure(Config) when is_list(Config) -> stop_node(Victim), ok. +%% Test various dist fragmentation errors +bad_dist_fragments(Config) when is_list(Config) -> + ct:timetrap({seconds, 15}), + + {ok, Offender} = start_node(bad_dist_fragment_offender), + {ok, Victim} = start_node(bad_dist_fragment_victim), + Msg = iolist_to_binary(dmsg_ext(lists:duplicate(255,255))), + + start_node_monitors([Offender,Victim]), + Parent = self(), + P = spawn(Victim, + fun () -> + process_flag(trap_exit,true), + Parent ! {self(), started}, + receive check_msgs -> ok end, + bad_dist_struct_check_msgs([one, + two]), + Parent ! {self(), messages_checked}, + receive done -> ok end + end), + receive {P, started} -> ok end, + pong = rpc:call(Victim, net_adm, ping, [Offender]), + verify_up(Offender, Victim), + true = lists:member(Offender, rpc:call(Victim, erlang, nodes, [])), + start_monitor(Offender,P), + P ! one, + + start_monitor(Offender,P), + send_bad_fragments(Offender, Victim, P,{?DOP_SEND,?COOKIE,P},3, + [{frg, 1, binary:part(Msg, 10,byte_size(Msg)-10)}]), + + start_monitor(Offender,P), + send_bad_fragments(Offender, Victim, P,{?DOP_SEND,?COOKIE,P},3, + [{hdr, 3, binary:part(Msg, 0,10)}, + {frg, 1, binary:part(Msg, 10,byte_size(Msg)-10)}]), + + start_monitor(Offender,P), + send_bad_fragments(Offender, Victim, P,{?DOP_SEND,?COOKIE,P},3, + [{hdr, 3, binary:part(Msg, 0,10)}, + {hdr, 3, binary:part(Msg, 0,10)}]), + + start_monitor(Offender,P), + send_bad_fragments(Offender, Victim, P,{?DOP_SEND,?COOKIE,P,broken},3, + [{hdr, 1, binary:part(Msg, 10,byte_size(Msg)-10)}]), + + start_monitor(Offender,P), + send_bad_fragments(Offender, Victim, P,{?DOP_SEND,?COOKIE,P},3, + [{hdr, 3, binary:part(Msg, 10,byte_size(Msg)-10)}, + close]), + + start_monitor(Offender,P), + ExitVictim = spawn(Victim, fun() -> receive ok -> ok end end), + send_bad_fragments(Offender, Victim, P,{?DOP_PAYLOAD_EXIT,P,ExitVictim},2, + [{hdr, 1, [131]}]), + + start_monitor(Offender,P), + Exit2Victim = spawn(Victim, fun() -> receive ok -> ok end end), + send_bad_fragments(Offender, Victim, P,{?DOP_PAYLOAD_EXIT2,P,ExitVictim},2, + [{hdr, 1, [132]}]), + + start_monitor(Offender,P), + DownVictim = spawn(Victim, fun() -> receive ok -> ok end end), + DownRef = erlang:monitor(process, DownVictim), + send_bad_fragments(Offender, Victim, P,{?DOP_PAYLOAD_MONITOR_P_EXIT,P,DownVictim,DownRef},2, + [{hdr, 1, [133]}]), + + P ! two, + P ! check_msgs, + receive + {P, messages_checked} -> ok + after 5000 -> + exit(victim_is_dead) + end, + + {message_queue_len, 0} + = rpc:call(Victim, erlang, process_info, [P, message_queue_len]), + + unlink(P), + P ! done, + stop_node(Offender), + stop_node(Victim), + ok. + +dmsg_frag_hdr(Frag) -> + dmsg_frag_hdr(erlang:phash2(self()), Frag). +dmsg_frag_hdr(Seq, Frag) -> + [131, $E, uint64_be(Seq), uint64_be(Frag), 0]. + +dmsg_frag(Frag) -> + dmsg_frag(erlang:phash2(self()), Frag). +dmsg_frag(Seq, Frag) -> + [131, $F, uint64_be(Seq), uint64_be(Frag)]. + +send_bad_fragments(Offender,VictimNode,Victim,Ctrl,WhereToPutSelf,Fragments) -> + Parent = self(), + Done = make_ref(), + ct:pal("Send: ~p",[Fragments]), + spawn_link(Offender, + fun () -> + Node = node(Victim), + pong = net_adm:ping(Node), + erlang:monitor_node(Node, true), + DCtrl = dctrl(Node), + Ctrl1 = case WhereToPutSelf of + 0 -> + Ctrl; + N when N > 0 -> + setelement(N,Ctrl,self()) + end, + + FragData = [case Type of + hdr -> + [dmsg_frag_hdr(FragId), + dmsg_ext(Ctrl1), FragPayload]; + frg -> + [dmsg_frag(FragId), FragPayload] + end || {Type, FragId, FragPayload} <- Fragments], + + receive {nodedown, Node} -> exit("premature nodedown") + after 10 -> ok + end, + + [ dctrl_send(DCtrl, D) || D <- FragData ], + [ erlang:port_close(DCtrl) || close <- Fragments], + + receive {nodedown, Node} -> ok + after 5000 -> exit("missing nodedown") + end, + Parent ! {FragData,Done} + end), + receive + {WhatSent,Done} -> + io:format("Offender sent ~p~n",[WhatSent]), + verify_nc(VictimNode), + ok + after 7000 -> + exit(unable_to_send) + end. bad_dist_ext_receive(Config) when is_list(Config) -> {ok, Offender} = start_node(bad_dist_ext_receive_offender), @@ -2124,8 +2274,25 @@ start_node(Config, Args, Rel) when is_list(Config), is_list(Rel) -> start_node(Name, Args, Rel). stop_node(Node) -> + verify_nc(Node), test_server:stop_node(Node). +verify_nc(Node) -> + P = self(), + Ref = make_ref(), + spawn(Node, + fun() -> + R = erts_test_utils:check_node_dist(fun(E) -> E end), + P ! {Ref, R} + end), + receive + {Ref, ok} -> + ok; + {Ref, Error} -> + ct:log("~s",[Error]), + ct:fail(failed_nc_refc_check) + end. + freeze_node(Node, MS) -> Own = 300, DoingIt = make_ref(), @@ -2485,6 +2652,17 @@ mk_ref({NodeNameExt, Creation}, Numbers) when is_integer(Creation), exit({unexpected_binary_to_term_result, Other}) end. +uint64_be(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 64 -> + [(Uint bsr 56) band 16#ff, + (Uint bsr 48) band 16#ff, + (Uint bsr 40) band 16#ff, + (Uint bsr 32) band 16#ff, + (Uint bsr 24) band 16#ff, + (Uint bsr 16) band 16#ff, + (Uint bsr 8) band 16#ff, + Uint band 16#ff]; +uint64_be(Uint) -> + exit({badarg, uint64_be, [Uint]}). uint32_be(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 32 -> [(Uint bsr 24) band 16#ff, diff --git a/erts/emulator/test/erts_test_utils.erl b/erts/emulator/test/erts_test_utils.erl index ac2f2435be..0c3ef3e0fc 100644 --- a/erts/emulator/test/erts_test_utils.erl +++ b/erts/emulator/test/erts_test_utils.erl @@ -27,6 +27,7 @@ -export([mk_ext_pid/3, mk_ext_port/2, mk_ext_ref/2, + available_internal_state/1, check_node_dist/0, check_node_dist/1, check_node_dist/3]). @@ -157,6 +158,21 @@ mk_ext_ref({NodeName, Creation}, Numbers) when is_list(NodeName), end. +available_internal_state(Bool) when Bool == true; Bool == false -> + case {Bool, + (catch erts_debug:get_internal_state(available_internal_state))} of + {true, true} -> + true; + {false, true} -> + erts_debug:set_internal_state(available_internal_state, false), + true; + {true, _} -> + erts_debug:set_internal_state(available_internal_state, true), + false; + {false, _} -> + false + end. + %% %% Check reference counters for node- and dist entries. @@ -168,16 +184,21 @@ check_node_dist() -> end). check_node_dist(Fail) -> + AIS = available_internal_state(true), + [erlang:garbage_collect(P) || P <- erlang:processes()], {{node_references, NodeRefs}, {dist_references, DistRefs}} = erts_debug:get_internal_state(node_and_dist_references), - check_node_dist(Fail, NodeRefs, DistRefs). - - + R = check_node_dist(Fail, NodeRefs, DistRefs), + available_internal_state(AIS), + R. check_node_dist(Fail, NodeRefs, DistRefs) -> - check_nd_refc({node(),erlang:system_info(creation)}, - NodeRefs, DistRefs, Fail). + AIS = available_internal_state(true), + R = check_nd_refc({node(),erlang:system_info(creation)}, + NodeRefs, DistRefs, Fail), + available_internal_state(AIS), + R. check_nd_refc({ThisNodeName, ThisCreation}, NodeRefs, DistRefs, Fail) -> diff --git a/erts/emulator/test/node_container_SUITE.erl b/erts/emulator/test/node_container_SUITE.erl index b3d8f9584d..ca5c93f535 100644 --- a/erts/emulator/test/node_container_SUITE.erl +++ b/erts/emulator/test/node_container_SUITE.erl @@ -71,25 +71,10 @@ init_per_suite(Config) -> end_per_suite(_Config) -> erts_debug:set_internal_state(available_internal_state, true), erts_debug:set_internal_state(node_tab_delayed_delete, -1), %% restore original value - available_internal_state(false). - -available_internal_state(Bool) when Bool == true; Bool == false -> - case {Bool, - (catch erts_debug:get_internal_state(available_internal_state))} of - {true, true} -> - true; - {false, true} -> - erts_debug:set_internal_state(available_internal_state, false), - true; - {true, _} -> - erts_debug:set_internal_state(available_internal_state, true), - false; - {false, _} -> - false - end. + erts_test_util:available_internal_state(false). init_per_testcase(_Case, Config) when is_list(Config) -> - available_internal_state(true), + erts_test_util:available_internal_state(true), Config. end_per_testcase(_Case, Config) when is_list(Config) -> @@ -928,9 +913,9 @@ id(X) -> -define(ND_REFS, erts_debug:get_internal_state(node_and_dist_references)). node_container_refc_check(Node) when is_atom(Node) -> - AIS = available_internal_state(true), + AIS = erts_test_util:available_internal_state(true), nc_refc_check(Node), - available_internal_state(AIS). + erts_test_util:available_internal_state(AIS). nc_refc_check(Node) when is_atom(Node) -> Ref = make_ref(), -- cgit v1.2.3 From 6493f5e396c7528ef6696ce440619550edb5c6ff Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Fri, 25 Jan 2019 16:29:26 +0100 Subject: erts: Add distr testcases for fragmentation --- erts/emulator/test/distribution_SUITE.erl | 156 +++++++++++++++++++++++++++- erts/emulator/test/node_container_SUITE.erl | 8 +- 2 files changed, 156 insertions(+), 8 deletions(-) (limited to 'erts/emulator/test') diff --git a/erts/emulator/test/distribution_SUITE.erl b/erts/emulator/test/distribution_SUITE.erl index 5e268c748f..4f70b51aa0 100644 --- a/erts/emulator/test/distribution_SUITE.erl +++ b/erts/emulator/test/distribution_SUITE.erl @@ -63,7 +63,11 @@ bad_dist_ext_connection_id/1, bad_dist_ext_size/1, start_epmd_false/1, epmd_module/1, - bad_dist_fragments/1]). + bad_dist_fragments/1, + message_latency_large_message/1, + message_latency_large_link_exit/1, + message_latency_large_monitor_exit/1, + message_latency_large_exit2/1]). %% Internal exports. -export([sender/3, receiver2/2, dummy_waiter/0, dead_process/0, @@ -91,6 +95,7 @@ all() -> dist_parallel_send, atom_roundtrip, unicode_atom_roundtrip, atom_roundtrip_r16b, contended_atom_cache_entry, contended_unicode_atom_cache_entry, + {group, message_latency}, {group, bad_dist}, {group, bad_dist_ext}, start_epmd_false, epmd_module]. @@ -106,7 +111,13 @@ groups() -> {bad_dist_ext, [], [bad_dist_ext_receive, bad_dist_ext_process_info, bad_dist_ext_size, - bad_dist_ext_control, bad_dist_ext_connection_id]}]. + bad_dist_ext_control, bad_dist_ext_connection_id]}, + {message_latency, [], + [message_latency_large_message, + message_latency_large_link_exit, + message_latency_large_monitor_exit, + message_latency_large_exit2]} + ]. %% Tests pinging a node in different ways. ping(Config) when is_list(Config) -> @@ -571,10 +582,20 @@ do_busy_test(Node, Fun) -> %% Don't match arity; it is different in debug and %% optimized emulator [{status, suspended}, - {current_function, {erlang, bif_return_trap, _}}] = Pinfo, + {current_function, {Mod, Func, _}}] = Pinfo, + if + Mod =:= erlang andalso Func =:= bif_return_trap -> + true; + Mod =:= erts_internal andalso Func =:= dsend_continue_trap -> + true; + true -> + ct:fail({incorrect, pinfo, Pinfo}) + end, receive {'DOWN', M, process, P, Reason} -> io:format("~p died with exit reason ~p~n", [P, Reason]), + verify_nc(node()), + verify_nc(Node), normal = Reason end end. @@ -934,7 +955,9 @@ dist_auto_connect_never(Config) when is_list(Config) -> ok; {do_dist_auto_connect, Error} -> {error, Error}; - Other -> + %% The io:formats in dos_dist_auto_connect will + %% generate port output messages that are ok + Other when not is_port(element(1, Other))-> {error, Other} after 32000 -> timeout @@ -1367,6 +1390,131 @@ get_conflicting_unicode_atoms(CIX, N) -> get_conflicting_unicode_atoms(CIX, N) end. + +%% The message_latency_large tests that small distribution messages are +%% not blocked by other large distribution messages. Basically it tests +%% that fragmentation of distribution messages works. +message_latency_large_message(Config) when is_list(Config) -> + measure_latency_large_message(?FUNCTION_NAME, fun(Dropper, Payload) -> Dropper ! Payload end). + +message_latency_large_exit2(Config) when is_list(Config) -> + measure_latency_large_message(?FUNCTION_NAME, fun erlang:exit/2). + +message_latency_large_link_exit(Config) when is_list(Config) -> + message_latency_large_exit(?FUNCTION_NAME, fun erlang:link/1). + +message_latency_large_monitor_exit(Config) when is_list(Config) -> + message_latency_large_exit(?FUNCTION_NAME, fun(Dropper) -> + Dropper ! {monitor, self()}, + receive ok -> ok end + end). + +message_latency_large_exit(Nodename, ReasonFun) -> + measure_latency_large_message( + Nodename, + fun(Dropper, Payload) -> + Pid = spawn(fun() -> + receive go -> ok end, + ReasonFun(Dropper), + exit(Payload) + end), + + FlushTrace = fun F() -> + receive + {trace, Pid, _, _} = M -> + F() + after 0 -> + ok + end + end, + + erlang:trace(Pid, true, [exiting]), + Pid ! go, + receive + {trace, Pid, out_exited, 0} -> + FlushTrace() + end + end). + +measure_latency_large_message(Nodename, DataFun) -> + + erlang:system_monitor(self(), [busy_dist_port]), + + {ok, N} = start_node(Nodename), + + Dropper = spawn(N, fun F() -> + process_flag(trap_exit, true), + receive + {monitor,Pid} -> + erlang:monitor(process, Pid), + Pid ! ok; + _ -> ok + end, + F() + end), + + Echo = spawn(N, fun F() -> receive {From, Msg} -> From ! Msg, F() end end), + + %% Test 32 MB and 320 MB and test the latency difference of sent messages + Payloads = [{I, <<0:(I * 32 * 1024 * 1024 * 8)>>} || I <- [1,10]], + + IndexTimes = [{I, measure_latency(DataFun, Dropper, Echo, P)} + || {I, P} <- Payloads], + + Times = [ Time || {_I, Time} <- IndexTimes], + + ct:pal("~p",[IndexTimes]), + + case {lists:max(Times), lists:min(Times)} of + {Max, Min} when Max * 0.25 > Min -> + ct:fail({incorrect_latency, IndexTimes}); + _ -> + ok + end. + +measure_latency(DataFun, Dropper, Echo, Payload) -> + + flush(), + + Senders = [spawn_monitor( + fun F() -> + DataFun(Dropper, Payload), + receive + die -> ok + after 0 -> + F() + end + end) || _ <- lists:seq(1,2)], + + [receive + {monitor, _Sender, busy_dist_port, _Info} = M -> + ok + end || _ <- lists:seq(1,10)], + + {TS, _} = + timer:tc(fun() -> + [begin + Echo ! {self(), hello}, + receive hello -> ok end + end || _ <- lists:seq(1,100)] + end), + [begin + Sender ! die, + receive + {'DOWN', Ref, process, _, _} -> + ok + end + end || {Sender, Ref} <- Senders], + TS. + +flush() -> + receive + _ -> + flush() + after 0 -> + ok + end. + -define(COOKIE, ''). -define(DOP_LINK, 1). -define(DOP_SEND, 2). diff --git a/erts/emulator/test/node_container_SUITE.erl b/erts/emulator/test/node_container_SUITE.erl index ca5c93f535..ef4635a6f5 100644 --- a/erts/emulator/test/node_container_SUITE.erl +++ b/erts/emulator/test/node_container_SUITE.erl @@ -71,10 +71,10 @@ init_per_suite(Config) -> end_per_suite(_Config) -> erts_debug:set_internal_state(available_internal_state, true), erts_debug:set_internal_state(node_tab_delayed_delete, -1), %% restore original value - erts_test_util:available_internal_state(false). + erts_test_utils:available_internal_state(false). init_per_testcase(_Case, Config) when is_list(Config) -> - erts_test_util:available_internal_state(true), + erts_test_utils:available_internal_state(true), Config. end_per_testcase(_Case, Config) when is_list(Config) -> @@ -913,9 +913,9 @@ id(X) -> -define(ND_REFS, erts_debug:get_internal_state(node_and_dist_references)). node_container_refc_check(Node) when is_atom(Node) -> - AIS = erts_test_util:available_internal_state(true), + AIS = erts_test_utils:available_internal_state(true), nc_refc_check(Node), - erts_test_util:available_internal_state(AIS). + erts_test_utils:available_internal_state(AIS). nc_refc_check(Node) when is_atom(Node) -> Ref = make_ref(), -- cgit v1.2.3