aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSiri Hansen <[email protected]>2015-09-24 12:57:42 +0200
committerSiri Hansen <[email protected]>2015-09-24 13:13:55 +0200
commitfa096962df681f39b1dfe4191f0f3ecc177d906c (patch)
tree4ff521e1f0069bf846e6bfa6d134183545da9b5d
parentc2fd9dc8a249a4cb093e9133fd3a3cf0d6711ff5 (diff)
downloadotp-fa096962df681f39b1dfe4191f0f3ecc177d906c.tar.gz
otp-fa096962df681f39b1dfe4191f0f3ecc177d906c.tar.bz2
otp-fa096962df681f39b1dfe4191f0f3ecc177d906c.zip
Flush timeout message from message queue when canceling timer
In ct_netconfc, if a timer expired 'at the same time' as the server sent the rpc-reply, the timeout message might already be in the client's message queue when the client removed the timer ref from its 'pending' list. This caused a crash in the client since the timer ref could no longer be found when handling the timeout message. This commit fixes the problem by always flushing the timeout message from the message queue when canceling a timer.
-rw-r--r--lib/common_test/src/ct_netconfc.erl28
-rw-r--r--lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl23
2 files changed, 41 insertions, 10 deletions
diff --git a/lib/common_test/src/ct_netconfc.erl b/lib/common_test/src/ct_netconfc.erl
index 0de7bf03af..05977e5649 100644
--- a/lib/common_test/src/ct_netconfc.erl
+++ b/lib/common_test/src/ct_netconfc.erl
@@ -1272,6 +1272,14 @@ set_request_timer(T) ->
{ok,TRef} = timer:send_after(T,{Ref,timeout}),
{Ref,TRef}.
+%%%-----------------------------------------------------------------
+cancel_request_timer(undefined,undefined) ->
+ ok;
+cancel_request_timer(Ref,TRef) ->
+ _ = timer:cancel(TRef),
+ receive {Ref,timeout} -> ok
+ after 0 -> ok
+ end.
%%%-----------------------------------------------------------------
client_hello(Options) when is_list(Options) ->
@@ -1404,9 +1412,9 @@ handle_error(Reason, State) ->
Pending ->
%% Assuming the first request gets the
%% first answer
- P=#pending{tref=TRef,caller=Caller} =
+ P=#pending{tref=TRef,ref=Ref,caller=Caller} =
lists:last(Pending),
- _ = timer:cancel(TRef),
+ cancel_request_timer(Ref,TRef),
Reason1 = {failed_to_parse_received_data,Reason},
ct_gen_conn:return(Caller,{error,Reason1}),
lists:delete(P,Pending)
@@ -1492,8 +1500,8 @@ decode({Tag,Attrs,_}=E, #state{connection=Connection,pending=Pending}=State) ->
{error,Reason} ->
{noreply,State#state{hello_status = {error,Reason}}}
end;
- #pending{tref=TRef,caller=Caller} ->
- _ = timer:cancel(TRef),
+ #pending{tref=TRef,ref=Ref,caller=Caller} ->
+ cancel_request_timer(Ref,TRef),
case decode_hello(E) of
{ok,SessionId,Capabilities} ->
ct_gen_conn:return(Caller,ok),
@@ -1519,9 +1527,8 @@ decode({Tag,Attrs,_}=E, #state{connection=Connection,pending=Pending}=State) ->
%% there is just one pending that matches (i.e. has
%% undefined msg_id and op)
case [P || P = #pending{msg_id=undefined,op=undefined} <- Pending] of
- [#pending{tref=TRef,
- caller=Caller}] ->
- _ = timer:cancel(TRef),
+ [#pending{tref=TRef,ref=Ref,caller=Caller}] ->
+ cancel_request_timer(Ref,TRef),
ct_gen_conn:return(Caller,E),
{noreply,State#state{pending=[]}};
_ ->
@@ -1542,8 +1549,8 @@ get_msg_id(Attrs) ->
decode_rpc_reply(MsgId,{_,Attrs,Content0}=E,#state{pending=Pending} = State) ->
case lists:keytake(MsgId,#pending.msg_id,Pending) of
- {value, #pending{tref=TRef,op=Op,caller=Caller}, Pending1} ->
- _ = timer:cancel(TRef),
+ {value, #pending{tref=TRef,ref=Ref,op=Op,caller=Caller}, Pending1} ->
+ cancel_request_timer(Ref,TRef),
Content = forward_xmlns_attr(Attrs,Content0),
{CallerReply,{ServerReply,State2}} =
do_decode_rpc_reply(Op,Content,State#state{pending=Pending1}),
@@ -1555,10 +1562,11 @@ decode_rpc_reply(MsgId,{_,Attrs,Content0}=E,#state{pending=Pending} = State) ->
%% pending that matches (i.e. has undefined msg_id and op)
case [P || P = #pending{msg_id=undefined,op=undefined} <- Pending] of
[#pending{tref=TRef,
+ ref=Ref,
msg_id=undefined,
op=undefined,
caller=Caller}] ->
- _ = timer:cancel(TRef),
+ cancel_request_timer(Ref,TRef),
ct_gen_conn:return(Caller,E),
{noreply,State#state{pending=[]}};
_ ->
diff --git a/lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl b/lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl
index 64ebfbc463..aaa0723488 100644
--- a/lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl
+++ b/lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl
@@ -73,6 +73,7 @@ all() ->
timeout_close_session,
get,
timeout_get,
+ flush_timeout_get,
get_xpath,
get_config,
get_config_xpath,
@@ -360,6 +361,28 @@ timeout_get(Config) ->
?ok = ct_netconfc:close_session(Client),
ok.
+%% Test OTP-13008 "ct_netconfc crash when receiving unknown timeout"
+%% If the timer expires "at the same time" as the rpc reply is
+%% received, the timeout message might already be sent when the timer
+%% is cancelled. This test checks that the timeout message is flushed
+%% from the message queue. If it isn't, the client crashes and the
+%% session can not be closed afterwards.
+%% Note that we can only hope that the test case triggers the problem
+%% every now and then, as it is very timing dependent...
+flush_timeout_get(Config) ->
+ DataDir = ?config(data_dir,Config),
+ {ok,Client} = open_success(DataDir),
+ Data = [{server,[{xmlns,"myns"}],[{name,[],["myserver"]}]}],
+ ?NS:expect_reply('get',{data,Data}),
+ timer:sleep(1000),
+ case ct_netconfc:get(Client,{server,[{xmlns,"myns"}],[]},1) of
+ {error,timeout} -> ok; % problem not triggered
+ {ok,Data} -> ok % problem possibly triggered
+ end,
+ ?NS:expect_do_reply('close-session',close,ok),
+ ?ok = ct_netconfc:close_session(Client),
+ ok.
+
get_xpath(Config) ->
DataDir = ?config(data_dir,Config),
{ok,Client} = open_success(DataDir),