diff options
author | Siri Hansen <[email protected]> | 2015-09-24 12:57:42 +0200 |
---|---|---|
committer | Siri Hansen <[email protected]> | 2015-09-24 13:13:55 +0200 |
commit | fa096962df681f39b1dfe4191f0f3ecc177d906c (patch) | |
tree | 4ff521e1f0069bf846e6bfa6d134183545da9b5d | |
parent | c2fd9dc8a249a4cb093e9133fd3a3cf0d6711ff5 (diff) | |
download | otp-fa096962df681f39b1dfe4191f0f3ecc177d906c.tar.gz otp-fa096962df681f39b1dfe4191f0f3ecc177d906c.tar.bz2 otp-fa096962df681f39b1dfe4191f0f3ecc177d906c.zip |
Flush timeout message from message queue when canceling timer
In ct_netconfc, if a timer expired 'at the same time' as the server
sent the rpc-reply, the timeout message might already be in the
client's message queue when the client removed the timer ref from its
'pending' list. This caused a crash in the client since the timer ref
could no longer be found when handling the timeout message.
This commit fixes the problem by always flushing the timeout message
from the message queue when canceling a timer.
-rw-r--r-- | lib/common_test/src/ct_netconfc.erl | 28 | ||||
-rw-r--r-- | lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl | 23 |
2 files changed, 41 insertions, 10 deletions
diff --git a/lib/common_test/src/ct_netconfc.erl b/lib/common_test/src/ct_netconfc.erl index 0de7bf03af..05977e5649 100644 --- a/lib/common_test/src/ct_netconfc.erl +++ b/lib/common_test/src/ct_netconfc.erl @@ -1272,6 +1272,14 @@ set_request_timer(T) -> {ok,TRef} = timer:send_after(T,{Ref,timeout}), {Ref,TRef}. +%%%----------------------------------------------------------------- +cancel_request_timer(undefined,undefined) -> + ok; +cancel_request_timer(Ref,TRef) -> + _ = timer:cancel(TRef), + receive {Ref,timeout} -> ok + after 0 -> ok + end. %%%----------------------------------------------------------------- client_hello(Options) when is_list(Options) -> @@ -1404,9 +1412,9 @@ handle_error(Reason, State) -> Pending -> %% Assuming the first request gets the %% first answer - P=#pending{tref=TRef,caller=Caller} = + P=#pending{tref=TRef,ref=Ref,caller=Caller} = lists:last(Pending), - _ = timer:cancel(TRef), + cancel_request_timer(Ref,TRef), Reason1 = {failed_to_parse_received_data,Reason}, ct_gen_conn:return(Caller,{error,Reason1}), lists:delete(P,Pending) @@ -1492,8 +1500,8 @@ decode({Tag,Attrs,_}=E, #state{connection=Connection,pending=Pending}=State) -> {error,Reason} -> {noreply,State#state{hello_status = {error,Reason}}} end; - #pending{tref=TRef,caller=Caller} -> - _ = timer:cancel(TRef), + #pending{tref=TRef,ref=Ref,caller=Caller} -> + cancel_request_timer(Ref,TRef), case decode_hello(E) of {ok,SessionId,Capabilities} -> ct_gen_conn:return(Caller,ok), @@ -1519,9 +1527,8 @@ decode({Tag,Attrs,_}=E, #state{connection=Connection,pending=Pending}=State) -> %% there is just one pending that matches (i.e. has %% undefined msg_id and op) case [P || P = #pending{msg_id=undefined,op=undefined} <- Pending] of - [#pending{tref=TRef, - caller=Caller}] -> - _ = timer:cancel(TRef), + [#pending{tref=TRef,ref=Ref,caller=Caller}] -> + cancel_request_timer(Ref,TRef), ct_gen_conn:return(Caller,E), {noreply,State#state{pending=[]}}; _ -> @@ -1542,8 +1549,8 @@ get_msg_id(Attrs) -> decode_rpc_reply(MsgId,{_,Attrs,Content0}=E,#state{pending=Pending} = State) -> case lists:keytake(MsgId,#pending.msg_id,Pending) of - {value, #pending{tref=TRef,op=Op,caller=Caller}, Pending1} -> - _ = timer:cancel(TRef), + {value, #pending{tref=TRef,ref=Ref,op=Op,caller=Caller}, Pending1} -> + cancel_request_timer(Ref,TRef), Content = forward_xmlns_attr(Attrs,Content0), {CallerReply,{ServerReply,State2}} = do_decode_rpc_reply(Op,Content,State#state{pending=Pending1}), @@ -1555,10 +1562,11 @@ decode_rpc_reply(MsgId,{_,Attrs,Content0}=E,#state{pending=Pending} = State) -> %% pending that matches (i.e. has undefined msg_id and op) case [P || P = #pending{msg_id=undefined,op=undefined} <- Pending] of [#pending{tref=TRef, + ref=Ref, msg_id=undefined, op=undefined, caller=Caller}] -> - _ = timer:cancel(TRef), + cancel_request_timer(Ref,TRef), ct_gen_conn:return(Caller,E), {noreply,State#state{pending=[]}}; _ -> diff --git a/lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl b/lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl index 64ebfbc463..aaa0723488 100644 --- a/lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl +++ b/lib/common_test/test/ct_netconfc_SUITE_data/netconfc1_SUITE.erl @@ -73,6 +73,7 @@ all() -> timeout_close_session, get, timeout_get, + flush_timeout_get, get_xpath, get_config, get_config_xpath, @@ -360,6 +361,28 @@ timeout_get(Config) -> ?ok = ct_netconfc:close_session(Client), ok. +%% Test OTP-13008 "ct_netconfc crash when receiving unknown timeout" +%% If the timer expires "at the same time" as the rpc reply is +%% received, the timeout message might already be sent when the timer +%% is cancelled. This test checks that the timeout message is flushed +%% from the message queue. If it isn't, the client crashes and the +%% session can not be closed afterwards. +%% Note that we can only hope that the test case triggers the problem +%% every now and then, as it is very timing dependent... +flush_timeout_get(Config) -> + DataDir = ?config(data_dir,Config), + {ok,Client} = open_success(DataDir), + Data = [{server,[{xmlns,"myns"}],[{name,[],["myserver"]}]}], + ?NS:expect_reply('get',{data,Data}), + timer:sleep(1000), + case ct_netconfc:get(Client,{server,[{xmlns,"myns"}],[]},1) of + {error,timeout} -> ok; % problem not triggered + {ok,Data} -> ok % problem possibly triggered + end, + ?NS:expect_do_reply('close-session',close,ok), + ?ok = ct_netconfc:close_session(Client), + ok. + get_xpath(Config) -> DataDir = ?config(data_dir,Config), {ok,Client} = open_success(DataDir), |