From 0bb261b730eff06cca524ea9999ff035585ef5b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 28 Sep 2018 05:57:47 +0200 Subject: Fix rare bug in binary matching (again) 2e40d8d1c51a attempted fix a bug in binary matching, but it only fixed the bug for the minimized test case. This commit removes the previous fix and fixes the bug in a more effective way. See the comments in the new code in `sys_core_bsm` for an explanation. This commit restores the optimizations in string.erl and dets_v9.erl that the previous fix disabled. I have not found any code where this commit will disable optimizations when they are actually safe. There are some changes to the code in ssl_cipher.erl in that some bs_start_match2 instruction did not reuse the binary register for the match context, but the delayed sub binary optimizations was never applied to the code in the first place. https://bugs.erlang.org/browse/ERL-689 --- lib/compiler/src/beam_bsm.erl | 13 +------- lib/compiler/src/sys_core_bsm.erl | 62 ++++++++++++++++++++++++++++++++++++ lib/compiler/test/bs_match_SUITE.erl | 38 ++++++++++++++++++++-- 3 files changed, 98 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/compiler/src/beam_bsm.erl b/lib/compiler/src/beam_bsm.erl index abc6e96c85..1c8e0e9854 100644 --- a/lib/compiler/src/beam_bsm.erl +++ b/lib/compiler/src/beam_bsm.erl @@ -310,18 +310,7 @@ btb_reaches_match_2([{test,bs_start_match2,{f,F},Live,[Bin,_],Ctx}|Is], end; btb_reaches_match_2([{test,_,{f,F},Ss}=I|Is], Regs, D0) -> btb_ensure_not_used(Ss, I, Regs), - D1 = btb_follow_branch(F, Regs, D0), - D = case Is of - [{bs_context_to_binary,_}|_] -> - %% bs_context_to_binary following a test instruction - %% probably needs the current position to be saved as - %% the new start position, but we can't be sure. - %% Therefore, conservatively disable the optimization - %% (instead of forcing a saving of the position). - D1#btb{must_save=true,must_not_save=true}; - _ -> - D1 - end, + D = btb_follow_branch(F, Regs, D0), btb_reaches_match_1(Is, Regs, D); btb_reaches_match_2([{test,_,{f,F},_,Ss,_}=I|Is], Regs, D0) -> btb_ensure_not_used(Ss, I, Regs), diff --git a/lib/compiler/src/sys_core_bsm.erl b/lib/compiler/src/sys_core_bsm.erl index d7b26c3a56..62657933ee 100644 --- a/lib/compiler/src/sys_core_bsm.erl +++ b/lib/compiler/src/sys_core_bsm.erl @@ -44,6 +44,14 @@ function([{#c_var{name={F,Arity}}=Name,B0}|Fs], FsAcc, Ws0) -> {B,Ws} -> function(Fs, [{Name,B}|FsAcc], Ws) catch + throw:unsafe_bs_context_to_binary -> + %% Unsafe bs_context_to_binary (in the sense that the + %% contents of the binary will probably be wrong). + %% Disable binary optimizations for the entire function. + %% We don't generate an INFO message, because this happens + %% very infrequently and it would be hard to explain in + %% a comprehensible way in an INFO message. + function(Fs, [{Name,B0}|FsAcc], Ws0); Class:Error:Stack -> io:fwrite("Function: ~w/~w\n", [F,Arity]), erlang:raise(Class, Error, Stack) @@ -116,12 +124,66 @@ move_from_col(Pos, L) -> [Col|First] ++ Rest. bsm_do_an([#c_var{name=Vname}=V0|Vs0], Cs0, Case) -> + bsm_inner_context_to_binary(Cs0), Cs = bsm_do_an_var(Vname, Cs0), V = bsm_annotate_for_reuse(V0), Vs = core_lib:make_values([V|Vs0]), Case#c_case{arg=Vs,clauses=Cs}; bsm_do_an(_Vs, _Cs, Case) -> Case. +bsm_inner_context_to_binary([#c_clause{body=B}|Cs]) -> + %% Consider: + %% + %% foo(<>) -> %Line 1 + %% case {Data, Length} of %Line 2 + %% {_, 0} -> Data; %Line 3 + %% {<<...>>, 4} -> ... %Line 4 + %% end. + %% + %% No sub binary will be created for Data in line 1. The match + %% context will be passed on to the `case` in line 2. In line 3, + %% this pass inserts a `bs_context_to_binary` instruction to + %% convert the match context representing Data to a binary before + %% returning it. The problem is that the binary created will be + %% the original binary (including the matched out Length field), + %% not the tail of the binary as it is supposed to be. + %% + %% Here follows a heuristic to disable the binary optimizations + %% for the entire function if this code kind of code is found. + + case cerl_trees:free_variables(B) of + [] -> + %% Since there are no free variables in the body of + %% this clause, there can't be any troublesome + %% bs_context_to_binary instructions. + bsm_inner_context_to_binary(Cs); + [_|_]=Free -> + %% One of the free variables could refer to a match context + %% created by the outer binary match. + F = fun(#c_primop{name=#c_literal{val=bs_context_to_binary}, + args=[#c_var{name=V}]}, _) -> + case member(V, Free) of + true -> + %% This bs_context_to_binary instruction will + %% make a binary of the match context from an + %% outer binary match. It is very likely that + %% the contents of the binary will be wrong + %% (the original binary as opposed to only + %% the tail binary). + throw(unsafe_bs_context_to_binary); + false -> + %% Safe. This bs_context_to_binary instruction + %% will make a binary from a match context + %% defined in the body of the clause. + ok + end; + (_, _) -> + ok + end, + cerl_trees:fold(F, ok, B) + end; +bsm_inner_context_to_binary([]) -> ok. + bsm_do_an_var(V, [#c_clause{pats=[P|_],guard=G,body=B0}=C0|Cs]) -> case P of #c_var{name=VarName} -> diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl index 7814738449..e97dbac8a6 100644 --- a/lib/compiler/test/bs_match_SUITE.erl +++ b/lib/compiler/test/bs_match_SUITE.erl @@ -1690,30 +1690,62 @@ non_opt_eq([], <<>>) -> %% ERL-689 -erl_689(Config) -> +erl_689(_Config) -> {{0, 0, 0}, <<>>} = do_erl_689_1(<<0>>, ?MODULE), {{2018, 8, 7}, <<>>} = do_erl_689_1(<<4,2018:16/little,8,7>>, ?MODULE), {{0, 0, 0}, <<>>} = do_erl_689_2(?MODULE, <<0>>), {{2018, 8, 7}, <<>>} = do_erl_689_2(?MODULE, <<4,2018:16/little,8,7>>), ok. -do_erl_689_1(<>, _) -> +do_erl_689_1(Arg1, Arg2) -> + Res = do_erl_689_1a(Arg1, Arg2), + Res = do_erl_689_1b(Arg1, Arg2). + +do_erl_689_2(Arg1, Arg2) -> + Res = do_erl_689_2a(Arg1, Arg2), + Res = do_erl_689_2b(Arg1, Arg2). + +do_erl_689_1a(<>, _) -> + case {Data, Length} of + {_, 0} -> + %% bs_context_to_binary would incorrectly set Data to the original + %% binary (before matching in the function head). + {{0, 0, 0}, Data}; + {<>, 4} -> + {{Y, M, D}, Rest} + end. + +do_erl_689_1b(<>, _) -> case {Data, Length} of {_, 0} -> %% bs_context_to_binary would incorrectly set Data to the original %% binary (before matching in the function head). + id(0), {{0, 0, 0}, Data}; {<>, 4} -> + id(1), + {{Y, M, D}, Rest} + end. + +do_erl_689_2a(_, <>) -> + case {Length, Data} of + {0, _} -> + %% bs_context_to_binary would incorrectly set Data to the original + %% binary (before matching in the function head). + {{0, 0, 0}, Data}; + {4, <>} -> {{Y, M, D}, Rest} end. -do_erl_689_2(_, <>) -> +do_erl_689_2b(_, <>) -> case {Length, Data} of {0, _} -> %% bs_context_to_binary would incorrectly set Data to the original %% binary (before matching in the function head). + id(0), {{0, 0, 0}, Data}; {4, <>} -> + id(1), {{Y, M, D}, Rest} end. -- cgit v1.2.3