From 1257cee4faf33cc69c9452ba54afe5d5dbd4ddf6 Mon Sep 17 00:00:00 2001 From: Kostis Sagonas Date: Thu, 3 Nov 2016 17:16:01 +0100 Subject: Fix the native code translation of bs_match_string This fixes a HiPE bug reported on erlang-questions on 2/11/2016. The BEAM to ICode tranaslation of the bs_match_string instruction, written long ago for binaries (i.e., with byte-sized strings), tried to do a `clever' translation of even bit-sized strings using a HiPE primop that took a `Size' argument expressed in *bytes*. ICode is not really the place to do such a thing, and moreover there is really no reason for the HiPE primop not to take a Size argument expressed in *bits* instead. This commit changes the `Size' argument to be in bits, postpones the translation of the bs_match_string primop Fixed in a pair-programming/debugging session with @margnus1. until RTL and does a proper translation using bit-sized quantities there. --- lib/hipe/icode/hipe_beam_to_icode.erl | 28 +++---------------------- lib/hipe/icode/hipe_icode_primops.erl | 8 ++++---- lib/hipe/rtl/hipe_rtl_binary.erl | 2 +- lib/hipe/rtl/hipe_rtl_binary_match.erl | 37 +++++++++++++++++----------------- 4 files changed, 26 insertions(+), 49 deletions(-) diff --git a/lib/hipe/icode/hipe_beam_to_icode.erl b/lib/hipe/icode/hipe_beam_to_icode.erl index 224aacd8d7..3386523206 100644 --- a/lib/hipe/icode/hipe_beam_to_icode.erl +++ b/lib/hipe/icode/hipe_beam_to_icode.erl @@ -763,32 +763,10 @@ trans_fun([{test,bs_test_unit,{f,Lbl},[Ms,Unit]}| [MsVar], [], Env, Instructions); trans_fun([{test,bs_match_string,{f,Lbl},[Ms,BitSize,Bin]}| Instructions], Env) -> - True = mk_label(new), - FalseLabName = map_label(Lbl), - TrueLabName = hipe_icode:label_name(True), + %% the current match buffer MsVar = mk_var(Ms), - TmpVar = mk_var(new), - ByteSize = BitSize div 8, - ExtraBits = BitSize rem 8, - WordSize = hipe_rtl_arch:word_size(), - if ExtraBits =:= 0 -> - trans_op_call({hipe_bs_primop,{bs_match_string,Bin,ByteSize}}, Lbl, - [MsVar], [MsVar], Env, Instructions); - BitSize =< ((WordSize * 8) - 5) -> - <> = Bin, - {I1,Env1} = trans_one_op_call({hipe_bs_primop,{bs_get_integer,BitSize,0}}, Lbl, - [MsVar], [TmpVar, MsVar], Env), - I2 = hipe_icode:mk_type([TmpVar], {integer,Int}, TrueLabName, FalseLabName), - I1 ++ [I2,True] ++ trans_fun(Instructions, Env1); - true -> - <> = Bin, - {I1,Env1} = trans_one_op_call({hipe_bs_primop,{bs_match_string,RealBin,ByteSize}}, Lbl, - [MsVar], [MsVar], Env), - {I2,Env2} = trans_one_op_call({hipe_bs_primop,{bs_get_integer,ExtraBits,0}}, Lbl, - [MsVar], [TmpVar, MsVar], Env1), - I3 = hipe_icode:mk_type([TmpVar], {integer,Int}, TrueLabName, FalseLabName), - I1 ++ I2 ++ [I3,True] ++ trans_fun(Instructions, Env2) - end; + Primop = {hipe_bs_primop, {bs_match_string, Bin, BitSize}}, + trans_op_call(Primop, Lbl, [MsVar], [MsVar], Env, Instructions); trans_fun([{bs_context_to_binary,Var}|Instructions], Env) -> %% the current match buffer IVars = [trans_arg(Var)], diff --git a/lib/hipe/icode/hipe_icode_primops.erl b/lib/hipe/icode/hipe_icode_primops.erl index cee37b6a57..2a141c514e 100644 --- a/lib/hipe/icode/hipe_icode_primops.erl +++ b/lib/hipe/icode/hipe_icode_primops.erl @@ -287,8 +287,8 @@ pp(Dev, Op) -> io:format(Dev, "bs_start_match<~w>", [Max]); {{bs_start_match, Type}, Max} -> io:format(Dev, "bs_start_match<~w,~w>", [Type,Max]); - {bs_match_string, String, SizeInBytes} -> - io:format(Dev, "bs_match_string<~w, ~w>", [String, SizeInBytes]); + {bs_match_string, String, SizeInBits} -> + io:format(Dev, "bs_match_string<~w, ~w>", [String, SizeInBits]); {bs_get_integer, Size, Flags} -> io:format(Dev, "bs_get_integer<~w, ~w>", [Size, Flags]); {bs_get_float, Size, Flags} -> @@ -596,10 +596,10 @@ type(Primop, Args) -> erl_types:t_subtract(Type, erl_types:t_matchstate()), erl_types:t_matchstate_slot( erl_types:t_inf(Type, erl_types:t_matchstate()), 0)); - {hipe_bs_primop, {bs_match_string,_,Bytes}} -> + {hipe_bs_primop, {bs_match_string,_,Bits}} -> [MatchState] = Args, BinType = erl_types:t_matchstate_present(MatchState), - NewBinType = match_bin(erl_types:t_bitstr(0, Bytes*8), BinType), + NewBinType = match_bin(erl_types:t_bitstr(0, Bits), BinType), erl_types:t_matchstate_update_present(NewBinType, MatchState); {hipe_bs_primop, {bs_test_unit,Unit}} -> [MatchState] = Args, diff --git a/lib/hipe/rtl/hipe_rtl_binary.erl b/lib/hipe/rtl/hipe_rtl_binary.erl index fb9c0c196d..9b400f4c93 100644 --- a/lib/hipe/rtl/hipe_rtl_binary.erl +++ b/lib/hipe/rtl/hipe_rtl_binary.erl @@ -19,7 +19,7 @@ %%% %CopyrightEnd% %%% %%%------------------------------------------------------------------- -%%% File : hipe_rtl_binary_2.erl +%%% File : hipe_rtl_binary.erl %%% Author : Per Gustafsson %%% Description : %%% diff --git a/lib/hipe/rtl/hipe_rtl_binary_match.erl b/lib/hipe/rtl/hipe_rtl_binary_match.erl index 528672b893..d999cd2743 100644 --- a/lib/hipe/rtl/hipe_rtl_binary_match.erl +++ b/lib/hipe/rtl/hipe_rtl_binary_match.erl @@ -270,24 +270,23 @@ gen_rtl({bs_save, Slot}, [NewMs], [Ms], TrueLblName, _FalseLblName) -> set_field_from_term({matchstate, {saveoffset, Slot}}, Ms, Offset), hipe_rtl:mk_goto(TrueLblName)]; %% ----- bs_match_string ----- -gen_rtl({bs_match_string, String, ByteSize}, Dst, [Ms], +gen_rtl({bs_match_string, String, BitSize}, Dst, [Ms], TrueLblName, FalseLblName) -> {[Offset, BinSize, Base], Instrs} = extract_matchstate_vars([offset, binsize, base], Ms), [SuccessLbl, ALbl, ULbl] = create_lbls(3), [NewOffset, BitOffset] = create_gcsafe_regs(2), - Unit = hipe_rtl_arch:word_size() - 1, - Loops = ByteSize div Unit, - Init = + Unit = (hipe_rtl_arch:word_size() - 1) * ?BYTE_SIZE, + Init = [Instrs, opt_update_ms(Dst, Ms), - check_size(Offset, hipe_rtl:mk_imm(ByteSize*?BYTE_SIZE), BinSize, + check_size(Offset, hipe_rtl:mk_imm(BitSize), BinSize, NewOffset, hipe_rtl:label_name(SuccessLbl), FalseLblName), SuccessLbl], SplitCode = [hipe_rtl:mk_alub(BitOffset, Offset, 'and', hipe_rtl:mk_imm(?LOW_BITS), eq, hipe_rtl:label_name(ALbl), hipe_rtl:label_name(ULbl))], - Loops = ByteSize div Unit, + Loops = BitSize div Unit, SkipSize = Loops * Unit, {ACode1, UCode1} = case Loops of @@ -297,9 +296,9 @@ gen_rtl({bs_match_string, String, ByteSize}, Dst, [Ms], create_loops(Loops, Unit, String, Base, Offset, BitOffset, FalseLblName) end, - <<_:SkipSize/binary, RestString/binary>> = String, + <<_:SkipSize/bits, RestString/bits>> = String, {ACode2, UCode2} = - case ByteSize rem Unit of + case BitSize rem Unit of 0 -> {[], []}; Rem -> @@ -393,12 +392,12 @@ validate_unicode_retract_c_code(Src, Ms, TrueLblName, FalseLblName) -> create_loops(Loops, Unit, String, Base, Offset, BitOffset, FalseLblName) -> [Reg] = create_gcsafe_regs(1), AlignedFun = fun(Value) -> - [get_int_to_reg(Reg, Unit*?BYTE_SIZE, Base, Offset, 'srl', + [get_int_to_reg(Reg, Unit, Base, Offset, 'srl', {unsigned, big}), update_and_test(Reg, Unit, Offset, Value, FalseLblName)] end, UnAlignedFun = fun(Value) -> - [get_unaligned_int_to_reg(Reg, Unit*?BYTE_SIZE, + [get_unaligned_int_to_reg(Reg, Unit, Base, Offset, BitOffset, 'srl', {unsigned, big})| update_and_test(Reg, Unit, Offset, Value, FalseLblName)] @@ -406,31 +405,31 @@ create_loops(Loops, Unit, String, Base, Offset, BitOffset, FalseLblName) -> {create_loops(Loops, Unit, String, AlignedFun), create_loops(Loops, Unit, String, UnAlignedFun)}. -create_rests(Rem, String, Base, Offset, BitOffset, FalseLblName) -> +create_rests(RemBits, String, Base, Offset, BitOffset, FalseLblName) -> [Reg] = create_gcsafe_regs(1), AlignedFun = fun(Value) -> - [get_int_to_reg(Reg, Rem*?BYTE_SIZE, Base, Offset, 'srl', + [get_int_to_reg(Reg, RemBits, Base, Offset, 'srl', {unsigned, big})| just_test(Reg, Value, FalseLblName)] end, UnAlignedFun = fun(Value) -> - [get_unaligned_int_to_reg(Reg, Rem*?BYTE_SIZE, + [get_unaligned_int_to_reg(Reg, RemBits, Base, Offset, BitOffset, 'srl', {unsigned, big})| just_test(Reg, Value, FalseLblName)] end, - {create_loops(1, Rem, String, AlignedFun), - create_loops(1, Rem, String, UnAlignedFun)}. + {create_loops(1, RemBits, String, AlignedFun), + create_loops(1, RemBits, String, UnAlignedFun)}. create_loops(0, _Unit, _String, _IntFun) -> []; create_loops(N, Unit, String, IntFun) -> - {Value, RestString} = get_value(Unit,String), + {Value, RestString} = get_value(Unit, String), [IntFun(Value), create_loops(N-1, Unit, RestString, IntFun)]. update_and_test(Reg, Unit, Offset, Value, FalseLblName) -> - [add_to_offset(Offset, Offset, hipe_rtl:mk_imm(Unit*?BYTE_SIZE), FalseLblName), + [add_to_offset(Offset, Offset, hipe_rtl:mk_imm(Unit), FalseLblName), just_test(Reg, Value, FalseLblName)]. just_test(Reg, Value, FalseLblName) -> @@ -439,8 +438,8 @@ just_test(Reg, Value, FalseLblName) -> hipe_rtl:label_name(ContLbl), FalseLblName), ContLbl]. -get_value(N,String) -> - <> = String, +get_value(N, String) -> + <> = String, {I, Rest}. make_int_gc_code(I) when is_integer(I) -> -- cgit v1.2.3 From 359caeeaf25854ec808b21e32cfc076283c07474 Mon Sep 17 00:00:00 2001 From: Kostis Sagonas Date: Thu, 3 Nov 2016 17:25:09 +0100 Subject: Add a test case that should now work --- lib/hipe/test/bs_SUITE_data/bs_pmatch_bugs.erl | 48 ++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/lib/hipe/test/bs_SUITE_data/bs_pmatch_bugs.erl b/lib/hipe/test/bs_SUITE_data/bs_pmatch_bugs.erl index b280705a47..d9f3278b45 100644 --- a/lib/hipe/test/bs_SUITE_data/bs_pmatch_bugs.erl +++ b/lib/hipe/test/bs_SUITE_data/bs_pmatch_bugs.erl @@ -9,6 +9,7 @@ test() -> <<49,50,51>> = lex_digits1(Bin, 1, []), <<49,50,51>> = lex_digits2(Bin, 1, []), ok = var_bind_bug(<<1, 2, 3, 4, 5, 6, 7, 8>>), + ok = bs_match_string_bug(), ok. %%-------------------------------------------------------------------- @@ -65,3 +66,50 @@ var_bind_bug(<>) -> B -> wrong; _ -> ok end. + +%%-------------------------------------------------------------------- +%% From: Andreas Schultz +%% Date: 2/11/2016 +%% +%% Either HiPE is messing up binary matches in some cases or I'm not +%% seeing the problem. ... +%% With Erlang 19.1.3 the HiPE compiled version behaves differently +%% than the non-HiPE version: ... +%% So, do I do something wrong here or is this a legitimate HiPE bug? +%% +%% Yes, this was a legitimate HiPE bug: The BEAM to ICode tranaslation +%% of the bs_match_string instruction, written long ago for binaries +%% (i.e., with byte-sized strings), tried to do a `clever' translation +%% of even bit-sized strings using a HiPE primop that took a `Size' +%% argument expressed in *bytes*. ICode is not really the place to do +%% such a thing, and moreover there is really no reason for the HiPE +%% primop not to take a Size argument expressed in *bits* instead. +%% The bug was fixed by changing the `Size' argument to be in bits, +%% postponing the translation of the bs_match_string primop until RTL +%% and doing a proper translation using bit-sized quantities there. +%%-------------------------------------------------------------------- + +bs_match_string_bug() -> + ok = test0(<<50>>), + Bin = data(), + ok = test1(Bin), + ok = test2(Bin), + ok. + +%% Minimal test case showing the problem matching with strings +test0(<<6:5, 0:1, 0:2>>) -> weird; +test0(<<6:5, _:1, _:2>>) -> ok; +test0(_) -> default. + +data() -> <<50,16,0>>. + +%% This was the problematic test case in HiPE: 'default' was returned +test1(<<1:3, 1:1, _:1, 0:1, 0:1, 0:1, _/binary>>) -> weird; +test1(<<1:3, 1:1, _:1, _:1, _:1, _:1, _/binary>>) -> ok; +test1(_) -> default. + +%% This variation of test1/1 above worked OK, even in HiPE +test2(<<1:3, 1:1, _:1, A:1, B:1, C:1, _/binary>>) + when A =:= 1; B =:= 1; C =:= 1 -> ok; +test2(<<1:3, 1:1, _:1, 0:1, 0:1, 0:1, _/binary>>) -> weird; +test2(_) -> default. -- cgit v1.2.3