diff options
author | Zandra <[email protected]> | 2016-01-27 11:44:33 +0100 |
---|---|---|
committer | Zandra <[email protected]> | 2016-01-27 11:44:33 +0100 |
commit | 9722388b2fcec9b7f7e5680335e1bd6392ef11fc (patch) | |
tree | db013e6468725fd9fcfe81b847afc08f72c51d0f /lib/hipe/test/bs_SUITE_data | |
parent | b5e787a6483c1aff35d44c0130946df441497d9d (diff) | |
parent | 8970c8c25c45e6a4f92b0652c65e26d890939409 (diff) | |
download | otp-9722388b2fcec9b7f7e5680335e1bd6392ef11fc.tar.gz otp-9722388b2fcec9b7f7e5680335e1bd6392ef11fc.tar.bz2 otp-9722388b2fcec9b7f7e5680335e1bd6392ef11fc.zip |
Merge branch 'kostis/hipe-tests-basic' into maint
OTP-13269
* kostis/hipe-tests-basic:
Fix compilation of matching with UTF binaries
Cleanup and add one more test case
Two tests that depend on inlining being turned on
More tests for BIFs
Include some more old HiPE tests to the test suite
Add tests for the is_boolean/1 guard
Two more tests added
Test that apply/3 is tail recursive
Three more tests added
Minor cleanup
Comment out tests that are not ready for to_llvm
Cleanups & uncomment some code
More tests for handling of UTF in bitstrings
Minor code cleanup
Add more generated test suites in Makefile
Use function from hipe module instead of a local one
Add function to prevent running tests in the LLVM backend
More basic tests
First part of the basic test suite for the HiPE compiler
Diffstat (limited to 'lib/hipe/test/bs_SUITE_data')
-rw-r--r-- | lib/hipe/test/bs_SUITE_data/bs_split.erl | 10 | ||||
-rw-r--r-- | lib/hipe/test/bs_SUITE_data/bs_utf.erl | 344 |
2 files changed, 346 insertions, 8 deletions
diff --git a/lib/hipe/test/bs_SUITE_data/bs_split.erl b/lib/hipe/test/bs_SUITE_data/bs_split.erl index 2e52308a77..617543f789 100644 --- a/lib/hipe/test/bs_SUITE_data/bs_split.erl +++ b/lib/hipe/test/bs_SUITE_data/bs_split.erl @@ -26,13 +26,13 @@ bs1(L, B, Pos, Sz1, Sz2) -> <<B1:Sz1/binary, B2:Sz2/binary>> = B, bs2(L, B, Pos, B1, B2). -bs2(L, B, Pos, B1, B2)-> +bs2(L, B, Pos, B1, B2) -> B1 = list_to_binary(lists:sublist(L, 1, Pos)), bs3(L, B, Pos, B2). bs3(L, B, Pos, B2) -> B2 = list_to_binary(lists:nthtail(Pos, L)), - byte_split(L, B, Pos-1). + byte_split(L, B, Pos - 1). %%-------------------------------------------------------------------- @@ -56,14 +56,14 @@ bit_split_binary2(_Action, _Bin, [], _Bef) -> ok. bit_split_binary3(Action, Bin, List, Bef, Aft) when Bef =< Aft -> Action(Bin, List, Bef, (Aft-Bef) div 8 * 8), - bit_split_binary3(Action, Bin, List, Bef, Aft-8); + bit_split_binary3(Action, Bin, List, Bef, Aft - 8); bit_split_binary3(_, _, _, _, _) -> ok. make_bin_from_list(_List, 0) -> mkbin([]); make_bin_from_list(List, N) -> list_to_binary([make_int(List, 8, 0), - make_bin_from_list(lists:nthtail(8, List), N-8)]). + make_bin_from_list(lists:nthtail(8, List), N - 8)]). make_int(_List, 0, Acc) -> Acc; make_int([H|T], N, Acc) -> make_int(T, N-1, Acc bsl 1 bor H). @@ -101,5 +101,5 @@ z_split(B, N) -> <<_:N/binary>> -> [B]; _ -> - z_split(B, N+1) + z_split(B, N + 1) end. diff --git a/lib/hipe/test/bs_SUITE_data/bs_utf.erl b/lib/hipe/test/bs_SUITE_data/bs_utf.erl index f50ae08964..24526f574d 100644 --- a/lib/hipe/test/bs_SUITE_data/bs_utf.erl +++ b/lib/hipe/test/bs_SUITE_data/bs_utf.erl @@ -1,18 +1,356 @@ %% -*- erlang-indent-level: 2 -*- %%------------------------------------------------------------------- -%% Purpose: test support for UTF datatypes in binaries - INCOMPLETE +%% Purpose: test support for UTF datatypes in binaries +%% +%% Most of it taken from emulator/test/bs_utf_SUITE.erl %%------------------------------------------------------------------- -module(bs_utf). -export([test/0]). +-include_lib("test_server/include/test_server.hrl"). + test() -> + ok = utf8_cm65(), + ok = utf8_roundtrip(), + ok = utf16_roundtrip(), + ok = utf32_roundtrip(), + %% The following were problematic for the LLVM backend + ok = utf8_illegal_sequences(), + ok = utf16_illegal_sequences(), + ok = utf32_illegal_sequences(), + ok. + +%%------------------------------------------------------------------- +%% A test with construction and matching + +utf8_cm65() -> <<65>> = b65utf8(), ok = m(<<65>>). +b65utf8() -> + <<65/utf8>>. + m(<<65/utf8>>) -> ok. -b65utf8() -> - <<65/utf8>>. +%%------------------------------------------------------------------- + +utf8_roundtrip() -> + ok = utf8_roundtrip(0, 16#D7FF), + ok = utf8_roundtrip(16#E000, 16#10FFFF), + ok. + +utf8_roundtrip(First, Last) when First =< Last -> + Bin = int_to_utf8(First), + Bin = id(<<First/utf8>>), + Bin = id(<<(id(<<>>))/binary,First/utf8>>), + Unaligned = id(<<3:2,First/utf8>>), + <<_:2,Bin/binary>> = Unaligned, + <<First/utf8>> = Bin, + <<First/utf8>> = make_unaligned(Bin), + utf8_roundtrip(First+1, Last); +utf8_roundtrip(_, _) -> + ok. + +%%------------------------------------------------------------------- + +utf16_roundtrip() -> + Big = fun utf16_big_roundtrip/1, + Little = fun utf16_little_roundtrip/1, + PidRefs = [spawn_monitor(fun() -> do_utf16_roundtrip(Fun) end) || + Fun <- [Big,Little]], + [receive {'DOWN', Ref, process, Pid, Reason} -> normal=Reason end || + {Pid, Ref} <- PidRefs], + ok. + +do_utf16_roundtrip(Fun) -> + do_utf16_roundtrip(0, 16#D7FF, Fun), + do_utf16_roundtrip(16#E000, 16#10FFFF, Fun). + +do_utf16_roundtrip(First, Last, Fun) when First =< Last -> + Fun(First), + do_utf16_roundtrip(First+1, Last, Fun); +do_utf16_roundtrip(_, _, _) -> ok. + +utf16_big_roundtrip(Char) -> + Bin = id(<<Char/utf16>>), + Bin = id(<<(id(<<>>))/binary,Char/utf16>>), + Unaligned = id(<<3:2,Char/utf16>>), + <<_:2,Bin/binary>> = Unaligned, + <<Char/utf16>> = Bin, + <<Char/utf16>> = make_unaligned(Bin), + ok. + +utf16_little_roundtrip(Char) -> + Bin = id(<<Char/little-utf16>>), + Bin = id(<<(id(<<>>))/binary,Char/little-utf16>>), + Unaligned = id(<<3:2,Char/little-utf16>>), + <<_:2,Bin/binary>> = Unaligned, + <<Char/little-utf16>> = Bin, + <<Char/little-utf16>> = make_unaligned(Bin), + ok. + +%%------------------------------------------------------------------- + +utf32_roundtrip() -> + Big = fun utf32_big_roundtrip/1, + Little = fun utf32_little_roundtrip/1, + PidRefs = [spawn_monitor(fun() -> do_utf32_roundtrip(Fun) end) || + Fun <- [Big,Little]], + [receive {'DOWN', Ref, process, Pid, Reason} -> normal=Reason end || + {Pid, Ref} <- PidRefs], + ok. + +do_utf32_roundtrip(Fun) -> + do_utf32_roundtrip(0, 16#D7FF, Fun), + do_utf32_roundtrip(16#E000, 16#10FFFF, Fun). + +do_utf32_roundtrip(First, Last, Fun) when First =< Last -> + Fun(First), + do_utf32_roundtrip(First+1, Last, Fun); +do_utf32_roundtrip(_, _, _) -> ok. + +utf32_big_roundtrip(Char) -> + Bin = id(<<Char/utf32>>), + Bin = id(<<(id(<<>>))/binary,Char/utf32>>), + Unaligned = id(<<3:2,Char/utf32>>), + <<_:2,Bin/binary>> = Unaligned, + <<Char/utf32>> = Bin, + <<Char/utf32>> = make_unaligned(Bin), + ok. + +utf32_little_roundtrip(Char) -> + Bin = id(<<Char/little-utf32>>), + Bin = id(<<(id(<<>>))/binary,Char/little-utf32>>), + Unaligned = id(<<3:2,Char/little-utf32>>), + <<_:2,Bin/binary>> = Unaligned, + <<Char/little-utf32>> = Bin, + <<Char/little-utf32>> = make_unaligned(Bin), + ok. + +%%------------------------------------------------------------------- + +utf8_illegal_sequences() -> + fail_range(16#10FFFF+1, 16#10FFFF+512), % Too large. + fail_range(16#D800, 16#DFFF), % Reserved for UTF-16. + + %% Illegal first character. + [fail(<<I,16#8F,16#8F,16#8F>>) || I <- lists:seq(16#80, 16#BF)], + + %% Short sequences. + short_sequences(16#80, 16#10FFFF), + + %% Overlong sequences. (Using more bytes than necessary + %% is not allowed.) + overlong(0, 127, 2), + overlong(128, 16#7FF, 3), + overlong(16#800, 16#FFFF, 4), + ok. + +fail_range(Char, End) when Char =< End -> + {'EXIT', _} = (catch <<Char/utf8>>), + Bin = int_to_utf8(Char), + fail(Bin), + fail_range(Char+1, End); +fail_range(_, _) -> ok. + +short_sequences(Char, End) -> + Step = (End - Char) div erlang:system_info(schedulers) + 1, + PidRefs = short_sequences_1(Char, Step, End), + [receive {'DOWN', Ref, process, Pid, Reason} -> normal=Reason end || + {Pid, Ref} <- PidRefs], + ok. + +short_sequences_1(Char, Step, End) when Char =< End -> + CharEnd = lists:min([Char+Step-1,End]), + [spawn_monitor(fun() -> + %% io:format("~p - ~p\n", [Char, CharEnd]), + do_short_sequences(Char, CharEnd) + end)|short_sequences_1(Char+Step, Step, End)]; +short_sequences_1(_, _, _) -> []. + +do_short_sequences(Char, End) when Char =< End -> + short_sequence(Char), + do_short_sequences(Char+1, End); +do_short_sequences(_, _) -> ok. + +short_sequence(I) -> + case int_to_utf8(I) of + <<S0:3/binary,_:8>> -> + <<S1:2/binary,R1:8>> = S0, + <<S2:1/binary,_:8>> = S1, + fail(S0), + fail(S1), + fail(S2), + fail(<<S2/binary,16#7F,R1,R1>>), + fail(<<S1/binary,16#7F,R1>>), + fail(<<S0/binary,16#7F>>); + <<S0:2/binary,_:8>> -> + <<S1:1/binary,R1:8>> = S0, + fail(S0), + fail(S1), + fail(<<S0/binary,16#7F>>), + fail(<<S1/binary,16#7F>>), + fail(<<S1/binary,16#7F,R1>>); + <<S:1/binary,_:8>> -> + fail(S), + fail(<<S/binary,16#7F>>) + end. + +overlong(Char, Last, NumBytes) when Char =< Last -> + overlong(Char, NumBytes), + overlong(Char+1, Last, NumBytes); +overlong(_, _, _) -> ok. + +overlong(Char, NumBytes) when NumBytes < 5 -> + case int_to_utf8(Char, NumBytes) of + <<Char/utf8>>=Bin -> + ?t:fail({illegal_encoding_accepted,Bin,Char}); + <<OtherChar/utf8>>=Bin -> + ?t:fail({illegal_encoding_accepted,Bin,Char,OtherChar}); + _ -> ok + end, + overlong(Char, NumBytes+1); +overlong(_, _) -> ok. + +fail(Bin) -> + fail_1(Bin), + fail_1(make_unaligned(Bin)). + +fail_1(<<Char/utf8>> = Bin) -> + ?t:fail({illegal_encoding_accepted, Bin, Char}); +fail_1(_) -> ok. + +%%------------------------------------------------------------------- + +utf16_illegal_sequences() -> + utf16_fail_range(16#10FFFF+1, 16#10FFFF+512), % Too large. + utf16_fail_range(16#D800, 16#DFFF), % Reserved for UTF-16. + lonely_hi_surrogate(16#D800, 16#DFFF), + leading_lo_surrogate(16#DC00, 16#DFFF), + ok. + +utf16_fail_range(Char, End) when Char =< End -> + {'EXIT', _} = (catch <<Char/big-utf16>>), + {'EXIT', _} = (catch <<Char/little-utf16>>), + utf16_fail_range(Char+1, End); +utf16_fail_range(_, _) -> ok. + +lonely_hi_surrogate(Char, End) when Char =< End -> + BinBig = <<Char:16/big>>, + BinLittle = <<Char:16/little>>, + case {BinBig,BinLittle} of + {<<Bad/big-utf16>>,_} -> + ?t:fail({lonely_hi_surrogate_accepted,Bad}); + {_,<<Bad/little-utf16>>} -> + ?t:fail({lonely_hi_surrogate_accepted,Bad}); + {_,_} -> + ok + end, + lonely_hi_surrogate(Char+1, End); +lonely_hi_surrogate(_, _) -> ok. + +leading_lo_surrogate(Char, End) when Char =< End -> + leading_lo_surrogate(Char, 16#D800, 16#DFFF), + leading_lo_surrogate(Char+1, End); +leading_lo_surrogate(_, _) -> ok. + +leading_lo_surrogate(HiSurr, LoSurr, End) when LoSurr =< End -> + BinBig = <<HiSurr:16/big,LoSurr:16/big>>, + BinLittle = <<HiSurr:16/little,LoSurr:16/little>>, + case {BinBig,BinLittle} of + {<<Bad/big-utf16,_/bits>>,_} -> + ?t:fail({leading_lo_surrogate_accepted,Bad}); + {_,<<Bad/little-utf16,_/bits>>} -> + ?t:fail({leading_lo_surrogate_accepted,Bad}); + {_,_} -> + ok + end, + leading_lo_surrogate(HiSurr, LoSurr+1, End); +leading_lo_surrogate(_, _, _) -> ok. + +%%------------------------------------------------------------------- + +utf32_illegal_sequences() -> + utf32_fail_range(16#10FFFF+1, 16#10FFFF+512), % Too large. + utf32_fail_range(16#D800, 16#DFFF), % Reserved for UTF-16. + utf32_fail_range(-100, -1), + ok. + +utf32_fail_range(Char, End) when Char =< End -> + {'EXIT', _} = (catch <<Char/big-utf32>>), + {'EXIT', _} = (catch <<Char/little-utf32>>), + case {<<Char:32>>,<<Char:32/little>>} of + {<<Unexpected/utf32>>,_} -> + ?t:fail(Unexpected); + {_,<<Unexpected/little-utf32>>} -> + ?t:fail(Unexpected); + {_,_} -> ok + end, + utf32_fail_range(Char+1, End); +utf32_fail_range(_, _) -> ok. + +%%------------------------------------------------------------------- +%% This function intentionally allows construction of UTF-8 sequence +%% in illegal ranges. + +int_to_utf8(I) when I =< 16#7F -> + <<I>>; +int_to_utf8(I) when I =< 16#7FF -> + B2 = I, + B1 = (I bsr 6), + <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>; +int_to_utf8(I) when I =< 16#FFFF -> + B3 = I, + B2 = (I bsr 6), + B1 = (I bsr 12), + <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>; +int_to_utf8(I) when I =< 16#3FFFFF -> + B4 = I, + B3 = (I bsr 6), + B2 = (I bsr 12), + B1 = (I bsr 18), + <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>; +int_to_utf8(I) when I =< 16#3FFFFFF -> + B5 = I, + B4 = (I bsr 6), + B3 = (I bsr 12), + B2 = (I bsr 18), + B1 = (I bsr 24), + <<1:1,1:1,1:1,1:1,1:1,0:1,B1:2,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6, + 1:1,0:1,B5:6>>. + +%% int_to_utf8(I, NumberOfBytes) -> Binary. +%% This function can be used to construct overlong sequences. +int_to_utf8(I, 1) -> + <<I>>; +int_to_utf8(I, 2) -> + B2 = I, + B1 = (I bsr 6), + <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>; +int_to_utf8(I, 3) -> + B3 = I, + B2 = (I bsr 6), + B1 = (I bsr 12), + <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>; +int_to_utf8(I, 4) -> + B4 = I, + B3 = (I bsr 6), + B2 = (I bsr 12), + B1 = (I bsr 18), + <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>. + +%%------------------------------------------------------------------- + +make_unaligned(Bin0) when is_binary(Bin0) -> + Bin1 = <<0:3,Bin0/binary,31:5>>, + Sz = byte_size(Bin0), + <<0:3,Bin:Sz/binary,31:5>> = id(Bin1), + Bin. + +%%------------------------------------------------------------------- +%% Just to prevent compiler optimizations + +id(X) -> X. |