diff options
Diffstat (limited to 'lib/hipe/test/bs_SUITE_data')
-rw-r--r-- | lib/hipe/test/bs_SUITE_data/bs_add.erl | 10 | ||||
-rw-r--r-- | lib/hipe/test/bs_SUITE_data/bs_construct.erl | 177 | ||||
-rw-r--r-- | lib/hipe/test/bs_SUITE_data/bs_match.erl | 108 | ||||
-rw-r--r-- | lib/hipe/test/bs_SUITE_data/bs_split.erl | 10 | ||||
-rw-r--r-- | lib/hipe/test/bs_SUITE_data/bs_utf.erl | 344 |
5 files changed, 637 insertions, 12 deletions
diff --git a/lib/hipe/test/bs_SUITE_data/bs_add.erl b/lib/hipe/test/bs_SUITE_data/bs_add.erl index af5a3b2f23..4b92e6b413 100644 --- a/lib/hipe/test/bs_SUITE_data/bs_add.erl +++ b/lib/hipe/test/bs_SUITE_data/bs_add.erl @@ -2,7 +2,7 @@ %%------------------------------------------------------------------------- %% The guard in f/3 revealed a problem in the translation of the 'bs_add' %% BEAM instruction to Icode. The fail label was not properly translated. -%% Fixed 3/2/2011. +%% Fixed 3/2/2011. Then in 2015 we found another issue: g/2. Also fixed. %%------------------------------------------------------------------------- -module(bs_add). @@ -10,9 +10,17 @@ test() -> 42 = f(<<12345:16>>, 4711, <<42>>), + true = g(<<1:13>>, 3), %% was handled OK, but + false = g(<<>>, gurka), %% this one leaked badarith ok. f(Bin, A, B) when <<A:9, B:7/binary>> == Bin -> gazonk; f(Bin, _, _) when is_binary(Bin) -> 42. + +%% Complex way of testing (bit_size(Bin) + Len) rem 8 =:= 0 +g(Bin, Len) when is_binary(<<Bin/binary-unit:1, 123:Len>>) -> + true; +g(_, _) -> + false. diff --git a/lib/hipe/test/bs_SUITE_data/bs_construct.erl b/lib/hipe/test/bs_SUITE_data/bs_construct.erl index 9cc9ac848c..b9e7d93570 100644 --- a/lib/hipe/test/bs_SUITE_data/bs_construct.erl +++ b/lib/hipe/test/bs_SUITE_data/bs_construct.erl @@ -13,6 +13,12 @@ test() -> ok = bs5(), 16#10000008 = bit_size(large_bin(1, 2, 3, 4)), ok = bad_ones(), + ok = zero_width(), + ok = not_used(), + ok = bad_append(), + ok = system_limit(), + ok = bad_floats(), + ok = huge_binaries(), ok. %%-------------------------------------------------------------------- @@ -126,3 +132,174 @@ bad_ones() -> Bin123 = <<1,2,3>>, ?FAIL(<<Bin123/float>>), ok. + +%%-------------------------------------------------------------------- +%% Taken from the emulator bs_construct_SUITE - seg faulted till 18.1 + +zero_width() -> + Z = id(0), + Small = id(42), + Big = id(1 bsl 128), % puts stuff on the heap + <<>> = <<Small:Z>>, + <<>> = <<Small:0>>, + <<>> = <<Big:Z>>, + <<>> = <<Big:0>>, + ok. + +id(X) -> X. + +%%-------------------------------------------------------------------- +%% Taken from bs_construct_SUITE. The test checks that constructed +%% binaries that are not used would still give a `badarg' exception. +%% Problem was that in native code one of them gave `badarith'. + +not_used() -> + ok = not_used1(3, <<"dum">>), + {'EXIT',{badarg,_}} = (catch not_used1(42, "dum_string")), + {'EXIT',{badarg,_}} = (catch not_used2(666, -2)), + {'EXIT',{badarg,_}} = (catch not_used2(666, "bad_size")), % this one + {'EXIT',{badarg,_}} = (catch not_used3(666)), + ok. + +not_used1(I, BinString) -> + <<I:32,BinString/binary>>, + ok. + +not_used2(I, Sz) -> + <<I:Sz>>, + ok. + +not_used3(I) -> + <<I:(-8)>>, + ok. + +%%-------------------------------------------------------------------- +%% Taken from bs_construct_SUITE. + +bad_append() -> + do_bad_append(<<127:1>>, fun append_unit_3/1), + do_bad_append(<<127:2>>, fun append_unit_3/1), + do_bad_append(<<127:17>>, fun append_unit_3/1), + + do_bad_append(<<127:3>>, fun append_unit_4/1), + do_bad_append(<<127:5>>, fun append_unit_4/1), + do_bad_append(<<127:7>>, fun append_unit_4/1), + do_bad_append(<<127:199>>, fun append_unit_4/1), + + do_bad_append(<<127:7>>, fun append_unit_8/1), + do_bad_append(<<127:9>>, fun append_unit_8/1), + + do_bad_append(<<0:8>>, fun append_unit_16/1), + do_bad_append(<<0:15>>, fun append_unit_16/1), + do_bad_append(<<0:17>>, fun append_unit_16/1), + ok. + +do_bad_append(Bin0, Appender) -> + {'EXIT',{badarg,_}} = (catch Appender(Bin0)), + + Bin1 = id(<<0:3,Bin0/bitstring>>), + <<_:3,Bin2/bitstring>> = Bin1, + {'EXIT',{badarg,_}} = (catch Appender(Bin2)), + + %% Create a writable binary. + Empty = id(<<>>), + Bin3 = <<Empty/bitstring,Bin0/bitstring>>, + {'EXIT',{badarg,_}} = (catch Appender(Bin3)), + ok. + +append_unit_3(Bin) -> + <<Bin/binary-unit:3,0:1>>. + +append_unit_4(Bin) -> + <<Bin/binary-unit:4,0:1>>. + +append_unit_8(Bin) -> + <<Bin/binary,0:1>>. + +append_unit_16(Bin) -> + <<Bin/binary-unit:16,0:1>>. + +%%-------------------------------------------------------------------- +%% Taken from bs_construct_SUITE. + +system_limit() -> + WordSize = erlang:system_info(wordsize), + BitsPerWord = WordSize * 8, + {'EXIT',{system_limit,_}} = + (catch <<0:(id(0)),42:(id(1 bsl BitsPerWord))>>), + {'EXIT',{system_limit,_}} = + (catch <<42:(id(1 bsl BitsPerWord)),0:(id(0))>>), + {'EXIT',{system_limit,_}} = + (catch <<(id(<<>>))/binary,0:(id(1 bsl 100))>>), + + %% Would fail to load. + {'EXIT',{system_limit,_}} = (catch <<0:(1 bsl 67)>>), + {'EXIT',{system_limit,_}} = (catch <<0:((1 bsl 64)+1)>>), + case WordSize of + 4 -> + system_limit_32(); + 8 -> + ok + end. + +system_limit_32() -> + {'EXIT',{badarg,_}} = (catch <<42:(-1)>>), + {'EXIT',{badarg,_}} = (catch <<42:(id(-1))>>), + {'EXIT',{badarg,_}} = (catch <<42:(id(-389739873536870912))/unit:8>>), + {'EXIT',{system_limit,_}} = (catch <<42:536870912/unit:8>>), + {'EXIT',{system_limit,_}} = (catch <<42:(id(536870912))/unit:8>>), + {'EXIT',{system_limit,_}} = (catch <<0:(id(8)),42:536870912/unit:8>>), + {'EXIT',{system_limit,_}} = (catch <<0:(id(8)),42:(id(536870912))/unit:8>>), + + %% The size would be silently truncated, resulting in a crash. + {'EXIT',{system_limit,_}} = (catch <<0:(1 bsl 35)>>), + {'EXIT',{system_limit,_}} = (catch <<0:((1 bsl 32)+1)>>), + + %% Would fail to load. + {'EXIT',{system_limit,_}} = (catch <<0:(1 bsl 43)>>), + {'EXIT',{system_limit,_}} = (catch <<0:((1 bsl 40)+1)>>), + ok. + +%%-------------------------------------------------------------------- + +bad_floats() -> + WordSize = erlang:system_info(wordsize), + BitsPerWord = WordSize * 8, + {'EXIT',{badarg,_}} = (catch <<3.14:(id(33))/float>>), + {'EXIT',{badarg,_}} = (catch <<3.14:(id(64 bor 32))/float>>), + {'EXIT',{badarg,_}} = (catch <<3.14:(id((1 bsl 28) bor 32))/float>>), + {'EXIT',{system_limit,_}} = (catch <<3.14:(id(1 bsl BitsPerWord))/float>>), + ok. + +%%-------------------------------------------------------------------- +%% A bug in the implementation of binaries compared sizes in bits with sizes in +%% bytes, causing <<0:(id((1 bsl 31)-1))>> to fail to construct with +%% 'system_limit'. +%% <<0:(id((1 bsl 32)-1))>> was succeeding because the comparison was +%% (incorrectly) signed. + +huge_binaries() -> + AlmostIllegal = id(<<0:(id((1 bsl 32)-8))>>), + case erlang:system_info(wordsize) of + 4 -> huge_binaries_32(AlmostIllegal); + 8 -> ok + end, + garbage_collect(), + id(<<0:(id((1 bsl 31)-1))>>), + id(<<0:(id((1 bsl 30)-1))>>), + garbage_collect(), + ok. + +huge_binaries_32(AlmostIllegal) -> + %% Attempt construction of too large binary using bs_init/1 (which takes the + %% number of bytes as an argument, which should be compared to the maximum + %% size in bytes). + {'EXIT',{system_limit,_}} = (catch <<0:32,AlmostIllegal/binary>>), + %% Attempt construction of too large binary using bs_init/1 with a size in + %% bytes that has the msb set (and would be negative if it was signed). + {'EXIT',{system_limit,_}} = + (catch <<0:8, AlmostIllegal/binary, AlmostIllegal/binary, + AlmostIllegal/binary, AlmostIllegal/binary, + AlmostIllegal/binary, AlmostIllegal/binary, + AlmostIllegal/binary, AlmostIllegal/binary>>), + ok. diff --git a/lib/hipe/test/bs_SUITE_data/bs_match.erl b/lib/hipe/test/bs_SUITE_data/bs_match.erl index 7bc93a316b..b241ea8d35 100644 --- a/lib/hipe/test/bs_SUITE_data/bs_match.erl +++ b/lib/hipe/test/bs_SUITE_data/bs_match.erl @@ -1,8 +1,8 @@ %%% -*- erlang-indent-level: 2 -*- %%%------------------------------------------------------------------- %%% File : bs_match.erl -%%% Author : Per Gustafsson <[email protected]> -%%% Purpose : Performs simple matching and construction of binaries +%%% Authors : Per Gustafsson <[email protected]>, Kostis Sagonas <[email protected]> +%%% Purpose : Tests matching and construction of binaries %%% TODO : Add binary and float tests %%% Created : 20 Feb 2004 %%%------------------------------------------------------------------- @@ -13,7 +13,7 @@ test() -> Funs = [fun test_aligned/0, fun test_unaligned/0, fun test_zero_tail/0, fun test_integer_matching/0, - fun test_writable_bin/0], + fun test_writable_bin/0, fun test_match_huge_bin/0], lists:foreach(fun (F) -> ok = F() end, Funs). %%------------------------------------------------------------------- @@ -175,6 +175,9 @@ test_dynamic_integer_matching(N) -> <<12:N/integer-little, 0:S>> = <<12:N/integer-little, 0:S>>, ok. +%%------------------------------------------------------------------- +%% Test writable bin -- added by Sverker Eriksson + test_writable_bin() -> test_writable_bin(<<>>, 0), ok. @@ -185,3 +188,102 @@ test_writable_bin(Bin0, N) when N < 128 -> Bin1 = <<Bin0/binary, N>>, <<_/utf8, _/binary>> = Bin1, test_writable_bin(Bin1, N+1). + +%%------------------------------------------------------------------- +%% Test matching with a huge bin -- taken from bs_match_bin_SUITE + +test_match_huge_bin() -> + Bin = <<0:(1 bsl 27),13:8>>, + skip_huge_bin_1(1 bsl 27, Bin), + 16777216 = match_huge_bin_1(1 bsl 27, Bin), + %% Test overflowing the size of a binary field. + nomatch = overflow_huge_bin_skip_32(Bin), + nomatch = overflow_huge_bin_32(Bin), + nomatch = overflow_huge_bin_skip_64(Bin), + nomatch = overflow_huge_bin_64(Bin), + %% Size in variable + ok = overflow_huge_bin(Bin, lists:seq(25, 32)++lists:seq(50, 64)), + ok = overflow_huge_bin_unit128(Bin, lists:seq(25, 32)++lists:seq(50, 64)), + ok. + +overflow_huge_bin(Bin, [Sz0|Sizes]) -> + Sz = id(1 bsl Sz0), + case Bin of + <<_:Sz/binary-unit:8,0,_/binary>> -> + {error,Sz}; + _ -> + case Bin of + <<NewBin:Sz/binary-unit:8,0,_/binary>> -> + {error,Sz,size(NewBin)}; + _ -> + overflow_huge_bin(Bin, Sizes) + end + end; +overflow_huge_bin(_, []) -> ok. + +overflow_huge_bin_unit128(Bin, [Sz0|Sizes]) -> + Sz = id(1 bsl Sz0), + case Bin of + <<_:Sz/binary-unit:128,0,_/binary>> -> + {error,Sz}; + _ -> + case Bin of + <<NewBin:Sz/binary-unit:128,0,_/binary>> -> + {error,Sz,size(NewBin)}; + _ -> + overflow_huge_bin_unit128(Bin, Sizes) + end + end; +overflow_huge_bin_unit128(_, []) -> ok. + +skip_huge_bin_1(I, Bin) -> + <<_:I/binary-unit:1,13>> = Bin, + ok. + +match_huge_bin_1(I, Bin) -> + case Bin of + <<Val:I/binary-unit:1,13>> -> size(Val); + _ -> nomatch + end. + +overflow_huge_bin_skip_32(<<_:4294967296/binary,0,_/binary>>) -> 1; % 1 bsl 32 +overflow_huge_bin_skip_32(<<_:33554432/binary-unit:128,0,_/binary>>) -> 2; % 1 bsl 25 +overflow_huge_bin_skip_32(<<_:67108864/binary-unit:64,0,_/binary>>) -> 3; % 1 bsl 26 +overflow_huge_bin_skip_32(<<_:134217728/binary-unit:32,0,_/binary>>) -> 4; % 1 bsl 27 +overflow_huge_bin_skip_32(<<_:268435456/binary-unit:16,0,_/binary>>) -> 5; % 1 bsl 28 +overflow_huge_bin_skip_32(<<_:536870912/binary-unit:8,0,_/binary>>) -> 6; % 1 bsl 29 +overflow_huge_bin_skip_32(<<_:1073741824/binary-unit:8,0,_/binary>>) -> 7; % 1 bsl 30 +overflow_huge_bin_skip_32(<<_:2147483648/binary-unit:8,0,_/binary>>) -> 8; % 1 bsl 31 +overflow_huge_bin_skip_32(_) -> nomatch. + +overflow_huge_bin_32(<<Bin:4294967296/binary,_/binary>>) -> {1,Bin}; % 1 bsl 32 +overflow_huge_bin_32(<<Bin:33554432/binary-unit:128,0,_/binary>>) -> {2,Bin}; % 1 bsl 25 +overflow_huge_bin_32(<<Bin:67108864/binary-unit:128,0,_/binary>>) -> {3,Bin}; % 1 bsl 26 +overflow_huge_bin_32(<<Bin:134217728/binary-unit:128,0,_/binary>>) -> {4,Bin}; % 1 bsl 27 +overflow_huge_bin_32(<<Bin:268435456/binary-unit:128,0,_/binary>>) -> {5,Bin}; % 1 bsl 28 +overflow_huge_bin_32(<<Bin:536870912/binary-unit:128,0,_/binary>>) -> {6,Bin}; % 1 bsl 29 +overflow_huge_bin_32(<<Bin:1073741824/binary-unit:128,0,_/binary>>) -> {7,Bin}; % 1 bsl 30 +overflow_huge_bin_32(<<Bin:2147483648/binary-unit:128,0,_/binary>>) -> {8,Bin}; % 1 bsl 31 +overflow_huge_bin_32(_) -> nomatch. + +overflow_huge_bin_skip_64(<<_:18446744073709551616/binary,0,_/binary>>) -> 1; % 1 bsl 64 +overflow_huge_bin_skip_64(<<_:144115188075855872/binary-unit:128,0,_/binary>>) -> 2; % 1 bsl 57 +overflow_huge_bin_skip_64(<<_:288230376151711744/binary-unit:64,0,_/binary>>) -> 3; % 1 bsl 58 +overflow_huge_bin_skip_64(<<_:576460752303423488/binary-unit:32,0,_/binary>>) -> 4; % 1 bsl 59 +overflow_huge_bin_skip_64(<<_:1152921504606846976/binary-unit:16,0,_/binary>>) -> 5; % 1 bsl 60 +overflow_huge_bin_skip_64(<<_:2305843009213693952/binary-unit:8,0,_/binary>>) -> 6; % 1 bsl 61 +overflow_huge_bin_skip_64(<<_:4611686018427387904/binary-unit:8,0,_/binary>>) -> 7; % 1 bsl 62 +overflow_huge_bin_skip_64(<<_:9223372036854775808/binary-unit:8,_/binary>>) -> 8; % 1 bsl 63 +overflow_huge_bin_skip_64(_) -> nomatch. + +overflow_huge_bin_64(<<Bin:18446744073709551616/binary,_/binary>>) -> {1,Bin}; % 1 bsl 64 +overflow_huge_bin_64(<<Bin:144115188075855872/binary-unit:128,0,_/binary>>) -> {2,Bin}; % 1 bsl 57 +overflow_huge_bin_64(<<Bin:288230376151711744/binary-unit:128,0,_/binary>>) -> {3,Bin}; % 1 bsl 58 +overflow_huge_bin_64(<<Bin:576460752303423488/binary-unit:128,0,_/binary>>) -> {4,Bin}; % 1 bsl 59 +overflow_huge_bin_64(<<Bin:1152921504606846976/binary-unit:128,0,_/binary>>) -> {5,Bin}; % 1 bsl 60 +overflow_huge_bin_64(<<Bin:2305843009213693952/binary-unit:128,0,_/binary>>) -> {6,Bin}; % 1 bsl 61 +overflow_huge_bin_64(<<Bin:4611686018427387904/binary-unit:128,0,_/binary>>) -> {7,Bin}; % 1 bsl 62 +overflow_huge_bin_64(<<Bin:9223372036854775808/binary-unit:128,0,_/binary>>) -> {8,Bin}; % 1 bsl 63 +overflow_huge_bin_64(_) -> nomatch. + +id(I) -> I. diff --git a/lib/hipe/test/bs_SUITE_data/bs_split.erl b/lib/hipe/test/bs_SUITE_data/bs_split.erl index 2e52308a77..617543f789 100644 --- a/lib/hipe/test/bs_SUITE_data/bs_split.erl +++ b/lib/hipe/test/bs_SUITE_data/bs_split.erl @@ -26,13 +26,13 @@ bs1(L, B, Pos, Sz1, Sz2) -> <<B1:Sz1/binary, B2:Sz2/binary>> = B, bs2(L, B, Pos, B1, B2). -bs2(L, B, Pos, B1, B2)-> +bs2(L, B, Pos, B1, B2) -> B1 = list_to_binary(lists:sublist(L, 1, Pos)), bs3(L, B, Pos, B2). bs3(L, B, Pos, B2) -> B2 = list_to_binary(lists:nthtail(Pos, L)), - byte_split(L, B, Pos-1). + byte_split(L, B, Pos - 1). %%-------------------------------------------------------------------- @@ -56,14 +56,14 @@ bit_split_binary2(_Action, _Bin, [], _Bef) -> ok. bit_split_binary3(Action, Bin, List, Bef, Aft) when Bef =< Aft -> Action(Bin, List, Bef, (Aft-Bef) div 8 * 8), - bit_split_binary3(Action, Bin, List, Bef, Aft-8); + bit_split_binary3(Action, Bin, List, Bef, Aft - 8); bit_split_binary3(_, _, _, _, _) -> ok. make_bin_from_list(_List, 0) -> mkbin([]); make_bin_from_list(List, N) -> list_to_binary([make_int(List, 8, 0), - make_bin_from_list(lists:nthtail(8, List), N-8)]). + make_bin_from_list(lists:nthtail(8, List), N - 8)]). make_int(_List, 0, Acc) -> Acc; make_int([H|T], N, Acc) -> make_int(T, N-1, Acc bsl 1 bor H). @@ -101,5 +101,5 @@ z_split(B, N) -> <<_:N/binary>> -> [B]; _ -> - z_split(B, N+1) + z_split(B, N + 1) end. diff --git a/lib/hipe/test/bs_SUITE_data/bs_utf.erl b/lib/hipe/test/bs_SUITE_data/bs_utf.erl index f50ae08964..24526f574d 100644 --- a/lib/hipe/test/bs_SUITE_data/bs_utf.erl +++ b/lib/hipe/test/bs_SUITE_data/bs_utf.erl @@ -1,18 +1,356 @@ %% -*- erlang-indent-level: 2 -*- %%------------------------------------------------------------------- -%% Purpose: test support for UTF datatypes in binaries - INCOMPLETE +%% Purpose: test support for UTF datatypes in binaries +%% +%% Most of it taken from emulator/test/bs_utf_SUITE.erl %%------------------------------------------------------------------- -module(bs_utf). -export([test/0]). +-include_lib("test_server/include/test_server.hrl"). + test() -> + ok = utf8_cm65(), + ok = utf8_roundtrip(), + ok = utf16_roundtrip(), + ok = utf32_roundtrip(), + %% The following were problematic for the LLVM backend + ok = utf8_illegal_sequences(), + ok = utf16_illegal_sequences(), + ok = utf32_illegal_sequences(), + ok. + +%%------------------------------------------------------------------- +%% A test with construction and matching + +utf8_cm65() -> <<65>> = b65utf8(), ok = m(<<65>>). +b65utf8() -> + <<65/utf8>>. + m(<<65/utf8>>) -> ok. -b65utf8() -> - <<65/utf8>>. +%%------------------------------------------------------------------- + +utf8_roundtrip() -> + ok = utf8_roundtrip(0, 16#D7FF), + ok = utf8_roundtrip(16#E000, 16#10FFFF), + ok. + +utf8_roundtrip(First, Last) when First =< Last -> + Bin = int_to_utf8(First), + Bin = id(<<First/utf8>>), + Bin = id(<<(id(<<>>))/binary,First/utf8>>), + Unaligned = id(<<3:2,First/utf8>>), + <<_:2,Bin/binary>> = Unaligned, + <<First/utf8>> = Bin, + <<First/utf8>> = make_unaligned(Bin), + utf8_roundtrip(First+1, Last); +utf8_roundtrip(_, _) -> + ok. + +%%------------------------------------------------------------------- + +utf16_roundtrip() -> + Big = fun utf16_big_roundtrip/1, + Little = fun utf16_little_roundtrip/1, + PidRefs = [spawn_monitor(fun() -> do_utf16_roundtrip(Fun) end) || + Fun <- [Big,Little]], + [receive {'DOWN', Ref, process, Pid, Reason} -> normal=Reason end || + {Pid, Ref} <- PidRefs], + ok. + +do_utf16_roundtrip(Fun) -> + do_utf16_roundtrip(0, 16#D7FF, Fun), + do_utf16_roundtrip(16#E000, 16#10FFFF, Fun). + +do_utf16_roundtrip(First, Last, Fun) when First =< Last -> + Fun(First), + do_utf16_roundtrip(First+1, Last, Fun); +do_utf16_roundtrip(_, _, _) -> ok. + +utf16_big_roundtrip(Char) -> + Bin = id(<<Char/utf16>>), + Bin = id(<<(id(<<>>))/binary,Char/utf16>>), + Unaligned = id(<<3:2,Char/utf16>>), + <<_:2,Bin/binary>> = Unaligned, + <<Char/utf16>> = Bin, + <<Char/utf16>> = make_unaligned(Bin), + ok. + +utf16_little_roundtrip(Char) -> + Bin = id(<<Char/little-utf16>>), + Bin = id(<<(id(<<>>))/binary,Char/little-utf16>>), + Unaligned = id(<<3:2,Char/little-utf16>>), + <<_:2,Bin/binary>> = Unaligned, + <<Char/little-utf16>> = Bin, + <<Char/little-utf16>> = make_unaligned(Bin), + ok. + +%%------------------------------------------------------------------- + +utf32_roundtrip() -> + Big = fun utf32_big_roundtrip/1, + Little = fun utf32_little_roundtrip/1, + PidRefs = [spawn_monitor(fun() -> do_utf32_roundtrip(Fun) end) || + Fun <- [Big,Little]], + [receive {'DOWN', Ref, process, Pid, Reason} -> normal=Reason end || + {Pid, Ref} <- PidRefs], + ok. + +do_utf32_roundtrip(Fun) -> + do_utf32_roundtrip(0, 16#D7FF, Fun), + do_utf32_roundtrip(16#E000, 16#10FFFF, Fun). + +do_utf32_roundtrip(First, Last, Fun) when First =< Last -> + Fun(First), + do_utf32_roundtrip(First+1, Last, Fun); +do_utf32_roundtrip(_, _, _) -> ok. + +utf32_big_roundtrip(Char) -> + Bin = id(<<Char/utf32>>), + Bin = id(<<(id(<<>>))/binary,Char/utf32>>), + Unaligned = id(<<3:2,Char/utf32>>), + <<_:2,Bin/binary>> = Unaligned, + <<Char/utf32>> = Bin, + <<Char/utf32>> = make_unaligned(Bin), + ok. + +utf32_little_roundtrip(Char) -> + Bin = id(<<Char/little-utf32>>), + Bin = id(<<(id(<<>>))/binary,Char/little-utf32>>), + Unaligned = id(<<3:2,Char/little-utf32>>), + <<_:2,Bin/binary>> = Unaligned, + <<Char/little-utf32>> = Bin, + <<Char/little-utf32>> = make_unaligned(Bin), + ok. + +%%------------------------------------------------------------------- + +utf8_illegal_sequences() -> + fail_range(16#10FFFF+1, 16#10FFFF+512), % Too large. + fail_range(16#D800, 16#DFFF), % Reserved for UTF-16. + + %% Illegal first character. + [fail(<<I,16#8F,16#8F,16#8F>>) || I <- lists:seq(16#80, 16#BF)], + + %% Short sequences. + short_sequences(16#80, 16#10FFFF), + + %% Overlong sequences. (Using more bytes than necessary + %% is not allowed.) + overlong(0, 127, 2), + overlong(128, 16#7FF, 3), + overlong(16#800, 16#FFFF, 4), + ok. + +fail_range(Char, End) when Char =< End -> + {'EXIT', _} = (catch <<Char/utf8>>), + Bin = int_to_utf8(Char), + fail(Bin), + fail_range(Char+1, End); +fail_range(_, _) -> ok. + +short_sequences(Char, End) -> + Step = (End - Char) div erlang:system_info(schedulers) + 1, + PidRefs = short_sequences_1(Char, Step, End), + [receive {'DOWN', Ref, process, Pid, Reason} -> normal=Reason end || + {Pid, Ref} <- PidRefs], + ok. + +short_sequences_1(Char, Step, End) when Char =< End -> + CharEnd = lists:min([Char+Step-1,End]), + [spawn_monitor(fun() -> + %% io:format("~p - ~p\n", [Char, CharEnd]), + do_short_sequences(Char, CharEnd) + end)|short_sequences_1(Char+Step, Step, End)]; +short_sequences_1(_, _, _) -> []. + +do_short_sequences(Char, End) when Char =< End -> + short_sequence(Char), + do_short_sequences(Char+1, End); +do_short_sequences(_, _) -> ok. + +short_sequence(I) -> + case int_to_utf8(I) of + <<S0:3/binary,_:8>> -> + <<S1:2/binary,R1:8>> = S0, + <<S2:1/binary,_:8>> = S1, + fail(S0), + fail(S1), + fail(S2), + fail(<<S2/binary,16#7F,R1,R1>>), + fail(<<S1/binary,16#7F,R1>>), + fail(<<S0/binary,16#7F>>); + <<S0:2/binary,_:8>> -> + <<S1:1/binary,R1:8>> = S0, + fail(S0), + fail(S1), + fail(<<S0/binary,16#7F>>), + fail(<<S1/binary,16#7F>>), + fail(<<S1/binary,16#7F,R1>>); + <<S:1/binary,_:8>> -> + fail(S), + fail(<<S/binary,16#7F>>) + end. + +overlong(Char, Last, NumBytes) when Char =< Last -> + overlong(Char, NumBytes), + overlong(Char+1, Last, NumBytes); +overlong(_, _, _) -> ok. + +overlong(Char, NumBytes) when NumBytes < 5 -> + case int_to_utf8(Char, NumBytes) of + <<Char/utf8>>=Bin -> + ?t:fail({illegal_encoding_accepted,Bin,Char}); + <<OtherChar/utf8>>=Bin -> + ?t:fail({illegal_encoding_accepted,Bin,Char,OtherChar}); + _ -> ok + end, + overlong(Char, NumBytes+1); +overlong(_, _) -> ok. + +fail(Bin) -> + fail_1(Bin), + fail_1(make_unaligned(Bin)). + +fail_1(<<Char/utf8>> = Bin) -> + ?t:fail({illegal_encoding_accepted, Bin, Char}); +fail_1(_) -> ok. + +%%------------------------------------------------------------------- + +utf16_illegal_sequences() -> + utf16_fail_range(16#10FFFF+1, 16#10FFFF+512), % Too large. + utf16_fail_range(16#D800, 16#DFFF), % Reserved for UTF-16. + lonely_hi_surrogate(16#D800, 16#DFFF), + leading_lo_surrogate(16#DC00, 16#DFFF), + ok. + +utf16_fail_range(Char, End) when Char =< End -> + {'EXIT', _} = (catch <<Char/big-utf16>>), + {'EXIT', _} = (catch <<Char/little-utf16>>), + utf16_fail_range(Char+1, End); +utf16_fail_range(_, _) -> ok. + +lonely_hi_surrogate(Char, End) when Char =< End -> + BinBig = <<Char:16/big>>, + BinLittle = <<Char:16/little>>, + case {BinBig,BinLittle} of + {<<Bad/big-utf16>>,_} -> + ?t:fail({lonely_hi_surrogate_accepted,Bad}); + {_,<<Bad/little-utf16>>} -> + ?t:fail({lonely_hi_surrogate_accepted,Bad}); + {_,_} -> + ok + end, + lonely_hi_surrogate(Char+1, End); +lonely_hi_surrogate(_, _) -> ok. + +leading_lo_surrogate(Char, End) when Char =< End -> + leading_lo_surrogate(Char, 16#D800, 16#DFFF), + leading_lo_surrogate(Char+1, End); +leading_lo_surrogate(_, _) -> ok. + +leading_lo_surrogate(HiSurr, LoSurr, End) when LoSurr =< End -> + BinBig = <<HiSurr:16/big,LoSurr:16/big>>, + BinLittle = <<HiSurr:16/little,LoSurr:16/little>>, + case {BinBig,BinLittle} of + {<<Bad/big-utf16,_/bits>>,_} -> + ?t:fail({leading_lo_surrogate_accepted,Bad}); + {_,<<Bad/little-utf16,_/bits>>} -> + ?t:fail({leading_lo_surrogate_accepted,Bad}); + {_,_} -> + ok + end, + leading_lo_surrogate(HiSurr, LoSurr+1, End); +leading_lo_surrogate(_, _, _) -> ok. + +%%------------------------------------------------------------------- + +utf32_illegal_sequences() -> + utf32_fail_range(16#10FFFF+1, 16#10FFFF+512), % Too large. + utf32_fail_range(16#D800, 16#DFFF), % Reserved for UTF-16. + utf32_fail_range(-100, -1), + ok. + +utf32_fail_range(Char, End) when Char =< End -> + {'EXIT', _} = (catch <<Char/big-utf32>>), + {'EXIT', _} = (catch <<Char/little-utf32>>), + case {<<Char:32>>,<<Char:32/little>>} of + {<<Unexpected/utf32>>,_} -> + ?t:fail(Unexpected); + {_,<<Unexpected/little-utf32>>} -> + ?t:fail(Unexpected); + {_,_} -> ok + end, + utf32_fail_range(Char+1, End); +utf32_fail_range(_, _) -> ok. + +%%------------------------------------------------------------------- +%% This function intentionally allows construction of UTF-8 sequence +%% in illegal ranges. + +int_to_utf8(I) when I =< 16#7F -> + <<I>>; +int_to_utf8(I) when I =< 16#7FF -> + B2 = I, + B1 = (I bsr 6), + <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>; +int_to_utf8(I) when I =< 16#FFFF -> + B3 = I, + B2 = (I bsr 6), + B1 = (I bsr 12), + <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>; +int_to_utf8(I) when I =< 16#3FFFFF -> + B4 = I, + B3 = (I bsr 6), + B2 = (I bsr 12), + B1 = (I bsr 18), + <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>; +int_to_utf8(I) when I =< 16#3FFFFFF -> + B5 = I, + B4 = (I bsr 6), + B3 = (I bsr 12), + B2 = (I bsr 18), + B1 = (I bsr 24), + <<1:1,1:1,1:1,1:1,1:1,0:1,B1:2,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6, + 1:1,0:1,B5:6>>. + +%% int_to_utf8(I, NumberOfBytes) -> Binary. +%% This function can be used to construct overlong sequences. +int_to_utf8(I, 1) -> + <<I>>; +int_to_utf8(I, 2) -> + B2 = I, + B1 = (I bsr 6), + <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>; +int_to_utf8(I, 3) -> + B3 = I, + B2 = (I bsr 6), + B1 = (I bsr 12), + <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>; +int_to_utf8(I, 4) -> + B4 = I, + B3 = (I bsr 6), + B2 = (I bsr 12), + B1 = (I bsr 18), + <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>. + +%%------------------------------------------------------------------- + +make_unaligned(Bin0) when is_binary(Bin0) -> + Bin1 = <<0:3,Bin0/binary,31:5>>, + Sz = byte_size(Bin0), + <<0:3,Bin:Sz/binary,31:5>> = id(Bin1), + Bin. + +%%------------------------------------------------------------------- +%% Just to prevent compiler optimizations + +id(X) -> X. |