aboutsummaryrefslogtreecommitdiffstats
path: root/lib/hipe/test/bs_SUITE_data
diff options
context:
space:
mode:
Diffstat (limited to 'lib/hipe/test/bs_SUITE_data')
-rw-r--r--lib/hipe/test/bs_SUITE_data/bs_add.erl10
-rw-r--r--lib/hipe/test/bs_SUITE_data/bs_construct.erl177
-rw-r--r--lib/hipe/test/bs_SUITE_data/bs_match.erl120
-rw-r--r--lib/hipe/test/bs_SUITE_data/bs_split.erl10
-rw-r--r--lib/hipe/test/bs_SUITE_data/bs_utf.erl344
5 files changed, 649 insertions, 12 deletions
diff --git a/lib/hipe/test/bs_SUITE_data/bs_add.erl b/lib/hipe/test/bs_SUITE_data/bs_add.erl
index af5a3b2f23..4b92e6b413 100644
--- a/lib/hipe/test/bs_SUITE_data/bs_add.erl
+++ b/lib/hipe/test/bs_SUITE_data/bs_add.erl
@@ -2,7 +2,7 @@
%%-------------------------------------------------------------------------
%% The guard in f/3 revealed a problem in the translation of the 'bs_add'
%% BEAM instruction to Icode. The fail label was not properly translated.
-%% Fixed 3/2/2011.
+%% Fixed 3/2/2011. Then in 2015 we found another issue: g/2. Also fixed.
%%-------------------------------------------------------------------------
-module(bs_add).
@@ -10,9 +10,17 @@
test() ->
42 = f(<<12345:16>>, 4711, <<42>>),
+ true = g(<<1:13>>, 3), %% was handled OK, but
+ false = g(<<>>, gurka), %% this one leaked badarith
ok.
f(Bin, A, B) when <<A:9, B:7/binary>> == Bin ->
gazonk;
f(Bin, _, _) when is_binary(Bin) ->
42.
+
+%% Complex way of testing (bit_size(Bin) + Len) rem 8 =:= 0
+g(Bin, Len) when is_binary(<<Bin/binary-unit:1, 123:Len>>) ->
+ true;
+g(_, _) ->
+ false.
diff --git a/lib/hipe/test/bs_SUITE_data/bs_construct.erl b/lib/hipe/test/bs_SUITE_data/bs_construct.erl
index 9cc9ac848c..b9e7d93570 100644
--- a/lib/hipe/test/bs_SUITE_data/bs_construct.erl
+++ b/lib/hipe/test/bs_SUITE_data/bs_construct.erl
@@ -13,6 +13,12 @@ test() ->
ok = bs5(),
16#10000008 = bit_size(large_bin(1, 2, 3, 4)),
ok = bad_ones(),
+ ok = zero_width(),
+ ok = not_used(),
+ ok = bad_append(),
+ ok = system_limit(),
+ ok = bad_floats(),
+ ok = huge_binaries(),
ok.
%%--------------------------------------------------------------------
@@ -126,3 +132,174 @@ bad_ones() ->
Bin123 = <<1,2,3>>,
?FAIL(<<Bin123/float>>),
ok.
+
+%%--------------------------------------------------------------------
+%% Taken from the emulator bs_construct_SUITE - seg faulted till 18.1
+
+zero_width() ->
+ Z = id(0),
+ Small = id(42),
+ Big = id(1 bsl 128), % puts stuff on the heap
+ <<>> = <<Small:Z>>,
+ <<>> = <<Small:0>>,
+ <<>> = <<Big:Z>>,
+ <<>> = <<Big:0>>,
+ ok.
+
+id(X) -> X.
+
+%%--------------------------------------------------------------------
+%% Taken from bs_construct_SUITE. The test checks that constructed
+%% binaries that are not used would still give a `badarg' exception.
+%% Problem was that in native code one of them gave `badarith'.
+
+not_used() ->
+ ok = not_used1(3, <<"dum">>),
+ {'EXIT',{badarg,_}} = (catch not_used1(42, "dum_string")),
+ {'EXIT',{badarg,_}} = (catch not_used2(666, -2)),
+ {'EXIT',{badarg,_}} = (catch not_used2(666, "bad_size")), % this one
+ {'EXIT',{badarg,_}} = (catch not_used3(666)),
+ ok.
+
+not_used1(I, BinString) ->
+ <<I:32,BinString/binary>>,
+ ok.
+
+not_used2(I, Sz) ->
+ <<I:Sz>>,
+ ok.
+
+not_used3(I) ->
+ <<I:(-8)>>,
+ ok.
+
+%%--------------------------------------------------------------------
+%% Taken from bs_construct_SUITE.
+
+bad_append() ->
+ do_bad_append(<<127:1>>, fun append_unit_3/1),
+ do_bad_append(<<127:2>>, fun append_unit_3/1),
+ do_bad_append(<<127:17>>, fun append_unit_3/1),
+
+ do_bad_append(<<127:3>>, fun append_unit_4/1),
+ do_bad_append(<<127:5>>, fun append_unit_4/1),
+ do_bad_append(<<127:7>>, fun append_unit_4/1),
+ do_bad_append(<<127:199>>, fun append_unit_4/1),
+
+ do_bad_append(<<127:7>>, fun append_unit_8/1),
+ do_bad_append(<<127:9>>, fun append_unit_8/1),
+
+ do_bad_append(<<0:8>>, fun append_unit_16/1),
+ do_bad_append(<<0:15>>, fun append_unit_16/1),
+ do_bad_append(<<0:17>>, fun append_unit_16/1),
+ ok.
+
+do_bad_append(Bin0, Appender) ->
+ {'EXIT',{badarg,_}} = (catch Appender(Bin0)),
+
+ Bin1 = id(<<0:3,Bin0/bitstring>>),
+ <<_:3,Bin2/bitstring>> = Bin1,
+ {'EXIT',{badarg,_}} = (catch Appender(Bin2)),
+
+ %% Create a writable binary.
+ Empty = id(<<>>),
+ Bin3 = <<Empty/bitstring,Bin0/bitstring>>,
+ {'EXIT',{badarg,_}} = (catch Appender(Bin3)),
+ ok.
+
+append_unit_3(Bin) ->
+ <<Bin/binary-unit:3,0:1>>.
+
+append_unit_4(Bin) ->
+ <<Bin/binary-unit:4,0:1>>.
+
+append_unit_8(Bin) ->
+ <<Bin/binary,0:1>>.
+
+append_unit_16(Bin) ->
+ <<Bin/binary-unit:16,0:1>>.
+
+%%--------------------------------------------------------------------
+%% Taken from bs_construct_SUITE.
+
+system_limit() ->
+ WordSize = erlang:system_info(wordsize),
+ BitsPerWord = WordSize * 8,
+ {'EXIT',{system_limit,_}} =
+ (catch <<0:(id(0)),42:(id(1 bsl BitsPerWord))>>),
+ {'EXIT',{system_limit,_}} =
+ (catch <<42:(id(1 bsl BitsPerWord)),0:(id(0))>>),
+ {'EXIT',{system_limit,_}} =
+ (catch <<(id(<<>>))/binary,0:(id(1 bsl 100))>>),
+
+ %% Would fail to load.
+ {'EXIT',{system_limit,_}} = (catch <<0:(1 bsl 67)>>),
+ {'EXIT',{system_limit,_}} = (catch <<0:((1 bsl 64)+1)>>),
+ case WordSize of
+ 4 ->
+ system_limit_32();
+ 8 ->
+ ok
+ end.
+
+system_limit_32() ->
+ {'EXIT',{badarg,_}} = (catch <<42:(-1)>>),
+ {'EXIT',{badarg,_}} = (catch <<42:(id(-1))>>),
+ {'EXIT',{badarg,_}} = (catch <<42:(id(-389739873536870912))/unit:8>>),
+ {'EXIT',{system_limit,_}} = (catch <<42:536870912/unit:8>>),
+ {'EXIT',{system_limit,_}} = (catch <<42:(id(536870912))/unit:8>>),
+ {'EXIT',{system_limit,_}} = (catch <<0:(id(8)),42:536870912/unit:8>>),
+ {'EXIT',{system_limit,_}} = (catch <<0:(id(8)),42:(id(536870912))/unit:8>>),
+
+ %% The size would be silently truncated, resulting in a crash.
+ {'EXIT',{system_limit,_}} = (catch <<0:(1 bsl 35)>>),
+ {'EXIT',{system_limit,_}} = (catch <<0:((1 bsl 32)+1)>>),
+
+ %% Would fail to load.
+ {'EXIT',{system_limit,_}} = (catch <<0:(1 bsl 43)>>),
+ {'EXIT',{system_limit,_}} = (catch <<0:((1 bsl 40)+1)>>),
+ ok.
+
+%%--------------------------------------------------------------------
+
+bad_floats() ->
+ WordSize = erlang:system_info(wordsize),
+ BitsPerWord = WordSize * 8,
+ {'EXIT',{badarg,_}} = (catch <<3.14:(id(33))/float>>),
+ {'EXIT',{badarg,_}} = (catch <<3.14:(id(64 bor 32))/float>>),
+ {'EXIT',{badarg,_}} = (catch <<3.14:(id((1 bsl 28) bor 32))/float>>),
+ {'EXIT',{system_limit,_}} = (catch <<3.14:(id(1 bsl BitsPerWord))/float>>),
+ ok.
+
+%%--------------------------------------------------------------------
+%% A bug in the implementation of binaries compared sizes in bits with sizes in
+%% bytes, causing <<0:(id((1 bsl 31)-1))>> to fail to construct with
+%% 'system_limit'.
+%% <<0:(id((1 bsl 32)-1))>> was succeeding because the comparison was
+%% (incorrectly) signed.
+
+huge_binaries() ->
+ AlmostIllegal = id(<<0:(id((1 bsl 32)-8))>>),
+ case erlang:system_info(wordsize) of
+ 4 -> huge_binaries_32(AlmostIllegal);
+ 8 -> ok
+ end,
+ garbage_collect(),
+ id(<<0:(id((1 bsl 31)-1))>>),
+ id(<<0:(id((1 bsl 30)-1))>>),
+ garbage_collect(),
+ ok.
+
+huge_binaries_32(AlmostIllegal) ->
+ %% Attempt construction of too large binary using bs_init/1 (which takes the
+ %% number of bytes as an argument, which should be compared to the maximum
+ %% size in bytes).
+ {'EXIT',{system_limit,_}} = (catch <<0:32,AlmostIllegal/binary>>),
+ %% Attempt construction of too large binary using bs_init/1 with a size in
+ %% bytes that has the msb set (and would be negative if it was signed).
+ {'EXIT',{system_limit,_}} =
+ (catch <<0:8, AlmostIllegal/binary, AlmostIllegal/binary,
+ AlmostIllegal/binary, AlmostIllegal/binary,
+ AlmostIllegal/binary, AlmostIllegal/binary,
+ AlmostIllegal/binary, AlmostIllegal/binary>>),
+ ok.
diff --git a/lib/hipe/test/bs_SUITE_data/bs_match.erl b/lib/hipe/test/bs_SUITE_data/bs_match.erl
index 8194d878b8..b241ea8d35 100644
--- a/lib/hipe/test/bs_SUITE_data/bs_match.erl
+++ b/lib/hipe/test/bs_SUITE_data/bs_match.erl
@@ -1,8 +1,8 @@
%%% -*- erlang-indent-level: 2 -*-
%%%-------------------------------------------------------------------
%%% File : bs_match.erl
-%%% Author : Per Gustafsson <[email protected]>
-%%% Purpose : Performs simple matching and construction of binaries
+%%% Authors : Per Gustafsson <[email protected]>, Kostis Sagonas <[email protected]>
+%%% Purpose : Tests matching and construction of binaries
%%% TODO : Add binary and float tests
%%% Created : 20 Feb 2004
%%%-------------------------------------------------------------------
@@ -12,7 +12,8 @@
test() ->
Funs = [fun test_aligned/0, fun test_unaligned/0,
- fun test_zero_tail/0, fun test_integer_matching/0],
+ fun test_zero_tail/0, fun test_integer_matching/0,
+ fun test_writable_bin/0, fun test_match_huge_bin/0],
lists:foreach(fun (F) -> ok = F() end, Funs).
%%-------------------------------------------------------------------
@@ -173,3 +174,116 @@ test_dynamic_integer_matching(N) ->
<<12:N/integer, 0:S>> = <<12:N/integer, 0:S>>,
<<12:N/integer-little, 0:S>> = <<12:N/integer-little, 0:S>>,
ok.
+
+%%-------------------------------------------------------------------
+%% Test writable bin -- added by Sverker Eriksson
+
+test_writable_bin() ->
+ test_writable_bin(<<>>, 0),
+ ok.
+
+test_writable_bin(Bin, 128) ->
+ Bin;
+test_writable_bin(Bin0, N) when N < 128 ->
+ Bin1 = <<Bin0/binary, N>>,
+ <<_/utf8, _/binary>> = Bin1,
+ test_writable_bin(Bin1, N+1).
+
+%%-------------------------------------------------------------------
+%% Test matching with a huge bin -- taken from bs_match_bin_SUITE
+
+test_match_huge_bin() ->
+ Bin = <<0:(1 bsl 27),13:8>>,
+ skip_huge_bin_1(1 bsl 27, Bin),
+ 16777216 = match_huge_bin_1(1 bsl 27, Bin),
+ %% Test overflowing the size of a binary field.
+ nomatch = overflow_huge_bin_skip_32(Bin),
+ nomatch = overflow_huge_bin_32(Bin),
+ nomatch = overflow_huge_bin_skip_64(Bin),
+ nomatch = overflow_huge_bin_64(Bin),
+ %% Size in variable
+ ok = overflow_huge_bin(Bin, lists:seq(25, 32)++lists:seq(50, 64)),
+ ok = overflow_huge_bin_unit128(Bin, lists:seq(25, 32)++lists:seq(50, 64)),
+ ok.
+
+overflow_huge_bin(Bin, [Sz0|Sizes]) ->
+ Sz = id(1 bsl Sz0),
+ case Bin of
+ <<_:Sz/binary-unit:8,0,_/binary>> ->
+ {error,Sz};
+ _ ->
+ case Bin of
+ <<NewBin:Sz/binary-unit:8,0,_/binary>> ->
+ {error,Sz,size(NewBin)};
+ _ ->
+ overflow_huge_bin(Bin, Sizes)
+ end
+ end;
+overflow_huge_bin(_, []) -> ok.
+
+overflow_huge_bin_unit128(Bin, [Sz0|Sizes]) ->
+ Sz = id(1 bsl Sz0),
+ case Bin of
+ <<_:Sz/binary-unit:128,0,_/binary>> ->
+ {error,Sz};
+ _ ->
+ case Bin of
+ <<NewBin:Sz/binary-unit:128,0,_/binary>> ->
+ {error,Sz,size(NewBin)};
+ _ ->
+ overflow_huge_bin_unit128(Bin, Sizes)
+ end
+ end;
+overflow_huge_bin_unit128(_, []) -> ok.
+
+skip_huge_bin_1(I, Bin) ->
+ <<_:I/binary-unit:1,13>> = Bin,
+ ok.
+
+match_huge_bin_1(I, Bin) ->
+ case Bin of
+ <<Val:I/binary-unit:1,13>> -> size(Val);
+ _ -> nomatch
+ end.
+
+overflow_huge_bin_skip_32(<<_:4294967296/binary,0,_/binary>>) -> 1; % 1 bsl 32
+overflow_huge_bin_skip_32(<<_:33554432/binary-unit:128,0,_/binary>>) -> 2; % 1 bsl 25
+overflow_huge_bin_skip_32(<<_:67108864/binary-unit:64,0,_/binary>>) -> 3; % 1 bsl 26
+overflow_huge_bin_skip_32(<<_:134217728/binary-unit:32,0,_/binary>>) -> 4; % 1 bsl 27
+overflow_huge_bin_skip_32(<<_:268435456/binary-unit:16,0,_/binary>>) -> 5; % 1 bsl 28
+overflow_huge_bin_skip_32(<<_:536870912/binary-unit:8,0,_/binary>>) -> 6; % 1 bsl 29
+overflow_huge_bin_skip_32(<<_:1073741824/binary-unit:8,0,_/binary>>) -> 7; % 1 bsl 30
+overflow_huge_bin_skip_32(<<_:2147483648/binary-unit:8,0,_/binary>>) -> 8; % 1 bsl 31
+overflow_huge_bin_skip_32(_) -> nomatch.
+
+overflow_huge_bin_32(<<Bin:4294967296/binary,_/binary>>) -> {1,Bin}; % 1 bsl 32
+overflow_huge_bin_32(<<Bin:33554432/binary-unit:128,0,_/binary>>) -> {2,Bin}; % 1 bsl 25
+overflow_huge_bin_32(<<Bin:67108864/binary-unit:128,0,_/binary>>) -> {3,Bin}; % 1 bsl 26
+overflow_huge_bin_32(<<Bin:134217728/binary-unit:128,0,_/binary>>) -> {4,Bin}; % 1 bsl 27
+overflow_huge_bin_32(<<Bin:268435456/binary-unit:128,0,_/binary>>) -> {5,Bin}; % 1 bsl 28
+overflow_huge_bin_32(<<Bin:536870912/binary-unit:128,0,_/binary>>) -> {6,Bin}; % 1 bsl 29
+overflow_huge_bin_32(<<Bin:1073741824/binary-unit:128,0,_/binary>>) -> {7,Bin}; % 1 bsl 30
+overflow_huge_bin_32(<<Bin:2147483648/binary-unit:128,0,_/binary>>) -> {8,Bin}; % 1 bsl 31
+overflow_huge_bin_32(_) -> nomatch.
+
+overflow_huge_bin_skip_64(<<_:18446744073709551616/binary,0,_/binary>>) -> 1; % 1 bsl 64
+overflow_huge_bin_skip_64(<<_:144115188075855872/binary-unit:128,0,_/binary>>) -> 2; % 1 bsl 57
+overflow_huge_bin_skip_64(<<_:288230376151711744/binary-unit:64,0,_/binary>>) -> 3; % 1 bsl 58
+overflow_huge_bin_skip_64(<<_:576460752303423488/binary-unit:32,0,_/binary>>) -> 4; % 1 bsl 59
+overflow_huge_bin_skip_64(<<_:1152921504606846976/binary-unit:16,0,_/binary>>) -> 5; % 1 bsl 60
+overflow_huge_bin_skip_64(<<_:2305843009213693952/binary-unit:8,0,_/binary>>) -> 6; % 1 bsl 61
+overflow_huge_bin_skip_64(<<_:4611686018427387904/binary-unit:8,0,_/binary>>) -> 7; % 1 bsl 62
+overflow_huge_bin_skip_64(<<_:9223372036854775808/binary-unit:8,_/binary>>) -> 8; % 1 bsl 63
+overflow_huge_bin_skip_64(_) -> nomatch.
+
+overflow_huge_bin_64(<<Bin:18446744073709551616/binary,_/binary>>) -> {1,Bin}; % 1 bsl 64
+overflow_huge_bin_64(<<Bin:144115188075855872/binary-unit:128,0,_/binary>>) -> {2,Bin}; % 1 bsl 57
+overflow_huge_bin_64(<<Bin:288230376151711744/binary-unit:128,0,_/binary>>) -> {3,Bin}; % 1 bsl 58
+overflow_huge_bin_64(<<Bin:576460752303423488/binary-unit:128,0,_/binary>>) -> {4,Bin}; % 1 bsl 59
+overflow_huge_bin_64(<<Bin:1152921504606846976/binary-unit:128,0,_/binary>>) -> {5,Bin}; % 1 bsl 60
+overflow_huge_bin_64(<<Bin:2305843009213693952/binary-unit:128,0,_/binary>>) -> {6,Bin}; % 1 bsl 61
+overflow_huge_bin_64(<<Bin:4611686018427387904/binary-unit:128,0,_/binary>>) -> {7,Bin}; % 1 bsl 62
+overflow_huge_bin_64(<<Bin:9223372036854775808/binary-unit:128,0,_/binary>>) -> {8,Bin}; % 1 bsl 63
+overflow_huge_bin_64(_) -> nomatch.
+
+id(I) -> I.
diff --git a/lib/hipe/test/bs_SUITE_data/bs_split.erl b/lib/hipe/test/bs_SUITE_data/bs_split.erl
index 2e52308a77..617543f789 100644
--- a/lib/hipe/test/bs_SUITE_data/bs_split.erl
+++ b/lib/hipe/test/bs_SUITE_data/bs_split.erl
@@ -26,13 +26,13 @@ bs1(L, B, Pos, Sz1, Sz2) ->
<<B1:Sz1/binary, B2:Sz2/binary>> = B,
bs2(L, B, Pos, B1, B2).
-bs2(L, B, Pos, B1, B2)->
+bs2(L, B, Pos, B1, B2) ->
B1 = list_to_binary(lists:sublist(L, 1, Pos)),
bs3(L, B, Pos, B2).
bs3(L, B, Pos, B2) ->
B2 = list_to_binary(lists:nthtail(Pos, L)),
- byte_split(L, B, Pos-1).
+ byte_split(L, B, Pos - 1).
%%--------------------------------------------------------------------
@@ -56,14 +56,14 @@ bit_split_binary2(_Action, _Bin, [], _Bef) -> ok.
bit_split_binary3(Action, Bin, List, Bef, Aft) when Bef =< Aft ->
Action(Bin, List, Bef, (Aft-Bef) div 8 * 8),
- bit_split_binary3(Action, Bin, List, Bef, Aft-8);
+ bit_split_binary3(Action, Bin, List, Bef, Aft - 8);
bit_split_binary3(_, _, _, _, _) -> ok.
make_bin_from_list(_List, 0) ->
mkbin([]);
make_bin_from_list(List, N) ->
list_to_binary([make_int(List, 8, 0),
- make_bin_from_list(lists:nthtail(8, List), N-8)]).
+ make_bin_from_list(lists:nthtail(8, List), N - 8)]).
make_int(_List, 0, Acc) -> Acc;
make_int([H|T], N, Acc) -> make_int(T, N-1, Acc bsl 1 bor H).
@@ -101,5 +101,5 @@ z_split(B, N) ->
<<_:N/binary>> ->
[B];
_ ->
- z_split(B, N+1)
+ z_split(B, N + 1)
end.
diff --git a/lib/hipe/test/bs_SUITE_data/bs_utf.erl b/lib/hipe/test/bs_SUITE_data/bs_utf.erl
index f50ae08964..24526f574d 100644
--- a/lib/hipe/test/bs_SUITE_data/bs_utf.erl
+++ b/lib/hipe/test/bs_SUITE_data/bs_utf.erl
@@ -1,18 +1,356 @@
%% -*- erlang-indent-level: 2 -*-
%%-------------------------------------------------------------------
-%% Purpose: test support for UTF datatypes in binaries - INCOMPLETE
+%% Purpose: test support for UTF datatypes in binaries
+%%
+%% Most of it taken from emulator/test/bs_utf_SUITE.erl
%%-------------------------------------------------------------------
-module(bs_utf).
-export([test/0]).
+-include_lib("test_server/include/test_server.hrl").
+
test() ->
+ ok = utf8_cm65(),
+ ok = utf8_roundtrip(),
+ ok = utf16_roundtrip(),
+ ok = utf32_roundtrip(),
+ %% The following were problematic for the LLVM backend
+ ok = utf8_illegal_sequences(),
+ ok = utf16_illegal_sequences(),
+ ok = utf32_illegal_sequences(),
+ ok.
+
+%%-------------------------------------------------------------------
+%% A test with construction and matching
+
+utf8_cm65() ->
<<65>> = b65utf8(),
ok = m(<<65>>).
+b65utf8() ->
+ <<65/utf8>>.
+
m(<<65/utf8>>) ->
ok.
-b65utf8() ->
- <<65/utf8>>.
+%%-------------------------------------------------------------------
+
+utf8_roundtrip() ->
+ ok = utf8_roundtrip(0, 16#D7FF),
+ ok = utf8_roundtrip(16#E000, 16#10FFFF),
+ ok.
+
+utf8_roundtrip(First, Last) when First =< Last ->
+ Bin = int_to_utf8(First),
+ Bin = id(<<First/utf8>>),
+ Bin = id(<<(id(<<>>))/binary,First/utf8>>),
+ Unaligned = id(<<3:2,First/utf8>>),
+ <<_:2,Bin/binary>> = Unaligned,
+ <<First/utf8>> = Bin,
+ <<First/utf8>> = make_unaligned(Bin),
+ utf8_roundtrip(First+1, Last);
+utf8_roundtrip(_, _) ->
+ ok.
+
+%%-------------------------------------------------------------------
+
+utf16_roundtrip() ->
+ Big = fun utf16_big_roundtrip/1,
+ Little = fun utf16_little_roundtrip/1,
+ PidRefs = [spawn_monitor(fun() -> do_utf16_roundtrip(Fun) end) ||
+ Fun <- [Big,Little]],
+ [receive {'DOWN', Ref, process, Pid, Reason} -> normal=Reason end ||
+ {Pid, Ref} <- PidRefs],
+ ok.
+
+do_utf16_roundtrip(Fun) ->
+ do_utf16_roundtrip(0, 16#D7FF, Fun),
+ do_utf16_roundtrip(16#E000, 16#10FFFF, Fun).
+
+do_utf16_roundtrip(First, Last, Fun) when First =< Last ->
+ Fun(First),
+ do_utf16_roundtrip(First+1, Last, Fun);
+do_utf16_roundtrip(_, _, _) -> ok.
+
+utf16_big_roundtrip(Char) ->
+ Bin = id(<<Char/utf16>>),
+ Bin = id(<<(id(<<>>))/binary,Char/utf16>>),
+ Unaligned = id(<<3:2,Char/utf16>>),
+ <<_:2,Bin/binary>> = Unaligned,
+ <<Char/utf16>> = Bin,
+ <<Char/utf16>> = make_unaligned(Bin),
+ ok.
+
+utf16_little_roundtrip(Char) ->
+ Bin = id(<<Char/little-utf16>>),
+ Bin = id(<<(id(<<>>))/binary,Char/little-utf16>>),
+ Unaligned = id(<<3:2,Char/little-utf16>>),
+ <<_:2,Bin/binary>> = Unaligned,
+ <<Char/little-utf16>> = Bin,
+ <<Char/little-utf16>> = make_unaligned(Bin),
+ ok.
+
+%%-------------------------------------------------------------------
+
+utf32_roundtrip() ->
+ Big = fun utf32_big_roundtrip/1,
+ Little = fun utf32_little_roundtrip/1,
+ PidRefs = [spawn_monitor(fun() -> do_utf32_roundtrip(Fun) end) ||
+ Fun <- [Big,Little]],
+ [receive {'DOWN', Ref, process, Pid, Reason} -> normal=Reason end ||
+ {Pid, Ref} <- PidRefs],
+ ok.
+
+do_utf32_roundtrip(Fun) ->
+ do_utf32_roundtrip(0, 16#D7FF, Fun),
+ do_utf32_roundtrip(16#E000, 16#10FFFF, Fun).
+
+do_utf32_roundtrip(First, Last, Fun) when First =< Last ->
+ Fun(First),
+ do_utf32_roundtrip(First+1, Last, Fun);
+do_utf32_roundtrip(_, _, _) -> ok.
+
+utf32_big_roundtrip(Char) ->
+ Bin = id(<<Char/utf32>>),
+ Bin = id(<<(id(<<>>))/binary,Char/utf32>>),
+ Unaligned = id(<<3:2,Char/utf32>>),
+ <<_:2,Bin/binary>> = Unaligned,
+ <<Char/utf32>> = Bin,
+ <<Char/utf32>> = make_unaligned(Bin),
+ ok.
+
+utf32_little_roundtrip(Char) ->
+ Bin = id(<<Char/little-utf32>>),
+ Bin = id(<<(id(<<>>))/binary,Char/little-utf32>>),
+ Unaligned = id(<<3:2,Char/little-utf32>>),
+ <<_:2,Bin/binary>> = Unaligned,
+ <<Char/little-utf32>> = Bin,
+ <<Char/little-utf32>> = make_unaligned(Bin),
+ ok.
+
+%%-------------------------------------------------------------------
+
+utf8_illegal_sequences() ->
+ fail_range(16#10FFFF+1, 16#10FFFF+512), % Too large.
+ fail_range(16#D800, 16#DFFF), % Reserved for UTF-16.
+
+ %% Illegal first character.
+ [fail(<<I,16#8F,16#8F,16#8F>>) || I <- lists:seq(16#80, 16#BF)],
+
+ %% Short sequences.
+ short_sequences(16#80, 16#10FFFF),
+
+ %% Overlong sequences. (Using more bytes than necessary
+ %% is not allowed.)
+ overlong(0, 127, 2),
+ overlong(128, 16#7FF, 3),
+ overlong(16#800, 16#FFFF, 4),
+ ok.
+
+fail_range(Char, End) when Char =< End ->
+ {'EXIT', _} = (catch <<Char/utf8>>),
+ Bin = int_to_utf8(Char),
+ fail(Bin),
+ fail_range(Char+1, End);
+fail_range(_, _) -> ok.
+
+short_sequences(Char, End) ->
+ Step = (End - Char) div erlang:system_info(schedulers) + 1,
+ PidRefs = short_sequences_1(Char, Step, End),
+ [receive {'DOWN', Ref, process, Pid, Reason} -> normal=Reason end ||
+ {Pid, Ref} <- PidRefs],
+ ok.
+
+short_sequences_1(Char, Step, End) when Char =< End ->
+ CharEnd = lists:min([Char+Step-1,End]),
+ [spawn_monitor(fun() ->
+ %% io:format("~p - ~p\n", [Char, CharEnd]),
+ do_short_sequences(Char, CharEnd)
+ end)|short_sequences_1(Char+Step, Step, End)];
+short_sequences_1(_, _, _) -> [].
+
+do_short_sequences(Char, End) when Char =< End ->
+ short_sequence(Char),
+ do_short_sequences(Char+1, End);
+do_short_sequences(_, _) -> ok.
+
+short_sequence(I) ->
+ case int_to_utf8(I) of
+ <<S0:3/binary,_:8>> ->
+ <<S1:2/binary,R1:8>> = S0,
+ <<S2:1/binary,_:8>> = S1,
+ fail(S0),
+ fail(S1),
+ fail(S2),
+ fail(<<S2/binary,16#7F,R1,R1>>),
+ fail(<<S1/binary,16#7F,R1>>),
+ fail(<<S0/binary,16#7F>>);
+ <<S0:2/binary,_:8>> ->
+ <<S1:1/binary,R1:8>> = S0,
+ fail(S0),
+ fail(S1),
+ fail(<<S0/binary,16#7F>>),
+ fail(<<S1/binary,16#7F>>),
+ fail(<<S1/binary,16#7F,R1>>);
+ <<S:1/binary,_:8>> ->
+ fail(S),
+ fail(<<S/binary,16#7F>>)
+ end.
+
+overlong(Char, Last, NumBytes) when Char =< Last ->
+ overlong(Char, NumBytes),
+ overlong(Char+1, Last, NumBytes);
+overlong(_, _, _) -> ok.
+
+overlong(Char, NumBytes) when NumBytes < 5 ->
+ case int_to_utf8(Char, NumBytes) of
+ <<Char/utf8>>=Bin ->
+ ?t:fail({illegal_encoding_accepted,Bin,Char});
+ <<OtherChar/utf8>>=Bin ->
+ ?t:fail({illegal_encoding_accepted,Bin,Char,OtherChar});
+ _ -> ok
+ end,
+ overlong(Char, NumBytes+1);
+overlong(_, _) -> ok.
+
+fail(Bin) ->
+ fail_1(Bin),
+ fail_1(make_unaligned(Bin)).
+
+fail_1(<<Char/utf8>> = Bin) ->
+ ?t:fail({illegal_encoding_accepted, Bin, Char});
+fail_1(_) -> ok.
+
+%%-------------------------------------------------------------------
+
+utf16_illegal_sequences() ->
+ utf16_fail_range(16#10FFFF+1, 16#10FFFF+512), % Too large.
+ utf16_fail_range(16#D800, 16#DFFF), % Reserved for UTF-16.
+ lonely_hi_surrogate(16#D800, 16#DFFF),
+ leading_lo_surrogate(16#DC00, 16#DFFF),
+ ok.
+
+utf16_fail_range(Char, End) when Char =< End ->
+ {'EXIT', _} = (catch <<Char/big-utf16>>),
+ {'EXIT', _} = (catch <<Char/little-utf16>>),
+ utf16_fail_range(Char+1, End);
+utf16_fail_range(_, _) -> ok.
+
+lonely_hi_surrogate(Char, End) when Char =< End ->
+ BinBig = <<Char:16/big>>,
+ BinLittle = <<Char:16/little>>,
+ case {BinBig,BinLittle} of
+ {<<Bad/big-utf16>>,_} ->
+ ?t:fail({lonely_hi_surrogate_accepted,Bad});
+ {_,<<Bad/little-utf16>>} ->
+ ?t:fail({lonely_hi_surrogate_accepted,Bad});
+ {_,_} ->
+ ok
+ end,
+ lonely_hi_surrogate(Char+1, End);
+lonely_hi_surrogate(_, _) -> ok.
+
+leading_lo_surrogate(Char, End) when Char =< End ->
+ leading_lo_surrogate(Char, 16#D800, 16#DFFF),
+ leading_lo_surrogate(Char+1, End);
+leading_lo_surrogate(_, _) -> ok.
+
+leading_lo_surrogate(HiSurr, LoSurr, End) when LoSurr =< End ->
+ BinBig = <<HiSurr:16/big,LoSurr:16/big>>,
+ BinLittle = <<HiSurr:16/little,LoSurr:16/little>>,
+ case {BinBig,BinLittle} of
+ {<<Bad/big-utf16,_/bits>>,_} ->
+ ?t:fail({leading_lo_surrogate_accepted,Bad});
+ {_,<<Bad/little-utf16,_/bits>>} ->
+ ?t:fail({leading_lo_surrogate_accepted,Bad});
+ {_,_} ->
+ ok
+ end,
+ leading_lo_surrogate(HiSurr, LoSurr+1, End);
+leading_lo_surrogate(_, _, _) -> ok.
+
+%%-------------------------------------------------------------------
+
+utf32_illegal_sequences() ->
+ utf32_fail_range(16#10FFFF+1, 16#10FFFF+512), % Too large.
+ utf32_fail_range(16#D800, 16#DFFF), % Reserved for UTF-16.
+ utf32_fail_range(-100, -1),
+ ok.
+
+utf32_fail_range(Char, End) when Char =< End ->
+ {'EXIT', _} = (catch <<Char/big-utf32>>),
+ {'EXIT', _} = (catch <<Char/little-utf32>>),
+ case {<<Char:32>>,<<Char:32/little>>} of
+ {<<Unexpected/utf32>>,_} ->
+ ?t:fail(Unexpected);
+ {_,<<Unexpected/little-utf32>>} ->
+ ?t:fail(Unexpected);
+ {_,_} -> ok
+ end,
+ utf32_fail_range(Char+1, End);
+utf32_fail_range(_, _) -> ok.
+
+%%-------------------------------------------------------------------
+%% This function intentionally allows construction of UTF-8 sequence
+%% in illegal ranges.
+
+int_to_utf8(I) when I =< 16#7F ->
+ <<I>>;
+int_to_utf8(I) when I =< 16#7FF ->
+ B2 = I,
+ B1 = (I bsr 6),
+ <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>;
+int_to_utf8(I) when I =< 16#FFFF ->
+ B3 = I,
+ B2 = (I bsr 6),
+ B1 = (I bsr 12),
+ <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>;
+int_to_utf8(I) when I =< 16#3FFFFF ->
+ B4 = I,
+ B3 = (I bsr 6),
+ B2 = (I bsr 12),
+ B1 = (I bsr 18),
+ <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>;
+int_to_utf8(I) when I =< 16#3FFFFFF ->
+ B5 = I,
+ B4 = (I bsr 6),
+ B3 = (I bsr 12),
+ B2 = (I bsr 18),
+ B1 = (I bsr 24),
+ <<1:1,1:1,1:1,1:1,1:1,0:1,B1:2,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6,
+ 1:1,0:1,B5:6>>.
+
+%% int_to_utf8(I, NumberOfBytes) -> Binary.
+%% This function can be used to construct overlong sequences.
+int_to_utf8(I, 1) ->
+ <<I>>;
+int_to_utf8(I, 2) ->
+ B2 = I,
+ B1 = (I bsr 6),
+ <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>;
+int_to_utf8(I, 3) ->
+ B3 = I,
+ B2 = (I bsr 6),
+ B1 = (I bsr 12),
+ <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>;
+int_to_utf8(I, 4) ->
+ B4 = I,
+ B3 = (I bsr 6),
+ B2 = (I bsr 12),
+ B1 = (I bsr 18),
+ <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>.
+
+%%-------------------------------------------------------------------
+
+make_unaligned(Bin0) when is_binary(Bin0) ->
+ Bin1 = <<0:3,Bin0/binary,31:5>>,
+ Sz = byte_size(Bin0),
+ <<0:3,Bin:Sz/binary,31:5>> = id(Bin1),
+ Bin.
+
+%%-------------------------------------------------------------------
+%% Just to prevent compiler optimizations
+
+id(X) -> X.