aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/test/bs_utf_SUITE.erl
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /erts/emulator/test/bs_utf_SUITE.erl
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'erts/emulator/test/bs_utf_SUITE.erl')
-rw-r--r--erts/emulator/test/bs_utf_SUITE.erl394
1 files changed, 394 insertions, 0 deletions
diff --git a/erts/emulator/test/bs_utf_SUITE.erl b/erts/emulator/test/bs_utf_SUITE.erl
new file mode 100644
index 0000000000..87adc5197b
--- /dev/null
+++ b/erts/emulator/test/bs_utf_SUITE.erl
@@ -0,0 +1,394 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-module(bs_utf_SUITE).
+
+-export([all/1,init_per_testcase/2,fin_per_testcase/2,
+ utf8_roundtrip/1,utf16_roundtrip/1,utf32_roundtrip/1,
+ utf8_illegal_sequences/1,utf16_illegal_sequences/1,
+ utf32_illegal_sequences/1,
+ bad_construction/1]).
+
+-include("test_server.hrl").
+
+-define(FAIL(Expr), ?line fail_check(catch Expr, ??Expr, [])).
+
+init_per_testcase(Func, Config) when is_atom(Func), is_list(Config) ->
+ Dog = ?t:timetrap(?t:minutes(6)),
+ [{watchdog,Dog}|Config].
+
+fin_per_testcase(_Func, Config) ->
+ Dog = ?config(watchdog, Config),
+ ?t:timetrap_cancel(Dog).
+
+all(suite) ->
+ [utf8_roundtrip,utf16_roundtrip,utf32_roundtrip,
+ utf8_illegal_sequences,utf16_illegal_sequences,
+ utf32_illegal_sequences,bad_construction].
+
+utf8_roundtrip(Config) when is_list(Config) ->
+ ?line utf8_roundtrip(0, 16#D7FF),
+ ?line utf8_roundtrip(16#E000, 16#FFFD),
+ ?line utf8_roundtrip(16#10000, 16#10FFFF),
+ ok.
+
+utf8_roundtrip(First, Last) when First =< Last ->
+ Bin = int_to_utf8(First),
+ Bin = id(<<First/utf8>>),
+ Bin = id(<<(id(<<>>))/binary,First/utf8>>),
+ Unaligned = id(<<3:2,First/utf8>>),
+ <<_:2,Bin/binary>> = Unaligned,
+ <<First/utf8>> = Bin,
+ <<First/utf8>> = make_unaligned(Bin),
+ utf8_roundtrip(First+1, Last);
+utf8_roundtrip(_, _) -> ok.
+
+utf16_roundtrip(Config) when is_list(Config) ->
+ Big = fun utf16_big_roundtrip/1,
+ Little = fun utf16_little_roundtrip/1,
+ PidRefs = [spawn_monitor(fun() ->
+ do_utf16_roundtrip(Fun)
+ end) || Fun <- [Big,Little]],
+ [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end ||
+ {Pid,Ref} <- PidRefs],
+ ok.
+
+do_utf16_roundtrip(Fun) ->
+ do_utf16_roundtrip(0, 16#D7FF, Fun),
+ do_utf16_roundtrip(16#E000, 16#FFFD, Fun),
+ do_utf16_roundtrip(16#10000, 16#10FFFF, Fun).
+
+do_utf16_roundtrip(First, Last, Fun) when First =< Last ->
+ Fun(First),
+ do_utf16_roundtrip(First+1, Last, Fun);
+do_utf16_roundtrip(_, _, _) -> ok.
+
+utf16_big_roundtrip(Char) ->
+ Bin = id(<<Char/utf16>>),
+ Bin = id(<<(id(<<>>))/binary,Char/utf16>>),
+ Unaligned = id(<<3:2,Char/utf16>>),
+ <<_:2,Bin/binary>> = Unaligned,
+ <<Char/utf16>> = Bin,
+ <<Char/utf16>> = make_unaligned(Bin),
+ ok.
+
+utf16_little_roundtrip(Char) ->
+ Bin = id(<<Char/little-utf16>>),
+ Bin = id(<<(id(<<>>))/binary,Char/little-utf16>>),
+ Unaligned = id(<<3:2,Char/little-utf16>>),
+ <<_:2,Bin/binary>> = Unaligned,
+ <<Char/little-utf16>> = Bin,
+ <<Char/little-utf16>> = make_unaligned(Bin),
+ ok.
+
+utf32_roundtrip(Config) when is_list(Config) ->
+ Big = fun utf32_big_roundtrip/1,
+ Little = fun utf32_little_roundtrip/1,
+ PidRefs = [spawn_monitor(fun() ->
+ do_utf32_roundtrip(Fun)
+ end) || Fun <- [Big,Little]],
+ [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end ||
+ {Pid,Ref} <- PidRefs],
+ ok.
+
+do_utf32_roundtrip(Fun) ->
+ do_utf32_roundtrip(0, 16#D7FF, Fun),
+ do_utf32_roundtrip(16#E000, 16#FFFD, Fun),
+ do_utf32_roundtrip(16#10000, 16#10FFFF, Fun).
+
+do_utf32_roundtrip(First, Last, Fun) when First =< Last ->
+ Fun(First),
+ do_utf32_roundtrip(First+1, Last, Fun);
+do_utf32_roundtrip(_, _, _) -> ok.
+
+utf32_big_roundtrip(Char) ->
+ Bin = id(<<Char/utf32>>),
+ Bin = id(<<(id(<<>>))/binary,Char/utf32>>),
+ Unaligned = id(<<3:2,Char/utf32>>),
+ <<_:2,Bin/binary>> = Unaligned,
+ <<Char/utf32>> = Bin,
+ <<Char/utf32>> = make_unaligned(Bin),
+ ok.
+
+utf32_little_roundtrip(Char) ->
+ Bin = id(<<Char/little-utf32>>),
+ Bin = id(<<(id(<<>>))/binary,Char/little-utf32>>),
+ Unaligned = id(<<3:2,Char/little-utf32>>),
+ <<_:2,Bin/binary>> = Unaligned,
+ <<Char/little-utf32>> = Bin,
+ <<Char/little-utf32>> = make_unaligned(Bin),
+ ok.
+
+utf8_illegal_sequences(Config) when is_list(Config) ->
+ ?line fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large.
+ ?line fail_range(16#D800, 16#DFFF), %Reserved for UTF-16.
+ ?line fail_range(16#FFFE, 16#FFFF), %Non-characters.
+
+ %% Illegal first character.
+ ?line [fail(<<I,16#8F,16#8F,16#8F>>) || I <- lists:seq(16#80, 16#BF)],
+
+ %% Short sequences.
+ ?line short_sequences(16#80, 16#10FFFF),
+
+ %% Overlong sequences. (Using more bytes than necessary
+ %% is not allowed.)
+ ?line overlong(0, 127, 2),
+ ?line overlong(128, 16#7FF, 3),
+ ?line overlong(16#800, 16#FFFF, 4),
+ ok.
+
+fail_range(Char, End) when Char =< End ->
+ {'EXIT',_} = (catch <<Char/utf8>>),
+ Bin = int_to_utf8(Char),
+ fail(Bin),
+ fail_range(Char+1, End);
+fail_range(_, _) -> ok.
+
+short_sequences(Char, End) ->
+ Step = (End - Char) div erlang:system_info(schedulers) + 1,
+ PidRefs = short_sequences_1(Char, Step, End),
+ [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end ||
+ {Pid,Ref} <- PidRefs],
+ ok.
+
+short_sequences_1(Char, Step, End) when Char =< End ->
+ CharEnd = lists:min([Char+Step-1,End]),
+ [spawn_monitor(fun() ->
+ io:format("~p - ~p\n", [Char,CharEnd]),
+ do_short_sequences(Char, CharEnd)
+ end)|short_sequences_1(Char+Step, Step, End)];
+short_sequences_1(_, _, _) -> [].
+
+do_short_sequences(Char, End) when Char =< End ->
+ short_sequence(Char),
+ do_short_sequences(Char+1, End);
+do_short_sequences(_, _) -> ok.
+
+short_sequence(I) ->
+ case int_to_utf8(I) of
+ <<S0:3/binary,_:8>> ->
+ <<S1:2/binary,R1:8>> = S0,
+ <<S2:1/binary,_:8>> = S1,
+ fail(S0),
+ fail(S1),
+ fail(S2),
+ fail(<<S2/binary,16#7F,R1,R1>>),
+ fail(<<S1/binary,16#7F,R1>>),
+ fail(<<S0/binary,16#7F>>);
+ <<S0:2/binary,_:8>> ->
+ <<S1:1/binary,R1:8>> = S0,
+ fail(S0),
+ fail(S1),
+ fail(<<S0/binary,16#7F>>),
+ fail(<<S1/binary,16#7F>>),
+ fail(<<S1/binary,16#7F,R1>>);
+ <<S:1/binary,_:8>> ->
+ fail(S),
+ fail(<<S/binary,16#7F>>)
+ end.
+
+overlong(Char, Last, NumBytes) when Char =< Last ->
+ overlong(Char, NumBytes),
+ overlong(Char+1, Last, NumBytes);
+overlong(_, _, _) -> ok.
+
+overlong(Char, NumBytes) when NumBytes < 5 ->
+ case int_to_utf8(Char, NumBytes) of
+ <<Char/utf8>>=Bin ->
+ ?t:fail({illegal_encoding_accepted,Bin,Char});
+ <<OtherChar/utf8>>=Bin ->
+ ?t:fail({illegal_encoding_accepted,Bin,Char,OtherChar});
+ _ -> ok
+ end,
+ overlong(Char, NumBytes+1);
+overlong(_, _) -> ok.
+
+fail(Bin) ->
+ fail_1(Bin),
+ fail_1(make_unaligned(Bin)).
+
+fail_1(<<Char/utf8>>=Bin) ->
+ ?t:fail({illegal_encoding_accepted,Bin,Char});
+fail_1(_) -> ok.
+
+
+utf16_illegal_sequences(Config) when is_list(Config) ->
+ ?line utf16_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large.
+ ?line utf16_fail_range(16#D800, 16#DFFF), %Reserved for UTF-16.
+ ?line utf16_fail_range(16#FFFE, 16#FFFF), %Non-characters.
+
+ ?line lonely_hi_surrogate(16#D800, 16#DFFF),
+ ?line leading_lo_surrogate(16#DC00, 16#DFFF),
+
+ ok.
+
+utf16_fail_range(Char, End) when Char =< End ->
+ {'EXIT',_} = (catch <<Char/big-utf16>>),
+ {'EXIT',_} = (catch <<Char/little-utf16>>),
+ utf16_fail_range(Char+1, End);
+utf16_fail_range(_, _) -> ok.
+
+lonely_hi_surrogate(Char, End) when Char =< End ->
+ BinBig = <<Char:16/big>>,
+ BinLittle = <<Char:16/little>>,
+ case {BinBig,BinLittle} of
+ {<<Bad/big-utf16>>,_} ->
+ ?t:fail({lonely_hi_surrogate_accepted,Bad});
+ {_,<<Bad/little-utf16>>} ->
+ ?t:fail({lonely_hi_surrogate_accepted,Bad});
+ {_,_} ->
+ ok
+ end,
+ lonely_hi_surrogate(Char+1, End);
+lonely_hi_surrogate(_, _) -> ok.
+
+leading_lo_surrogate(Char, End) when Char =< End ->
+ leading_lo_surrogate(Char, 16#D800, 16#DFFF),
+ leading_lo_surrogate(Char+1, End);
+leading_lo_surrogate(_, _) -> ok.
+
+leading_lo_surrogate(HiSurr, LoSurr, End) when LoSurr =< End ->
+ BinBig = <<HiSurr:16/big,LoSurr:16/big>>,
+ BinLittle = <<HiSurr:16/little,LoSurr:16/little>>,
+ case {BinBig,BinLittle} of
+ {<<Bad/big-utf16,_/bits>>,_} ->
+ ?t:fail({leading_lo_surrogate_accepted,Bad});
+ {_,<<Bad/little-utf16,_/bits>>} ->
+ ?t:fail({leading_lo_surrogate_accepted,Bad});
+ {_,_} ->
+ ok
+ end,
+ leading_lo_surrogate(HiSurr, LoSurr+1, End);
+leading_lo_surrogate(_, _, _) -> ok.
+
+utf32_illegal_sequences(Config) when is_list(Config) ->
+ ?line utf32_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large.
+ ?line utf32_fail_range(16#D800, 16#DFFF), %Reserved for UTF-16.
+ ?line utf32_fail_range(16#FFFE, 16#FFFF), %Non-characters.
+ ?line utf32_fail_range(-100, -1),
+ ok.
+
+utf32_fail_range(Char, End) when Char =< End ->
+ {'EXIT',_} = (catch <<Char/big-utf32>>),
+ {'EXIT',_} = (catch <<Char/little-utf32>>),
+ case {<<Char:32>>,<<Char:32/little>>} of
+ {<<Unexpected/utf32>>,_} ->
+ ?line ?t:fail(Unexpected);
+ {_,<<Unexpected/little-utf32>>} ->
+ ?line ?t:fail(Unexpected);
+ {_,_} -> ok
+ end,
+ utf32_fail_range(Char+1, End);
+utf32_fail_range(_, _) -> ok.
+
+bad_construction(Config) when is_list(Config) ->
+ ?FAIL(<<3.14/utf8>>),
+ ?FAIL(<<3.1415/utf16>>),
+ ?FAIL(<<3.1415/utf32>>),
+
+ ?FAIL(<<(-1)/utf8>>),
+ ?FAIL(<<(-1)/utf16>>),
+ {'EXIT',_} = (catch <<(id(-1))/utf8>>),
+ {'EXIT',_} = (catch <<(id(-1))/utf16>>),
+ {'EXIT',_} = (catch <<(id(-1))/utf32>>),
+
+ ?FAIL(<<16#D800/utf8>>),
+ ?FAIL(<<16#D800/utf16>>),
+ ?FAIL(<<16#D800/utf32>>),
+
+ ok.
+
+%% This function intentionally allows construction of
+%% UTF-8 sequence in illegal ranges.
+int_to_utf8(I) when I =< 16#7F ->
+ <<I>>;
+int_to_utf8(I) when I =< 16#7FF ->
+ B2 = I,
+ B1 = (I bsr 6),
+ <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>;
+int_to_utf8(I) when I =< 16#FFFF ->
+ B3 = I,
+ B2 = (I bsr 6),
+ B1 = (I bsr 12),
+ <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>;
+int_to_utf8(I) when I =< 16#3FFFFF ->
+ B4 = I,
+ B3 = (I bsr 6),
+ B2 = (I bsr 12),
+ B1 = (I bsr 18),
+ <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>;
+int_to_utf8(I) when I =< 16#3FFFFFF ->
+ B5 = I,
+ B4 = (I bsr 6),
+ B3 = (I bsr 12),
+ B2 = (I bsr 18),
+ B1 = (I bsr 24),
+ <<1:1,1:1,1:1,1:1,1:1,0:1,B1:2,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6,
+ 1:1,0:1,B5:6>>.
+
+%% int_to_utf8(I, NumberOfBytes) -> Binary.
+%% This function can be used to construct overlong sequences.
+int_to_utf8(I, 1) ->
+ <<I>>;
+int_to_utf8(I, 2) ->
+ B2 = I,
+ B1 = (I bsr 6),
+ <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>;
+int_to_utf8(I, 3) ->
+ B3 = I,
+ B2 = (I bsr 6),
+ B1 = (I bsr 12),
+ <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>;
+int_to_utf8(I, 4) ->
+ B4 = I,
+ B3 = (I bsr 6),
+ B2 = (I bsr 12),
+ B1 = (I bsr 18),
+ <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>.
+
+make_unaligned(Bin0) when is_binary(Bin0) ->
+ Bin1 = <<0:3,Bin0/binary,31:5>>,
+ Sz = byte_size(Bin0),
+ <<0:3,Bin:Sz/binary,31:5>> = id(Bin1),
+ Bin.
+
+fail_check({'EXIT',{badarg,_}}, Str, Vars) ->
+ try evaluate(Str, Vars) of
+ Res ->
+ io:format("Interpreted result: ~p", [Res]),
+ ?t:fail(did_not_fail_in_intepreted_code)
+ catch
+ error:badarg ->
+ ok
+ end;
+fail_check(Res, _, _) ->
+ io:format("Compiled result: ~p", [Res]),
+ ?t:fail(did_not_fail_in_compiled_code).
+
+evaluate(Str, Vars) ->
+ {ok,Tokens,_} =
+ erl_scan:string(Str ++ " . "),
+ {ok, [Expr]} = erl_parse:parse_exprs(Tokens),
+ case erl_eval:expr(Expr, Vars) of
+ {value, Result, _} ->
+ Result
+ end.
+
+id(I) -> I.
+