diff options
Diffstat (limited to 'erts/emulator/test/bs_utf_SUITE.erl')
-rw-r--r-- | erts/emulator/test/bs_utf_SUITE.erl | 394 |
1 files changed, 394 insertions, 0 deletions
diff --git a/erts/emulator/test/bs_utf_SUITE.erl b/erts/emulator/test/bs_utf_SUITE.erl new file mode 100644 index 0000000000..87adc5197b --- /dev/null +++ b/erts/emulator/test/bs_utf_SUITE.erl @@ -0,0 +1,394 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% + +-module(bs_utf_SUITE). + +-export([all/1,init_per_testcase/2,fin_per_testcase/2, + utf8_roundtrip/1,utf16_roundtrip/1,utf32_roundtrip/1, + utf8_illegal_sequences/1,utf16_illegal_sequences/1, + utf32_illegal_sequences/1, + bad_construction/1]). + +-include("test_server.hrl"). + +-define(FAIL(Expr), ?line fail_check(catch Expr, ??Expr, [])). + +init_per_testcase(Func, Config) when is_atom(Func), is_list(Config) -> + Dog = ?t:timetrap(?t:minutes(6)), + [{watchdog,Dog}|Config]. + +fin_per_testcase(_Func, Config) -> + Dog = ?config(watchdog, Config), + ?t:timetrap_cancel(Dog). + +all(suite) -> + [utf8_roundtrip,utf16_roundtrip,utf32_roundtrip, + utf8_illegal_sequences,utf16_illegal_sequences, + utf32_illegal_sequences,bad_construction]. + +utf8_roundtrip(Config) when is_list(Config) -> + ?line utf8_roundtrip(0, 16#D7FF), + ?line utf8_roundtrip(16#E000, 16#FFFD), + ?line utf8_roundtrip(16#10000, 16#10FFFF), + ok. + +utf8_roundtrip(First, Last) when First =< Last -> + Bin = int_to_utf8(First), + Bin = id(<<First/utf8>>), + Bin = id(<<(id(<<>>))/binary,First/utf8>>), + Unaligned = id(<<3:2,First/utf8>>), + <<_:2,Bin/binary>> = Unaligned, + <<First/utf8>> = Bin, + <<First/utf8>> = make_unaligned(Bin), + utf8_roundtrip(First+1, Last); +utf8_roundtrip(_, _) -> ok. + +utf16_roundtrip(Config) when is_list(Config) -> + Big = fun utf16_big_roundtrip/1, + Little = fun utf16_little_roundtrip/1, + PidRefs = [spawn_monitor(fun() -> + do_utf16_roundtrip(Fun) + end) || Fun <- [Big,Little]], + [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end || + {Pid,Ref} <- PidRefs], + ok. + +do_utf16_roundtrip(Fun) -> + do_utf16_roundtrip(0, 16#D7FF, Fun), + do_utf16_roundtrip(16#E000, 16#FFFD, Fun), + do_utf16_roundtrip(16#10000, 16#10FFFF, Fun). + +do_utf16_roundtrip(First, Last, Fun) when First =< Last -> + Fun(First), + do_utf16_roundtrip(First+1, Last, Fun); +do_utf16_roundtrip(_, _, _) -> ok. + +utf16_big_roundtrip(Char) -> + Bin = id(<<Char/utf16>>), + Bin = id(<<(id(<<>>))/binary,Char/utf16>>), + Unaligned = id(<<3:2,Char/utf16>>), + <<_:2,Bin/binary>> = Unaligned, + <<Char/utf16>> = Bin, + <<Char/utf16>> = make_unaligned(Bin), + ok. + +utf16_little_roundtrip(Char) -> + Bin = id(<<Char/little-utf16>>), + Bin = id(<<(id(<<>>))/binary,Char/little-utf16>>), + Unaligned = id(<<3:2,Char/little-utf16>>), + <<_:2,Bin/binary>> = Unaligned, + <<Char/little-utf16>> = Bin, + <<Char/little-utf16>> = make_unaligned(Bin), + ok. + +utf32_roundtrip(Config) when is_list(Config) -> + Big = fun utf32_big_roundtrip/1, + Little = fun utf32_little_roundtrip/1, + PidRefs = [spawn_monitor(fun() -> + do_utf32_roundtrip(Fun) + end) || Fun <- [Big,Little]], + [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end || + {Pid,Ref} <- PidRefs], + ok. + +do_utf32_roundtrip(Fun) -> + do_utf32_roundtrip(0, 16#D7FF, Fun), + do_utf32_roundtrip(16#E000, 16#FFFD, Fun), + do_utf32_roundtrip(16#10000, 16#10FFFF, Fun). + +do_utf32_roundtrip(First, Last, Fun) when First =< Last -> + Fun(First), + do_utf32_roundtrip(First+1, Last, Fun); +do_utf32_roundtrip(_, _, _) -> ok. + +utf32_big_roundtrip(Char) -> + Bin = id(<<Char/utf32>>), + Bin = id(<<(id(<<>>))/binary,Char/utf32>>), + Unaligned = id(<<3:2,Char/utf32>>), + <<_:2,Bin/binary>> = Unaligned, + <<Char/utf32>> = Bin, + <<Char/utf32>> = make_unaligned(Bin), + ok. + +utf32_little_roundtrip(Char) -> + Bin = id(<<Char/little-utf32>>), + Bin = id(<<(id(<<>>))/binary,Char/little-utf32>>), + Unaligned = id(<<3:2,Char/little-utf32>>), + <<_:2,Bin/binary>> = Unaligned, + <<Char/little-utf32>> = Bin, + <<Char/little-utf32>> = make_unaligned(Bin), + ok. + +utf8_illegal_sequences(Config) when is_list(Config) -> + ?line fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large. + ?line fail_range(16#D800, 16#DFFF), %Reserved for UTF-16. + ?line fail_range(16#FFFE, 16#FFFF), %Non-characters. + + %% Illegal first character. + ?line [fail(<<I,16#8F,16#8F,16#8F>>) || I <- lists:seq(16#80, 16#BF)], + + %% Short sequences. + ?line short_sequences(16#80, 16#10FFFF), + + %% Overlong sequences. (Using more bytes than necessary + %% is not allowed.) + ?line overlong(0, 127, 2), + ?line overlong(128, 16#7FF, 3), + ?line overlong(16#800, 16#FFFF, 4), + ok. + +fail_range(Char, End) when Char =< End -> + {'EXIT',_} = (catch <<Char/utf8>>), + Bin = int_to_utf8(Char), + fail(Bin), + fail_range(Char+1, End); +fail_range(_, _) -> ok. + +short_sequences(Char, End) -> + Step = (End - Char) div erlang:system_info(schedulers) + 1, + PidRefs = short_sequences_1(Char, Step, End), + [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end || + {Pid,Ref} <- PidRefs], + ok. + +short_sequences_1(Char, Step, End) when Char =< End -> + CharEnd = lists:min([Char+Step-1,End]), + [spawn_monitor(fun() -> + io:format("~p - ~p\n", [Char,CharEnd]), + do_short_sequences(Char, CharEnd) + end)|short_sequences_1(Char+Step, Step, End)]; +short_sequences_1(_, _, _) -> []. + +do_short_sequences(Char, End) when Char =< End -> + short_sequence(Char), + do_short_sequences(Char+1, End); +do_short_sequences(_, _) -> ok. + +short_sequence(I) -> + case int_to_utf8(I) of + <<S0:3/binary,_:8>> -> + <<S1:2/binary,R1:8>> = S0, + <<S2:1/binary,_:8>> = S1, + fail(S0), + fail(S1), + fail(S2), + fail(<<S2/binary,16#7F,R1,R1>>), + fail(<<S1/binary,16#7F,R1>>), + fail(<<S0/binary,16#7F>>); + <<S0:2/binary,_:8>> -> + <<S1:1/binary,R1:8>> = S0, + fail(S0), + fail(S1), + fail(<<S0/binary,16#7F>>), + fail(<<S1/binary,16#7F>>), + fail(<<S1/binary,16#7F,R1>>); + <<S:1/binary,_:8>> -> + fail(S), + fail(<<S/binary,16#7F>>) + end. + +overlong(Char, Last, NumBytes) when Char =< Last -> + overlong(Char, NumBytes), + overlong(Char+1, Last, NumBytes); +overlong(_, _, _) -> ok. + +overlong(Char, NumBytes) when NumBytes < 5 -> + case int_to_utf8(Char, NumBytes) of + <<Char/utf8>>=Bin -> + ?t:fail({illegal_encoding_accepted,Bin,Char}); + <<OtherChar/utf8>>=Bin -> + ?t:fail({illegal_encoding_accepted,Bin,Char,OtherChar}); + _ -> ok + end, + overlong(Char, NumBytes+1); +overlong(_, _) -> ok. + +fail(Bin) -> + fail_1(Bin), + fail_1(make_unaligned(Bin)). + +fail_1(<<Char/utf8>>=Bin) -> + ?t:fail({illegal_encoding_accepted,Bin,Char}); +fail_1(_) -> ok. + + +utf16_illegal_sequences(Config) when is_list(Config) -> + ?line utf16_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large. + ?line utf16_fail_range(16#D800, 16#DFFF), %Reserved for UTF-16. + ?line utf16_fail_range(16#FFFE, 16#FFFF), %Non-characters. + + ?line lonely_hi_surrogate(16#D800, 16#DFFF), + ?line leading_lo_surrogate(16#DC00, 16#DFFF), + + ok. + +utf16_fail_range(Char, End) when Char =< End -> + {'EXIT',_} = (catch <<Char/big-utf16>>), + {'EXIT',_} = (catch <<Char/little-utf16>>), + utf16_fail_range(Char+1, End); +utf16_fail_range(_, _) -> ok. + +lonely_hi_surrogate(Char, End) when Char =< End -> + BinBig = <<Char:16/big>>, + BinLittle = <<Char:16/little>>, + case {BinBig,BinLittle} of + {<<Bad/big-utf16>>,_} -> + ?t:fail({lonely_hi_surrogate_accepted,Bad}); + {_,<<Bad/little-utf16>>} -> + ?t:fail({lonely_hi_surrogate_accepted,Bad}); + {_,_} -> + ok + end, + lonely_hi_surrogate(Char+1, End); +lonely_hi_surrogate(_, _) -> ok. + +leading_lo_surrogate(Char, End) when Char =< End -> + leading_lo_surrogate(Char, 16#D800, 16#DFFF), + leading_lo_surrogate(Char+1, End); +leading_lo_surrogate(_, _) -> ok. + +leading_lo_surrogate(HiSurr, LoSurr, End) when LoSurr =< End -> + BinBig = <<HiSurr:16/big,LoSurr:16/big>>, + BinLittle = <<HiSurr:16/little,LoSurr:16/little>>, + case {BinBig,BinLittle} of + {<<Bad/big-utf16,_/bits>>,_} -> + ?t:fail({leading_lo_surrogate_accepted,Bad}); + {_,<<Bad/little-utf16,_/bits>>} -> + ?t:fail({leading_lo_surrogate_accepted,Bad}); + {_,_} -> + ok + end, + leading_lo_surrogate(HiSurr, LoSurr+1, End); +leading_lo_surrogate(_, _, _) -> ok. + +utf32_illegal_sequences(Config) when is_list(Config) -> + ?line utf32_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large. + ?line utf32_fail_range(16#D800, 16#DFFF), %Reserved for UTF-16. + ?line utf32_fail_range(16#FFFE, 16#FFFF), %Non-characters. + ?line utf32_fail_range(-100, -1), + ok. + +utf32_fail_range(Char, End) when Char =< End -> + {'EXIT',_} = (catch <<Char/big-utf32>>), + {'EXIT',_} = (catch <<Char/little-utf32>>), + case {<<Char:32>>,<<Char:32/little>>} of + {<<Unexpected/utf32>>,_} -> + ?line ?t:fail(Unexpected); + {_,<<Unexpected/little-utf32>>} -> + ?line ?t:fail(Unexpected); + {_,_} -> ok + end, + utf32_fail_range(Char+1, End); +utf32_fail_range(_, _) -> ok. + +bad_construction(Config) when is_list(Config) -> + ?FAIL(<<3.14/utf8>>), + ?FAIL(<<3.1415/utf16>>), + ?FAIL(<<3.1415/utf32>>), + + ?FAIL(<<(-1)/utf8>>), + ?FAIL(<<(-1)/utf16>>), + {'EXIT',_} = (catch <<(id(-1))/utf8>>), + {'EXIT',_} = (catch <<(id(-1))/utf16>>), + {'EXIT',_} = (catch <<(id(-1))/utf32>>), + + ?FAIL(<<16#D800/utf8>>), + ?FAIL(<<16#D800/utf16>>), + ?FAIL(<<16#D800/utf32>>), + + ok. + +%% This function intentionally allows construction of +%% UTF-8 sequence in illegal ranges. +int_to_utf8(I) when I =< 16#7F -> + <<I>>; +int_to_utf8(I) when I =< 16#7FF -> + B2 = I, + B1 = (I bsr 6), + <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>; +int_to_utf8(I) when I =< 16#FFFF -> + B3 = I, + B2 = (I bsr 6), + B1 = (I bsr 12), + <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>; +int_to_utf8(I) when I =< 16#3FFFFF -> + B4 = I, + B3 = (I bsr 6), + B2 = (I bsr 12), + B1 = (I bsr 18), + <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>; +int_to_utf8(I) when I =< 16#3FFFFFF -> + B5 = I, + B4 = (I bsr 6), + B3 = (I bsr 12), + B2 = (I bsr 18), + B1 = (I bsr 24), + <<1:1,1:1,1:1,1:1,1:1,0:1,B1:2,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6, + 1:1,0:1,B5:6>>. + +%% int_to_utf8(I, NumberOfBytes) -> Binary. +%% This function can be used to construct overlong sequences. +int_to_utf8(I, 1) -> + <<I>>; +int_to_utf8(I, 2) -> + B2 = I, + B1 = (I bsr 6), + <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>; +int_to_utf8(I, 3) -> + B3 = I, + B2 = (I bsr 6), + B1 = (I bsr 12), + <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>; +int_to_utf8(I, 4) -> + B4 = I, + B3 = (I bsr 6), + B2 = (I bsr 12), + B1 = (I bsr 18), + <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>. + +make_unaligned(Bin0) when is_binary(Bin0) -> + Bin1 = <<0:3,Bin0/binary,31:5>>, + Sz = byte_size(Bin0), + <<0:3,Bin:Sz/binary,31:5>> = id(Bin1), + Bin. + +fail_check({'EXIT',{badarg,_}}, Str, Vars) -> + try evaluate(Str, Vars) of + Res -> + io:format("Interpreted result: ~p", [Res]), + ?t:fail(did_not_fail_in_intepreted_code) + catch + error:badarg -> + ok + end; +fail_check(Res, _, _) -> + io:format("Compiled result: ~p", [Res]), + ?t:fail(did_not_fail_in_compiled_code). + +evaluate(Str, Vars) -> + {ok,Tokens,_} = + erl_scan:string(Str ++ " . "), + {ok, [Expr]} = erl_parse:parse_exprs(Tokens), + case erl_eval:expr(Expr, Vars) of + {value, Result, _} -> + Result + end. + +id(I) -> I. + |