%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 2008-2011. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in %% compliance with the License. You should have received a copy of the %% Erlang Public License along with this software. If not, it can be %% retrieved online at http://www.erlang.org/. %% %% Software distributed under the License is distributed on an "AS IS" %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See %% the License for the specific language governing rights and limitations %% under the License. %% %% %CopyrightEnd% %% -module(bs_utf_SUITE). -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, init_per_group/2,end_per_group/2, init_per_testcase/2,end_per_testcase/2, utf8_roundtrip/1,utf16_roundtrip/1,utf32_roundtrip/1, utf8_illegal_sequences/1,utf16_illegal_sequences/1, utf32_illegal_sequences/1, bad_construction/1]). -include_lib("test_server/include/test_server.hrl"). -define(FAIL(Expr), ?line fail_check(catch Expr, ??Expr, [])). init_per_testcase(Func, Config) when is_atom(Func), is_list(Config) -> Dog = ?t:timetrap(?t:minutes(6)), [{watchdog,Dog}|Config]. end_per_testcase(_Func, Config) -> Dog = ?config(watchdog, Config), ?t:timetrap_cancel(Dog). suite() -> [{ct_hooks,[ts_install_cth]}]. all() -> [utf8_roundtrip, utf16_roundtrip, utf32_roundtrip, utf8_illegal_sequences, utf16_illegal_sequences, utf32_illegal_sequences, bad_construction]. groups() -> []. init_per_suite(Config) -> Config. end_per_suite(_Config) -> ok. init_per_group(_GroupName, Config) -> Config. end_per_group(_GroupName, Config) -> Config. utf8_roundtrip(Config) when is_list(Config) -> ?line utf8_roundtrip(0, 16#D7FF), ?line utf8_roundtrip(16#E000, 16#FFFD), ?line utf8_roundtrip(16#10000, 16#10FFFF), ok. utf8_roundtrip(First, Last) when First =< Last -> Bin = int_to_utf8(First), Bin = id(<>), Bin = id(<<(id(<<>>))/binary,First/utf8>>), Unaligned = id(<<3:2,First/utf8>>), <<_:2,Bin/binary>> = Unaligned, <> = Bin, <> = make_unaligned(Bin), utf8_roundtrip(First+1, Last); utf8_roundtrip(_, _) -> ok. utf16_roundtrip(Config) when is_list(Config) -> Big = fun utf16_big_roundtrip/1, Little = fun utf16_little_roundtrip/1, PidRefs = [spawn_monitor(fun() -> do_utf16_roundtrip(Fun) end) || Fun <- [Big,Little]], [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end || {Pid,Ref} <- PidRefs], ok. do_utf16_roundtrip(Fun) -> do_utf16_roundtrip(0, 16#D7FF, Fun), do_utf16_roundtrip(16#E000, 16#FFFD, Fun), do_utf16_roundtrip(16#10000, 16#10FFFF, Fun). do_utf16_roundtrip(First, Last, Fun) when First =< Last -> Fun(First), do_utf16_roundtrip(First+1, Last, Fun); do_utf16_roundtrip(_, _, _) -> ok. utf16_big_roundtrip(Char) -> Bin = id(<>), Bin = id(<<(id(<<>>))/binary,Char/utf16>>), Unaligned = id(<<3:2,Char/utf16>>), <<_:2,Bin/binary>> = Unaligned, <> = Bin, <> = make_unaligned(Bin), ok. utf16_little_roundtrip(Char) -> Bin = id(<>), Bin = id(<<(id(<<>>))/binary,Char/little-utf16>>), Unaligned = id(<<3:2,Char/little-utf16>>), <<_:2,Bin/binary>> = Unaligned, <> = Bin, <> = make_unaligned(Bin), ok. utf32_roundtrip(Config) when is_list(Config) -> Big = fun utf32_big_roundtrip/1, Little = fun utf32_little_roundtrip/1, PidRefs = [spawn_monitor(fun() -> do_utf32_roundtrip(Fun) end) || Fun <- [Big,Little]], [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end || {Pid,Ref} <- PidRefs], ok. do_utf32_roundtrip(Fun) -> do_utf32_roundtrip(0, 16#D7FF, Fun), do_utf32_roundtrip(16#E000, 16#FFFD, Fun), do_utf32_roundtrip(16#10000, 16#10FFFF, Fun). do_utf32_roundtrip(First, Last, Fun) when First =< Last -> Fun(First), do_utf32_roundtrip(First+1, Last, Fun); do_utf32_roundtrip(_, _, _) -> ok. utf32_big_roundtrip(Char) -> Bin = id(<>), Bin = id(<<(id(<<>>))/binary,Char/utf32>>), Unaligned = id(<<3:2,Char/utf32>>), <<_:2,Bin/binary>> = Unaligned, <> = Bin, <> = make_unaligned(Bin), ok. utf32_little_roundtrip(Char) -> Bin = id(<>), Bin = id(<<(id(<<>>))/binary,Char/little-utf32>>), Unaligned = id(<<3:2,Char/little-utf32>>), <<_:2,Bin/binary>> = Unaligned, <> = Bin, <> = make_unaligned(Bin), ok. utf8_illegal_sequences(Config) when is_list(Config) -> ?line fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large. ?line fail_range(16#D800, 16#DFFF), %Reserved for UTF-16. ?line fail_range(16#FFFE, 16#FFFF), %Non-characters. %% Illegal first character. ?line [fail(<>) || I <- lists:seq(16#80, 16#BF)], %% Short sequences. ?line short_sequences(16#80, 16#10FFFF), %% Overlong sequences. (Using more bytes than necessary %% is not allowed.) ?line overlong(0, 127, 2), ?line overlong(128, 16#7FF, 3), ?line overlong(16#800, 16#FFFF, 4), ok. fail_range(Char, End) when Char =< End -> {'EXIT',_} = (catch <>), Bin = int_to_utf8(Char), fail(Bin), fail_range(Char+1, End); fail_range(_, _) -> ok. short_sequences(Char, End) -> Step = (End - Char) div erlang:system_info(schedulers) + 1, PidRefs = short_sequences_1(Char, Step, End), [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end || {Pid,Ref} <- PidRefs], ok. short_sequences_1(Char, Step, End) when Char =< End -> CharEnd = lists:min([Char+Step-1,End]), [spawn_monitor(fun() -> io:format("~p - ~p\n", [Char,CharEnd]), do_short_sequences(Char, CharEnd) end)|short_sequences_1(Char+Step, Step, End)]; short_sequences_1(_, _, _) -> []. do_short_sequences(Char, End) when Char =< End -> short_sequence(Char), do_short_sequences(Char+1, End); do_short_sequences(_, _) -> ok. short_sequence(I) -> case int_to_utf8(I) of <> -> <> = S0, <> = S1, fail(S0), fail(S1), fail(S2), fail(<>), fail(<>), fail(<>); <> -> <> = S0, fail(S0), fail(S1), fail(<>), fail(<>), fail(<>); <> -> fail(S), fail(<>) end. overlong(Char, Last, NumBytes) when Char =< Last -> overlong(Char, NumBytes), overlong(Char+1, Last, NumBytes); overlong(_, _, _) -> ok. overlong(Char, NumBytes) when NumBytes < 5 -> case int_to_utf8(Char, NumBytes) of <>=Bin -> ?t:fail({illegal_encoding_accepted,Bin,Char}); <>=Bin -> ?t:fail({illegal_encoding_accepted,Bin,Char,OtherChar}); _ -> ok end, overlong(Char, NumBytes+1); overlong(_, _) -> ok. fail(Bin) -> fail_1(Bin), fail_1(make_unaligned(Bin)). fail_1(<>=Bin) -> ?t:fail({illegal_encoding_accepted,Bin,Char}); fail_1(_) -> ok. utf16_illegal_sequences(Config) when is_list(Config) -> ?line utf16_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large. ?line utf16_fail_range(16#D800, 16#DFFF), %Reserved for UTF-16. ?line utf16_fail_range(16#FFFE, 16#FFFF), %Non-characters. ?line lonely_hi_surrogate(16#D800, 16#DFFF), ?line leading_lo_surrogate(16#DC00, 16#DFFF), ok. utf16_fail_range(Char, End) when Char =< End -> {'EXIT',_} = (catch <>), {'EXIT',_} = (catch <>), utf16_fail_range(Char+1, End); utf16_fail_range(_, _) -> ok. lonely_hi_surrogate(Char, End) when Char =< End -> BinBig = <>, BinLittle = <>, case {BinBig,BinLittle} of {<>,_} -> ?t:fail({lonely_hi_surrogate_accepted,Bad}); {_,<>} -> ?t:fail({lonely_hi_surrogate_accepted,Bad}); {_,_} -> ok end, lonely_hi_surrogate(Char+1, End); lonely_hi_surrogate(_, _) -> ok. leading_lo_surrogate(Char, End) when Char =< End -> leading_lo_surrogate(Char, 16#D800, 16#DFFF), leading_lo_surrogate(Char+1, End); leading_lo_surrogate(_, _) -> ok. leading_lo_surrogate(HiSurr, LoSurr, End) when LoSurr =< End -> BinBig = <>, BinLittle = <>, case {BinBig,BinLittle} of {<>,_} -> ?t:fail({leading_lo_surrogate_accepted,Bad}); {_,<>} -> ?t:fail({leading_lo_surrogate_accepted,Bad}); {_,_} -> ok end, leading_lo_surrogate(HiSurr, LoSurr+1, End); leading_lo_surrogate(_, _, _) -> ok. utf32_illegal_sequences(Config) when is_list(Config) -> ?line utf32_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large. ?line utf32_fail_range(16#D800, 16#DFFF), %Reserved for UTF-16. ?line utf32_fail_range(16#FFFE, 16#FFFF), %Non-characters. ?line utf32_fail_range(-100, -1), ok. utf32_fail_range(Char, End) when Char =< End -> {'EXIT',_} = (catch <>), {'EXIT',_} = (catch <>), case {<>,<>} of {<>,_} -> ?line ?t:fail(Unexpected); {_,<>} -> ?line ?t:fail(Unexpected); {_,_} -> ok end, utf32_fail_range(Char+1, End); utf32_fail_range(_, _) -> ok. bad_construction(Config) when is_list(Config) -> ?FAIL(<<3.14/utf8>>), ?FAIL(<<3.1415/utf16>>), ?FAIL(<<3.1415/utf32>>), ?FAIL(<<(-1)/utf8>>), ?FAIL(<<(-1)/utf16>>), {'EXIT',_} = (catch <<(id(-1))/utf8>>), {'EXIT',_} = (catch <<(id(-1))/utf16>>), {'EXIT',_} = (catch <<(id(-1))/utf32>>), ?FAIL(<<16#D800/utf8>>), ?FAIL(<<16#D800/utf16>>), ?FAIL(<<16#D800/utf32>>), ok. %% This function intentionally allows construction of %% UTF-8 sequence in illegal ranges. int_to_utf8(I) when I =< 16#7F -> <>; int_to_utf8(I) when I =< 16#7FF -> B2 = I, B1 = (I bsr 6), <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>; int_to_utf8(I) when I =< 16#FFFF -> B3 = I, B2 = (I bsr 6), B1 = (I bsr 12), <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>; int_to_utf8(I) when I =< 16#3FFFFF -> B4 = I, B3 = (I bsr 6), B2 = (I bsr 12), B1 = (I bsr 18), <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>; int_to_utf8(I) when I =< 16#3FFFFFF -> B5 = I, B4 = (I bsr 6), B3 = (I bsr 12), B2 = (I bsr 18), B1 = (I bsr 24), <<1:1,1:1,1:1,1:1,1:1,0:1,B1:2,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6, 1:1,0:1,B5:6>>. %% int_to_utf8(I, NumberOfBytes) -> Binary. %% This function can be used to construct overlong sequences. int_to_utf8(I, 1) -> <>; int_to_utf8(I, 2) -> B2 = I, B1 = (I bsr 6), <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>; int_to_utf8(I, 3) -> B3 = I, B2 = (I bsr 6), B1 = (I bsr 12), <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>; int_to_utf8(I, 4) -> B4 = I, B3 = (I bsr 6), B2 = (I bsr 12), B1 = (I bsr 18), <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>. make_unaligned(Bin0) when is_binary(Bin0) -> Bin1 = <<0:3,Bin0/binary,31:5>>, Sz = byte_size(Bin0), <<0:3,Bin:Sz/binary,31:5>> = id(Bin1), Bin. fail_check({'EXIT',{badarg,_}}, Str, Vars) -> try evaluate(Str, Vars) of Res -> io:format("Interpreted result: ~p", [Res]), ?t:fail(did_not_fail_in_intepreted_code) catch error:badarg -> ok end; fail_check(Res, _, _) -> io:format("Compiled result: ~p", [Res]), ?t:fail(did_not_fail_in_compiled_code). evaluate(Str, Vars) -> {ok,Tokens,_} = erl_scan:string(Str ++ " . "), {ok, [Expr]} = erl_parse:parse_exprs(Tokens), case erl_eval:expr(Expr, Vars) of {value, Result, _} -> Result end. id(I) -> I.