aboutsummaryrefslogblamecommitdiffstats
path: root/erts/emulator/test/bs_utf_SUITE.erl
blob: 6e660b12b01bfcf91a2394dc76f8531ff2bc1a52 (plain) (tree)
1
2
3
4


                   
                                                        
















                                                                         


                                                                    




                                                              
                                                    






                                                                      
                                  


                                    

                                                
         


                                                      



            





                         
                                     
           

                                    
           
 






























































































































































































































































































































































                                                                             
%%
%% %CopyrightBegin%
%% 
%% Copyright Ericsson AB 2008-2010. All Rights Reserved.
%% 
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
%% 
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%% 
%% %CopyrightEnd%
%%

-module(bs_utf_SUITE).

-export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, 
	 init_per_group/2,end_per_group/2,
	 init_per_testcase/2,end_per_testcase/2,
	 utf8_roundtrip/1,utf16_roundtrip/1,utf32_roundtrip/1,
	 utf8_illegal_sequences/1,utf16_illegal_sequences/1,
	 utf32_illegal_sequences/1,
	 bad_construction/1]).

-include_lib("test_server/include/test_server.hrl").

-define(FAIL(Expr), ?line fail_check(catch Expr, ??Expr, [])).

init_per_testcase(Func, Config) when is_atom(Func), is_list(Config) ->
    Dog = ?t:timetrap(?t:minutes(6)),
    [{watchdog,Dog}|Config].

end_per_testcase(_Func, Config) ->
    Dog = ?config(watchdog, Config),
    ?t:timetrap_cancel(Dog).

suite() -> [{suite_callbacks,[ts_install_scb]}].

all() -> 
    [utf8_roundtrip, utf16_roundtrip, utf32_roundtrip,
     utf8_illegal_sequences, utf16_illegal_sequences,
     utf32_illegal_sequences, bad_construction].

groups() -> 
    [].

init_per_suite(Config) ->
    Config.

end_per_suite(_Config) ->
    ok.

init_per_group(_GroupName, Config) ->
    Config.

end_per_group(_GroupName, Config) ->
    Config.


utf8_roundtrip(Config) when is_list(Config) ->
    ?line utf8_roundtrip(0, 16#D7FF),
    ?line utf8_roundtrip(16#E000, 16#FFFD),
    ?line utf8_roundtrip(16#10000, 16#10FFFF),
    ok.

utf8_roundtrip(First, Last) when First =< Last ->
    Bin = int_to_utf8(First),
    Bin = id(<<First/utf8>>),
    Bin = id(<<(id(<<>>))/binary,First/utf8>>),
    Unaligned = id(<<3:2,First/utf8>>),
    <<_:2,Bin/binary>> = Unaligned,
    <<First/utf8>> = Bin,
    <<First/utf8>> = make_unaligned(Bin),
    utf8_roundtrip(First+1, Last);
utf8_roundtrip(_, _) -> ok.

utf16_roundtrip(Config) when is_list(Config) ->
    Big = fun utf16_big_roundtrip/1,
    Little = fun utf16_little_roundtrip/1,
    PidRefs = [spawn_monitor(fun() ->
				     do_utf16_roundtrip(Fun)
			     end) || Fun <- [Big,Little]],
    [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end ||
	{Pid,Ref} <- PidRefs],
    ok.

do_utf16_roundtrip(Fun) ->
    do_utf16_roundtrip(0, 16#D7FF, Fun),
    do_utf16_roundtrip(16#E000, 16#FFFD, Fun),
    do_utf16_roundtrip(16#10000, 16#10FFFF, Fun).

do_utf16_roundtrip(First, Last, Fun) when First =< Last ->
    Fun(First),
    do_utf16_roundtrip(First+1, Last, Fun);
do_utf16_roundtrip(_, _, _) -> ok.

utf16_big_roundtrip(Char) ->
    Bin = id(<<Char/utf16>>),
    Bin = id(<<(id(<<>>))/binary,Char/utf16>>),
    Unaligned = id(<<3:2,Char/utf16>>),
    <<_:2,Bin/binary>> = Unaligned,
    <<Char/utf16>> = Bin,
    <<Char/utf16>> = make_unaligned(Bin),
    ok.

utf16_little_roundtrip(Char) ->
    Bin = id(<<Char/little-utf16>>),
    Bin = id(<<(id(<<>>))/binary,Char/little-utf16>>),
    Unaligned = id(<<3:2,Char/little-utf16>>),
    <<_:2,Bin/binary>> = Unaligned,
    <<Char/little-utf16>> = Bin,
    <<Char/little-utf16>> = make_unaligned(Bin),
    ok.

utf32_roundtrip(Config) when is_list(Config) ->
    Big = fun utf32_big_roundtrip/1,
    Little = fun utf32_little_roundtrip/1,
    PidRefs = [spawn_monitor(fun() ->
				     do_utf32_roundtrip(Fun)
			     end) || Fun <- [Big,Little]],
    [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end ||
	{Pid,Ref} <- PidRefs],
    ok.

do_utf32_roundtrip(Fun) ->
    do_utf32_roundtrip(0, 16#D7FF, Fun),
    do_utf32_roundtrip(16#E000, 16#FFFD, Fun),
    do_utf32_roundtrip(16#10000, 16#10FFFF, Fun).

do_utf32_roundtrip(First, Last, Fun) when First =< Last ->
    Fun(First),
    do_utf32_roundtrip(First+1, Last, Fun);
do_utf32_roundtrip(_, _, _) -> ok.

utf32_big_roundtrip(Char) ->
    Bin = id(<<Char/utf32>>),
    Bin = id(<<(id(<<>>))/binary,Char/utf32>>),
    Unaligned = id(<<3:2,Char/utf32>>),
    <<_:2,Bin/binary>> = Unaligned,
    <<Char/utf32>> = Bin,
    <<Char/utf32>> = make_unaligned(Bin),
    ok.

utf32_little_roundtrip(Char) ->
    Bin = id(<<Char/little-utf32>>),
    Bin = id(<<(id(<<>>))/binary,Char/little-utf32>>),
    Unaligned = id(<<3:2,Char/little-utf32>>),
    <<_:2,Bin/binary>> = Unaligned,
    <<Char/little-utf32>> = Bin,
    <<Char/little-utf32>> = make_unaligned(Bin),
    ok.

utf8_illegal_sequences(Config) when is_list(Config) ->
    ?line fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large.
    ?line fail_range(16#D800, 16#DFFF),		%Reserved for UTF-16.
    ?line fail_range(16#FFFE, 16#FFFF),		%Non-characters.

    %% Illegal first character.
    ?line [fail(<<I,16#8F,16#8F,16#8F>>) || I <- lists:seq(16#80, 16#BF)],

    %% Short sequences.
    ?line short_sequences(16#80, 16#10FFFF),

    %% Overlong sequences. (Using more bytes than necessary
    %% is not allowed.)
    ?line overlong(0, 127, 2),
    ?line overlong(128, 16#7FF, 3),
    ?line overlong(16#800, 16#FFFF, 4),
    ok.

fail_range(Char, End) when Char =< End ->
    {'EXIT',_} = (catch <<Char/utf8>>),
    Bin = int_to_utf8(Char),
    fail(Bin),
    fail_range(Char+1, End);
fail_range(_, _) -> ok.

short_sequences(Char, End) ->
    Step = (End - Char) div erlang:system_info(schedulers) + 1,
    PidRefs = short_sequences_1(Char, Step, End),
    [receive {'DOWN',Ref,process,Pid,Reason} -> normal=Reason end ||
	{Pid,Ref} <- PidRefs],
    ok.

short_sequences_1(Char, Step, End) when Char =< End ->
    CharEnd = lists:min([Char+Step-1,End]),
    [spawn_monitor(fun() ->
			   io:format("~p - ~p\n", [Char,CharEnd]),
			   do_short_sequences(Char, CharEnd)
		   end)|short_sequences_1(Char+Step, Step, End)];
short_sequences_1(_, _, _) -> [].

do_short_sequences(Char, End) when Char =< End ->
    short_sequence(Char),
    do_short_sequences(Char+1, End);
do_short_sequences(_, _) -> ok.

short_sequence(I) ->
    case int_to_utf8(I) of
	<<S0:3/binary,_:8>> ->
	    <<S1:2/binary,R1:8>> = S0,
	    <<S2:1/binary,_:8>> = S1,
	    fail(S0),
	    fail(S1),
	    fail(S2),
	    fail(<<S2/binary,16#7F,R1,R1>>),
	    fail(<<S1/binary,16#7F,R1>>),
	    fail(<<S0/binary,16#7F>>);
	<<S0:2/binary,_:8>> ->
	    <<S1:1/binary,R1:8>> = S0,
	    fail(S0),
	    fail(S1),
	    fail(<<S0/binary,16#7F>>),
	    fail(<<S1/binary,16#7F>>),
	    fail(<<S1/binary,16#7F,R1>>);
	<<S:1/binary,_:8>> ->
	    fail(S),
	    fail(<<S/binary,16#7F>>)
    end.

overlong(Char, Last, NumBytes) when Char =< Last ->
    overlong(Char, NumBytes),
    overlong(Char+1, Last, NumBytes);
overlong(_, _, _) -> ok.

overlong(Char, NumBytes) when NumBytes < 5 ->
    case int_to_utf8(Char, NumBytes) of
	<<Char/utf8>>=Bin ->
	    ?t:fail({illegal_encoding_accepted,Bin,Char});
	<<OtherChar/utf8>>=Bin ->
	    ?t:fail({illegal_encoding_accepted,Bin,Char,OtherChar});
	_ -> ok
    end,
    overlong(Char, NumBytes+1);
overlong(_, _) -> ok.

fail(Bin) ->
    fail_1(Bin),
    fail_1(make_unaligned(Bin)).

fail_1(<<Char/utf8>>=Bin) ->
    ?t:fail({illegal_encoding_accepted,Bin,Char});
fail_1(_) -> ok.


utf16_illegal_sequences(Config) when is_list(Config) ->
    ?line utf16_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large.
    ?line utf16_fail_range(16#D800, 16#DFFF),		%Reserved for UTF-16.
    ?line utf16_fail_range(16#FFFE, 16#FFFF),		%Non-characters.

    ?line lonely_hi_surrogate(16#D800, 16#DFFF),
    ?line leading_lo_surrogate(16#DC00, 16#DFFF),
    
    ok.

utf16_fail_range(Char, End) when Char =< End ->
    {'EXIT',_} = (catch <<Char/big-utf16>>),
    {'EXIT',_} = (catch <<Char/little-utf16>>),
    utf16_fail_range(Char+1, End);
utf16_fail_range(_, _) -> ok.

lonely_hi_surrogate(Char, End) when Char =< End ->
    BinBig = <<Char:16/big>>,
    BinLittle = <<Char:16/little>>,
    case {BinBig,BinLittle} of
	{<<Bad/big-utf16>>,_} ->
	    ?t:fail({lonely_hi_surrogate_accepted,Bad});
	{_,<<Bad/little-utf16>>} ->
	    ?t:fail({lonely_hi_surrogate_accepted,Bad});
	{_,_} ->
	    ok
    end,
    lonely_hi_surrogate(Char+1, End);
lonely_hi_surrogate(_, _) -> ok.

leading_lo_surrogate(Char, End) when Char =< End ->
    leading_lo_surrogate(Char, 16#D800, 16#DFFF),
    leading_lo_surrogate(Char+1, End);
leading_lo_surrogate(_, _) -> ok.

leading_lo_surrogate(HiSurr, LoSurr, End) when LoSurr =< End ->
    BinBig = <<HiSurr:16/big,LoSurr:16/big>>,
    BinLittle = <<HiSurr:16/little,LoSurr:16/little>>,
    case {BinBig,BinLittle} of
	{<<Bad/big-utf16,_/bits>>,_} ->
	    ?t:fail({leading_lo_surrogate_accepted,Bad});
	{_,<<Bad/little-utf16,_/bits>>} ->
	    ?t:fail({leading_lo_surrogate_accepted,Bad});
	{_,_} ->
	    ok
    end,
    leading_lo_surrogate(HiSurr, LoSurr+1, End);
leading_lo_surrogate(_, _, _) -> ok.

utf32_illegal_sequences(Config) when is_list(Config) ->
    ?line utf32_fail_range(16#10FFFF+1, 16#10FFFF+512), %Too large.
    ?line utf32_fail_range(16#D800, 16#DFFF),		%Reserved for UTF-16.
    ?line utf32_fail_range(16#FFFE, 16#FFFF),		%Non-characters.
    ?line utf32_fail_range(-100, -1),
    ok.

utf32_fail_range(Char, End) when Char =< End ->
    {'EXIT',_} = (catch <<Char/big-utf32>>),
    {'EXIT',_} = (catch <<Char/little-utf32>>),
    case {<<Char:32>>,<<Char:32/little>>} of
	{<<Unexpected/utf32>>,_} ->
	    ?line ?t:fail(Unexpected);
	{_,<<Unexpected/little-utf32>>} ->
	    ?line ?t:fail(Unexpected);
	{_,_} -> ok
    end,
    utf32_fail_range(Char+1, End);
utf32_fail_range(_, _) -> ok.

bad_construction(Config) when is_list(Config) ->
    ?FAIL(<<3.14/utf8>>),
    ?FAIL(<<3.1415/utf16>>),
    ?FAIL(<<3.1415/utf32>>),

    ?FAIL(<<(-1)/utf8>>),
    ?FAIL(<<(-1)/utf16>>),
    {'EXIT',_} = (catch <<(id(-1))/utf8>>),
    {'EXIT',_} = (catch <<(id(-1))/utf16>>),
    {'EXIT',_} = (catch <<(id(-1))/utf32>>),

    ?FAIL(<<16#D800/utf8>>),
    ?FAIL(<<16#D800/utf16>>),
    ?FAIL(<<16#D800/utf32>>),

    ok.

%% This function intentionally allows construction of
%% UTF-8 sequence in illegal ranges.
int_to_utf8(I) when I =< 16#7F ->
    <<I>>;
int_to_utf8(I) when I =< 16#7FF ->
    B2 = I,
    B1 = (I bsr 6),
    <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>;
int_to_utf8(I) when I =< 16#FFFF ->
    B3 = I,
    B2 = (I bsr 6),
    B1 = (I bsr 12),
    <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>;
int_to_utf8(I) when I =< 16#3FFFFF ->
    B4 = I,
    B3 = (I bsr 6),
    B2 = (I bsr 12),
    B1 = (I bsr 18),
    <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>;
int_to_utf8(I) when I =< 16#3FFFFFF ->
    B5 = I,
    B4 = (I bsr 6),
    B3 = (I bsr 12),
    B2 = (I bsr 18),
    B1 = (I bsr 24),
    <<1:1,1:1,1:1,1:1,1:1,0:1,B1:2,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6,
     1:1,0:1,B5:6>>.

%% int_to_utf8(I, NumberOfBytes) -> Binary.
%%  This function can be used to construct overlong sequences.
int_to_utf8(I, 1) ->
    <<I>>;
int_to_utf8(I, 2) ->
    B2 = I,
    B1 = (I bsr 6),
    <<1:1,1:1,0:1,B1:5,1:1,0:1,B2:6>>;
int_to_utf8(I, 3) ->
    B3 = I,
    B2 = (I bsr 6),
    B1 = (I bsr 12),
    <<1:1,1:1,1:1,0:1,B1:4,1:1,0:1,B2:6,1:1,0:1,B3:6>>;
int_to_utf8(I, 4) ->
    B4 = I,
    B3 = (I bsr 6),
    B2 = (I bsr 12),
    B1 = (I bsr 18),
    <<1:1,1:1,1:1,1:1,0:1,B1:3,1:1,0:1,B2:6,1:1,0:1,B3:6,1:1,0:1,B4:6>>.

make_unaligned(Bin0) when is_binary(Bin0) ->
    Bin1 = <<0:3,Bin0/binary,31:5>>,
    Sz = byte_size(Bin0),
    <<0:3,Bin:Sz/binary,31:5>> = id(Bin1),
    Bin.

fail_check({'EXIT',{badarg,_}}, Str, Vars) ->
    try	evaluate(Str, Vars) of
	Res ->
	    io:format("Interpreted result: ~p", [Res]),
	    ?t:fail(did_not_fail_in_intepreted_code)
    catch
	error:badarg ->
	    ok
    end;
fail_check(Res, _, _) ->
    io:format("Compiled result: ~p", [Res]),
    ?t:fail(did_not_fail_in_compiled_code).

evaluate(Str, Vars) ->
    {ok,Tokens,_} =
	erl_scan:string(Str ++ " . "),
    {ok, [Expr]} = erl_parse:parse_exprs(Tokens),
    case erl_eval:expr(Expr, Vars) of
	{value, Result, _} ->
	    Result
    end.

id(I) -> I.