From 26b59dfe67ef551cd94765557cdd8c79794bcc38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Valim?= Date: Tue, 31 May 2016 14:28:54 +0200 Subject: Add new AtU8 beam chunk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new chunk stores atoms encoded in UTF-8. beam_lib has also been modified to handle the new 'utf8_atoms' attribute while the 'atoms' attribute may be a missing chunk from now on. The binary_to_atom/2 BIF can now encode any utf8 binary with up to 255 characters. The list_to_atom/1 BIF can now accept codepoints higher than 255 with up to 255 characters (thanks to Björn Gustavsson). --- lib/stdlib/test/beam_lib_SUITE.erl | 45 +++++++++++++++++++++++++------------- lib/stdlib/test/erl_scan_SUITE.erl | 15 ++++++------- 2 files changed, 37 insertions(+), 23 deletions(-) (limited to 'lib/stdlib/test') diff --git a/lib/stdlib/test/beam_lib_SUITE.erl b/lib/stdlib/test/beam_lib_SUITE.erl index 4521ecc0ef..279e15f703 100644 --- a/lib/stdlib/test/beam_lib_SUITE.erl +++ b/lib/stdlib/test/beam_lib_SUITE.erl @@ -81,12 +81,8 @@ normal(Conf) when is_list(Conf) -> NoOfTables = length(ets:all()), P0 = pps(), - CompileFlags = [{outdir,PrivDir}, debug_info], - {ok,_} = compile:file(Source, CompileFlags), - {ok, Binary} = file:read_file(BeamFile), - - do_normal(BeamFile), - do_normal(Binary), + do_normal(Source, PrivDir, BeamFile, []), + do_normal(Source, PrivDir, BeamFile, [no_utf8_atoms]), {ok,_} = compile:file(Source, [{outdir,PrivDir}, no_debug_info]), {ok, {simple, [{abstract_code, no_abstract_code}]}} = @@ -101,7 +97,15 @@ normal(Conf) when is_list(Conf) -> true = (P0 == pps()), ok. -do_normal(BeamFile) -> +do_normal(Source, PrivDir, BeamFile, Opts) -> + CompileFlags = [{outdir,PrivDir}, debug_info | Opts], + {ok,_} = compile:file(Source, CompileFlags), + {ok, Binary} = file:read_file(BeamFile), + + do_normal(BeamFile, Opts), + do_normal(Binary, Opts). + +do_normal(BeamFile, Opts) -> Imports = {imports, [{erlang, get_module_info, 1}, {erlang, get_module_info, 2}, {lists, member, 2}]}, @@ -130,20 +134,31 @@ do_normal(BeamFile) -> beam_lib:chunks(BeamFile, [abstract_code]), %% Test reading optional chunks. - All = ["Atom", "Code", "StrT", "ImpT", "ExpT", "FunT", "LitT"], + All = ["Atom", "Code", "StrT", "ImpT", "ExpT", "FunT", "LitT", "AtU8"], {ok,{simple,Chunks}} = beam_lib:chunks(BeamFile, All, [allow_missing_chunks]), - verify_simple(Chunks). + case {verify_simple(Chunks),Opts} of + {{missing_chunk, AtomBin}, []} when is_binary(AtomBin) -> ok; + {{AtomBin, missing_chunk}, [no_utf8_atoms]} when is_binary(AtomBin) -> ok + end, -verify_simple([{"Atom", AtomBin}, + %% Make sure that reading the atom chunk works when the 'allow_missing_chunks' + %% option is used. + Some = ["Code",atoms,"ExpT","LitT"], + {ok,{simple,SomeChunks}} = beam_lib:chunks(BeamFile, Some, [allow_missing_chunks]), + [{"Code",<<_/binary>>},{atoms,[_|_]},{"ExpT",<<_/binary>>},{"LitT",missing_chunk}] = + SomeChunks. + +verify_simple([{"Atom", PlainAtomChunk}, {"Code", CodeBin}, {"StrT", StrBin}, {"ImpT", ImpBin}, {"ExpT", ExpBin}, {"FunT", missing_chunk}, - {"LitT", missing_chunk}]) - when is_binary(AtomBin), is_binary(CodeBin), is_binary(StrBin), + {"LitT", missing_chunk}, + {"AtU8", AtU8Chunk}]) + when is_binary(CodeBin), is_binary(StrBin), is_binary(ImpBin), is_binary(ExpBin) -> - ok. + {PlainAtomChunk, AtU8Chunk}. %% Read invalid beam files. error(Conf) when is_list(Conf) -> @@ -211,7 +226,7 @@ last_chunk(Bin) -> do_error(BeamFile, ACopy) -> %% evil tests Chunks = chunk_info(BeamFile), - {value, {_, AtomStart, _}} = lists:keysearch("Atom", 1, Chunks), + {value, {_, AtomStart, _}} = lists:keysearch("AtU8", 1, Chunks), {value, {_, ImportStart, _}} = lists:keysearch("ImpT", 1, Chunks), {value, {_, AbstractStart, _}} = lists:keysearch("Abst", 1, Chunks), {value, {_, AttributesStart, _}} = @@ -234,7 +249,7 @@ do_error(BeamFile, ACopy) -> verify(not_a_beam_file, beam_lib:info(BF7)), BF8 = set_byte(ACopy, BeamFile, 13, 17), - verify(missing_chunk, beam_lib:chunks(BF8, ["Atom"])), + verify(missing_chunk, beam_lib:chunks(BF8, ["AtU8"])), BF9 = set_byte(ACopy, BeamFile, CompileInfoStart+10, 17), verify(invalid_chunk, beam_lib:chunks(BF9, [compile_info])). diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl index 4ae734eb65..7d0ba967f9 100644 --- a/lib/stdlib/test/erl_scan_SUITE.erl +++ b/lib/stdlib/test/erl_scan_SUITE.erl @@ -772,10 +772,9 @@ unicode() -> erl_scan:string([1089]), {error,{{1,1},erl_scan,{illegal,character}},{1,2}} = erl_scan:string([1089], {1,1}), - {error,{1,erl_scan,{illegal,atom}},1} = - erl_scan:string("'a"++[1089]++"b'", 1), - {error,{{1,1},erl_scan,{illegal,atom}},{1,6}} = - erl_scan:string("'a"++[1089]++"b'", {1,1}), + {error,{{1,3},erl_scan,{illegal,character}},{1,4}} = + erl_scan:string("'a" ++ [999999999] ++ "c'", {1,1}), + test("\"a"++[1089]++"b\""), {ok,[{char,1,1}],1} = erl_scan_string([$$,$\\,$^,1089], 1), @@ -786,8 +785,8 @@ unicode() -> erl_scan:format_error(Error), {error,{{1,1},erl_scan,_},{1,11}} = erl_scan:string("\"qa\\x{aaa}",{1,1}), - {error,{{1,1},erl_scan,{illegal,atom}},{1,12}} = - erl_scan:string("'qa\\x{aaa}'",{1,1}), + {error,{{1,1},erl_scan,_},{1,11}} = + erl_scan:string("'qa\\x{aaa}",{1,1}), {ok,[{char,1,1089}],1} = erl_scan_string([$$,1089], 1), @@ -904,9 +903,9 @@ more_chars() -> %% OTP-10302. Unicode characters scanner/parser. otp_10302(Config) when is_list(Config) -> %% From unicode(): - {error,{1,erl_scan,{illegal,atom}},1} = + {ok,[{atom,1,'aсb'}],1} = erl_scan:string("'a"++[1089]++"b'", 1), - {error,{{1,1},erl_scan,{illegal,atom}},{1,12}} = + {ok,[{atom,{1,1},'qaપ'}],{1,12}} = erl_scan:string("'qa\\x{aaa}'",{1,1}), {ok,[{char,1,1089}],1} = erl_scan_string([$$,1089], 1), -- cgit v1.2.3