aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorJosé Valim <[email protected]>2016-05-31 14:28:54 +0200
committerJosé Valim <[email protected]>2017-01-30 15:24:05 +0100
commit26b59dfe67ef551cd94765557cdd8c79794bcc38 (patch)
tree696adc07b3e7a4a3f1ed6c52311ff6e163b218b4 /lib
parent6c7539b0e39996f870385e5276e08c0dd98b6eb8 (diff)
downloadotp-26b59dfe67ef551cd94765557cdd8c79794bcc38.tar.gz
otp-26b59dfe67ef551cd94765557cdd8c79794bcc38.tar.bz2
otp-26b59dfe67ef551cd94765557cdd8c79794bcc38.zip
Add new AtU8 beam chunk
The new chunk stores atoms encoded in UTF-8. beam_lib has also been modified to handle the new 'utf8_atoms' attribute while the 'atoms' attribute may be a missing chunk from now on. The binary_to_atom/2 BIF can now encode any utf8 binary with up to 255 characters. The list_to_atom/1 BIF can now accept codepoints higher than 255 with up to 255 characters (thanks to Björn Gustavsson).
Diffstat (limited to 'lib')
-rw-r--r--lib/compiler/src/beam_asm.erl29
-rw-r--r--lib/compiler/src/beam_dict.erl12
-rw-r--r--lib/compiler/src/compile.erl21
-rw-r--r--lib/compiler/test/compile_SUITE.erl31
-rw-r--r--lib/compiler/test/compile_SUITE_data/simple.erl5
-rw-r--r--lib/compiler/test/lc_SUITE.erl2
-rw-r--r--lib/kernel/test/code_SUITE.erl2
-rw-r--r--lib/stdlib/src/beam_lib.erl54
-rw-r--r--lib/stdlib/test/beam_lib_SUITE.erl45
-rw-r--r--lib/stdlib/test/erl_scan_SUITE.erl15
10 files changed, 145 insertions, 71 deletions
diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl
index a2f5dc674c..2fc2850591 100644
--- a/lib/compiler/src/beam_asm.erl
+++ b/lib/compiler/src/beam_asm.erl
@@ -21,7 +21,7 @@
-module(beam_asm).
--export([module/4]).
+-export([module/5]).
-export([encode/2]).
-export_type([fail/0,label/0,reg/0,src/0,module_code/0,function_name/0]).
@@ -57,20 +57,20 @@
-type module_code() ::
{module(),[_],[_],[asm_function()],pos_integer()}.
--spec module(module_code(), exports(), [_], [compile:option()]) ->
+-spec module(module_code(), exports(), [_], [compile:option()], [compile:option()]) ->
{'ok',binary()}.
-module(Code, Abst, SourceFile, Opts) ->
- {ok,assemble(Code, Abst, SourceFile, Opts)}.
+module(Code, Abst, SourceFile, Opts, CompilerOpts) ->
+ {ok,assemble(Code, Abst, SourceFile, Opts, CompilerOpts)}.
-assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, Abst, SourceFile, Opts) ->
+assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, Abst, SourceFile, Opts, CompilerOpts) ->
{1,Dict0} = beam_dict:atom(Mod, beam_dict:new()),
{0,Dict1} = beam_dict:fname(atom_to_list(Mod) ++ ".erl", Dict0),
NumFuncs = length(Asm0),
{Asm,Attr} = on_load(Asm0, Attr0),
Exp = cerl_sets:from_list(Exp0),
{Code,Dict2} = assemble_1(Asm, Exp, Dict1, []),
- build_file(Code, Attr, Dict2, NumLabels, NumFuncs, Abst, SourceFile, Opts).
+ build_file(Code, Attr, Dict2, NumLabels, NumFuncs, Abst, SourceFile, Opts, CompilerOpts).
on_load(Fs0, Attr0) ->
case proplists:get_value(on_load, Attr0) of
@@ -113,7 +113,7 @@ assemble_function([H|T], Acc, Dict0) ->
assemble_function([], Code, Dict) ->
{Code, Dict}.
-build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) ->
+build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts, CompilerOpts) ->
%% Create the code chunk.
CodeChunk = chunk(<<"Code">>,
@@ -125,9 +125,9 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) ->
Code),
%% Create the atom table chunk.
-
- {NumAtoms, AtomTab} = beam_dict:atom_table(Dict),
- AtomChunk = chunk(<<"Atom">>, <<NumAtoms:32>>, AtomTab),
+ AtomEncoding = atom_encoding(CompilerOpts),
+ {NumAtoms, AtomTab} = beam_dict:atom_table(Dict, AtomEncoding),
+ AtomChunk = chunk(atom_chunk_name(AtomEncoding), <<NumAtoms:32>>, AtomTab),
%% Create the import table chunk.
@@ -203,6 +203,15 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) ->
end,
build_form(<<"BEAM">>, Chunks).
+atom_encoding(Opts) ->
+ case proplists:get_bool(no_utf8_atoms, Opts) of
+ false -> utf8;
+ true -> latin1
+ end.
+
+atom_chunk_name(utf8) -> <<"AtU8">>;
+atom_chunk_name(latin1) -> <<"Atom">>.
+
%% finalize_fun_table(Essentials, MD5) -> FinalizedEssentials
%% Update the 'old_uniq' field in the entry for each fun in the
%% 'FunT' chunk. We'll use part of the MD5 for the module as a
diff --git a/lib/compiler/src/beam_dict.erl b/lib/compiler/src/beam_dict.erl
index 719d799fd7..990e86062a 100644
--- a/lib/compiler/src/beam_dict.erl
+++ b/lib/compiler/src/beam_dict.erl
@@ -24,7 +24,7 @@
-export([new/0,opcode/2,highest_opcode/1,
atom/2,local/4,export/4,import/4,
string/2,lambda/3,literal/2,line/2,fname/2,
- atom_table/1,local_table/1,export_table/1,import_table/1,
+ atom_table/2,local_table/1,export_table/1,import_table/1,
string_table/1,lambda_table/1,literal_table/1,
line_table/1]).
@@ -197,15 +197,15 @@ fname(Name, #asm{fnames=Fnames}=Dict) ->
end.
%% Returns the atom table.
-%% atom_table(Dict) -> {LastIndex,[Length,AtomString...]}
--spec atom_table(bdict()) -> {non_neg_integer(), [[non_neg_integer(),...]]}.
+%% atom_table(Dict, Encoding) -> {LastIndex,[Length,AtomString...]}
+-spec atom_table(bdict(), latin1 | utf8) -> {non_neg_integer(), [[non_neg_integer(),...]]}.
-atom_table(#asm{atoms=Atoms}) ->
+atom_table(#asm{atoms=Atoms}, Encoding) ->
NumAtoms = maps:size(Atoms),
Sorted = lists:keysort(2, maps:to_list(Atoms)),
{NumAtoms,[begin
- L = atom_to_list(A),
- [length(L)|L]
+ L = atom_to_binary(A, Encoding),
+ [byte_size(L),L]
end || {A,_} <- Sorted]}.
%% Returns the table of local functions.
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 069add7890..dcd962df66 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -214,11 +214,21 @@ expand_opt(report, Os) ->
expand_opt(return, Os) ->
[return_errors,return_warnings|Os];
expand_opt(r12, Os) ->
- [no_recv_opt,no_line_info|Os];
+ [no_recv_opt,no_line_info,no_utf8_atoms|Os];
expand_opt(r13, Os) ->
- [no_recv_opt,no_line_info|Os];
+ [no_recv_opt,no_line_info,no_utf8_atoms|Os];
expand_opt(r14, Os) ->
- [no_line_info|Os];
+ [no_line_info,no_utf8_atoms|Os];
+expand_opt(r15, Os) ->
+ [no_utf8_atoms|Os];
+expand_opt(r16, Os) ->
+ [no_utf8_atoms|Os];
+expand_opt(r17, Os) ->
+ [no_utf8_atoms|Os];
+expand_opt(r18, Os) ->
+ [no_utf8_atoms|Os];
+expand_opt(r19, Os) ->
+ [no_utf8_atoms|Os];
expand_opt({debug_info_key,_}=O, Os) ->
[encrypt_debug_info,O|Os];
expand_opt(no_float_opt, Os) ->
@@ -1376,13 +1386,14 @@ encrypt({des3_cbc=Type,Key,IVec,BlockSize}, Bin0) ->
save_core_code(Code, St) ->
{ok,Code,St#compile{core_code=cerl:from_records(Code)}}.
-beam_asm(Code0, #compile{ifile=File,abstract_code=Abst,mod_options=Opts0}=St) ->
+beam_asm(Code0, #compile{ifile=File,abstract_code=Abst,
+ options=CompilerOpts,mod_options=Opts0}=St) ->
Source = paranoid_absname(File),
Opts1 = lists:map(fun({debug_info_key,_}) -> {debug_info_key,'********'};
(Other) -> Other
end, Opts0),
Opts2 = [O || O <- Opts1, effects_code_generation(O)],
- case beam_asm:module(Code0, Abst, Source, Opts2) of
+ case beam_asm:module(Code0, Abst, Source, Opts2, CompilerOpts) of
{ok,Code} -> {ok,Code,St#compile{abstract_code=[]}}
end.
diff --git a/lib/compiler/test/compile_SUITE.erl b/lib/compiler/test/compile_SUITE.erl
index 8c09414a52..8d7facd727 100644
--- a/lib/compiler/test/compile_SUITE.erl
+++ b/lib/compiler/test/compile_SUITE.erl
@@ -30,7 +30,7 @@
file_1/1, forms_2/1, module_mismatch/1, big_file/1, outdir/1,
binary/1, makedep/1, cond_and_ifdef/1, listings/1, listings_big/1,
other_output/1, kernel_listing/1, encrypted_abstr/1,
- strict_record/1,
+ strict_record/1, utf8_atoms/1,
cover/1, env/1, core/1,
core_roundtrip/1, asm/1, optimized_guards/1,
sys_pre_attributes/1, dialyzer/1,
@@ -48,7 +48,7 @@ all() ->
[app_test, appup_test, file_1, forms_2, module_mismatch, big_file, outdir,
binary, makedep, cond_and_ifdef, listings, listings_big,
other_output, kernel_listing, encrypted_abstr,
- strict_record,
+ strict_record, utf8_atoms,
cover, env, core, core_roundtrip, asm, optimized_guards,
sys_pre_attributes, dialyzer, warnings, pre_load_check,
env_compiler_options].
@@ -450,8 +450,10 @@ do_kernel_listing({M,A}) ->
try
{ok,M,Kern} = compile:forms(A, [to_kernel]),
IoList = v3_kernel_pp:format(Kern),
- _ = iolist_size(IoList),
- ok
+ case unicode:characters_to_binary(IoList) of
+ Bin when is_binary(Bin) ->
+ ok
+ end
catch
throw:{error,Error} ->
io:format("*** compilation failure '~p' for module ~s\n",
@@ -680,6 +682,23 @@ test_sloppy() ->
{1,2} = record_access:test(Turtle),
Turtle.
+utf8_atoms(Config) when is_list(Config) ->
+ Anno = erl_anno:new(1),
+ Atom = binary_to_atom(<<"こんにちは"/utf8>>, utf8),
+ Forms = [{attribute,Anno,compile,[export_all]},
+ {function,Anno,atom,0,[{clause,Anno,[],[],[{atom,Anno,Atom}]}]}],
+
+ Utf8AtomForms = [{attribute,Anno,module,utf8_atom}|Forms],
+ {ok,utf8_atom,Utf8AtomBin} =
+ compile:forms(Utf8AtomForms, [binary]),
+ {ok,{utf8_atom,[{atoms,_}]}} =
+ beam_lib:chunks(Utf8AtomBin, [atoms]),
+ code:load_binary(utf8_atom, "compile_SUITE", Utf8AtomBin),
+ Atom = utf8_atom:atom(),
+
+ NoUtf8AtomForms = [{attribute,Anno,module,no_utf8_atom}|Forms],
+ error = compile:forms(NoUtf8AtomForms, [binary, r19]).
+
env(Config) when is_list(Config) ->
{Simple,Target} = get_files(Config, simple, env),
{ok,Cwd} = file:get_cwd(),
@@ -751,7 +770,7 @@ do_core_1(M, A, Outdir) ->
{ok,M,Core0} = compile:forms(A, [to_core]),
CoreFile = filename:join(Outdir, atom_to_list(M)++".core"),
CorePP = core_pp:format(Core0),
- ok = file:write_file(CoreFile, CorePP),
+ ok = file:write_file(CoreFile, unicode:characters_to_binary(CorePP)),
%% Parse the .core file and return the result as Core Erlang Terms.
Core = case compile:file(CoreFile, [report_errors,from_core,no_copt,to_core,binary]) of
@@ -823,7 +842,7 @@ do_core_roundtrip_1(Mod, Abstr, Outdir) ->
do_core_roundtrip_2(M, Core0, Outdir) ->
CoreFile = filename:join(Outdir, atom_to_list(M)++".core"),
CorePP = core_pp:format_all(Core0),
- ok = file:write_file(CoreFile, CorePP),
+ ok = file:write_file(CoreFile, unicode:characters_to_binary(CorePP)),
%% Parse the .core file and return the result as Core Erlang Terms.
Core2 = case compile:file(CoreFile, [report_errors,from_core,
diff --git a/lib/compiler/test/compile_SUITE_data/simple.erl b/lib/compiler/test/compile_SUITE_data/simple.erl
index d8324dafaf..9385d101e0 100644
--- a/lib/compiler/test/compile_SUITE_data/simple.erl
+++ b/lib/compiler/test/compile_SUITE_data/simple.erl
@@ -19,7 +19,7 @@
%%
-module(simple).
--export([test/0]).
+-export([test/0,unicode/0]).
-ifdef(need_foo).
-export([foo/0]).
@@ -28,6 +28,9 @@
test() ->
passed.
+unicode() ->
+ {"это",'спутник'}.
+
%% Conditional inclusion.
%% Compile with [{d, need_foo}, {d, foo_value, 42}].
diff --git a/lib/compiler/test/lc_SUITE.erl b/lib/compiler/test/lc_SUITE.erl
index 3cb49433ce..6904ee7f52 100644
--- a/lib/compiler/test/lc_SUITE.erl
+++ b/lib/compiler/test/lc_SUITE.erl
@@ -226,7 +226,7 @@ effect(Config) when is_list(Config) ->
lc_SUITE ->
_ = [{'EXIT',{badarg,_}} =
(catch binary_to_atom(<<C/utf8>>, utf8)) ||
- C <- lists:seq(16#10000, 16#FFFFF)];
+ C <- lists:seq(16#FF10000, 16#FFFFFFF)];
_ ->
ok
end,
diff --git a/lib/kernel/test/code_SUITE.erl b/lib/kernel/test/code_SUITE.erl
index 4914ce9e4c..f368232bfc 100644
--- a/lib/kernel/test/code_SUITE.erl
+++ b/lib/kernel/test/code_SUITE.erl
@@ -323,7 +323,7 @@ load_abs(Config) when is_list(Config) ->
{error, nofile} = code:load_abs(TestDir ++ "/duuuumy_mod"),
{error, badfile} = code:load_abs(TestDir ++ "/code_a_test"),
{'EXIT', _} = (catch code:load_abs({})),
- {'EXIT', _} = (catch code:load_abs("Non-latin-имя-файла")),
+ {error, nofile} = code:load_abs("Non-latin-имя-файла"),
{module, code_b_test} = code:load_abs(TestDir ++ "/code_b_test"),
code:stick_dir(TestDir),
{error, sticky_directory} = code:load_abs(TestDir ++ "/code_b_test"),
diff --git a/lib/stdlib/src/beam_lib.erl b/lib/stdlib/src/beam_lib.erl
index d7ee5c1f5d..461acf03be 100644
--- a/lib/stdlib/src/beam_lib.erl
+++ b/lib/stdlib/src/beam_lib.erl
@@ -63,7 +63,7 @@
-type label() :: integer().
-type chunkid() :: nonempty_string(). % approximation of the strings below
-%% "Abst" | "Attr" | "CInf" | "ExpT" | "ImpT" | "LocT" | "Atom".
+%% "Abst" | "Attr" | "CInf" | "ExpT" | "ImpT" | "LocT" | "Atom" | "AtU8".
-type chunkname() :: 'abstract_code' | 'attributes' | 'compile_info'
| 'exports' | 'labeled_exports'
| 'imports' | 'indexed_imports'
@@ -520,6 +520,8 @@ read_chunk_data(File0, ChunkNames0, Options)
end.
%% -> {ok, list()} | throw(Error)
+check_chunks([atoms | Ids], File, IL, L) ->
+ check_chunks(Ids, File, ["Atom", "AtU8" | IL], [{atom_chunk, atoms} | L]);
check_chunks([ChunkName | Ids], File, IL, L) when is_atom(ChunkName) ->
ChunkId = chunk_name_to_id(ChunkName, File),
check_chunks(Ids, File, [ChunkId | IL], [{ChunkId, ChunkName} | L]);
@@ -537,6 +539,10 @@ scan_beam(File, What0, AllowMissingChunks) ->
case scan_beam1(File, What0) of
{missing, _FD, Mod, Data, What} when AllowMissingChunks ->
{ok, Mod, [{Id, missing_chunk} || Id <- What] ++ Data};
+ {missing, _FD, Mod, Data, ["Atom"]} ->
+ {ok, Mod, Data};
+ {missing, _FD, Mod, Data, ["AtU8"]} ->
+ {ok, Mod, Data};
{missing, FD, _Mod, _Data, What} ->
error({missing_chunk, filename(FD), hd(What)});
R ->
@@ -581,18 +587,23 @@ scan_beam(FD, Pos, What, Mod, Data) ->
error({invalid_beam_file, filename(FD), Pos})
end.
-get_data(Cs, "Atom"=Id, FD, Size, Pos, Pos2, _Mod, Data) ->
+get_atom_data(Cs, Id, FD, Size, Pos, Pos2, Data, Encoding) ->
NewCs = del_chunk(Id, Cs),
{NFD, Chunk} = get_chunk(Id, Pos, Size, FD),
<<_Num:32, Chunk2/binary>> = Chunk,
- {Module, _} = extract_atom(Chunk2),
+ {Module, _} = extract_atom(Chunk2, Encoding),
C = case Cs of
info ->
{Id, Pos, Size};
_ ->
{Id, Chunk}
end,
- scan_beam(NFD, Pos2, NewCs, Module, [C | Data]);
+ scan_beam(NFD, Pos2, NewCs, Module, [C | Data]).
+
+get_data(Cs, "Atom" = Id, FD, Size, Pos, Pos2, _Mod, Data) ->
+ get_atom_data(Cs, Id, FD, Size, Pos, Pos2, Data, latin1);
+get_data(Cs, "AtU8" = Id, FD, Size, Pos, Pos2, _Mod, Data) ->
+ get_atom_data(Cs, Id, FD, Size, Pos, Pos2, Data, utf8);
get_data(info, Id, FD, Size, Pos, Pos2, Mod, Data) ->
scan_beam(FD, Pos2, info, Mod, [{Id, Pos, Size} | Data]);
get_data(Chunks, Id, FD, Size, Pos, Pos2, Mod, Data) ->
@@ -624,6 +635,9 @@ get_chunk(Id, Pos, Size, FD) ->
{NFD, Chunk}
end.
+chunks_to_data([{atom_chunk, Name} | CNs], Chunks, File, Cs, Module, Atoms, L) ->
+ {NewAtoms, Ret} = chunk_to_data(Name, <<"">>, File, Cs, Atoms, Module),
+ chunks_to_data(CNs, Chunks, File, Cs, Module, NewAtoms, [Ret | L]);
chunks_to_data([{Id, Name} | CNs], Chunks, File, Cs, Module, Atoms, L) ->
{_Id, Chunk} = lists:keyfind(Id, 1, Chunks),
{NewAtoms, Ret} = chunk_to_data(Name, Chunk, File, Cs, Atoms, Module),
@@ -651,7 +665,7 @@ chunk_to_data(abstract_code=Id, Chunk, File, _Cs, AtomTable, Mod) ->
<<>> ->
{AtomTable, {Id, no_abstract_code}};
<<0:8,N:8,Mode0:N/binary,Rest/binary>> ->
- Mode = list_to_atom(binary_to_list(Mode0)),
+ Mode = binary_to_atom(Mode0, utf8),
decrypt_abst(Mode, Mod, File, Id, AtomTable, Rest);
_ ->
case catch binary_to_term(Chunk) of
@@ -683,7 +697,6 @@ chunk_to_data(ChunkId, Chunk, _File,
_Cs, AtomTable, _Module) when is_list(ChunkId) ->
{AtomTable, {ChunkId, Chunk}}. % Chunk is a binary
-chunk_name_to_id(atoms, _) -> "Atom";
chunk_name_to_id(indexed_imports, _) -> "ImpT";
chunk_name_to_id(imports, _) -> "ImpT";
chunk_name_to_id(exports, _) -> "ExpT";
@@ -738,25 +751,30 @@ atm(AT, N) ->
%% AT is updated.
ensure_atoms({empty, AT}, Cs) ->
- {_Id, AtomChunk} = lists:keyfind("Atom", 1, Cs),
- extract_atoms(AtomChunk, AT),
+ case lists:keyfind("AtU8", 1, Cs) of
+ {_Id, AtomChunk} when is_binary(AtomChunk) ->
+ extract_atoms(AtomChunk, AT, utf8);
+ _ ->
+ {_Id, AtomChunk} = lists:keyfind("Atom", 1, Cs),
+ extract_atoms(AtomChunk, AT, latin1)
+ end,
AT;
ensure_atoms(AT, _Cs) ->
AT.
-extract_atoms(<<_Num:32, B/binary>>, AT) ->
- extract_atoms(B, 1, AT).
+extract_atoms(<<_Num:32, B/binary>>, AT, Encoding) ->
+ extract_atoms(B, 1, AT, Encoding).
-extract_atoms(<<>>, _I, _AT) ->
+extract_atoms(<<>>, _I, _AT, _Encoding) ->
true;
-extract_atoms(B, I, AT) ->
- {Atom, B1} = extract_atom(B),
+extract_atoms(B, I, AT, Encoding) ->
+ {Atom, B1} = extract_atom(B, Encoding),
true = ets:insert(AT, {I, Atom}),
- extract_atoms(B1, I+1, AT).
+ extract_atoms(B1, I+1, AT, Encoding).
-extract_atom(<<Len, B/binary>>) ->
+extract_atom(<<Len, B/binary>>, Encoding) ->
<<SB:Len/binary, Tail/binary>> = B,
- {list_to_atom(binary_to_list(SB)), Tail}.
+ {binary_to_atom(SB, Encoding), Tail}.
%%% Utils.
@@ -856,12 +874,12 @@ significant_chunks() ->
%% for a module. They are listed in the order that they should be MD5:ed.
md5_chunks() ->
- ["Atom", "Code", "StrT", "ImpT", "ExpT", "FunT", "LitT"].
+ ["Atom", "AtU8", "Code", "StrT", "ImpT", "ExpT", "FunT", "LitT"].
%% The following chunks are mandatory in every Beam file.
mandatory_chunks() ->
- ["Code", "ExpT", "ImpT", "StrT", "Atom"].
+ ["Code", "ExpT", "ImpT", "StrT"].
%%% ====================================================================
%%% The rest of the file handles encrypted debug info.
diff --git a/lib/stdlib/test/beam_lib_SUITE.erl b/lib/stdlib/test/beam_lib_SUITE.erl
index 4521ecc0ef..279e15f703 100644
--- a/lib/stdlib/test/beam_lib_SUITE.erl
+++ b/lib/stdlib/test/beam_lib_SUITE.erl
@@ -81,12 +81,8 @@ normal(Conf) when is_list(Conf) ->
NoOfTables = length(ets:all()),
P0 = pps(),
- CompileFlags = [{outdir,PrivDir}, debug_info],
- {ok,_} = compile:file(Source, CompileFlags),
- {ok, Binary} = file:read_file(BeamFile),
-
- do_normal(BeamFile),
- do_normal(Binary),
+ do_normal(Source, PrivDir, BeamFile, []),
+ do_normal(Source, PrivDir, BeamFile, [no_utf8_atoms]),
{ok,_} = compile:file(Source, [{outdir,PrivDir}, no_debug_info]),
{ok, {simple, [{abstract_code, no_abstract_code}]}} =
@@ -101,7 +97,15 @@ normal(Conf) when is_list(Conf) ->
true = (P0 == pps()),
ok.
-do_normal(BeamFile) ->
+do_normal(Source, PrivDir, BeamFile, Opts) ->
+ CompileFlags = [{outdir,PrivDir}, debug_info | Opts],
+ {ok,_} = compile:file(Source, CompileFlags),
+ {ok, Binary} = file:read_file(BeamFile),
+
+ do_normal(BeamFile, Opts),
+ do_normal(Binary, Opts).
+
+do_normal(BeamFile, Opts) ->
Imports = {imports, [{erlang, get_module_info, 1},
{erlang, get_module_info, 2},
{lists, member, 2}]},
@@ -130,20 +134,31 @@ do_normal(BeamFile) ->
beam_lib:chunks(BeamFile, [abstract_code]),
%% Test reading optional chunks.
- All = ["Atom", "Code", "StrT", "ImpT", "ExpT", "FunT", "LitT"],
+ All = ["Atom", "Code", "StrT", "ImpT", "ExpT", "FunT", "LitT", "AtU8"],
{ok,{simple,Chunks}} = beam_lib:chunks(BeamFile, All, [allow_missing_chunks]),
- verify_simple(Chunks).
+ case {verify_simple(Chunks),Opts} of
+ {{missing_chunk, AtomBin}, []} when is_binary(AtomBin) -> ok;
+ {{AtomBin, missing_chunk}, [no_utf8_atoms]} when is_binary(AtomBin) -> ok
+ end,
-verify_simple([{"Atom", AtomBin},
+ %% Make sure that reading the atom chunk works when the 'allow_missing_chunks'
+ %% option is used.
+ Some = ["Code",atoms,"ExpT","LitT"],
+ {ok,{simple,SomeChunks}} = beam_lib:chunks(BeamFile, Some, [allow_missing_chunks]),
+ [{"Code",<<_/binary>>},{atoms,[_|_]},{"ExpT",<<_/binary>>},{"LitT",missing_chunk}] =
+ SomeChunks.
+
+verify_simple([{"Atom", PlainAtomChunk},
{"Code", CodeBin},
{"StrT", StrBin},
{"ImpT", ImpBin},
{"ExpT", ExpBin},
{"FunT", missing_chunk},
- {"LitT", missing_chunk}])
- when is_binary(AtomBin), is_binary(CodeBin), is_binary(StrBin),
+ {"LitT", missing_chunk},
+ {"AtU8", AtU8Chunk}])
+ when is_binary(CodeBin), is_binary(StrBin),
is_binary(ImpBin), is_binary(ExpBin) ->
- ok.
+ {PlainAtomChunk, AtU8Chunk}.
%% Read invalid beam files.
error(Conf) when is_list(Conf) ->
@@ -211,7 +226,7 @@ last_chunk(Bin) ->
do_error(BeamFile, ACopy) ->
%% evil tests
Chunks = chunk_info(BeamFile),
- {value, {_, AtomStart, _}} = lists:keysearch("Atom", 1, Chunks),
+ {value, {_, AtomStart, _}} = lists:keysearch("AtU8", 1, Chunks),
{value, {_, ImportStart, _}} = lists:keysearch("ImpT", 1, Chunks),
{value, {_, AbstractStart, _}} = lists:keysearch("Abst", 1, Chunks),
{value, {_, AttributesStart, _}} =
@@ -234,7 +249,7 @@ do_error(BeamFile, ACopy) ->
verify(not_a_beam_file, beam_lib:info(BF7)),
BF8 = set_byte(ACopy, BeamFile, 13, 17),
- verify(missing_chunk, beam_lib:chunks(BF8, ["Atom"])),
+ verify(missing_chunk, beam_lib:chunks(BF8, ["AtU8"])),
BF9 = set_byte(ACopy, BeamFile, CompileInfoStart+10, 17),
verify(invalid_chunk, beam_lib:chunks(BF9, [compile_info])).
diff --git a/lib/stdlib/test/erl_scan_SUITE.erl b/lib/stdlib/test/erl_scan_SUITE.erl
index 4ae734eb65..7d0ba967f9 100644
--- a/lib/stdlib/test/erl_scan_SUITE.erl
+++ b/lib/stdlib/test/erl_scan_SUITE.erl
@@ -772,10 +772,9 @@ unicode() ->
erl_scan:string([1089]),
{error,{{1,1},erl_scan,{illegal,character}},{1,2}} =
erl_scan:string([1089], {1,1}),
- {error,{1,erl_scan,{illegal,atom}},1} =
- erl_scan:string("'a"++[1089]++"b'", 1),
- {error,{{1,1},erl_scan,{illegal,atom}},{1,6}} =
- erl_scan:string("'a"++[1089]++"b'", {1,1}),
+ {error,{{1,3},erl_scan,{illegal,character}},{1,4}} =
+ erl_scan:string("'a" ++ [999999999] ++ "c'", {1,1}),
+
test("\"a"++[1089]++"b\""),
{ok,[{char,1,1}],1} =
erl_scan_string([$$,$\\,$^,1089], 1),
@@ -786,8 +785,8 @@ unicode() ->
erl_scan:format_error(Error),
{error,{{1,1},erl_scan,_},{1,11}} =
erl_scan:string("\"qa\\x{aaa}",{1,1}),
- {error,{{1,1},erl_scan,{illegal,atom}},{1,12}} =
- erl_scan:string("'qa\\x{aaa}'",{1,1}),
+ {error,{{1,1},erl_scan,_},{1,11}} =
+ erl_scan:string("'qa\\x{aaa}",{1,1}),
{ok,[{char,1,1089}],1} =
erl_scan_string([$$,1089], 1),
@@ -904,9 +903,9 @@ more_chars() ->
%% OTP-10302. Unicode characters scanner/parser.
otp_10302(Config) when is_list(Config) ->
%% From unicode():
- {error,{1,erl_scan,{illegal,atom}},1} =
+ {ok,[{atom,1,'aсb'}],1} =
erl_scan:string("'a"++[1089]++"b'", 1),
- {error,{{1,1},erl_scan,{illegal,atom}},{1,12}} =
+ {ok,[{atom,{1,1},'qaપ'}],{1,12}} =
erl_scan:string("'qa\\x{aaa}'",{1,1}),
{ok,[{char,1,1089}],1} = erl_scan_string([$$,1089], 1),