From 26b59dfe67ef551cd94765557cdd8c79794bcc38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Valim?= Date: Tue, 31 May 2016 14:28:54 +0200 Subject: Add new AtU8 beam chunk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new chunk stores atoms encoded in UTF-8. beam_lib has also been modified to handle the new 'utf8_atoms' attribute while the 'atoms' attribute may be a missing chunk from now on. The binary_to_atom/2 BIF can now encode any utf8 binary with up to 255 characters. The list_to_atom/1 BIF can now accept codepoints higher than 255 with up to 255 characters (thanks to Björn Gustavsson). --- lib/compiler/src/beam_asm.erl | 29 +++++++++++++++++++---------- lib/compiler/src/beam_dict.erl | 12 ++++++------ lib/compiler/src/compile.erl | 21 ++++++++++++++++----- 3 files changed, 41 insertions(+), 21 deletions(-) (limited to 'lib/compiler/src') diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl index a2f5dc674c..2fc2850591 100644 --- a/lib/compiler/src/beam_asm.erl +++ b/lib/compiler/src/beam_asm.erl @@ -21,7 +21,7 @@ -module(beam_asm). --export([module/4]). +-export([module/5]). -export([encode/2]). -export_type([fail/0,label/0,reg/0,src/0,module_code/0,function_name/0]). @@ -57,20 +57,20 @@ -type module_code() :: {module(),[_],[_],[asm_function()],pos_integer()}. --spec module(module_code(), exports(), [_], [compile:option()]) -> +-spec module(module_code(), exports(), [_], [compile:option()], [compile:option()]) -> {'ok',binary()}. -module(Code, Abst, SourceFile, Opts) -> - {ok,assemble(Code, Abst, SourceFile, Opts)}. +module(Code, Abst, SourceFile, Opts, CompilerOpts) -> + {ok,assemble(Code, Abst, SourceFile, Opts, CompilerOpts)}. -assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, Abst, SourceFile, Opts) -> +assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, Abst, SourceFile, Opts, CompilerOpts) -> {1,Dict0} = beam_dict:atom(Mod, beam_dict:new()), {0,Dict1} = beam_dict:fname(atom_to_list(Mod) ++ ".erl", Dict0), NumFuncs = length(Asm0), {Asm,Attr} = on_load(Asm0, Attr0), Exp = cerl_sets:from_list(Exp0), {Code,Dict2} = assemble_1(Asm, Exp, Dict1, []), - build_file(Code, Attr, Dict2, NumLabels, NumFuncs, Abst, SourceFile, Opts). + build_file(Code, Attr, Dict2, NumLabels, NumFuncs, Abst, SourceFile, Opts, CompilerOpts). on_load(Fs0, Attr0) -> case proplists:get_value(on_load, Attr0) of @@ -113,7 +113,7 @@ assemble_function([H|T], Acc, Dict0) -> assemble_function([], Code, Dict) -> {Code, Dict}. -build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) -> +build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts, CompilerOpts) -> %% Create the code chunk. CodeChunk = chunk(<<"Code">>, @@ -125,9 +125,9 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) -> Code), %% Create the atom table chunk. - - {NumAtoms, AtomTab} = beam_dict:atom_table(Dict), - AtomChunk = chunk(<<"Atom">>, <>, AtomTab), + AtomEncoding = atom_encoding(CompilerOpts), + {NumAtoms, AtomTab} = beam_dict:atom_table(Dict, AtomEncoding), + AtomChunk = chunk(atom_chunk_name(AtomEncoding), <>, AtomTab), %% Create the import table chunk. @@ -203,6 +203,15 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) -> end, build_form(<<"BEAM">>, Chunks). +atom_encoding(Opts) -> + case proplists:get_bool(no_utf8_atoms, Opts) of + false -> utf8; + true -> latin1 + end. + +atom_chunk_name(utf8) -> <<"AtU8">>; +atom_chunk_name(latin1) -> <<"Atom">>. + %% finalize_fun_table(Essentials, MD5) -> FinalizedEssentials %% Update the 'old_uniq' field in the entry for each fun in the %% 'FunT' chunk. We'll use part of the MD5 for the module as a diff --git a/lib/compiler/src/beam_dict.erl b/lib/compiler/src/beam_dict.erl index 719d799fd7..990e86062a 100644 --- a/lib/compiler/src/beam_dict.erl +++ b/lib/compiler/src/beam_dict.erl @@ -24,7 +24,7 @@ -export([new/0,opcode/2,highest_opcode/1, atom/2,local/4,export/4,import/4, string/2,lambda/3,literal/2,line/2,fname/2, - atom_table/1,local_table/1,export_table/1,import_table/1, + atom_table/2,local_table/1,export_table/1,import_table/1, string_table/1,lambda_table/1,literal_table/1, line_table/1]). @@ -197,15 +197,15 @@ fname(Name, #asm{fnames=Fnames}=Dict) -> end. %% Returns the atom table. -%% atom_table(Dict) -> {LastIndex,[Length,AtomString...]} --spec atom_table(bdict()) -> {non_neg_integer(), [[non_neg_integer(),...]]}. +%% atom_table(Dict, Encoding) -> {LastIndex,[Length,AtomString...]} +-spec atom_table(bdict(), latin1 | utf8) -> {non_neg_integer(), [[non_neg_integer(),...]]}. -atom_table(#asm{atoms=Atoms}) -> +atom_table(#asm{atoms=Atoms}, Encoding) -> NumAtoms = maps:size(Atoms), Sorted = lists:keysort(2, maps:to_list(Atoms)), {NumAtoms,[begin - L = atom_to_list(A), - [length(L)|L] + L = atom_to_binary(A, Encoding), + [byte_size(L),L] end || {A,_} <- Sorted]}. %% Returns the table of local functions. diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index 069add7890..dcd962df66 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -214,11 +214,21 @@ expand_opt(report, Os) -> expand_opt(return, Os) -> [return_errors,return_warnings|Os]; expand_opt(r12, Os) -> - [no_recv_opt,no_line_info|Os]; + [no_recv_opt,no_line_info,no_utf8_atoms|Os]; expand_opt(r13, Os) -> - [no_recv_opt,no_line_info|Os]; + [no_recv_opt,no_line_info,no_utf8_atoms|Os]; expand_opt(r14, Os) -> - [no_line_info|Os]; + [no_line_info,no_utf8_atoms|Os]; +expand_opt(r15, Os) -> + [no_utf8_atoms|Os]; +expand_opt(r16, Os) -> + [no_utf8_atoms|Os]; +expand_opt(r17, Os) -> + [no_utf8_atoms|Os]; +expand_opt(r18, Os) -> + [no_utf8_atoms|Os]; +expand_opt(r19, Os) -> + [no_utf8_atoms|Os]; expand_opt({debug_info_key,_}=O, Os) -> [encrypt_debug_info,O|Os]; expand_opt(no_float_opt, Os) -> @@ -1376,13 +1386,14 @@ encrypt({des3_cbc=Type,Key,IVec,BlockSize}, Bin0) -> save_core_code(Code, St) -> {ok,Code,St#compile{core_code=cerl:from_records(Code)}}. -beam_asm(Code0, #compile{ifile=File,abstract_code=Abst,mod_options=Opts0}=St) -> +beam_asm(Code0, #compile{ifile=File,abstract_code=Abst, + options=CompilerOpts,mod_options=Opts0}=St) -> Source = paranoid_absname(File), Opts1 = lists:map(fun({debug_info_key,_}) -> {debug_info_key,'********'}; (Other) -> Other end, Opts0), Opts2 = [O || O <- Opts1, effects_code_generation(O)], - case beam_asm:module(Code0, Abst, Source, Opts2) of + case beam_asm:module(Code0, Abst, Source, Opts2, CompilerOpts) of {ok,Code} -> {ok,Code,St#compile{abstract_code=[]}} end. -- cgit v1.2.3 From 36f7087ae0fe9749b776b7b013ccc0a26a494a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Valim?= Date: Sun, 12 Feb 2017 22:04:36 +0100 Subject: Add extra_chunks option to compile This allow languages such as Elixir and LFE to attach extra chunks to the .beam file without having to parse the beam file after compilation. This commit also cleans up the interface to beam_asm, allowing chunks to be passed from the compiler without a need to change beam_asm API on every new chunk. --- lib/compiler/src/beam_asm.erl | 22 ++++++++++------------ lib/compiler/src/compile.erl | 17 ++++++++++++----- 2 files changed, 22 insertions(+), 17 deletions(-) (limited to 'lib/compiler/src') diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl index 2fc2850591..1bda185acd 100644 --- a/lib/compiler/src/beam_asm.erl +++ b/lib/compiler/src/beam_asm.erl @@ -49,28 +49,26 @@ -type function_name() :: atom(). --type exports() :: [{function_name(),arity()}]. - -type asm_function() :: {'function',function_name(),arity(),label(),[asm_instruction()]}. -type module_code() :: {module(),[_],[_],[asm_function()],pos_integer()}. --spec module(module_code(), exports(), [_], [compile:option()], [compile:option()]) -> +-spec module(module_code(), [{binary(), binary()}], [_], [compile:option()], [compile:option()]) -> {'ok',binary()}. -module(Code, Abst, SourceFile, Opts, CompilerOpts) -> - {ok,assemble(Code, Abst, SourceFile, Opts, CompilerOpts)}. +module(Code, ExtraChunks, SourceFile, Opts, CompilerOpts) -> + {ok,assemble(Code, ExtraChunks, SourceFile, Opts, CompilerOpts)}. -assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, Abst, SourceFile, Opts, CompilerOpts) -> +assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, ExtraChunks, SourceFile, Opts, CompilerOpts) -> {1,Dict0} = beam_dict:atom(Mod, beam_dict:new()), {0,Dict1} = beam_dict:fname(atom_to_list(Mod) ++ ".erl", Dict0), NumFuncs = length(Asm0), {Asm,Attr} = on_load(Asm0, Attr0), Exp = cerl_sets:from_list(Exp0), {Code,Dict2} = assemble_1(Asm, Exp, Dict1, []), - build_file(Code, Attr, Dict2, NumLabels, NumFuncs, Abst, SourceFile, Opts, CompilerOpts). + build_file(Code, Attr, Dict2, NumLabels, NumFuncs, ExtraChunks, SourceFile, Opts, CompilerOpts). on_load(Fs0, Attr0) -> case proplists:get_value(on_load, Attr0) of @@ -113,7 +111,7 @@ assemble_function([H|T], Acc, Dict0) -> assemble_function([], Code, Dict) -> {Code, Dict}. -build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts, CompilerOpts) -> +build_file(Code, Attr, Dict, NumLabels, NumFuncs, ExtraChunks, SourceFile, Opts, CompilerOpts) -> %% Create the code chunk. CodeChunk = chunk(<<"Code">>, @@ -188,18 +186,18 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts, Compil AttrChunk = chunk(<<"Attr">>, Attributes), CompileChunk = chunk(<<"CInf">>, Compile), - %% Create the abstract code chunk. + %% Compile all extra chunks. - AbstChunk = chunk(<<"Abst">>, Abst), + CheckedChunks = [chunk(Key, Value) || {Key, Value} <- ExtraChunks], %% Create IFF chunk. Chunks = case member(slim, Opts) of true -> - [Essentials,AttrChunk,AbstChunk]; + [Essentials,AttrChunk,CheckedChunks]; false -> [Essentials,LocChunk,AttrChunk, - CompileChunk,AbstChunk,LineChunk] + CompileChunk,CheckedChunks,LineChunk] end, build_form(<<"BEAM">>, Chunks). diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl index dcd962df66..c849306c0d 100644 --- a/lib/compiler/src/compile.erl +++ b/lib/compiler/src/compile.erl @@ -315,19 +315,25 @@ format_error_reason(Reason) -> mod_options=[] :: [option()], %Options for module_info encoding=none :: none | epp:source_encoding(), errors=[] :: [err_warn_info()], - warnings=[] :: [err_warn_info()]}). + warnings=[] :: [err_warn_info()], + extra_chunks=[] :: [{binary(), binary()}]}). internal({forms,Forms}, Opts0) -> {_,Ps} = passes(forms, Opts0), Source = proplists:get_value(source, Opts0, ""), Opts1 = proplists:delete(source, Opts0), - Compile = #compile{options=Opts1,mod_options=Opts1}, + Compile = build_compile(Opts1), internal_comp(Ps, Forms, Source, "", Compile); internal({file,File}, Opts) -> {Ext,Ps} = passes(file, Opts), - Compile = #compile{options=Opts,mod_options=Opts}, + Compile = build_compile(Opts), internal_comp(Ps, none, File, Ext, Compile). +build_compile(Opts0) -> + ExtraChunks = proplists:get_value(extra_chunks, Opts0, []), + Opts1 = proplists:delete(extra_chunks, Opts0), + #compile{options=Opts1,mod_options=Opts1,extra_chunks=ExtraChunks}. + internal_comp(Passes, Code0, File, Suffix, St0) -> Dir = filename:dirname(File), Base = filename:basename(File, Suffix), @@ -1386,14 +1392,15 @@ encrypt({des3_cbc=Type,Key,IVec,BlockSize}, Bin0) -> save_core_code(Code, St) -> {ok,Code,St#compile{core_code=cerl:from_records(Code)}}. -beam_asm(Code0, #compile{ifile=File,abstract_code=Abst, +beam_asm(Code0, #compile{ifile=File,abstract_code=Abst,extra_chunks=ExtraChunks, options=CompilerOpts,mod_options=Opts0}=St) -> Source = paranoid_absname(File), Opts1 = lists:map(fun({debug_info_key,_}) -> {debug_info_key,'********'}; (Other) -> Other end, Opts0), Opts2 = [O || O <- Opts1, effects_code_generation(O)], - case beam_asm:module(Code0, Abst, Source, Opts2, CompilerOpts) of + Chunks = [{<<"Abst">>, Abst} | ExtraChunks], + case beam_asm:module(Code0, Chunks, Source, Opts2, CompilerOpts) of {ok,Code} -> {ok,Code,St#compile{abstract_code=[]}} end. -- cgit v1.2.3