aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler/src
diff options
context:
space:
mode:
authorJosé Valim <[email protected]>2016-05-31 14:28:54 +0200
committerJosé Valim <[email protected]>2017-01-30 15:24:05 +0100
commit26b59dfe67ef551cd94765557cdd8c79794bcc38 (patch)
tree696adc07b3e7a4a3f1ed6c52311ff6e163b218b4 /lib/compiler/src
parent6c7539b0e39996f870385e5276e08c0dd98b6eb8 (diff)
downloadotp-26b59dfe67ef551cd94765557cdd8c79794bcc38.tar.gz
otp-26b59dfe67ef551cd94765557cdd8c79794bcc38.tar.bz2
otp-26b59dfe67ef551cd94765557cdd8c79794bcc38.zip
Add new AtU8 beam chunk
The new chunk stores atoms encoded in UTF-8. beam_lib has also been modified to handle the new 'utf8_atoms' attribute while the 'atoms' attribute may be a missing chunk from now on. The binary_to_atom/2 BIF can now encode any utf8 binary with up to 255 characters. The list_to_atom/1 BIF can now accept codepoints higher than 255 with up to 255 characters (thanks to Björn Gustavsson).
Diffstat (limited to 'lib/compiler/src')
-rw-r--r--lib/compiler/src/beam_asm.erl29
-rw-r--r--lib/compiler/src/beam_dict.erl12
-rw-r--r--lib/compiler/src/compile.erl21
3 files changed, 41 insertions, 21 deletions
diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl
index a2f5dc674c..2fc2850591 100644
--- a/lib/compiler/src/beam_asm.erl
+++ b/lib/compiler/src/beam_asm.erl
@@ -21,7 +21,7 @@
-module(beam_asm).
--export([module/4]).
+-export([module/5]).
-export([encode/2]).
-export_type([fail/0,label/0,reg/0,src/0,module_code/0,function_name/0]).
@@ -57,20 +57,20 @@
-type module_code() ::
{module(),[_],[_],[asm_function()],pos_integer()}.
--spec module(module_code(), exports(), [_], [compile:option()]) ->
+-spec module(module_code(), exports(), [_], [compile:option()], [compile:option()]) ->
{'ok',binary()}.
-module(Code, Abst, SourceFile, Opts) ->
- {ok,assemble(Code, Abst, SourceFile, Opts)}.
+module(Code, Abst, SourceFile, Opts, CompilerOpts) ->
+ {ok,assemble(Code, Abst, SourceFile, Opts, CompilerOpts)}.
-assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, Abst, SourceFile, Opts) ->
+assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, Abst, SourceFile, Opts, CompilerOpts) ->
{1,Dict0} = beam_dict:atom(Mod, beam_dict:new()),
{0,Dict1} = beam_dict:fname(atom_to_list(Mod) ++ ".erl", Dict0),
NumFuncs = length(Asm0),
{Asm,Attr} = on_load(Asm0, Attr0),
Exp = cerl_sets:from_list(Exp0),
{Code,Dict2} = assemble_1(Asm, Exp, Dict1, []),
- build_file(Code, Attr, Dict2, NumLabels, NumFuncs, Abst, SourceFile, Opts).
+ build_file(Code, Attr, Dict2, NumLabels, NumFuncs, Abst, SourceFile, Opts, CompilerOpts).
on_load(Fs0, Attr0) ->
case proplists:get_value(on_load, Attr0) of
@@ -113,7 +113,7 @@ assemble_function([H|T], Acc, Dict0) ->
assemble_function([], Code, Dict) ->
{Code, Dict}.
-build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) ->
+build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts, CompilerOpts) ->
%% Create the code chunk.
CodeChunk = chunk(<<"Code">>,
@@ -125,9 +125,9 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) ->
Code),
%% Create the atom table chunk.
-
- {NumAtoms, AtomTab} = beam_dict:atom_table(Dict),
- AtomChunk = chunk(<<"Atom">>, <<NumAtoms:32>>, AtomTab),
+ AtomEncoding = atom_encoding(CompilerOpts),
+ {NumAtoms, AtomTab} = beam_dict:atom_table(Dict, AtomEncoding),
+ AtomChunk = chunk(atom_chunk_name(AtomEncoding), <<NumAtoms:32>>, AtomTab),
%% Create the import table chunk.
@@ -203,6 +203,15 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) ->
end,
build_form(<<"BEAM">>, Chunks).
+atom_encoding(Opts) ->
+ case proplists:get_bool(no_utf8_atoms, Opts) of
+ false -> utf8;
+ true -> latin1
+ end.
+
+atom_chunk_name(utf8) -> <<"AtU8">>;
+atom_chunk_name(latin1) -> <<"Atom">>.
+
%% finalize_fun_table(Essentials, MD5) -> FinalizedEssentials
%% Update the 'old_uniq' field in the entry for each fun in the
%% 'FunT' chunk. We'll use part of the MD5 for the module as a
diff --git a/lib/compiler/src/beam_dict.erl b/lib/compiler/src/beam_dict.erl
index 719d799fd7..990e86062a 100644
--- a/lib/compiler/src/beam_dict.erl
+++ b/lib/compiler/src/beam_dict.erl
@@ -24,7 +24,7 @@
-export([new/0,opcode/2,highest_opcode/1,
atom/2,local/4,export/4,import/4,
string/2,lambda/3,literal/2,line/2,fname/2,
- atom_table/1,local_table/1,export_table/1,import_table/1,
+ atom_table/2,local_table/1,export_table/1,import_table/1,
string_table/1,lambda_table/1,literal_table/1,
line_table/1]).
@@ -197,15 +197,15 @@ fname(Name, #asm{fnames=Fnames}=Dict) ->
end.
%% Returns the atom table.
-%% atom_table(Dict) -> {LastIndex,[Length,AtomString...]}
--spec atom_table(bdict()) -> {non_neg_integer(), [[non_neg_integer(),...]]}.
+%% atom_table(Dict, Encoding) -> {LastIndex,[Length,AtomString...]}
+-spec atom_table(bdict(), latin1 | utf8) -> {non_neg_integer(), [[non_neg_integer(),...]]}.
-atom_table(#asm{atoms=Atoms}) ->
+atom_table(#asm{atoms=Atoms}, Encoding) ->
NumAtoms = maps:size(Atoms),
Sorted = lists:keysort(2, maps:to_list(Atoms)),
{NumAtoms,[begin
- L = atom_to_list(A),
- [length(L)|L]
+ L = atom_to_binary(A, Encoding),
+ [byte_size(L),L]
end || {A,_} <- Sorted]}.
%% Returns the table of local functions.
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 069add7890..dcd962df66 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -214,11 +214,21 @@ expand_opt(report, Os) ->
expand_opt(return, Os) ->
[return_errors,return_warnings|Os];
expand_opt(r12, Os) ->
- [no_recv_opt,no_line_info|Os];
+ [no_recv_opt,no_line_info,no_utf8_atoms|Os];
expand_opt(r13, Os) ->
- [no_recv_opt,no_line_info|Os];
+ [no_recv_opt,no_line_info,no_utf8_atoms|Os];
expand_opt(r14, Os) ->
- [no_line_info|Os];
+ [no_line_info,no_utf8_atoms|Os];
+expand_opt(r15, Os) ->
+ [no_utf8_atoms|Os];
+expand_opt(r16, Os) ->
+ [no_utf8_atoms|Os];
+expand_opt(r17, Os) ->
+ [no_utf8_atoms|Os];
+expand_opt(r18, Os) ->
+ [no_utf8_atoms|Os];
+expand_opt(r19, Os) ->
+ [no_utf8_atoms|Os];
expand_opt({debug_info_key,_}=O, Os) ->
[encrypt_debug_info,O|Os];
expand_opt(no_float_opt, Os) ->
@@ -1376,13 +1386,14 @@ encrypt({des3_cbc=Type,Key,IVec,BlockSize}, Bin0) ->
save_core_code(Code, St) ->
{ok,Code,St#compile{core_code=cerl:from_records(Code)}}.
-beam_asm(Code0, #compile{ifile=File,abstract_code=Abst,mod_options=Opts0}=St) ->
+beam_asm(Code0, #compile{ifile=File,abstract_code=Abst,
+ options=CompilerOpts,mod_options=Opts0}=St) ->
Source = paranoid_absname(File),
Opts1 = lists:map(fun({debug_info_key,_}) -> {debug_info_key,'********'};
(Other) -> Other
end, Opts0),
Opts2 = [O || O <- Opts1, effects_code_generation(O)],
- case beam_asm:module(Code0, Abst, Source, Opts2) of
+ case beam_asm:module(Code0, Abst, Source, Opts2, CompilerOpts) of
{ok,Code} -> {ok,Code,St#compile{abstract_code=[]}}
end.