1 files changed, 91 insertions, 36 deletions
diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl
index f6ca7a0afb..df09dcb06c 100644
--- a/lib/compiler/src/beam_asm.erl
+++ b/lib/compiler/src/beam_asm.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2018. All Rights Reserved.
 %%
 %% Licensed under the Apache License, Version 2.0 (the "License");
 %% you may not use this file except in compliance with the License.
@@ -24,20 +24,51 @@
 -export([module/4]).
 -export([encode/2]).
 
+-export_type([fail/0,label/0,reg/0,reg_num/0,src/0,module_code/0,function_name/0]).
+
 -import(lists, [map/2,member/2,keymember/3,duplicate/2,splitwith/2]).
 -include("beam_opcodes.hrl").
 
-module(Code, Abst, SourceFile, Opts) ->
-    {ok,assemble(Code, Abst, SourceFile, Opts)}.
+%% Common types for describing operands for BEAM instructions.
+-type reg_num() :: 0..1023.
+-type reg() :: {'x',reg_num()} | {'y',reg_num()}.
+-type src() :: reg() |
+	       {'literal',term()} |
+	       {'atom',atom()} |
+	       {'integer',integer()} |
+	       'nil' |
+	       {'float',float()}.
+-type label() :: pos_integer().
+-type fail() :: {'f',label() | 0}.
+
+%% asm_instruction() describes only the instructions that
+%% are used in BEAM files (as opposed to internal instructions
+%% used only during optimization).
+
+-type asm_instruction() :: atom() | tuple().
+
+-type function_name() :: atom().
+
+-type asm_function() ::
+        {'function',function_name(),arity(),label(),[asm_instruction()]}.
 
-assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, Abst, SourceFile, Opts) ->
+-type module_code() ::
+        {module(),[_],[_],[asm_function()],pos_integer()}.
+
+-spec module(module_code(), [{binary(), binary()}], [{atom(),term()}], [compile:option()]) ->
+                    {'ok',binary()}.
+
+module(Code, ExtraChunks, CompileInfo, CompilerOpts) ->
+    {ok,assemble(Code, ExtraChunks, CompileInfo, CompilerOpts)}.
+
+assemble({Mod,Exp0,Attr0,Asm0,NumLabels}, ExtraChunks, CompileInfo, CompilerOpts) ->
     {1,Dict0} = beam_dict:atom(Mod, beam_dict:new()),
     {0,Dict1} = beam_dict:fname(atom_to_list(Mod) ++ ".erl", Dict0),
     NumFuncs = length(Asm0),
     {Asm,Attr} = on_load(Asm0, Attr0),
     Exp = cerl_sets:from_list(Exp0),
     {Code,Dict2} = assemble_1(Asm, Exp, Dict1, []),
-    build_file(Code, Attr, Dict2, NumLabels, NumFuncs, Abst, SourceFile, Opts).
+    build_file(Code, Attr, Dict2, NumLabels, NumFuncs, ExtraChunks, CompileInfo, CompilerOpts).
 
 on_load(Fs0, Attr0) ->
     case proplists:get_value(on_load, Attr0) of
@@ -80,7 +111,7 @@ assemble_function([H|T], Acc, Dict0) ->
 assemble_function([], Code, Dict) ->
     {Code, Dict}.
 
-build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) ->
+build_file(Code, Attr, Dict, NumLabels, NumFuncs, ExtraChunks, CompileInfo, CompilerOpts) ->
     %% Create the code chunk.
 
     CodeChunk = chunk(<<"Code">>,
@@ -92,9 +123,9 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) ->
 		      Code),
 
     %% Create the atom table chunk.
-
-    {NumAtoms, AtomTab} = beam_dict:atom_table(Dict),
-    AtomChunk = chunk(<<"Atom">>, <<NumAtoms:32>>, AtomTab),
+    AtomEncoding = atom_encoding(CompilerOpts),
+    {NumAtoms, AtomTab} = beam_dict:atom_table(Dict, AtomEncoding),
+    AtomChunk = chunk(atom_chunk_name(AtomEncoding), <<NumAtoms:32>>, AtomTab),
 
     %% Create the import table chunk.
 
@@ -151,25 +182,34 @@ build_file(Code, Attr, Dict, NumLabels, NumFuncs, Abst, SourceFile, Opts) ->
     Essentials1 = [iolist_to_binary(C) || C <- Essentials0],
     MD5 = module_md5(Essentials1),
     Essentials = finalize_fun_table(Essentials1, MD5),
-    {Attributes,Compile} = build_attributes(Opts, SourceFile, Attr, MD5),
+    {Attributes,Compile} = build_attributes(Attr, CompileInfo, MD5),
     AttrChunk = chunk(<<"Attr">>, Attributes),
     CompileChunk = chunk(<<"CInf">>, Compile),
 
-    %% Create the abstract code chunk.
+    %% Compile all extra chunks.
 
-    AbstChunk = chunk(<<"Abst">>, Abst),
+    CheckedChunks = [chunk(Key, Value) || {Key, Value} <- ExtraChunks],
 
     %% Create IFF chunk.
 
-    Chunks = case member(slim, Opts) of
+    Chunks = case member(slim, CompilerOpts) of
 		 true ->
-		     [Essentials,AttrChunk,AbstChunk];
+		     [Essentials,AttrChunk];
 		 false ->
 		     [Essentials,LocChunk,AttrChunk,
-		      CompileChunk,AbstChunk,LineChunk]
+		      CompileChunk,CheckedChunks,LineChunk]
 	     end,
     build_form(<<"BEAM">>, Chunks).
 
+atom_encoding(Opts) ->
+    case proplists:get_bool(no_utf8_atoms, Opts) of
+	false -> utf8;
+	true -> latin1
+    end.
+
+atom_chunk_name(utf8) -> <<"AtU8">>;
+atom_chunk_name(latin1) -> <<"Atom">>.
+
 %% finalize_fun_table(Essentials, MD5) -> FinalizedEssentials
 %%  Update the 'old_uniq' field in the entry for each fun in the
 %%  'FunT' chunk. We'll use part of the MD5 for the module as a
@@ -200,9 +240,7 @@ build_form(Id, Chunks0) when byte_size(Id) =:= 4, is_list(Chunks0) ->
 
 chunk(Id, Contents) when byte_size(Id) =:= 4, is_binary(Contents) ->
     Size = byte_size(Contents),
-    [<<Id/binary,Size:32>>,Contents|pad(Size)];
-chunk(Id, Contents) when is_list(Contents) ->
-    chunk(Id, list_to_binary(Contents)).
+    [<<Id/binary,Size:32>>,Contents|pad(Size)].
 
 %% Build a correctly padded chunk (with a sub-header).
 
@@ -224,17 +262,10 @@ flatten_exports(Exps) ->
 flatten_imports(Imps) ->
     list_to_binary(map(fun({M,F,A}) -> <<M:32,F:32,A:32>> end, Imps)).
 
-build_attributes(Opts, SourceFile, Attr, MD5) ->
-    Misc0 = case SourceFile of
-		[] -> [];
-		[_|_] -> [{source,SourceFile}]
-	    end,
-    Misc = case member(slim, Opts) of
-	       false -> Misc0;
-	       true -> []
-	   end,
-    Compile = [{options,Opts},{version,?COMPILER_VSN}|Misc],
-    {term_to_binary(set_vsn_attribute(Attr, MD5)),term_to_binary(Compile)}.
+build_attributes(Attr, Compile, MD5) ->
+    AttrBinary = term_to_binary(set_vsn_attribute(Attr, MD5)),
+    CompileBinary = term_to_binary([{version,?COMPILER_VSN}|Compile]),
+    {AttrBinary,CompileBinary}.
 
 build_line_table(Dict) ->
     {NumLineInstrs,NumFnames0,Fnames0,NumLines,Lines0} =
@@ -376,15 +407,25 @@ encode_arg({atom, Atom}, Dict0) when is_atom(Atom) ->
     {Index, Dict} = beam_dict:atom(Atom, Dict0),
     {encode(?tag_a, Index), Dict};
 encode_arg({integer, N}, Dict) ->
-    {encode(?tag_i, N), Dict};
+    %% Conservatively assume that all integers whose absolute
+    %% value is greater than 1 bsl 128 will be bignums in
+    %% the runtime system.
+    if
+        N >= 1 bsl 128 ->
+            encode_literal(N, Dict);
+        N =< -(1 bsl 128) ->
+            encode_literal(N, Dict);
+        true ->
+            {encode(?tag_i, N), Dict}
+    end;
 encode_arg(nil, Dict) ->
     {encode(?tag_a, 0), Dict};
 encode_arg({f, W}, Dict) ->
     {encode(?tag_f, W), Dict};
 %% encode_arg({'char', C}, Dict) ->
 %%     {encode(?tag_h, C), Dict};
-encode_arg({string, String}, Dict0) ->
-    {Offset, Dict} = beam_dict:string(String, Dict0),
+encode_arg({string, BinString}, Dict0) when is_binary(BinString) ->
+    {Offset, Dict} = beam_dict:string(BinString, Dict0),
     {encode(?tag_u, Offset), Dict};
 encode_arg({extfunc, M, F, A}, Dict0) ->
     {Index, Dict} = beam_dict:import(M, F, A, Dict0),
@@ -393,7 +434,7 @@ encode_arg({list, List}, Dict0) ->
     {L, Dict} = encode_list(List, Dict0, []),
     {[encode(?tag_z, 1), encode(?tag_u, length(List))|L], Dict};
 encode_arg({float, Float}, Dict) when is_float(Float) ->
-    encode_arg({literal,Float}, Dict);
+    encode_literal(Float, Dict);
 encode_arg({fr,Fr}, Dict) ->
     {[encode(?tag_z, 2),encode(?tag_u, Fr)], Dict};
 encode_arg({field_flags,Flags0}, Dict) ->
@@ -401,12 +442,24 @@ encode_arg({field_flags,Flags0}, Dict) ->
     {encode(?tag_u, Flags), Dict};
 encode_arg({alloc,List}, Dict) ->
     encode_alloc_list(List, Dict);
-encode_arg({literal,Lit}, Dict0) ->
-    {Index,Dict} = beam_dict:literal(Lit, Dict0),
-    {[encode(?tag_z, 4),encode(?tag_u, Index)],Dict};
+encode_arg({literal,Lit}, Dict) ->
+    if
+        Lit =:= [] ->
+            encode_arg(nil, Dict);
+        is_atom(Lit) ->
+            encode_arg({atom,Lit}, Dict);
+        is_integer(Lit) ->
+            encode_arg({integer,Lit}, Dict);
+        true ->
+            encode_literal(Lit, Dict)
+    end;
 encode_arg(Int, Dict) when is_integer(Int) ->
     {encode(?tag_u, Int),Dict}.
 
+encode_literal(Literal, Dict0) ->
+    {Index,Dict} = beam_dict:literal(Literal, Dict0),
+    {[encode(?tag_z, 4),encode(?tag_u, Index)],Dict}.
+
 %%flag_to_bit(aligned) -> 16#01; %% No longer useful.
 flag_to_bit(little)  -> 16#02;
 flag_to_bit(big)     -> 16#00;
@@ -434,6 +487,8 @@ encode_alloc_list_1([{floats,Floats}|T], Dict, Acc0) ->
 encode_alloc_list_1([], Dict, Acc) ->
     {iolist_to_binary(Acc),Dict}.
 
+-spec encode(non_neg_integer(), integer()) -> iolist() | integer().
+
 encode(Tag, N) when N < 0 ->
     encode1(Tag, negative_to_bytes(N));
 encode(Tag, N) when N < 16 ->