#!/usr/bin/env perl -W
#
# %CopyrightBegin%
#
# Copyright Ericsson AB 1998-2017. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# %CopyrightEnd%
#
use strict;
use vars qw($BEAM_FORMAT_NUMBER);
$BEAM_FORMAT_NUMBER = undef;
my $target = \&emulator_output;
my $outdir = "."; # Directory for output files.
my $verbose = 0;
my $hot = 1;
my $num_file_opcodes = 0;
my $wordsize = 32;
my %defs; # Defines (from command line).
# This is shift counts and mask for the packer.
my $WHOLE_WORD = '';
my @pack_instr;
my @pack_shift;
my @pack_mask;
$pack_instr[2] = ['6', 'i'];
$pack_instr[3] = ['0', '0', 'i'];
$pack_instr[4] = ['6', '6', '6', 'i']; # Only for 64 bit wordsize
$pack_shift[2] = ['0', 'BEAM_LOOSE_SHIFT'];
$pack_shift[3] = ['0', 'BEAM_TIGHT_SHIFT', '(2*BEAM_TIGHT_SHIFT)'];
$pack_shift[4] = ['0', 'BEAM_LOOSE_SHIFT', # Only for 64 bit wordsize
'(2*BEAM_LOOSE_SHIFT)',
'(3*BEAM_LOOSE_SHIFT)'];
$pack_mask[2] = ['BEAM_LOOSE_MASK', $WHOLE_WORD];
$pack_mask[3] = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK'];
$pack_mask[4] = ['BEAM_LOOSE_MASK', # Only for 64 bit wordsize
'BEAM_LOOSE_MASK',
'BEAM_LOOSE_MASK',
$WHOLE_WORD];
# Mapping from packagable arguments to number of packed arguments per
# word. Initialized after the wordsize is known.
my @args_per_word;
# There are two types of instructions: generic and specific.
# The generic instructions are those generated by the Beam compiler.
# Corresponding to each generic instruction, there is generally a
# whole family of related specific instructions. Specific instructions
# are those executed by the VM interpreter during run-time.
# Maximum number of operands for a generic instruction.
# In beam_load.c the MAX_OPARGS refers to the maximum
# number of operands for generic instructions.
my $max_gen_operands = 8;
# Maximum number of operands for a specific instruction.
# Must be even. The beam_load.c file must be updated, too.
my $max_spec_operands = 6;
# The maximum number of primitive genop_types.
my $max_genop_types = 16;
my %gen_opnum;
my %num_specific;
my %gen_to_spec;
my %specific_op;
my %gen_arity;
my @gen_arity;
my @gen_opname;
my @op_to_name;
my @obsolete;
# Instructions and micro instructions implemented in C.
my %c_code; # C code block, location, arguments.
my %c_code_used; # Used or not.
# Definitions for instructions combined from micro instructions.
my %combined_instrs;
my %combined_code; # Combined micro instructions.
my %hot_code;
my %cold_code;
my @unnumbered_generic;
my %unnumbered;
my %is_transformed;
#
# Pre-processor.
#
my @if_val;
my @if_line;
#
# Code transformations.
#
my $te_max_vars = 0; # Max number of variables ever needed.
my %gen_transform;
my %match_engine_ops; # All opcodes for the match engine.
my %gen_transform_offset;
my @transformations;
my @call_table;
my %call_table;
my @pred_table;
my %pred_table;
# Operand types for generic instructions.
my $compiler_types = "uiaxyfhz";
my $loader_types = "nprvlqo";
my $genop_types = $compiler_types . $loader_types;
#
# Defines the argument types and their loaded size assuming no packing.
#
my %arg_size = ('r' => 0, # x(0) - x register zero
'x' => 1, # x(N), N > 0 - x register
'y' => 1, # y(N) - y register
'i' => 1, # tagged integer
'a' => 1, # tagged atom
'n' => 0, # NIL (implicit)
'c' => 1, # tagged constant (integer, atom, nil)
's' => 1, # tagged source; any of the above
'd' => 1, # tagged destination register (r, x, y)
'f' => 1, # failure label
'j' => 1, # either 'f' or 'p'
'e' => 1, # pointer to export entry
'L' => 0, # label
'I' => 1, # untagged integer
't' => 1, # untagged integer -- can be packed
'b' => 1, # pointer to bif
'A' => 1, # arity value
'P' => 1, # byte offset into tuple or stack
'Q' => 1, # like 'P', but packable
'h' => 1, # character
'l' => 1, # float reg
'q' => 1, # literal term
);
#
# Generate bits.
#
my %type_bit;
my @tag_type;
sub define_type_bit {
my($tag,$val) = @_;
defined $type_bit{$tag} and
sanity("the tag '$tag' has already been defined with the value ",
$type_bit{$tag});
$type_bit{$tag} = $val;
}
{
my($bit) = 1;
my(%bit);
foreach (split('', $genop_types)) {
push(@tag_type, $_);
define_type_bit($_, $bit);
$bit{$_} = $bit;
$bit *= 2;
}
# Composed types.
define_type_bit('d', $type_bit{'x'} | $type_bit{'y'});
define_type_bit('c', $type_bit{'i'} | $type_bit{'a'} |
$type_bit{'n'} | $type_bit{'q'});
define_type_bit('s', $type_bit{'d'} | $type_bit{'i'} |
$type_bit{'a'} | $type_bit{'n'} |
$type_bit{'q'});
define_type_bit('j', $type_bit{'f'} | $type_bit{'p'});
# Aliases (for matching purposes).
define_type_bit('I', $type_bit{'u'});
define_type_bit('t', $type_bit{'u'});
define_type_bit('A', $type_bit{'u'});
define_type_bit('L', $type_bit{'u'});
define_type_bit('b', $type_bit{'u'});
define_type_bit('N', $type_bit{'u'});
define_type_bit('U', $type_bit{'u'});
define_type_bit('e', $type_bit{'u'});
define_type_bit('P', $type_bit{'u'});
define_type_bit('Q', $type_bit{'u'});
}
#
# Pre-define the 'fail' instruction. It is used internally
# by the 'try_me_else_fail' instruction.
#
$match_engine_ops{'TOP_fail'} = 1;
#
# Sanity checks.
#
{
if (@tag_type > $max_genop_types) {
sanity("\$max_genop_types is $max_genop_types, ",
"but there are ", scalar(@tag_type),
" primitive tags defined\n");
}
foreach my $tag (@tag_type) {
sanity("tag '$tag': primitive tags must be named with lowercase letters")
unless $tag =~ /^[a-z]$/;
}
foreach my $tag (keys %arg_size) {
defined $type_bit{$tag} or
sanity("the tag '$tag' has a size in %arg_size, " .
"but has no defined bit pattern");
}
}
#
# Parse command line options.
#
while (@ARGV && $ARGV[0] =~ /^-(.*)/) {
$_ = $1;
shift;
($target = \&emulator_output), next if /^emulator/;
($target = \&compiler_output), next if /^compiler/;
($outdir = shift), next if /^outdir/;
($wordsize = shift), next if /^wordsize/;
($verbose = 1), next if /^v/;
($defs{$1} = $2), next if /^D(\w+)=(\w+)/;
die "$0: Bad option: -$_\n";
}
if ($wordsize == 32) {
$defs{'ARCH_32'} = 1;
$defs{'ARCH_64'} = 0;
} elsif ($wordsize == 64) {
$defs{'ARCH_32'} = 0;
$defs{'ARCH_64'} = 1;
}
#
# Initialize number of arguments per packed word.
#
$args_per_word[2] = 2;
$args_per_word[3] = 3;
$args_per_word[4] = 2;
$args_per_word[5] = 3;
$args_per_word[6] = 3;
if ($wordsize == 64) {
$pack_mask[3] = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', $WHOLE_WORD];
$args_per_word[4] = 4;
}
#
# Parse the input files.
#
my $in_c_code = '';
my $c_code_block;
my $c_code_loc;
my @c_args;
while (<>) {
my($op_num);
if ($in_c_code) {
if (/^\}/) {
my $name = $in_c_code;
my $block = $c_code_block;
$in_c_code = '';
$block =~ s/^ //mg;
chomp $block;
$c_code{$name} = [$block,$c_code_loc,@c_args];
} else {
$c_code_block .= $_;
}
next;
}
chomp;
if (s/\\$//) {
$_ .= <>;
redo unless eof(ARGV);
}
next if /^\s*$/;
next if /^\#/;
next if m@^//@;
#
# Handle %if.
#
if (/^\%if (\w+)/) {
my $name = $1;
my $val = $defs{$name};
defined $val or error("'$name' is undefined");
push @if_val, $val;
push @if_line, $.;
next;
} elsif (/^\%unless (\w+)/) {
my $name = $1;
my $val = $defs{$name};
defined $val or error("'$name' is undefined");
push @if_val, !$val;
push @if_line, $.;
next;
} elsif (/^\%else$/) {
unless (@if_line) {
error("%else without a preceding %if/%unless");
}
$if_line[$#if_line] = $.;
$if_val[$#if_val] = !$if_val[$#if_val];
next;
} elsif (/^\%endif$/) {
unless (@if_line) {
error("%endif without a preceding %if/%unless/%else");
}
pop @if_val;
pop @if_line;
next;
}
if (@if_val and not $if_val[$#if_val]) {
next;
}
#
# Handle assignments.
#
if (/^([\w_][\w\d_]+)=(.*)/) {
no strict 'refs';
my($name) = $1;
$$name = $2;
next;
}
#
# Handle %hot/%cold.
#
if (/^\%hot/) {
$hot = 1;
next;
} elsif (/^\%cold/) {
$hot = 0;
next;
}
#
# Handle transformations.
#
if (/=>/) {
parse_transformation($_);
next;
}
#
# Handle C code blocks.
#
if (/^(\w[\w.]*)\(([^\)]*)\)\s*{/) {
my $name = $1;
$in_c_code = $name;
$c_code_block = '';
@c_args = parse_c_args($2);
$c_code_loc = "$ARGV($.)";
if (defined $c_code{$name}) {
my $where = $c_code{$name}->[1];
error("$name: already defined at $where");
}
next;
}
#
# Handle definition of instructions in terms of
# micro instructions.
#
if (/^(\w+)\s*:=\s*([\w.]+)\s*;\s*$/) {
$combined_instrs{$1} = ["$ARGV($.)","beam_instrs.h",$2];
next;
}
#
# Parse off the number of the operation.
#
$op_num = undef;
if (s/^(\d+):\s*//) {
$op_num = $1;
$op_num != 0 or error("Opcode 0 invalid");
error("Opcode $op_num already defined")
if defined $gen_opname[$op_num];
}
#
# Parse: Name/Arity (generic instruction)
#
if (m@^(-)?(\w+)/(\d)\s*$@) {
my($obsolete) = $1;
my($name) = $2;
my($arity) = $3;
$name =~ /^[a-z]/ or error("Opname must start with a lowercase letter");
defined $gen_arity{$name} and $gen_arity{$name} != $arity and
error("Opname $name already defined with arity $gen_arity{$name}");
defined $unnumbered{$name,$arity} and
error("Opname $name already defined with arity $gen_arity{$name}");
if (defined $op_num) { # Numbered generic operation
$gen_opname[$op_num] = $name;
$gen_arity[$op_num] = $arity;
$gen_opnum{$name,$arity} = $op_num;
$gen_arity{$name} = $arity;
$gen_to_spec{"$name/$arity"} = undef;
$num_specific{"$name/$arity"} = 0;
$obsolete[$op_num] = defined $obsolete;
} else { # Unnumbered generic operation.
push(@unnumbered_generic, [$name, $arity]);
$unnumbered{$name,$arity} = 1;
}
next;
}
#
# Parse specific instructions (only present in emulator/loader):
# Name Arg1 Arg2...
#
my($name, @args) = split;
error("too many operands")
if @args > $max_spec_operands;
syntax_check($name, @args);
my $arity = @args;
if (defined $gen_opnum{$name,$arity} and $obsolete[$gen_opnum{$name,$arity}]) {
error("specific instructions may not be specified for obsolete instructions");
}
save_specific_ops($name, $arity, $hot, @args);
if (defined $op_num) {
error("specific instructions must not be numbered");
} elsif (!defined($gen_arity{$name}) && !defined($unnumbered{$name,$arity})) {
#
# Create an unumbered generic instruction too.
#
push(@unnumbered_generic, [$name, $arity]);
$unnumbered{$name,$arity} = 1;
}
} continue {
if (eof(ARGV)) {
close(ARGV);
if (@if_line) {
error("Unterminated %if/%unless/%else at " .
"line $if_line[$#if_line]\n");
}
}
}
$num_file_opcodes = @gen_opname;
#
# Number all generic operations without numbers.
#
{
my $ref;
foreach $ref (@unnumbered_generic) {
my($name, $arity) = @$ref;
my $op_num = @gen_opname;
push(@gen_opname, $name);
push(@gen_arity, $arity);
$gen_opnum{$name,$arity} = $op_num;
$gen_arity{$name} = $arity;
$gen_to_spec{"$name/$arity"} = undef;
$num_specific{"$name/$arity"} = 0;
}
}
#
# Produce output for the chosen target.
#
&$target();
#
# Ensure that all C code implementations have been used.
#
{
my(@unused) = grep(!$c_code_used{$_}, keys %c_code);
foreach my $unused (@unused) {
my(undef,$where) = @{$c_code{$unused}};
warn "$where: $unused is unused\n";
}
die "\n" if @unused;
}
#
# Produce output needed by the emulator/loader.
#
sub emulator_output {
my $i;
my $name;
my $key; # Loop variable.
#
# Information about opcodes (beam_opcodes.c).
#
$name = "$outdir/beam_opcodes.c";
open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n";
comment('C');
print "#ifdef HAVE_CONFIG_H\n";
print "# include \"config.h\"\n";
print "#endif\n\n";
print '#include "sys.h"', "\n";
print '#include "erl_vm.h"', "\n";
print '#include "export.h"', "\n";
print '#include "erl_process.h"', "\n";
print '#include "bif.h"', "\n";
print '#include "erl_atom_table.h"', "\n";
print '#include "beam_load.h"', "\n";
print "\n";
print "const char tag_to_letter[] = {\n ";
for ($i = 0; $i < length($genop_types); $i++) {
print "'$tag_type[$i]', ";
}
for (; $i < @tag_type; $i++) {
print "'_', ";
}
print "\n};\n";
print "\n";
#
# Combine micro instruction into instruction blocks.
#
combine_micro_instructions();
#
# Generate code for specific ops.
#
my($spec_opnum) = 0;
print "const OpEntry opc[] = {\n";
foreach $key (sort keys %specific_op) {
$gen_to_spec{$key} = $spec_opnum;
$num_specific{$key} = @{$specific_op{$key}};
#
# Pick up all instructions and manufacture sort keys; we must have
# the most specific instructions appearing first (e.g. an 'x' operand
# should be matched before 's' or 'd').
#
my(%items) = ();
foreach (@{$specific_op{$key}}) {
my($name, $hot, @args) = @{$_};
my($sign) = join('', @args);
# The primitive types should sort before other types.
my($sort_key) = $sign;
eval "\$sort_key =~ tr/$genop_types/./";
$sort_key .= ":$sign";
$items{$sort_key} = [$name, $hot, $sign, @args];
}
#
# Now call the generator for the sorted result.
#
foreach (sort keys %items) {
my($name, $hot, $sign, @args) = @{$items{$_}};
my $arity = @args;
my($instr) = "${name}_$sign";
$instr =~ s/_$//;
#
# Call a generator to calculate size and generate macros
# for the emulator.
#
my($size, $code, $pack) =
basic_generator($name, $hot, '', 0, undef, @args);
#
# Save the generated $code for later.
#
if (defined $code) {
if ($hot) {
push(@{$hot_code{$code}}, $instr);
} else {
push(@{$cold_code{$code}}, $instr);
}
}
#
# Calculate the bit mask which should be used to match this
# instruction.
#
my(@bits) = (0) x ($max_spec_operands/2);
my($i);
my $involves_r = 0;
for ($i = 0; $i < $max_spec_operands && defined $args[$i]; $i++) {
my $t = $args[$i];
my $bits = $type_bit{$t};
if ($t eq 'r') {
$bits |= $type_bit{'x'};
$involves_r |= 1 << $i;
}
my $shift = $max_genop_types * ($i % 2);
$bits[int($i/2)] |= $bits << $shift;
}
printf "/* %3d */ ", $spec_opnum;
my $print_name = $sign ne '' ? "${name}_$sign" : $name;
my $init = "{";
my $sep = "";
foreach (@bits) {
$init .= sprintf("%s0x%X", $sep, $_);
$sep = ",";
}
$init .= "}";
init_item($print_name, $init, $involves_r, $size, $pack, $sign);
$op_to_name[$spec_opnum] = $instr;
$spec_opnum++;
}
}
print "};\n\n";
print "const int num_instructions = $spec_opnum;\n\n";
#
# Print the array for instruction counts.
#
print "#ifdef ERTS_OPCODE_COUNTER_SUPPORT\n";
print "Uint erts_instr_count[$spec_opnum];\n";
print "#endif\n";
print "\n";
#
# Generate transformations.
#
tr_gen(@transformations);
#
# Print the generic instruction table.
#
print "const GenOpEntry gen_opc[] = {\n";
for ($i = 0; $i < @gen_opname; $i++) {
if ($i == $num_file_opcodes) {
print "\n/*\n * Internal generic instructions.\n */\n\n";
}
my($name) = $gen_opname[$i];
my($arity) = $gen_arity[$i];
printf "/* %3d */ ", $i;
if (!defined $name) {
init_item("", 0, 0, 0, -1);
} else {
my($key) = "$name/$arity";
my($tr) = defined $gen_transform_offset{$key} ?
$gen_transform_offset{$key} : -1;
my($spec_op) = $gen_to_spec{$key};
my($num_specific) = $num_specific{$key};
defined $spec_op or
$obsolete[$gen_opnum{$name,$arity}] or
$is_transformed{$name,$arity} or
error("instruction $key has no specific instruction");
$spec_op = -1 unless defined $spec_op;
init_item($name, $arity, $spec_op, $num_specific, $tr);
}
}
print "};\n";
#
# Information about opcodes (beam_opcodes.h).
#
$name = "$outdir/beam_opcodes.h";
open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n";
comment('C');
print "#ifndef __OPCODES_H__\n";
print "#define __OPCODES_H__\n\n";
print "#define BEAM_FORMAT_NUMBER $BEAM_FORMAT_NUMBER\n";
print "#define MAX_GENERIC_OPCODE ", $num_file_opcodes-1, "\n";
print "#define NUM_GENERIC_OPS ", scalar(@gen_opname), "\n";
print "#define NUM_SPECIFIC_OPS ", scalar(@op_to_name), "\n";
print "#define SCRATCH_X_REG 1023\n";
print "\n";
if ($wordsize == 32) {
print "#if defined(ARCH_64)\n";
print qq[ #error "32-bit architecture assumed, but ARCH_64 is defined"\n];
print "#endif\n";
print "#define BEAM_LOOSE_MASK 0xFFF\n";
print "#define BEAM_TIGHT_MASK 0xFFC\n";
print "#define BEAM_LOOSE_SHIFT 16\n";
print "#define BEAM_TIGHT_SHIFT 10\n";
} elsif ($wordsize == 64) {
print "#if !defined(ARCH_64)\n";
print qq[ #error "64-bit architecture assumed, but ARCH_64 not defined"\n];
print "#endif\n";
print "#define BEAM_WIDE_MASK 0xFFFFFFFFUL\n";
print "#define BEAM_LOOSE_MASK 0xFFFFUL\n";
print "#define BEAM_TIGHT_MASK 0xFFFFUL\n";
print "#define BEAM_WIDE_SHIFT 32\n";
print "#define BEAM_LOOSE_SHIFT 16\n";
print "#define BEAM_TIGHT_SHIFT 16\n";
}
print "\n";
#
# Definitions of tags.
#
my $letter;
my $tag_num = 0;
comment('C', "The following operand types for generic instructions",
"occur in beam files.");
foreach $letter (split('', $compiler_types)) {
print "#define TAG_$letter $tag_num\n";
$tag_num++;
}
print "\n";
comment('C', "The following operand types are only used in the loader.");
foreach $letter (split('', $loader_types)) {
print "#define TAG_$letter $tag_num\n";
$tag_num++;
}
print "\n#define BEAM_NUM_TAGS $tag_num\n\n";
$i = 0;
foreach (sort keys %match_engine_ops) {
print "#define $_ $i\n";
$i++;
}
print "#define NUM_TOPS $i\n";
print "\n";
print "#define TE_MAX_VARS $te_max_vars\n";
print "\n";
print "extern const char tag_to_letter[];\n";
print "extern const Uint op_transform[];\n";
print "\n";
for ($i = 0; $i < @op_to_name; $i++) {
print "#define op_$op_to_name[$i] $i\n";
}
print "\n";
print "#define NUMBER_OF_OPCODES ",scalar(@op_to_name),"\n";
for ($i = 0; $i < @op_to_name; $i++) {
print "#define op_count_$op_to_name[$i] ",$i+scalar(@op_to_name),"\n";
}
print "\n";
print "#define DEFINE_OPCODES";
foreach (@op_to_name) {
print " \\\n&&lb_$_,";
}
print "\n\n";
print "#define DEFINE_COUNTING_OPCODES";
foreach (@op_to_name) {
print " \\\n&&lb_count_$_,";
}
print "\n\n";
print "#define DEFINE_COUNTING_LABELS";
for ($i = 0; $i < @op_to_name; $i++) {
my($name) = $op_to_name[$i];
print " \\\nCountCase($name): erts_instr_count[$i]++; goto lb_$name;";
}
print "\n\n";
for ($i = 0; $i < @gen_opname; $i++) {
print "#define genop_$gen_opname[$i]_$gen_arity[$i] $i\n"
if defined $gen_opname[$i];
}
print "#endif\n";
#
# Extension of transform engine.
#
$name = "$outdir/beam_tr_funcs.h";
open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n";
comment('C');
tr_gen_call(@call_table);
$name = "$outdir/beam_pred_funcs.h";
open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n";
comment('C');
tr_gen_call(@pred_table);
#
# Implementation of operations for emulator.
#
$name = "$outdir/beam_hot.h";
open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n";
comment('C');
print_code(\%hot_code);
$name = "$outdir/beam_cold.h";
open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n";
comment('C');
print_code(\%cold_code);
foreach my $key (keys %combined_code) {
my $name = "$outdir/$key";
open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n";
comment('C');
print_indented_code(@{$combined_code{$key}});
}
}
sub init_item {
my($sep) = "";
print "{";
foreach (@_) {
if (!defined $_) {
print "${sep}NULL";
} elsif (/^\{/) {
print "$sep$_";
} elsif (/^-?\d+$/) {
print "$sep$_";
} else {
print "$sep\"$_\"";
}
$sep = ", ";
}
print "},\n";
}
sub q {
my($str) = @_;
"\"$str\"";
}
sub print_code {
my($ref) = @_;
my(%sorted);
my($key, $label); # Loop variables.
foreach $key (keys %$ref) {
my($sort_key);
my($code) = '';
foreach $label (@{$ref->{$key}}) {
$code .= "OpCase($label):\n";
$sort_key = $label;
}
$code .= "$key\n";
$sorted{$sort_key} = $code;
}
foreach (sort keys %sorted) {
print_indented_code($sorted{$_});
}
}
sub print_indented_code {
my(@code) = @_;
foreach my $chunk (@code) {
my $indent = 0;
foreach (split "\n", $chunk) {
s/^\s*//;
if (/\}/) {
$indent -= 2;
}
if ($_ eq '') {
print "\n";
} elsif (/^#/) {
print $_, "\n";
} else {
print ' ' x $indent, $_, "\n";
}
if (/\{/) {
$indent += 2;
}
}
print "\n";
}
}
#
# Produce output needed by the compiler back-end (assembler).
#
sub compiler_output {
my($module) = 'beam_opcodes';
my($name) = "${module}.erl";
my($i);
open(STDOUT, ">$outdir/$name") || die "Failed to open $name for writing: $!\n";
print "-module($module).\n";
comment('erlang');
print "-export([format_number/0]).\n";
print "-export([opcode/2,opname/1]).\n";
print "\n";
print "-spec format_number() -> $BEAM_FORMAT_NUMBER.\n";
print "format_number() -> $BEAM_FORMAT_NUMBER.\n\n";
print "-spec opcode(atom(), 0..", $max_gen_operands, ") -> 1..", $num_file_opcodes-1, ".\n";
for ($i = 0; $i < @gen_opname; $i++) {
next unless defined $gen_opname[$i];
print "%%" if $obsolete[$i];
print "opcode(", quote($gen_opname[$i]), ", $gen_arity[$i]) -> $i;\n";
}
print "opcode(Name, Arity) -> erlang:error(badarg, [Name,Arity]).\n\n";
print "-spec opname(1..", $num_file_opcodes-1, ") -> {atom(),0..", $max_gen_operands, "}.\n";
for ($i = 0; $i < @gen_opname; $i++) {
next unless defined $gen_opname[$i];
print "opname($i) -> {",
quote($gen_opname[$i]), ",$gen_arity[$i]};\n";
}
print "opname(Number) -> erlang:error(badarg, [Number]).\n";
#
# Generate .hrl file.
#
my($hrl_name) = "$outdir/${module}.hrl";
open(STDOUT, ">$hrl_name") || die "Failed to open $hrl_name for writing: $!\n";
comment('erlang');
for ($i = 0; $i < @tag_type && $i < 8; $i++) {
print "-define(tag_$tag_type[$i], $i).\n";
}
print "\n";
}
#
# Check an operation for validity.
#
sub syntax_check {
my($name, @args) = @_;
my($i);
error("Bad opcode name '$name'")
unless $name =~ /^[a-z][\w\d_]*$/;
for ($i = 0; $i < @args; $i++) {
foreach my $type (split(//, $args[$i])) {
error("Argument " . ($i+1) . ": invalid type '$type'")
unless defined $arg_size{$type};
}
}
}
sub save_specific_ops {
my($name,$arity,$hot,@args) = @_;
my(@res) = ("");
foreach my $arg (@args) {
my @new_res = ();
foreach my $type (split(//, $arg)) {
foreach my $args (@res) {
push @new_res, "$args$type";
}
}
@res = @new_res;
}
my $key = "$name/$arity";
foreach my $args (@res) {
@args = split //, $args;
push @{$specific_op{$key}}, [$name,$hot,@args];
}
}
sub parse_c_args {
local($_) = @_;
my @res;
while (s/^(\w[\w\d]*)\s*//) {
push @res, $1;
s/^,\s*// or last;
}
$_ eq '' or error("garbage in argument list: $_");
@res;
}
sub error {
my(@message) = @_;
my($where) = $. ? "$ARGV($.): " : "";
die $where, @message, "\n";
}
sub sanity {
die "internal error: ", @_, "\n";
}
sub comment {
my($lang, @comments) = @_;
my($prefix);
if ($lang eq 'C') {
print "/*\n";
$prefix = " * ";
} elsif ($lang eq 'erlang') {
$prefix = '%% ';
} else {
$prefix = '# ';
}
my(@prog) = split('/', $0);
my($prog) = $prog[$#prog];
if (@comments) {
my $line;
foreach $line (@comments) {
print "$prefix$line\n";
}
} else {
print "$prefix Warning: Do not edit this file.\n";
print "$prefix Auto-generated by '$prog'.\n";
}
if ($lang eq 'C') {
print " */\n";
}
print "\n";
}
#
# Combine micro instruction into instruction blocks.
#
sub combine_micro_instructions {
my %groups;
my %group_file;
# Sanity check, normalize micro instructions.
foreach my $instr (keys %combined_instrs) {
my $ref = $combined_instrs{$instr};
my($def_loc,$outfile,$def) = @$ref;
my($group,@subs) = split /[.]/, $def;
my $arity = 0;
@subs = map { "$group.$_" } @subs;
foreach my $s (@subs) {
my $code = $c_code{$s};
defined $code or
error("$def_loc: no definition of $s");
$c_code_used{$s} = 1;
my(undef,undef,@c_args) = @{$code};
$arity += scalar(@c_args);
}
push @{$groups{$group}}, [$instr,$arity,@subs];
$group_file{$group} = $outfile;
}
# Now generate code for each group.
foreach my $group (sort keys %groups) {
my $code = combine_instruction_group($group, @{$groups{$group}});
my $outfile = $group_file{$group};
push @{$combined_code{$outfile}}, $code;
}
}
sub combine_instruction_group {
my($group,@in_instrs) = @_;
my $gcode = ''; # Code for the entire group.
# Get code for the head of the group (if any).
my $head_name = "$group.head";
$c_code_used{$head_name} = 1;
my $head_code_ref = $c_code{$head_name};
if (defined $head_code_ref) {
my($head_code,$where,@c_args) = @{$head_code_ref};
@c_args and error("$where: no arguments allowed for " .
"head function '$head_name()'");
$gcode = $head_code . "\n";
}
# Variables.
my %offsets;
my @instrs;
my %num_references;
my $group_size = 0;
# Do basic error checking. Associate operands of instructions
# with the correct micro instructions. Calculate offsets for micro
# instructions.
foreach my $ref_instr (@in_instrs) {
my($specific,$arity,@subs) = @$ref_instr;
my $specific_key = "$specific/$arity";
my $specific_op_ref = $specific_op{$specific_key};
error("no $specific_key instruction")
unless defined $specific_op_ref;
foreach my $specific_op (@$specific_op_ref) {
my($name, $hot, @args) = @{$specific_op};
my $offset = 0;
my @rest = @args;
my @new_subs;
my $opcase = $specific;
$opcase .= "_" . join '', @args if @args;
foreach my $s (@subs) {
my $code = $c_code{$s};
my(undef,undef,@c_args) = @{$code};
my @first;
foreach (0..$#c_args) {
push @first, shift @rest;
}
my($size,undef) = basic_generator($s, 0, '', 0, undef, @first);
$offsets{$s} = $offset
unless defined $offsets{$s} and $offsets{$s} >= $offset;
$offset += $size - 1;
my $label = micro_label($s);
$num_references{$label} = 0;
push @new_subs, [$opcase,$label,$s,$size-1,@first];
$opcase = '';
}
$group_size = $offset if $group_size < $offset;
push @instrs, [$specific_key,@new_subs];
}
}
# Link the sub instructions for each instructions to each
# other.
my @all_instrs;
foreach my $instr (@instrs) {
my($specific_key,@subs) = @{$instr};
for (my $i = 0; $i < @subs; $i++) {
my($opcase,$label,$s,$size,@args) = @{$subs[$i]};
my $next = '';
(undef,$next) = @{$subs[$i+1]} if $i < $#subs;
$num_references{$next}++ if $next;
my $instr_info = "$opcase:$label:$next:$s:$size:@args";
push @all_instrs, [$label,$offsets{$s},$instr_info];
}
}
my %order_to_instrs;
my %label_to_offset;
my %order_to_offset;
foreach my $instr (@all_instrs) {
my($label,$offset,$instr_info) = @$instr;
my $sort_key = sprintf("%02d.%02d", $offset, $num_references{$label});
push @{$order_to_instrs{$sort_key}}, $instr_info;
$label_to_offset{$label} = $offset;
$order_to_offset{$sort_key} = $offset;
}
my(@slots) = sort {$a <=> $b} keys %order_to_instrs;
# Now generate the code for the entire group.
my $offset = 0;
for(my $i = 0; $i < @slots; $i++) {
my $key = $slots[$i];
# Sort micro-instructions with OpCase before other micro-instructions.
my(@instrs) = @{$order_to_instrs{$key}};
my $order_func = sub {
my $a_key = ($a =~ /^:/) ? "1$a" : "0$a";
my $b_key = ($b =~ /^:/) ? "1$b" : "0$b";
$a_key cmp $b_key;
};
@instrs = sort $order_func @instrs;
my %seen;
foreach my $instr (@instrs) {
my($opcase,$label,$next,$s,$size,$args) = split ":", $instr;
my(@first) = split " ", $args;
my $seen_key = "$label:$next:" . scalar(@first);
next if $opcase eq '' and $seen{$seen_key};
$seen{$seen_key} = 1;
if ($opcase ne '') {
$gcode .= "OpCase($opcase):\n";
}
if ($num_references{$label}) {
$gcode .= "$label:\n";
}
my $flags = '';
my $transfer_to_next = '';
my $dec = 0;
unless ($i == $#slots) {
$flags = "-no_next";
my $next_offset = $label_to_offset{$next};
$dec = $next_offset - ($offset + $size);
$transfer_to_next = "I -= $dec;\n" if $dec;
$transfer_to_next .= "goto $next;\n\n";
}
my(undef,$gen_code) =
basic_generator($s, 0, $flags, $offset,
$group_size-$offset-$dec, @first);
$gcode .= $gen_code . $transfer_to_next;
}
$offset = $order_to_offset{$slots[$i+1]} if $i < $#slots;
}
"{\n$gcode\n}\n\n";
}
sub micro_label {
my $label = shift;
$label =~ s/[.]/__/g;
$label;
}
#
# Basic implementation of instruction in emulator loop
# (assuming no packing).
#
sub basic_generator {
my($name,$hot,$extra_comments,$offset,$group_size,@args) = @_;
my $size = 0;
my $flags = '';
my @f;
my $prefix = '';
my $tmp_arg_num = 1;
my $pack_spec = '';
my $var_decls = '';
#
# Pack arguments for hot code with an implementation.
#
my $c_code_ref = $c_code{$name};
if ($hot and defined $c_code_ref) {
($prefix, $pack_spec, @args) = do_pack(@args);
}
#
# Calculate the size of the instruction and generate each argument for
# the macro.
#
my $need_block = 0;
my $arg_offset = $offset;
foreach (@args) {
my($this_size) = $arg_size{$_};
SWITCH:
{
/^pack:(\d):(.*)/ and do {
push(@f, $2);
$this_size = $1;
last SWITCH;
};
/r/ and do {
push(@f, "r(0)");
last SWITCH;
};
/[lxy]/ and do {
push(@f, $_ . "b(Arg($arg_offset))");
last SWITCH;
};
/n/ and do {
push(@f, "NIL");
last SWITCH;
};
/s/ and do {
my($tmp) = "targ$tmp_arg_num";
$var_decls .= "Eterm $tmp;\n";
$tmp_arg_num++;
push(@f, $tmp);
$prefix .= "GetR($arg_offset, $tmp);\n";
$need_block = 1;
last SWITCH;
};
/d/ and do {
$var_decls .= "Eterm dst = Arg($arg_offset);\n" .
"Eterm* dst_ptr = REG_TARGET_PTR(dst);\n";
push(@f, "*dst_ptr");
last SWITCH;
};
defined $arg_size{$_} and do {
push(@f, "Arg($arg_offset)");
last SWITCH;
};
die "$name: The generator can't handle $_, at";
}
$size += $this_size;
$arg_offset += $this_size;
}
#
# If the implementation is in beam_emu.c, there is nothing
# more to do.
#
unless (defined $c_code_ref) {
return ($size+1, undef, '');
}
$group_size = $size unless defined $group_size;
#
# Generate main body of the implementation.
#
my($c_code,$where,@c_args) = @{$c_code_ref};
my %bindings;
$c_code_used{$name} = 1;
if (@f != @c_args) {
error("$where: defining '$name' with ", scalar(@c_args),
" arguments instead of expected ", scalar(@f), " arguments");
}
for (my $i = 0; $i < @f; $i++) {
my $var = $c_args[$i];
$bindings{$var} = $f[$i];
}
$bindings{'NEXT_INSTRUCTION'} = "I+" . ($group_size+$offset+1);
$c_code = eval { expand_all($c_code, \%bindings) };
unless (defined $c_code) {
warn $@;
error("... from the body of $name at $where");
}
my(@comments) = $c_code =~ m@//[|]\s*(.*)@g;
$c_code =~ s@//[|]\s*(.*)\n?@@g;
$flags = "@comments $extra_comments";
#
# Generate code for transferring to the next instruction.
#
my $dispatch_next;
my $instr_offset = $group_size + $offset + 1;
if ($flags =~ /-no_next/) {
$dispatch_next = "";
} elsif ($flags =~ /-no_prefetch/) {
$dispatch_next = "\nI += $instr_offset;\n" .
"ASSERT(VALID_INSTR(*I));\n" .
"Goto(*I);";
} else {
$var_decls .= "BeamInstr* _nextpf = " .
"(BeamInstr *) I[$instr_offset];\n";
$dispatch_next = "\nI += $instr_offset;\n" .
"ASSERT(VALID_INSTR(_nextpf));\n" .
"Goto(_nextpf);";
}
#
# Assemble the complete code for the instruction.
#
my $body = "$c_code$dispatch_next";
if ($need_block) {
$body = "$prefix\{\n$body\n}";
} else {
$body = "$prefix$body";
}
my $code = join("\n",
"{",
"$var_decls$body",
"}", "");
($size+1, $code, $pack_spec);
}
sub expand_all {
my($code,$bindings_ref) = @_;
my %bindings = %{$bindings_ref};
# Expand all $Var occurrences.
$code =~ s/[\$](\w[\w\d]*)(?!\()/defined $bindings{$1} ? $bindings{$1} : "\$$1"/ge;
# Find calls to macros, $name(...), and expand them.
my $res = "";
while ($code =~ /[\$](\w[\w\d]*)\(/) {
my $macro_name = $1;
my $keep = substr($code, 0, $-[0]);
my $after = substr($code, $+[0]);
# Keep the special, pre-defined bindings.
my %new_bindings;
foreach my $key (qw(NEXT_INSTRUCTION)) {
$new_bindings{$key} = $bindings{$key};
}
my $body;
($body,$code) = expand_macro($macro_name, $after, \%new_bindings);
$res .= "$keep$body";
}
$res . $code;
}
sub expand_macro {
my($name,$rest,$bindings_ref) = @_;
my $c_code = $c_code{$name};
defined $c_code or
error("calling undefined macro '$name'...");
$c_code_used{$name} = 1;
my ($body,$where,@vars) = @{$c_code};
# Separate the arguments into @args;
my @args;
my $level = 1;
my %inc = ('(' => 1, ')' => -1,
'[' => 1, ']' => -1,
'{' => 1, '}' => -1);
my $arg = undef;
while ($rest =~ /([,\(\[\{\}\]\)]|([^,\(\[\{\}\]\)]*))/g) {
my $token = $1;
my $inc = $inc{$token} || 0;
$level += $inc;
if ($level == 0) {
$rest = substr($rest, pos($rest));
push @args, $arg if defined $arg;
last;
}
if ($token eq ',') {
if ($level == 1) {
push @args, $arg;
$arg = "";
}
next;
}
$arg .= $token;
}
# Trim leading whitespace from each argument.
foreach my $arg (@args) {
$arg =~ s/^\s*//;
}
# Now combine bindings from the parameter names and arguments.
my %bindings = %{$bindings_ref};
if (@vars != @args) {
error("calling $name with ", scalar(@args),
" arguments instead of expected ", scalar(@vars), " arguments...");
}
for (my $i = 0; $i < @vars; $i++) {
$bindings{$vars[$i]} = $args[$i];
}
$body = eval { expand_all($body, \%bindings) };
unless (defined $body) {
warn $@;
die "... from the body of $name at $where\n";
}
("do {\n$body\n} while (0)",$rest);
}
sub do_pack {
my(@args) = @_;
my($packable_args) = 0;
my @is_packable; # Packability (boolean) for each argument.
my $wide_packing = 0;
my(@orig_args) = @args;
#
# Count the number of packable arguments. If we encounter any 's' or 'd'
# arguments, packing is not possible.
#
my $packable_types = "xytQ";
foreach my $arg (@args) {
if ($arg =~ /^[$packable_types]/) {
$packable_args++;
push @is_packable, 1;
} elsif ($arg =~ /^I/ and $wordsize == 64 and $packable_args < 2) {
$wide_packing = 1;
push @is_packable, 1;
if (++$packable_args == 2) {
# We can only pack two arguments. Turn off packing
# for the rest of the arguments.
$packable_types = "\xFF";
}
} elsif ($arg =~ /^[sd]/) {
return ('', '', @args);
} elsif ($arg =~ /^[scq]/ and $packable_args > 0) {
# When packing, this operand will be picked up from the
# code array, put onto the packing stack, and later put
# back into a different location in the code. The problem
# is that if this operand is a literal, the original
# location in the code would have been remembered in a
# literal patch. For packing to work, we would have to
# adjust the position in the literal patch. For the
# moment, adding additional instructions to the packing
# engine to handle this does not seem worth it, so we will
# just turn off packing.
return ('', '', @args);
} else {
push @is_packable, 0;
}
}
#
# Get out of here if too few or too many arguments.
#
return ('', '', @args) if $packable_args < 2;
my($size) = 0;
my($pack_prefix) = '';
my($down) = ''; # Pack commands (towards instruction
# beginning).
my($up) = ''; # Pack commands (storing back while
# moving forward).
my $args_per_word = $args_per_word[$packable_args];
my @shift;
my @mask;
my @instr;
if ($wide_packing) {
@shift = ('0', 'BEAM_WIDE_SHIFT');
@mask = ('BEAM_WIDE_MASK', $WHOLE_WORD);
@instr = ('w', 'i');
} else {
@shift = @{$pack_shift[$args_per_word]};
@mask = @{$pack_mask[$args_per_word]};
@instr = @{$pack_instr[$args_per_word]};
}
#
# Now generate the packing instructions. One complication is that
# the packing engine works from right-to-left, but we must generate
# the instructions from left-to-right because we must calculate
# instruction sizes from left-to-right.
#
# XXX Packing 3 't's in one word won't work. Sorry.
my $did_some_packing = 0; # Nothing packed yet.
my($ap) = 0; # Argument number within word.
my($tmpnum) = 1; # Number of temporary variable.
my($expr) = '';
for (my $i = 0; $i < @args; $i++) {
my($reg) = $args[$i];
my($this_size) = $arg_size{$reg};
if ($is_packable[$i]) {
$this_size = 0;
$did_some_packing = 1;
if ($ap == 0) {
$pack_prefix .= "Eterm tmp_packed$tmpnum = Arg($size);\n";
$up .= "p";
$down = "P$down";
$this_size = 1;
}
$down = "$instr[$ap]$down";
my($unpack) = make_unpack($tmpnum, $shift[$ap], $mask[$ap]);
$args[$i] = "pack:$this_size:$reg" . "b($unpack)";
if (++$ap == $args_per_word) {
$ap = 0;
$tmpnum++;
}
} elsif ($arg_size{$reg} && $did_some_packing) {
#
# This is an argument that can't be packed. Normally, we must
# save it on the pack engine's stack, unless:
#
# 1. The argument has zero size (e.g. r(0)). Such arguments
# will not be loaded. They disappear.
# 2. If the argument is on the left of the first packed argument,
# the packing engine will never access it (because the engine
# operates from right-to-left).
#
$down = "g${down}";
$up = "${up}p";
}
$size += $this_size;
}
my $pack_spec = $down . $up;
return ($pack_prefix, $pack_spec, @args);
}
sub make_unpack {
my($tmpnum, $shift, $mask) = @_;
my($e) = "tmp_packed$tmpnum";
$e = "($e>>$shift)" if $shift;
$e .= "&$mask" unless $mask eq $WHOLE_WORD;
$e;
}
sub quote {
local($_) = @_;
return "'$_'" if $_ eq 'try';
return "'$_'" if $_ eq 'catch';
return "'$_'" if $_ eq 'receive';
return "'$_'" if $_ =~ /^[A-Z]/;
$_;
}
#
# Parse instruction transformations when they first appear.
#
sub parse_transformation {
local($_) = @_;
my($orig) = $_;
my($from, $to) = split(/\s*=>\s*/);
my(@op);
my $rest_var;
# The source instructions.
my(@from) = split(/\s*\|\s*/, $from);
foreach (@from) {
if (/^(\w+)\((.*?)\)/) {
my($name, $arglist) = ($1, $2);
$_ = (compile_transform_function($name, split(/\s*,\s*/, $arglist)));
} else {
(@op) = split;
($rest_var,$_) = compile_transform(1, $rest_var, @op);
}
}
#
# Check for a function which should be called to provide the new
# instructions if the left-hand side matched. Otherwise there is
# an explicit list of instructions.
#
my @to;
if ($to =~ /^(\w+)\((.*?)\)/) {
my($name, $arglist) = ($1, $2);
@to = (compile_transform_function($name, split(/\s*,\s*/, $arglist)));
} else {
@to = split(/\s*\|\s*/, $to);
foreach (@to) {
(@op) = split;
(undef,$_) = compile_transform(0, $rest_var, @op);
}
}
push(@transformations, [$., $orig, [@from], [reverse @to]]);
}
sub compile_transform_function {
my($name, @args) = @_;
[".$name", 0, @args];
}
sub compile_transform {
my($src, $rest_var, $name, @ops) = @_;
my $arity = 0;
foreach (@ops) {
my(@list) = tr_parse_op($src, $_);
if ($list[1] eq '*') {
$rest_var = $list[0];
} elsif (defined $rest_var and $list[0] eq $rest_var) {
$list[1] = '*';
} else {
$arity++;
}
$_ = [ @list ];
}
if (defined $gen_opnum{$name,$arity} && $obsolete[$gen_opnum{$name,$arity}]) {
error("obsolete function must not be used in transformations");
}
if ($src) {
$is_transformed{$name,$arity} = 1;
}
($rest_var,[$name,$arity,@ops]);
}
sub tr_parse_op {
my($src, $op) = @_;
my($var) = '';
my($type) = '';
my($type_val) = 0;
my($cond) = '';
my($cond_val) = '';
local($_) = $op;
# Get the variable name if any.
if (/^([A-Z]\w*)(.*)/) {
$var = $1;
$_ = $2;
error("garbage after variable")
unless /^=(.*)/ or /^(\s*)$/;
$_ = $1;
}
# Get the type if any.
if (/^([a-z*]+)(.*)/) {
$type = $1;
$_ = $2;
foreach (split('', $type)) {
error("bad type in $op")
unless defined $type_bit{$_} or $type eq '*';
$_ eq 'r' and
error("$op: 'r' is not allowed in transformations")
}
}
# Get an optional condition. (In source.)
if (/^==(.*)/) {
$cond = 'is_eq';
$cond_val = $1;
$_ = '';
} elsif (/^\$is_bif(.*)/) {
$cond = 'is_bif';
$cond_val = -1;
$_ = $1;
} elsif (/^\$is_not_bif(.*)/) {
$cond = 'is_not_bif';
$cond_val = -1;
$_ = $1;
} elsif (m@^\$bif:(\w+):(\w+)/(\d)(.*)@) {
$cond = 'is_bif';
if ($1 eq 'erlang') {
$cond_val = "BIF_$2_$3";
} else {
$cond_val = "BIF_$1_$2_$3";
}
$_ = $4;
} elsif (m@^\$func:(\w+):(\w+)/([_\d])(.*)@) {
my $arity = $3 eq '_' ? 1024 : $3;
$cond = 'is_func';
$cond_val = "$1:$2:$arity";
$_ = $4;
}
# Get an optional value. (In destination.)
$type_val = $type eq 'x' ? 1023 : 0;
if (/^=(.*)/) {
error("value not allowed in source: $op")
if $src;
$type_val = $1;
$_ = '';
}
# Nothing more is allowed after the command.
error("garbage '$_' after operand: $op")
unless /^\s*$/;
# Test that destination has no conditions.
unless ($src) {
error("condition not allowed in destination: $op")
if $cond;
error("variable name and type cannot be combined in destination: $op")
if $var && $type;
}
($var,$type,$type_val,$cond,$cond_val);
}
#
# Generate code for all transformations.
#
sub tr_gen {
my(@g) = @_;
my($ref, $key, $instr); # Loop variables.
foreach $ref (@g) {
my($line, $orig_transform, $from_ref, $to_ref) = @$ref;
my $so_far = tr_gen_from($line, @$from_ref);
tr_gen_to($line, $orig_transform, $so_far, @$to_ref);
}
#
# Print the generated transformation engine.
#
my($offset) = 0;
print "const Uint op_transform[] = {\n";
foreach $key (sort keys %gen_transform) {
$gen_transform_offset{$key} = $offset;
my @instr = @{$gen_transform{$key}};
#
# If the last instruction is 'fail', remove it and
# convert the previous 'try_me_else' to 'try_me_else_fail'.
#
if (is_instr($instr[$#instr], 'fail')) {
pop(@instr);
my $i = $#instr;
$i-- while !is_instr($instr[$i], 'try_me_else');
$instr[$i] = make_op('', 'try_me_else_fail');
}
foreach $instr (@instr) {
my($size, $instr_ref, $comment) = @$instr;
my($op, @args) = @$instr_ref;
print " ";
if (!defined $op) {
$comment =~ s/\n(.)/\n $1/g;
print "\n", $comment;
} else {
$op = "TOP_$op";
$match_engine_ops{$op} = 1;
if ($comment ne '') {
printf "%-24s /* %s */\n", (join(", ", ($op, @args)) . ","),
$comment;
} else {
print join(", ", ($op, @args)), ",\n";
}
$offset += $size;
}
}
print "\n";
}
print "/*\n";
print " * Total number of words: $offset\n";
print " */\n";
print "};\n\n";
}
sub tr_gen_from {
my($line,@tr) = @_;
my(%var) = ();
my(%var_type);
my($var_num) = 0;
my(@code);
my($op, $ref); # Loop variables.
my $where = "left side of transformation in line $line: ";
my $may_fail = 0;
my $is_first = 1;
foreach $ref (@tr) {
my($name, $arity, @ops) = @$ref;
my($key) = "$name/$arity";
my($opnum);
$may_fail = 1 unless $is_first;
$is_first = 0;
#
# A name starting with a period is a C pred function to be called.
#
if ($name =~ /^\.(\w+)/) {
$name = $1;
$may_fail = 1;
my $var;
my(@args);
foreach $var (@ops) {
error($where, "variable '$var' unbound")
unless defined $var{$var};
if ($var_type{$var} eq 'scalar') {
push(@args, "var[$var{$var}]");
} else {
push(@args, "rest_args");
}
}
my $pi = tr_next_index(\@pred_table, \%pred_table, $name, @args);
my $op = make_op("$name()", 'pred', $pi);
my @slots = grep(/^\d+/, map { $var{$_} } @ops);
op_slot_usage($op, @slots);
push(@code, $op);
next;
}
#
# Check that $name/$arity refers to a valid generic instruction.
#
error($where, "invalid generic op $name/$arity")
unless defined $gen_opnum{$name,$arity};
$opnum = $gen_opnum{$name,$arity};
push(@code, make_op("$name/$arity", 'next_instr', $opnum));
foreach $op (@ops) {
my($var, $type, $type_val, $cond, $val) = @$op;
my $ignored_var = "$var (ignored)";
if ($type ne '' && $type ne '*') {
$may_fail = 1;
#
# The is_bif, is_not_bif, and is_func instructions have
# their own built-in type test and don't need to
# be guarded with a type test instruction.
#
$ignored_var = '';
unless ($cond eq 'is_bif' or
$cond eq 'is_not_bif' or
$cond eq 'is_func') {
my($types) = '';
my($type_mask) = 0;
foreach (split('', $type)) {
$types .= "$_ ";
$type_mask |= $type_bit{$_};
}
if ($cond ne 'is_eq') {
push(@code, make_op($types, 'is_type', $type_mask));
} else {
$cond = '';
push(@code, make_op("$types== $val", 'is_type_eq',
$type_mask, $val));
}
}
}
if ($cond eq 'is_func') {
my($m, $f, $a) = split(/:/, $val);
$ignored_var = '';
$may_fail = 1;
push(@code, make_op('', "$cond", "am_$m",
"am_$f", $a));
} elsif ($cond ne '') {
$ignored_var = '';
$may_fail = 1;
push(@code, make_op('', "$cond", $val));
}
if ($var ne '') {
if (defined $var{$var}) {
$ignored_var = '';
$may_fail = 1;
my $op = make_op($var, 'is_same_var', $var{$var});
op_slot_usage($op, $var{$var});
push(@code, $op);
} elsif ($type eq '*') {
foreach my $type (values %var_type) {
error("only one use of a '*' variable is " .
"allowed on the left hand side of " .
"a transformation")
if $type eq 'array';
}
$ignored_var = '';
$var{$var} = 'unnumbered';
$var_type{$var} = 'array';
push(@code, make_op($var, 'rest_args'));
} else {
$ignored_var = '';
$var_type{$var} = 'scalar';
$var{$var} = $var_num;
$var_num++;
push(@code, make_op($var, 'set_var', $var{$var}));
}
}
if (is_instr($code[$#code], 'set_var')) {
my $ref = pop @code;
my $comment = $ref->[2];
my $var = $ref->[1][1];
push(@code, make_op($comment, 'set_var_next_arg', $var));
} else {
push(@code, make_op($ignored_var, 'next_arg'));
}
}
# Remove redundant 'next_arg' instructions before the end
# of the instruction.
pop(@code) while is_instr($code[$#code], 'next_arg');
}
#
# Insert the commit operation.
#
push(@code, make_op($may_fail ? '' : 'always reached', 'commit'));
$te_max_vars = $var_num
if $te_max_vars < $var_num;
[\%var, \%var_type, \@code];
}
sub tr_gen_to {
my($line, $orig_transform, $so_far, @tr) = @_;
my($var_ref, $var_type_ref, $code_ref) = @$so_far;
my(%var) = %$var_ref;
my(%var_type) = %$var_type_ref;
my(@code) = @$code_ref;
my($op, $ref); # Loop variables.
my($where) = "right side of transformation in line $line: ";
my $last_instr = $code[$#code];
my $cannot_fail = is_instr($last_instr, 'commit') &&
(get_comment($last_instr) =~ /^always/);
foreach $ref (@tr) {
my($name, $arity, @ops) = @$ref;
#
# A name starting with a period is a C function to be called.
#
if ($name =~ /^\.(\w+)/) {
$name = $1;
my $var;
my(@args);
foreach $var (@ops) {
error($where, "variable '$var' unbound")
unless defined $var{$var};
if ($var_type{$var} eq 'scalar') {
push(@args, "var[$var{$var}]");
} else {
push(@args, "rest_args");
}
}
pop(@code); # Get rid of 'commit' instruction
my $index = tr_next_index(\@call_table, \%call_table,
$name, @args);
my $op = make_op("$name()", 'call_end', $index);
my @slots = grep(/^\d+/, map { $var{$_} } @ops);
op_slot_usage($op, @slots);
push(@code, $op);
last;
}
#
# Check that $name/$arity refers to a valid generic instruction.
#
my($key) = "$name/$arity";
error($where, "invalid generic op $name/$arity")
unless defined $gen_opnum{$name,$arity};
my $opnum = $gen_opnum{$name,$arity};
#
# Create code to build the generic instruction.
#
push(@code, make_op("$name/$arity", 'new_instr', $opnum));
foreach $op (@ops) {
my($var, $type, $type_val) = @$op;
if ($type eq '*') {
push(@code, make_op($var, 'store_rest_args'));
} elsif ($var ne '') {
error($where, "variable '$var' unbound")
unless defined $var{$var};
my $op = make_op($var, 'store_var_next_arg', $var{$var});
op_slot_usage($op, $var{$var});
push(@code, $op);
} elsif ($type ne '') {
push(@code, make_op('', 'store_type', "TAG_$type"));
if ($type_val) {
push(@code, make_op('', 'store_val', $type_val));
}
push(@code, make_op('', 'next_arg'));
}
}
pop(@code) if is_instr($code[$#code], 'next_arg');
}
push(@code, make_op('', 'end'))
unless is_instr($code[$#code], 'call_end');
tr_maybe_keep(\@code);
tr_maybe_rename(\@code);
tr_remove_unused(\@code);
#
# Chain together all codes segments having the same first operation.
#
my($first_ref) = shift(@code);
my($size, $first, $key) = @$first_ref;
my($dummy, $arity);
($dummy, $op, $arity) = @$first;
my($comment) = "\n/*\n * Line $line:\n * $orig_transform\n */\n\n";
my $prev_last;
$prev_last = pop(@{$gen_transform{$key}})
if defined $gen_transform{$key}; # Fail
if ($prev_last && !is_instr($prev_last, 'fail')) {
error("Line $line: A previous transformation shadows '$orig_transform'");
}
unless ($cannot_fail) {
unshift(@code, make_op('', 'try_me_else',
tr_code_len(@code)));
push(@code, make_op("$key", 'fail'));
}
unshift(@code, make_op($comment));
push(@{$gen_transform{$key}}, @code),
}
sub tr_maybe_keep {
my($ref) = @_;
my @last_instr;
my $pos;
my $reused_instr;
for (my $i = 0; $i < @$ref; $i++) {
my $instr = $$ref[$i];
my($size, $instr_ref, $comment) = @$instr;
my($op, @args) = @$instr_ref;
if ($op eq 'next_instr') {
@last_instr = ($args[0]);
} elsif ($op eq 'set_var_next_arg') {
push @last_instr, $args[0];
} elsif ($op eq 'next_arg') {
push @last_instr, 'ignored';
} elsif ($op eq 'new_instr') {
unless (defined $pos) {
# 'new_instr' immediately after 'commit'.
$reused_instr = $args[0];
return unless shift(@last_instr) == $reused_instr;
$pos = $i - 1;
} else {
# Second 'new_instr' after 'commit'. The instructions
# from $pos up to and including $i - 1 rebuilds the
# existing instruction exactly.
my $name = $gen_opname[$reused_instr];
my $arity = $gen_arity[$reused_instr];
my $reuse = make_op("$name/$arity", 'keep');
splice @$ref, $pos, $i-$pos, ($reuse);
return;
}
} elsif ($op eq 'store_var_next_arg') {
return unless shift(@last_instr) eq $args[0];
} elsif (defined $pos) {
return;
}
}
}
sub tr_maybe_rename {
my($ref) = @_;
my $s = 'left';
my $a = 0;
my $num_args = 0;
my $new_instr;
my $first;
my $i;
for ($i = 1; $i < @$ref; $i++) {
my $instr = $$ref[$i];
my($size, $instr_ref, $comment) = @$instr;
my($op, @args) = @$instr_ref;
if ($s eq 'left') {
if ($op eq 'set_var_next_arg') {
if ($num_args == $a and $args[0] == $a) {
$num_args++;
}
$a++;
} elsif ($op eq 'next_arg') {
$a++;
} elsif ($op eq 'commit') {
$a = 0;
$first = $i;
$s = 'committed';
} elsif ($op eq 'next_instr') {
return;
}
} elsif ($s eq 'committed') {
if ($op eq 'new_instr') {
$new_instr = $args[0];
$a = 0;
$s = 'right';
} else {
return;
}
} elsif ($s eq 'right') {
if ($op eq 'store_var_next_arg' && $args[0] == $a) {
$a++;
} elsif ($op eq 'end' && $a <= $num_args) {
my $name = $gen_opname[$new_instr];
my $arity = $gen_arity[$new_instr];
my $new_op = make_op("$name/$arity", 'rename', $new_instr);
splice @$ref, $first, $i-$first+1, ($new_op);
return;
} else {
return;
}
}
}
}
sub tr_remove_unused {
my($ref) = @_;
my %used;
# Collect all used variables.
for my $instr (@$ref) {
my $uref = $$instr[3];
for my $slot (@$uref) {
$used{$slot} = 1;
}
}
# Replace 'set_var_next_arg' with 'next_arg' if the variable
# is never used.
for my $instr (@$ref) {
my($size, $instr_ref, $comment) = @$instr;
my($op, @args) = @$instr_ref;
if ($op eq 'set_var_next_arg') {
my $var = $args[0];
next if $used{$var};
$instr = make_op("$comment (ignored)", 'next_arg');
}
}
# Delete a sequence of 'next_arg' instructions when they are
# redundant before instructions such as 'commit'.
my @opcode;
my %ending = (call_end => 1,
commit => 1,
next_instr => 1,
pred => 1,
rename => 1,
keep => 1);
for (my $i = 0; $i < @$ref; $i++) {
my $instr = $$ref[$i];
my($size, $instr_ref, $comment) = @$instr;
my($opcode) = @$instr_ref;
if ($ending{$opcode}) {
my $first = $i;
$first-- while $first > 0 and $opcode[$first-1] eq 'next_arg';
my $n = $i - $first;
splice @$ref, $first, $n;
$i -= $n;
}
$opcode[$i] = $opcode;
}
}
sub tr_code_len {
my($sum) = 0;
my($ref);
foreach $ref (@_) {
$sum += $$ref[0];
}
$sum;
}
sub make_op {
my($comment, @op) = @_;
[scalar(@op), [@op], $comment, []];
}
sub op_slot_usage {
my($op_ref, @slots) = @_;
$$op_ref[3] = \@slots;
}
sub is_instr {
my($ref,$op) = @_;
return 0 unless ref($ref) eq 'ARRAY';
$ref->[1][0] eq $op;
}
sub get_comment {
my($ref,$op) = @_;
return '' unless ref($ref) eq 'ARRAY';
$ref->[2];
}
sub tr_next_index {
my($lref,$href,$name,@args) = @_;
my $code = "RVAL = $name(" . join(', ', 'st', @args) . "); break;\n";
my $index;
if (defined $$href{$code}) {
$index = $$href{$code};
} else {
$index = scalar(@$lref);
push(@$lref, $code);
$$href{$code} = $index;
}
$index;
}
sub tr_gen_call {
my(@call_table) = @_;
my($i);
for ($i = 0; $i < @call_table; $i++) {
print "case $i: $call_table[$i]";
}
}