#!/usr/bin/env perl -W # # %CopyrightBegin% # # Copyright Ericsson AB 1998-2017. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # %CopyrightEnd% # use strict; use vars qw($BEAM_FORMAT_NUMBER); $BEAM_FORMAT_NUMBER = undef; my $target = \&emulator_output; my $outdir = "."; # Directory for output files. my $verbose = 0; my $hot = 1; my $num_file_opcodes = 0; my $wordsize = 32; my %defs; # Defines (from command line). # This is shift counts and mask for the packer. my $WHOLE_WORD = ''; my @pack_instr; my @pack_shift; my @pack_mask; $pack_instr[2] = ['6', 'i']; $pack_instr[3] = ['0', '0', 'i']; $pack_instr[4] = ['6', '6', '6', 'i']; # Only for 64 bit wordsize $pack_shift[2] = ['0', 'BEAM_LOOSE_SHIFT']; $pack_shift[3] = ['0', 'BEAM_TIGHT_SHIFT', '(2*BEAM_TIGHT_SHIFT)']; $pack_shift[4] = ['0', 'BEAM_LOOSE_SHIFT', # Only for 64 bit wordsize '(2*BEAM_LOOSE_SHIFT)', '(3*BEAM_LOOSE_SHIFT)']; $pack_mask[2] = ['BEAM_LOOSE_MASK', $WHOLE_WORD]; $pack_mask[3] = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK']; $pack_mask[4] = ['BEAM_LOOSE_MASK', # Only for 64 bit wordsize 'BEAM_LOOSE_MASK', 'BEAM_LOOSE_MASK', $WHOLE_WORD]; # Mapping from packagable arguments to number of packed arguments per # word. Initialized after the wordsize is known. my @args_per_word; # There are two types of instructions: generic and specific. # The generic instructions are those generated by the Beam compiler. # Corresponding to each generic instruction, there is generally a # whole family of related specific instructions. Specific instructions # are those executed by the VM interpreter during run-time. # Maximum number of operands for a generic instruction. # In beam_load.c the MAX_OPARGS refers to the maximum # number of operands for generic instructions. my $max_gen_operands = 8; # Maximum number of operands for a specific instruction. # Must be even. The beam_load.c file must be updated, too. my $max_spec_operands = 6; # The maximum number of primitive genop_types. my $max_genop_types = 16; my %gen_opnum; my %num_specific; my %gen_to_spec; my %specific_op; my %gen_arity; my @gen_arity; my @gen_opname; my @op_to_name; my @obsolete; # Instructions and micro instructions implemented in C. my %c_code; # C code block, location, arguments. my %c_code_used; # Used or not. # Definitions for instructions combined from micro instructions. my %combined_instrs; my %combined_code; # Combined micro instructions. my %hot_code; my %cold_code; my @unnumbered_generic; my %unnumbered; my %is_transformed; # # Pre-processor. # my @if_val; my @if_line; # # Code transformations. # my $te_max_vars = 0; # Max number of variables ever needed. my %gen_transform; my %match_engine_ops; # All opcodes for the match engine. my %gen_transform_offset; my @transformations; my @call_table; my %call_table; my @pred_table; my %pred_table; # Operand types for generic instructions. my $compiler_types = "uiaxyfhz"; my $loader_types = "nprvlqo"; my $genop_types = $compiler_types . $loader_types; # # Defines the argument types and their loaded size assuming no packing. # my %arg_size = ('r' => 0, # x(0) - x register zero 'x' => 1, # x(N), N > 0 - x register 'y' => 1, # y(N) - y register 'i' => 1, # tagged integer 'a' => 1, # tagged atom 'n' => 0, # NIL (implicit) 'c' => 1, # tagged constant (integer, atom, nil) 's' => 1, # tagged source; any of the above 'd' => 1, # tagged destination register (r, x, y) 'f' => 1, # failure label 'j' => 1, # either 'f' or 'p' 'e' => 1, # pointer to export entry 'L' => 0, # label 'I' => 1, # untagged integer 't' => 1, # untagged integer -- can be packed 'b' => 1, # pointer to bif 'A' => 1, # arity value 'P' => 1, # byte offset into tuple or stack 'Q' => 1, # like 'P', but packable 'h' => 1, # character 'l' => 1, # float reg 'q' => 1, # literal term ); # # Generate bits. # my %type_bit; my @tag_type; sub define_type_bit { my($tag,$val) = @_; defined $type_bit{$tag} and sanity("the tag '$tag' has already been defined with the value ", $type_bit{$tag}); $type_bit{$tag} = $val; } { my($bit) = 1; my(%bit); foreach (split('', $genop_types)) { push(@tag_type, $_); define_type_bit($_, $bit); $bit{$_} = $bit; $bit *= 2; } # Composed types. define_type_bit('d', $type_bit{'x'} | $type_bit{'y'}); define_type_bit('c', $type_bit{'i'} | $type_bit{'a'} | $type_bit{'n'} | $type_bit{'q'}); define_type_bit('s', $type_bit{'d'} | $type_bit{'i'} | $type_bit{'a'} | $type_bit{'n'} | $type_bit{'q'}); define_type_bit('j', $type_bit{'f'} | $type_bit{'p'}); # Aliases (for matching purposes). define_type_bit('I', $type_bit{'u'}); define_type_bit('t', $type_bit{'u'}); define_type_bit('A', $type_bit{'u'}); define_type_bit('L', $type_bit{'u'}); define_type_bit('b', $type_bit{'u'}); define_type_bit('e', $type_bit{'u'}); define_type_bit('P', $type_bit{'u'}); define_type_bit('Q', $type_bit{'u'}); } # # Pre-define the 'fail' instruction. It is used internally # by the 'try_me_else_fail' instruction. # $match_engine_ops{'TOP_fail'} = 1; # # Sanity checks. # { if (@tag_type > $max_genop_types) { sanity("\$max_genop_types is $max_genop_types, ", "but there are ", scalar(@tag_type), " primitive tags defined\n"); } foreach my $tag (@tag_type) { sanity("tag '$tag': primitive tags must be named with lowercase letters") unless $tag =~ /^[a-z]$/; } foreach my $tag (keys %arg_size) { defined $type_bit{$tag} or sanity("the tag '$tag' has a size in %arg_size, " . "but has no defined bit pattern"); } } # # Parse command line options. # while (@ARGV && $ARGV[0] =~ /^-(.*)/) { $_ = $1; shift; ($target = \&emulator_output), next if /^emulator/; ($target = \&compiler_output), next if /^compiler/; ($outdir = shift), next if /^outdir/; ($wordsize = shift), next if /^wordsize/; ($verbose = 1), next if /^v/; ($defs{$1} = $2), next if /^D(\w+)=(\w+)/; die "$0: Bad option: -$_\n"; } if ($wordsize == 32) { $defs{'ARCH_32'} = 1; $defs{'ARCH_64'} = 0; } elsif ($wordsize == 64) { $defs{'ARCH_32'} = 0; $defs{'ARCH_64'} = 1; } # # Initialize number of arguments per packed word. # $args_per_word[2] = 2; $args_per_word[3] = 3; $args_per_word[4] = 2; $args_per_word[5] = 3; $args_per_word[6] = 3; if ($wordsize == 64) { $pack_mask[3] = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', $WHOLE_WORD]; $args_per_word[4] = 4; } # # Parse the input files. # my $in_c_code = ''; my $c_code_block; my $c_code_loc; my @c_args; while (<>) { my($op_num); if ($in_c_code) { if (/^\}/) { my $name = $in_c_code; my $block = $c_code_block; $in_c_code = ''; $block =~ s/^ //mg; chomp $block; $c_code{$name} = [$block,$c_code_loc,@c_args]; } else { $c_code_block .= $_; } next; } chomp; if (s/\\$//) { $_ .= <>; redo unless eof(ARGV); } next if /^\s*$/; next if /^\#/; next if m@^//@; # # Handle %if. # if (/^\%if (\w+)/) { my $name = $1; my $val = $defs{$name}; defined $val or error("'$name' is undefined"); push @if_val, $val; push @if_line, $.; next; } elsif (/^\%unless (\w+)/) { my $name = $1; my $val = $defs{$name}; defined $val or error("'$name' is undefined"); push @if_val, !$val; push @if_line, $.; next; } elsif (/^\%else$/) { unless (@if_line) { error("%else without a preceding %if/%unless"); } $if_line[$#if_line] = $.; $if_val[$#if_val] = !$if_val[$#if_val]; next; } elsif (/^\%endif$/) { unless (@if_line) { error("%endif without a preceding %if/%unless/%else"); } pop @if_val; pop @if_line; next; } if (@if_val and not $if_val[$#if_val]) { next; } # # Handle assignments. # if (/^([\w_][\w\d_]+)=(.*)/) { no strict 'refs'; my($name) = $1; $$name = $2; next; } # # Handle %hot/%cold. # if (/^\%hot/) { $hot = 1; next; } elsif (/^\%cold/) { $hot = 0; next; } # # Handle transformations. # if (/=>/) { parse_transformation($_); next; } # # Handle C code blocks. # if (/^(\w[\w.]*)\(([^\)]*)\)\s*{/) { my $name = $1; $in_c_code = $name; $c_code_block = ''; @c_args = parse_c_args($2); $c_code_loc = "$ARGV($.)"; if (defined $c_code{$name}) { my $where = $c_code{$name}->[1]; error("$name: already defined at $where"); } next; } # # Handle definition of instructions in terms of # micro instructions. # if (/^(\w+)\s*:=\s*([\w.]+)\s*;\s*$/) { $combined_instrs{$1} = ["$ARGV($.)","beam_instrs.h",$2]; next; } # # Parse off the number of the operation. # $op_num = undef; if (s/^(\d+):\s*//) { $op_num = $1; $op_num != 0 or error("Opcode 0 invalid"); error("Opcode $op_num already defined") if defined $gen_opname[$op_num]; } # # Parse: Name/Arity (generic instruction) # if (m@^(-)?(\w+)/(\d)\s*$@) { my($obsolete) = $1; my($name) = $2; my($arity) = $3; $name =~ /^[a-z]/ or error("Opname must start with a lowercase letter"); defined $gen_arity{$name} and $gen_arity{$name} != $arity and error("Opname $name already defined with arity $gen_arity{$name}"); defined $unnumbered{$name,$arity} and error("Opname $name already defined with arity $gen_arity{$name}"); if (defined $op_num) { # Numbered generic operation $gen_opname[$op_num] = $name; $gen_arity[$op_num] = $arity; $gen_opnum{$name,$arity} = $op_num; $gen_arity{$name} = $arity; $gen_to_spec{"$name/$arity"} = undef; $num_specific{"$name/$arity"} = 0; $obsolete[$op_num] = defined $obsolete; } else { # Unnumbered generic operation. push(@unnumbered_generic, [$name, $arity]); $unnumbered{$name,$arity} = 1; } next; } # # Parse specific instructions (only present in emulator/loader): # Name Arg1 Arg2... # my($name, @args) = split; error("too many operands") if @args > $max_spec_operands; syntax_check($name, @args); my $arity = @args; if (defined $gen_opnum{$name,$arity} and $obsolete[$gen_opnum{$name,$arity}]) { error("specific instructions may not be specified for obsolete instructions"); } save_specific_ops($name, $arity, $hot, @args); if (defined $op_num) { error("specific instructions must not be numbered"); } elsif (!defined($gen_arity{$name}) && !defined($unnumbered{$name,$arity})) { # # Create an unumbered generic instruction too. # push(@unnumbered_generic, [$name, $arity]); $unnumbered{$name,$arity} = 1; } } continue { if (eof(ARGV)) { close(ARGV); if (@if_line) { error("Unterminated %if/%unless/%else at " . "line $if_line[$#if_line]\n"); } } } $num_file_opcodes = @gen_opname; # # Number all generic operations without numbers. # { my $ref; foreach $ref (@unnumbered_generic) { my($name, $arity) = @$ref; my $op_num = @gen_opname; push(@gen_opname, $name); push(@gen_arity, $arity); $gen_opnum{$name,$arity} = $op_num; $gen_arity{$name} = $arity; $gen_to_spec{"$name/$arity"} = undef; $num_specific{"$name/$arity"} = 0; } } # # Produce output for the chosen target. # &$target(); # # Ensure that all C code implementations have been used. # { my(@unused) = grep(!$c_code_used{$_}, keys %c_code); foreach my $unused (@unused) { my(undef,$where) = @{$c_code{$unused}}; warn "$where: $unused is unused\n"; } die "\n" if @unused; } # # Produce output needed by the emulator/loader. # sub emulator_output { my $i; my $name; my $key; # Loop variable. # # Information about opcodes (beam_opcodes.c). # $name = "$outdir/beam_opcodes.c"; open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; comment('C'); print "#ifdef HAVE_CONFIG_H\n"; print "# include \"config.h\"\n"; print "#endif\n\n"; print '#include "sys.h"', "\n"; print '#include "erl_vm.h"', "\n"; print '#include "export.h"', "\n"; print '#include "erl_process.h"', "\n"; print '#include "bif.h"', "\n"; print '#include "erl_atom_table.h"', "\n"; print '#include "beam_load.h"', "\n"; print "\n"; print "const char tag_to_letter[] = {\n "; for ($i = 0; $i < length($genop_types); $i++) { print "'$tag_type[$i]', "; } for (; $i < @tag_type; $i++) { print "'_', "; } print "\n};\n"; print "\n"; # # Combine micro instruction into instruction blocks. # combine_micro_instructions(); # # Generate code for specific ops. # my($spec_opnum) = 0; print "const OpEntry opc[] = {\n"; foreach $key (sort keys %specific_op) { $gen_to_spec{$key} = $spec_opnum; $num_specific{$key} = @{$specific_op{$key}}; # # Pick up all instructions and manufacture sort keys; we must have # the most specific instructions appearing first (e.g. an 'x' operand # should be matched before 's' or 'd'). # my(%items) = (); foreach (@{$specific_op{$key}}) { my($name, $hot, @args) = @{$_}; my($sign) = join('', @args); # The primitive types should sort before other types. my($sort_key) = $sign; eval "\$sort_key =~ tr/$genop_types/./"; $sort_key .= ":$sign"; $items{$sort_key} = [$name, $hot, $sign, @args]; } # # Now call the generator for the sorted result. # foreach (sort keys %items) { my($name, $hot, $sign, @args) = @{$items{$_}}; my $arity = @args; my($instr) = "${name}_$sign"; $instr =~ s/_$//; # # Call a generator to calculate size and generate macros # for the emulator. # my($size, $code, $pack) = basic_generator($name, $hot, '', 0, undef, @args); # # Save the generated $code for later. # if (defined $code) { if ($hot) { push(@{$hot_code{$code}}, $instr); } else { push(@{$cold_code{$code}}, $instr); } } # # Calculate the bit mask which should be used to match this # instruction. # my(@bits) = (0) x ($max_spec_operands/2); my($i); my $involves_r = 0; for ($i = 0; $i < $max_spec_operands && defined $args[$i]; $i++) { my $t = $args[$i]; my $bits = $type_bit{$t}; if ($t eq 'r') { $bits |= $type_bit{'x'}; $involves_r |= 1 << $i; } my $shift = $max_genop_types * ($i % 2); $bits[int($i/2)] |= $bits << $shift; } printf "/* %3d */ ", $spec_opnum; my $print_name = $sign ne '' ? "${name}_$sign" : $name; my $init = "{"; my $sep = ""; foreach (@bits) { $init .= sprintf("%s0x%X", $sep, $_); $sep = ","; } $init .= "}"; init_item($print_name, $init, $involves_r, $size, $pack, $sign); $op_to_name[$spec_opnum] = $instr; $spec_opnum++; } } print "};\n\n"; print "const int num_instructions = $spec_opnum;\n\n"; # # Print the array for instruction counts. # print "#ifdef ERTS_OPCODE_COUNTER_SUPPORT\n"; print "Uint erts_instr_count[$spec_opnum];\n"; print "#endif\n"; print "\n"; # # Generate transformations. # tr_gen(@transformations); # # Print the generic instruction table. # print "const GenOpEntry gen_opc[] = {\n"; for ($i = 0; $i < @gen_opname; $i++) { if ($i == $num_file_opcodes) { print "\n/*\n * Internal generic instructions.\n */\n\n"; } my($name) = $gen_opname[$i]; my($arity) = $gen_arity[$i]; printf "/* %3d */ ", $i; if (!defined $name) { init_item("", 0, 0, 0, -1); } else { my($key) = "$name/$arity"; my($tr) = defined $gen_transform_offset{$key} ? $gen_transform_offset{$key} : -1; my($spec_op) = $gen_to_spec{$key}; my($num_specific) = $num_specific{$key}; defined $spec_op or $obsolete[$gen_opnum{$name,$arity}] or $is_transformed{$name,$arity} or error("instruction $key has no specific instruction"); $spec_op = -1 unless defined $spec_op; init_item($name, $arity, $spec_op, $num_specific, $tr); } } print "};\n"; # # Information about opcodes (beam_opcodes.h). # $name = "$outdir/beam_opcodes.h"; open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; comment('C'); print "#ifndef __OPCODES_H__\n"; print "#define __OPCODES_H__\n\n"; print "#define BEAM_FORMAT_NUMBER $BEAM_FORMAT_NUMBER\n"; print "#define MAX_GENERIC_OPCODE ", $num_file_opcodes-1, "\n"; print "#define NUM_GENERIC_OPS ", scalar(@gen_opname), "\n"; print "#define NUM_SPECIFIC_OPS ", scalar(@op_to_name), "\n"; print "#define SCRATCH_X_REG 1023\n"; print "\n"; if ($wordsize == 32) { print "#if defined(ARCH_64)\n"; print qq[ #error "32-bit architecture assumed, but ARCH_64 is defined"\n]; print "#endif\n"; print "#define BEAM_LOOSE_MASK 0xFFF\n"; print "#define BEAM_TIGHT_MASK 0xFFC\n"; print "#define BEAM_LOOSE_SHIFT 16\n"; print "#define BEAM_TIGHT_SHIFT 10\n"; } elsif ($wordsize == 64) { print "#if !defined(ARCH_64)\n"; print qq[ #error "64-bit architecture assumed, but ARCH_64 not defined"\n]; print "#endif\n"; print "#define BEAM_WIDE_MASK 0xFFFFFFFFUL\n"; print "#define BEAM_LOOSE_MASK 0xFFFFUL\n"; print "#define BEAM_TIGHT_MASK 0xFFFFUL\n"; print "#define BEAM_WIDE_SHIFT 32\n"; print "#define BEAM_LOOSE_SHIFT 16\n"; print "#define BEAM_TIGHT_SHIFT 16\n"; } print "\n"; # # Definitions of tags. # my $letter; my $tag_num = 0; comment('C', "The following operand types for generic instructions", "occur in beam files."); foreach $letter (split('', $compiler_types)) { print "#define TAG_$letter $tag_num\n"; $tag_num++; } print "\n"; comment('C', "The following operand types are only used in the loader."); foreach $letter (split('', $loader_types)) { print "#define TAG_$letter $tag_num\n"; $tag_num++; } print "\n#define BEAM_NUM_TAGS $tag_num\n\n"; $i = 0; foreach (sort keys %match_engine_ops) { print "#define $_ $i\n"; $i++; } print "#define NUM_TOPS $i\n"; print "\n"; print "#define TE_MAX_VARS $te_max_vars\n"; print "\n"; print "extern const char tag_to_letter[];\n"; print "extern const Uint op_transform[];\n"; print "\n"; for ($i = 0; $i < @op_to_name; $i++) { print "#define op_$op_to_name[$i] $i\n"; } print "\n"; print "#define NUMBER_OF_OPCODES ",scalar(@op_to_name),"\n"; for ($i = 0; $i < @op_to_name; $i++) { print "#define op_count_$op_to_name[$i] ",$i+scalar(@op_to_name),"\n"; } print "\n"; print "#define DEFINE_OPCODES"; foreach (@op_to_name) { print " \\\n&&lb_$_,"; } print "\n\n"; print "#define DEFINE_COUNTING_OPCODES"; foreach (@op_to_name) { print " \\\n&&lb_count_$_,"; } print "\n\n"; print "#define DEFINE_COUNTING_LABELS"; for ($i = 0; $i < @op_to_name; $i++) { my($name) = $op_to_name[$i]; print " \\\nCountCase($name): erts_instr_count[$i]++; goto lb_$name;"; } print "\n\n"; for ($i = 0; $i < @gen_opname; $i++) { print "#define genop_$gen_opname[$i]_$gen_arity[$i] $i\n" if defined $gen_opname[$i]; } print "#endif\n"; # # Extension of transform engine. # $name = "$outdir/beam_tr_funcs.h"; open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; comment('C'); tr_gen_call(@call_table); $name = "$outdir/beam_pred_funcs.h"; open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; comment('C'); tr_gen_call(@pred_table); # # Implementation of operations for emulator. # $name = "$outdir/beam_hot.h"; open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; comment('C'); print_code(\%hot_code); $name = "$outdir/beam_cold.h"; open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; comment('C'); print_code(\%cold_code); foreach my $key (keys %combined_code) { my $name = "$outdir/$key"; open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; comment('C'); print_indented_code(@{$combined_code{$key}}); } } sub init_item { my($sep) = ""; print "{"; foreach (@_) { if (!defined $_) { print "${sep}NULL"; } elsif (/^\{/) { print "$sep$_"; } elsif (/^-?\d+$/) { print "$sep$_"; } else { print "$sep\"$_\""; } $sep = ", "; } print "},\n"; } sub q { my($str) = @_; "\"$str\""; } sub print_code { my($ref) = @_; my(%sorted); my($key, $label); # Loop variables. foreach $key (keys %$ref) { my($sort_key); my($code) = ''; foreach $label (@{$ref->{$key}}) { $code .= "OpCase($label):\n"; $sort_key = $label; } $code .= "$key\n"; $sorted{$sort_key} = $code; } foreach (sort keys %sorted) { print_indented_code($sorted{$_}); } } sub print_indented_code { my(@code) = @_; foreach my $chunk (@code) { my $indent = 0; foreach (split "\n", $chunk) { s/^\s*//; if (/\}/) { $indent -= 2; } if ($_ eq '') { print "\n"; } elsif (/^#/) { print $_, "\n"; } else { print ' ' x $indent, $_, "\n"; } if (/\{/) { $indent += 2; } } print "\n"; } } # # Produce output needed by the compiler back-end (assembler). # sub compiler_output { my($module) = 'beam_opcodes'; my($name) = "${module}.erl"; my($i); open(STDOUT, ">$outdir/$name") || die "Failed to open $name for writing: $!\n"; print "-module($module).\n"; comment('erlang'); print "-export([format_number/0]).\n"; print "-export([opcode/2,opname/1]).\n"; print "\n"; print "-spec format_number() -> $BEAM_FORMAT_NUMBER.\n"; print "format_number() -> $BEAM_FORMAT_NUMBER.\n\n"; print "-spec opcode(atom(), 0..", $max_gen_operands, ") -> 1..", $num_file_opcodes-1, ".\n"; for ($i = 0; $i < @gen_opname; $i++) { next unless defined $gen_opname[$i]; print "%%" if $obsolete[$i]; print "opcode(", quote($gen_opname[$i]), ", $gen_arity[$i]) -> $i;\n"; } print "opcode(Name, Arity) -> erlang:error(badarg, [Name,Arity]).\n\n"; print "-spec opname(1..", $num_file_opcodes-1, ") -> {atom(),0..", $max_gen_operands, "}.\n"; for ($i = 0; $i < @gen_opname; $i++) { next unless defined $gen_opname[$i]; print "opname($i) -> {", quote($gen_opname[$i]), ",$gen_arity[$i]};\n"; } print "opname(Number) -> erlang:error(badarg, [Number]).\n"; # # Generate .hrl file. # my($hrl_name) = "$outdir/${module}.hrl"; open(STDOUT, ">$hrl_name") || die "Failed to open $hrl_name for writing: $!\n"; comment('erlang'); for ($i = 0; $i < @tag_type && $i < 8; $i++) { print "-define(tag_$tag_type[$i], $i).\n"; } print "\n"; } # # Check an operation for validity. # sub syntax_check { my($name, @args) = @_; my($i); error("Bad opcode name '$name'") unless $name =~ /^[a-z][\w\d_]*$/; for ($i = 0; $i < @args; $i++) { foreach my $type (split(//, $args[$i])) { error("Argument " . ($i+1) . ": invalid type '$type'") unless defined $arg_size{$type}; } } } sub save_specific_ops { my($name,$arity,$hot,@args) = @_; my(@res) = (""); foreach my $arg (@args) { my @new_res = (); foreach my $type (split(//, $arg)) { foreach my $args (@res) { push @new_res, "$args$type"; } } @res = @new_res; } my $key = "$name/$arity"; foreach my $args (@res) { @args = split //, $args; push @{$specific_op{$key}}, [$name,$hot,@args]; } } sub parse_c_args { local($_) = @_; my @res; while (s/^(\w[\w\d]*)\s*//) { push @res, $1; s/^,\s*// or last; } $_ eq '' or error("garbage in argument list: $_"); @res; } sub error { my(@message) = @_; my($where) = $. ? "$ARGV($.): " : ""; die $where, @message, "\n"; } sub sanity { die "internal error: ", @_, "\n"; } sub comment { my($lang, @comments) = @_; my($prefix); if ($lang eq 'C') { print "/*\n"; $prefix = " * "; } elsif ($lang eq 'erlang') { $prefix = '%% '; } else { $prefix = '# '; } my(@prog) = split('/', $0); my($prog) = $prog[$#prog]; if (@comments) { my $line; foreach $line (@comments) { print "$prefix$line\n"; } } else { print "$prefix Warning: Do not edit this file.\n"; print "$prefix Auto-generated by '$prog'.\n"; } if ($lang eq 'C') { print " */\n"; } print "\n"; } # # Combine micro instruction into instruction blocks. # sub combine_micro_instructions { my %groups; my %group_file; # Sanity check, normalize micro instructions. foreach my $instr (keys %combined_instrs) { my $ref = $combined_instrs{$instr}; my($def_loc,$outfile,$def) = @$ref; my($group,@subs) = split /[.]/, $def; my $arity = 0; @subs = map { "$group.$_" } @subs; foreach my $s (@subs) { my $code = $c_code{$s}; defined $code or error("$def_loc: no definition of $s"); $c_code_used{$s} = 1; my(undef,undef,@c_args) = @{$code}; $arity += scalar(@c_args); } push @{$groups{$group}}, [$instr,$arity,@subs]; $group_file{$group} = $outfile; } # Now generate code for each group. foreach my $group (sort keys %groups) { my $code = combine_instruction_group($group, @{$groups{$group}}); my $outfile = $group_file{$group}; push @{$combined_code{$outfile}}, $code; } } sub combine_instruction_group { my($group,@in_instrs) = @_; my $gcode = ''; # Code for the entire group. # Get code for the head of the group (if any). my $head_name = "$group.head"; $c_code_used{$head_name} = 1; my $head_code_ref = $c_code{$head_name}; if (defined $head_code_ref) { my($head_code,$where,@c_args) = @{$head_code_ref}; @c_args and error("$where: no arguments allowed for " . "head function '$head_name()'"); $gcode = $head_code . "\n"; } # Variables. my %offsets; my @instrs; my %num_references; my $group_size = 0; # Do basic error checking. Associate operands of instructions # with the correct micro instructions. Calculate offsets for micro # instructions. foreach my $ref_instr (@in_instrs) { my($specific,$arity,@subs) = @$ref_instr; my $specific_key = "$specific/$arity"; my $specific_op_ref = $specific_op{$specific_key}; error("no $specific_key instruction") unless defined $specific_op_ref; foreach my $specific_op (@$specific_op_ref) { my($name, $hot, @args) = @{$specific_op}; my $offset = 0; my @rest = @args; my @new_subs; my $opcase = $specific; $opcase .= "_" . join '', @args if @args; foreach my $s (@subs) { my $code = $c_code{$s}; my(undef,undef,@c_args) = @{$code}; my @first; foreach (0..$#c_args) { push @first, shift @rest; } my($size,undef) = basic_generator($s, 0, '', 0, undef, @first); $offsets{$s} = $offset unless defined $offsets{$s} and $offsets{$s} >= $offset; $offset += $size - 1; my $label = micro_label($s); $num_references{$label} = 0; push @new_subs, [$opcase,$label,$s,$size-1,@first]; $opcase = ''; } $group_size = $offset if $group_size < $offset; push @instrs, [$specific_key,@new_subs]; } } # Link the sub instructions for each instructions to each # other. my @all_instrs; foreach my $instr (@instrs) { my($specific_key,@subs) = @{$instr}; for (my $i = 0; $i < @subs; $i++) { my($opcase,$label,$s,$size,@args) = @{$subs[$i]}; my $next = ''; (undef,$next) = @{$subs[$i+1]} if $i < $#subs; $num_references{$next}++ if $next; my $instr_info = "$opcase:$label:$next:$s:$size:@args"; push @all_instrs, [$label,$offsets{$s},$instr_info]; } } my %order_to_instrs; my %label_to_offset; my %order_to_offset; foreach my $instr (@all_instrs) { my($label,$offset,$instr_info) = @$instr; my $sort_key = sprintf("%02d.%02d", $offset, $num_references{$label}); push @{$order_to_instrs{$sort_key}}, $instr_info; $label_to_offset{$label} = $offset; $order_to_offset{$sort_key} = $offset; } my(@slots) = sort {$a <=> $b} keys %order_to_instrs; # Now generate the code for the entire group. my $offset = 0; for(my $i = 0; $i < @slots; $i++) { my $key = $slots[$i]; # Sort micro-instructions with OpCase before other micro-instructions. my(@instrs) = @{$order_to_instrs{$key}}; my $order_func = sub { my $a_key = ($a =~ /^:/) ? "1$a" : "0$a"; my $b_key = ($b =~ /^:/) ? "1$b" : "0$b"; $a_key cmp $b_key; }; @instrs = sort $order_func @instrs; my %seen; foreach my $instr (@instrs) { my($opcase,$label,$next,$s,$size,$args) = split ":", $instr; my(@first) = split " ", $args; my $seen_key = "$label:$next:" . scalar(@first); next if $opcase eq '' and $seen{$seen_key}; $seen{$seen_key} = 1; if ($opcase ne '') { $gcode .= "OpCase($opcase):\n"; } if ($num_references{$label}) { $gcode .= "$label:\n"; } my $flags = ''; my $transfer_to_next = ''; my $dec = 0; unless ($i == $#slots) { $flags = "-no_next"; my $next_offset = $label_to_offset{$next}; $dec = $next_offset - ($offset + $size); $transfer_to_next = "I -= $dec;\n" if $dec; $transfer_to_next .= "goto $next;\n\n"; } my(undef,$gen_code) = basic_generator($s, 0, $flags, $offset, $group_size-$offset-$dec, @first); $gcode .= $gen_code . $transfer_to_next; } $offset = $order_to_offset{$slots[$i+1]} if $i < $#slots; } "{\n$gcode\n}\n\n"; } sub micro_label { my $label = shift; $label =~ s/[.]/__/g; $label; } # # Basic implementation of instruction in emulator loop # (assuming no packing). # sub basic_generator { my($name,$hot,$extra_comments,$offset,$group_size,@args) = @_; my $size = 0; my $flags = ''; my @f; my $prefix = ''; my $tmp_arg_num = 1; my $pack_spec = ''; my $var_decls = ''; # # Pack arguments for hot code with an implementation. # my $c_code_ref = $c_code{$name}; if ($hot and defined $c_code_ref) { ($prefix, $pack_spec, @args) = do_pack(@args); } # # Calculate the size of the instruction and generate each argument for # the macro. # my $need_block = 0; my $arg_offset = $offset; foreach (@args) { my($this_size) = $arg_size{$_}; SWITCH: { /^pack:(\d):(.*)/ and do { push(@f, $2); $this_size = $1; last SWITCH; }; /r/ and do { push(@f, "r(0)"); last SWITCH; }; /[lxy]/ and do { push(@f, $_ . "b(Arg($arg_offset))"); last SWITCH; }; /n/ and do { push(@f, "NIL"); last SWITCH; }; /s/ and do { my($tmp) = "targ$tmp_arg_num"; $var_decls .= "Eterm $tmp;\n"; $tmp_arg_num++; push(@f, $tmp); $prefix .= "GetR($arg_offset, $tmp);\n"; $need_block = 1; last SWITCH; }; /d/ and do { $var_decls .= "Eterm dst = Arg($arg_offset);\n" . "Eterm* dst_ptr = REG_TARGET_PTR(dst);\n"; push(@f, "*dst_ptr"); last SWITCH; }; defined $arg_size{$_} and do { push(@f, "Arg($arg_offset)"); last SWITCH; }; die "$name: The generator can't handle $_, at"; } $size += $this_size; $arg_offset += $this_size; } # # If the implementation is in beam_emu.c, there is nothing # more to do. # unless (defined $c_code_ref) { return ($size+1, undef, ''); } $group_size = $size unless defined $group_size; # # Generate main body of the implementation. # my($c_code,$where,@c_args) = @{$c_code_ref}; my %bindings; $c_code_used{$name} = 1; if (@f != @c_args) { error("$where: defining '$name' with ", scalar(@c_args), " arguments instead of expected ", scalar(@f), " arguments"); } for (my $i = 0; $i < @f; $i++) { my $var = $c_args[$i]; $bindings{$var} = $f[$i]; } $bindings{'NEXT_INSTRUCTION'} = "I+" . ($group_size+$offset+1); $c_code = eval { expand_all($c_code, \%bindings) }; unless (defined $c_code) { warn $@; error("... from the body of $name at $where"); } my(@comments) = $c_code =~ m@//[|]\s*(.*)@g; $c_code =~ s@//[|]\s*(.*)\n?@@g; $flags = "@comments $extra_comments"; # # Generate code for transferring to the next instruction. # my $dispatch_next; my $instr_offset = $group_size + $offset + 1; if ($flags =~ /-no_next/) { $dispatch_next = ""; } elsif ($flags =~ /-no_prefetch/) { $dispatch_next = "\nI += $instr_offset;\n" . "ASSERT(VALID_INSTR(*I));\n" . "Goto(*I);"; } else { $var_decls .= "BeamInstr* _nextpf = " . "(BeamInstr *) I[$instr_offset];\n"; $dispatch_next = "\nI += $instr_offset;\n" . "ASSERT(VALID_INSTR(_nextpf));\n" . "Goto(_nextpf);"; } # # Assemble the complete code for the instruction. # my $body = "$c_code$dispatch_next"; if ($need_block) { $body = "$prefix\{\n$body\n}"; } else { $body = "$prefix$body"; } my $code = join("\n", "{", "$var_decls$body", "}", ""); ($size+1, $code, $pack_spec); } sub expand_all { my($code,$bindings_ref) = @_; my %bindings = %{$bindings_ref}; # Expand all $Var occurrences. $code =~ s/[\$](\w[\w\d]*)(?!\()/defined $bindings{$1} ? $bindings{$1} : "\$$1"/ge; # Find calls to macros, $name(...), and expand them. my $res = ""; while ($code =~ /[\$](\w[\w\d]*)\(/) { my $macro_name = $1; my $keep = substr($code, 0, $-[0]); my $after = substr($code, $+[0]); # Keep the special, pre-defined bindings. my %new_bindings; foreach my $key (qw(NEXT_INSTRUCTION)) { $new_bindings{$key} = $bindings{$key}; } my $body; ($body,$code) = expand_macro($macro_name, $after, \%new_bindings); $res .= "$keep$body"; } $res . $code; } sub expand_macro { my($name,$rest,$bindings_ref) = @_; my $c_code = $c_code{$name}; defined $c_code or error("calling undefined macro '$name'..."); $c_code_used{$name} = 1; my ($body,$where,@vars) = @{$c_code}; # Separate the arguments into @args; my @args; my $level = 1; my %inc = ('(' => 1, ')' => -1, '[' => 1, ']' => -1, '{' => 1, '}' => -1); my $arg = undef; while ($rest =~ /([,\(\[\{\}\]\)]|([^,\(\[\{\}\]\)]*))/g) { my $token = $1; my $inc = $inc{$token} || 0; $level += $inc; if ($level == 0) { $rest = substr($rest, pos($rest)); push @args, $arg if defined $arg; last; } if ($token eq ',') { if ($level == 1) { push @args, $arg; $arg = ""; } next; } $arg .= $token; } # Trim leading whitespace from each argument. foreach my $arg (@args) { $arg =~ s/^\s*//; } # Now combine bindings from the parameter names and arguments. my %bindings = %{$bindings_ref}; if (@vars != @args) { error("calling $name with ", scalar(@args), " arguments instead of expected ", scalar(@vars), " arguments..."); } for (my $i = 0; $i < @vars; $i++) { $bindings{$vars[$i]} = $args[$i]; } $body = eval { expand_all($body, \%bindings) }; unless (defined $body) { warn $@; die "... from the body of $name at $where\n"; } ("do {\n$body\n} while (0)",$rest); } sub do_pack { my(@args) = @_; my($packable_args) = 0; my @is_packable; # Packability (boolean) for each argument. my $wide_packing = 0; my(@orig_args) = @args; # # Count the number of packable arguments. If we encounter any 's' or 'd' # arguments, packing is not possible. # my $packable_types = "xytQ"; foreach my $arg (@args) { if ($arg =~ /^[$packable_types]/) { $packable_args++; push @is_packable, 1; } elsif ($arg =~ /^I/ and $wordsize == 64 and $packable_args < 2) { $wide_packing = 1; push @is_packable, 1; if (++$packable_args == 2) { # We can only pack two arguments. Turn off packing # for the rest of the arguments. $packable_types = "\xFF"; } } elsif ($arg =~ /^[sd]/) { return ('', '', @args); } elsif ($arg =~ /^[scq]/ and $packable_args > 0) { # When packing, this operand will be picked up from the # code array, put onto the packing stack, and later put # back into a different location in the code. The problem # is that if this operand is a literal, the original # location in the code would have been remembered in a # literal patch. For packing to work, we would have to # adjust the position in the literal patch. For the # moment, adding additional instructions to the packing # engine to handle this does not seem worth it, so we will # just turn off packing. return ('', '', @args); } else { push @is_packable, 0; } } # # Get out of here if too few or too many arguments. # return ('', '', @args) if $packable_args < 2; my($size) = 0; my($pack_prefix) = ''; my($down) = ''; # Pack commands (towards instruction # beginning). my($up) = ''; # Pack commands (storing back while # moving forward). my $args_per_word = $args_per_word[$packable_args]; my @shift; my @mask; my @instr; if ($wide_packing) { @shift = ('0', 'BEAM_WIDE_SHIFT'); @mask = ('BEAM_WIDE_MASK', $WHOLE_WORD); @instr = ('w', 'i'); } else { @shift = @{$pack_shift[$args_per_word]}; @mask = @{$pack_mask[$args_per_word]}; @instr = @{$pack_instr[$args_per_word]}; } # # Now generate the packing instructions. One complication is that # the packing engine works from right-to-left, but we must generate # the instructions from left-to-right because we must calculate # instruction sizes from left-to-right. # # XXX Packing 3 't's in one word won't work. Sorry. my $did_some_packing = 0; # Nothing packed yet. my($ap) = 0; # Argument number within word. my($tmpnum) = 1; # Number of temporary variable. my($expr) = ''; for (my $i = 0; $i < @args; $i++) { my($reg) = $args[$i]; my($this_size) = $arg_size{$reg}; if ($is_packable[$i]) { $this_size = 0; $did_some_packing = 1; if ($ap == 0) { $pack_prefix .= "Eterm tmp_packed$tmpnum = Arg($size);\n"; $up .= "p"; $down = "P$down"; $this_size = 1; } $down = "$instr[$ap]$down"; my($unpack) = make_unpack($tmpnum, $shift[$ap], $mask[$ap]); $args[$i] = "pack:$this_size:$reg" . "b($unpack)"; if (++$ap == $args_per_word) { $ap = 0; $tmpnum++; } } elsif ($arg_size{$reg} && $did_some_packing) { # # This is an argument that can't be packed. Normally, we must # save it on the pack engine's stack, unless: # # 1. The argument has zero size (e.g. r(0)). Such arguments # will not be loaded. They disappear. # 2. If the argument is on the left of the first packed argument, # the packing engine will never access it (because the engine # operates from right-to-left). # $down = "g${down}"; $up = "${up}p"; } $size += $this_size; } my $pack_spec = $down . $up; return ($pack_prefix, $pack_spec, @args); } sub make_unpack { my($tmpnum, $shift, $mask) = @_; my($e) = "tmp_packed$tmpnum"; $e = "($e>>$shift)" if $shift; $e .= "&$mask" unless $mask eq $WHOLE_WORD; $e; } sub quote { local($_) = @_; return "'$_'" if $_ eq 'try'; return "'$_'" if $_ eq 'catch'; return "'$_'" if $_ eq 'receive'; return "'$_'" if $_ =~ /^[A-Z]/; $_; } # # Parse instruction transformations when they first appear. # sub parse_transformation { local($_) = @_; my($orig) = $_; my($from, $to) = split(/\s*=>\s*/); my(@op); my $rest_var; # The source instructions. my(@from) = split(/\s*\|\s*/, $from); foreach (@from) { if (/^(\w+)\((.*?)\)/) { my($name, $arglist) = ($1, $2); $_ = (compile_transform_function($name, split(/\s*,\s*/, $arglist))); } else { (@op) = split; ($rest_var,$_) = compile_transform(1, $rest_var, @op); } } # # Check for a function which should be called to provide the new # instructions if the left-hand side matched. Otherwise there is # an explicit list of instructions. # my @to; if ($to =~ /^(\w+)\((.*?)\)/) { my($name, $arglist) = ($1, $2); @to = (compile_transform_function($name, split(/\s*,\s*/, $arglist))); } else { @to = split(/\s*\|\s*/, $to); foreach (@to) { (@op) = split; (undef,$_) = compile_transform(0, $rest_var, @op); } } push(@transformations, [$., $orig, [@from], [reverse @to]]); } sub compile_transform_function { my($name, @args) = @_; [".$name", 0, @args]; } sub compile_transform { my($src, $rest_var, $name, @ops) = @_; my $arity = 0; foreach (@ops) { my(@list) = tr_parse_op($src, $_); if ($list[1] eq '*') { $rest_var = $list[0]; } elsif (defined $rest_var and $list[0] eq $rest_var) { $list[1] = '*'; } else { $arity++; } $_ = [ @list ]; } if (defined $gen_opnum{$name,$arity} && $obsolete[$gen_opnum{$name,$arity}]) { error("obsolete function must not be used in transformations"); } if ($src) { $is_transformed{$name,$arity} = 1; } ($rest_var,[$name,$arity,@ops]); } sub tr_parse_op { my($src, $op) = @_; my($var) = ''; my($type) = ''; my($type_val) = 0; my($cond) = ''; my($cond_val) = ''; local($_) = $op; # Get the variable name if any. if (/^([A-Z]\w*)(.*)/) { $var = $1; $_ = $2; error("garbage after variable") unless /^=(.*)/ or /^(\s*)$/; $_ = $1; } # Get the type if any. if (/^([a-z*]+)(.*)/) { $type = $1; $_ = $2; foreach (split('', $type)) { error("bad type in $op") unless defined $type_bit{$_} or $type eq '*'; $_ eq 'r' and error("$op: 'r' is not allowed in transformations") } } # Get an optional condition. (In source.) if (/^==(.*)/) { $cond = 'is_eq'; $cond_val = $1; $_ = ''; } elsif (/^\$is_bif(.*)/) { $cond = 'is_bif'; $cond_val = -1; $_ = $1; } elsif (/^\$is_not_bif(.*)/) { $cond = 'is_not_bif'; $cond_val = -1; $_ = $1; } elsif (m@^\$bif:(\w+):(\w+)/(\d)(.*)@) { $cond = 'is_bif'; if ($1 eq 'erlang') { $cond_val = "BIF_$2_$3"; } else { $cond_val = "BIF_$1_$2_$3"; } $_ = $4; } elsif (m@^\$func:(\w+):(\w+)/([_\d])(.*)@) { my $arity = $3 eq '_' ? 1024 : $3; $cond = 'is_func'; $cond_val = "$1:$2:$arity"; $_ = $4; } # Get an optional value. (In destination.) $type_val = $type eq 'x' ? 1023 : 0; if (/^=(.*)/) { error("value not allowed in source: $op") if $src; $type_val = $1; $_ = ''; } # Nothing more is allowed after the command. error("garbage '$_' after operand: $op") unless /^\s*$/; # Test that destination has no conditions. unless ($src) { error("condition not allowed in destination: $op") if $cond; error("variable name and type cannot be combined in destination: $op") if $var && $type; } ($var,$type,$type_val,$cond,$cond_val); } # # Generate code for all transformations. # sub tr_gen { my(@g) = @_; my($ref, $key, $instr); # Loop variables. foreach $ref (@g) { my($line, $orig_transform, $from_ref, $to_ref) = @$ref; my $so_far = tr_gen_from($line, @$from_ref); tr_gen_to($line, $orig_transform, $so_far, @$to_ref); } # # Print the generated transformation engine. # my($offset) = 0; print "const Uint op_transform[] = {\n"; foreach $key (sort keys %gen_transform) { $gen_transform_offset{$key} = $offset; my @instr = @{$gen_transform{$key}}; # # If the last instruction is 'fail', remove it and # convert the previous 'try_me_else' to 'try_me_else_fail'. # if (is_instr($instr[$#instr], 'fail')) { pop(@instr); my $i = $#instr; $i-- while !is_instr($instr[$i], 'try_me_else'); $instr[$i] = make_op('', 'try_me_else_fail'); } foreach $instr (@instr) { my($size, $instr_ref, $comment) = @$instr; my($op, @args) = @$instr_ref; print " "; if (!defined $op) { $comment =~ s/\n(.)/\n $1/g; print "\n", $comment; } else { $op = "TOP_$op"; $match_engine_ops{$op} = 1; if ($comment ne '') { printf "%-24s /* %s */\n", (join(", ", ($op, @args)) . ","), $comment; } else { print join(", ", ($op, @args)), ",\n"; } $offset += $size; } } print "\n"; } print "/*\n"; print " * Total number of words: $offset\n"; print " */\n"; print "};\n\n"; } sub tr_gen_from { my($line,@tr) = @_; my(%var) = (); my(%var_type); my($var_num) = 0; my(@code); my($op, $ref); # Loop variables. my $where = "left side of transformation in line $line: "; my $may_fail = 0; my $is_first = 1; foreach $ref (@tr) { my($name, $arity, @ops) = @$ref; my($key) = "$name/$arity"; my($opnum); $may_fail = 1 unless $is_first; $is_first = 0; # # A name starting with a period is a C pred function to be called. # if ($name =~ /^\.(\w+)/) { $name = $1; $may_fail = 1; my $var; my(@args); foreach $var (@ops) { error($where, "variable '$var' unbound") unless defined $var{$var}; if ($var_type{$var} eq 'scalar') { push(@args, "var[$var{$var}]"); } else { push(@args, "rest_args"); } } my $pi = tr_next_index(\@pred_table, \%pred_table, $name, @args); my $op = make_op("$name()", 'pred', $pi); my @slots = grep(/^\d+/, map { $var{$_} } @ops); op_slot_usage($op, @slots); push(@code, $op); next; } # # Check that $name/$arity refers to a valid generic instruction. # error($where, "invalid generic op $name/$arity") unless defined $gen_opnum{$name,$arity}; $opnum = $gen_opnum{$name,$arity}; push(@code, make_op("$name/$arity", 'next_instr', $opnum)); foreach $op (@ops) { my($var, $type, $type_val, $cond, $val) = @$op; my $ignored_var = "$var (ignored)"; if ($type ne '' && $type ne '*') { $may_fail = 1; # # The is_bif, is_not_bif, and is_func instructions have # their own built-in type test and don't need to # be guarded with a type test instruction. # $ignored_var = ''; unless ($cond eq 'is_bif' or $cond eq 'is_not_bif' or $cond eq 'is_func') { my($types) = ''; my($type_mask) = 0; foreach (split('', $type)) { $types .= "$_ "; $type_mask |= $type_bit{$_}; } if ($cond ne 'is_eq') { push(@code, make_op($types, 'is_type', $type_mask)); } else { $cond = ''; push(@code, make_op("$types== $val", 'is_type_eq', $type_mask, $val)); } } } if ($cond eq 'is_func') { my($m, $f, $a) = split(/:/, $val); $ignored_var = ''; $may_fail = 1; push(@code, make_op('', "$cond", "am_$m", "am_$f", $a)); } elsif ($cond ne '') { $ignored_var = ''; $may_fail = 1; push(@code, make_op('', "$cond", $val)); } if ($var ne '') { if (defined $var{$var}) { $ignored_var = ''; $may_fail = 1; my $op = make_op($var, 'is_same_var', $var{$var}); op_slot_usage($op, $var{$var}); push(@code, $op); } elsif ($type eq '*') { foreach my $type (values %var_type) { error("only one use of a '*' variable is " . "allowed on the left hand side of " . "a transformation") if $type eq 'array'; } $ignored_var = ''; $var{$var} = 'unnumbered'; $var_type{$var} = 'array'; push(@code, make_op($var, 'rest_args')); } else { $ignored_var = ''; $var_type{$var} = 'scalar'; $var{$var} = $var_num; $var_num++; push(@code, make_op($var, 'set_var', $var{$var})); } } if (is_instr($code[$#code], 'set_var')) { my $ref = pop @code; my $comment = $ref->[2]; my $var = $ref->[1][1]; push(@code, make_op($comment, 'set_var_next_arg', $var)); } else { push(@code, make_op($ignored_var, 'next_arg')); } } # Remove redundant 'next_arg' instructions before the end # of the instruction. pop(@code) while is_instr($code[$#code], 'next_arg'); } # # Insert the commit operation. # push(@code, make_op($may_fail ? '' : 'always reached', 'commit')); $te_max_vars = $var_num if $te_max_vars < $var_num; [\%var, \%var_type, \@code]; } sub tr_gen_to { my($line, $orig_transform, $so_far, @tr) = @_; my($var_ref, $var_type_ref, $code_ref) = @$so_far; my(%var) = %$var_ref; my(%var_type) = %$var_type_ref; my(@code) = @$code_ref; my($op, $ref); # Loop variables. my($where) = "right side of transformation in line $line: "; my $last_instr = $code[$#code]; my $cannot_fail = is_instr($last_instr, 'commit') && (get_comment($last_instr) =~ /^always/); foreach $ref (@tr) { my($name, $arity, @ops) = @$ref; # # A name starting with a period is a C function to be called. # if ($name =~ /^\.(\w+)/) { $name = $1; my $var; my(@args); foreach $var (@ops) { error($where, "variable '$var' unbound") unless defined $var{$var}; if ($var_type{$var} eq 'scalar') { push(@args, "var[$var{$var}]"); } else { push(@args, "rest_args"); } } pop(@code); # Get rid of 'commit' instruction my $index = tr_next_index(\@call_table, \%call_table, $name, @args); my $op = make_op("$name()", 'call_end', $index); my @slots = grep(/^\d+/, map { $var{$_} } @ops); op_slot_usage($op, @slots); push(@code, $op); last; } # # Check that $name/$arity refers to a valid generic instruction. # my($key) = "$name/$arity"; error($where, "invalid generic op $name/$arity") unless defined $gen_opnum{$name,$arity}; my $opnum = $gen_opnum{$name,$arity}; # # Create code to build the generic instruction. # push(@code, make_op("$name/$arity", 'new_instr', $opnum)); foreach $op (@ops) { my($var, $type, $type_val) = @$op; if ($type eq '*') { push(@code, make_op($var, 'store_rest_args')); } elsif ($var ne '') { error($where, "variable '$var' unbound") unless defined $var{$var}; my $op = make_op($var, 'store_var_next_arg', $var{$var}); op_slot_usage($op, $var{$var}); push(@code, $op); } elsif ($type ne '') { push(@code, make_op('', 'store_type', "TAG_$type")); if ($type_val) { push(@code, make_op('', 'store_val', $type_val)); } push(@code, make_op('', 'next_arg')); } } pop(@code) if is_instr($code[$#code], 'next_arg'); } push(@code, make_op('', 'end')) unless is_instr($code[$#code], 'call_end'); tr_maybe_keep(\@code); tr_maybe_rename(\@code); tr_remove_unused(\@code); # # Chain together all codes segments having the same first operation. # my($first_ref) = shift(@code); my($size, $first, $key) = @$first_ref; my($dummy, $arity); ($dummy, $op, $arity) = @$first; my($comment) = "\n/*\n * Line $line:\n * $orig_transform\n */\n\n"; my $prev_last; $prev_last = pop(@{$gen_transform{$key}}) if defined $gen_transform{$key}; # Fail if ($prev_last && !is_instr($prev_last, 'fail')) { error("Line $line: A previous transformation shadows '$orig_transform'"); } unless ($cannot_fail) { unshift(@code, make_op('', 'try_me_else', tr_code_len(@code))); push(@code, make_op("$key", 'fail')); } unshift(@code, make_op($comment)); push(@{$gen_transform{$key}}, @code), } sub tr_maybe_keep { my($ref) = @_; my @last_instr; my $pos; my $reused_instr; for (my $i = 0; $i < @$ref; $i++) { my $instr = $$ref[$i]; my($size, $instr_ref, $comment) = @$instr; my($op, @args) = @$instr_ref; if ($op eq 'next_instr') { @last_instr = ($args[0]); } elsif ($op eq 'set_var_next_arg') { push @last_instr, $args[0]; } elsif ($op eq 'next_arg') { push @last_instr, 'ignored'; } elsif ($op eq 'new_instr') { unless (defined $pos) { # 'new_instr' immediately after 'commit'. $reused_instr = $args[0]; return unless shift(@last_instr) == $reused_instr; $pos = $i - 1; } else { # Second 'new_instr' after 'commit'. The instructions # from $pos up to and including $i - 1 rebuilds the # existing instruction exactly. my $name = $gen_opname[$reused_instr]; my $arity = $gen_arity[$reused_instr]; my $reuse = make_op("$name/$arity", 'keep'); splice @$ref, $pos, $i-$pos, ($reuse); return; } } elsif ($op eq 'store_var_next_arg') { return unless shift(@last_instr) eq $args[0]; } elsif (defined $pos) { return; } } } sub tr_maybe_rename { my($ref) = @_; my $s = 'left'; my $a = 0; my $num_args = 0; my $new_instr; my $first; my $i; for ($i = 1; $i < @$ref; $i++) { my $instr = $$ref[$i]; my($size, $instr_ref, $comment) = @$instr; my($op, @args) = @$instr_ref; if ($s eq 'left') { if ($op eq 'set_var_next_arg') { if ($num_args == $a and $args[0] == $a) { $num_args++; } $a++; } elsif ($op eq 'next_arg') { $a++; } elsif ($op eq 'commit') { $a = 0; $first = $i; $s = 'committed'; } elsif ($op eq 'next_instr') { return; } } elsif ($s eq 'committed') { if ($op eq 'new_instr') { $new_instr = $args[0]; $a = 0; $s = 'right'; } else { return; } } elsif ($s eq 'right') { if ($op eq 'store_var_next_arg' && $args[0] == $a) { $a++; } elsif ($op eq 'end' && $a <= $num_args) { my $name = $gen_opname[$new_instr]; my $arity = $gen_arity[$new_instr]; my $new_op = make_op("$name/$arity", 'rename', $new_instr); splice @$ref, $first, $i-$first+1, ($new_op); return; } else { return; } } } } sub tr_remove_unused { my($ref) = @_; my %used; # Collect all used variables. for my $instr (@$ref) { my $uref = $$instr[3]; for my $slot (@$uref) { $used{$slot} = 1; } } # Replace 'set_var_next_arg' with 'next_arg' if the variable # is never used. for my $instr (@$ref) { my($size, $instr_ref, $comment) = @$instr; my($op, @args) = @$instr_ref; if ($op eq 'set_var_next_arg') { my $var = $args[0]; next if $used{$var}; $instr = make_op("$comment (ignored)", 'next_arg'); } } # Delete a sequence of 'next_arg' instructions when they are # redundant before instructions such as 'commit'. my @opcode; my %ending = (call_end => 1, commit => 1, next_instr => 1, pred => 1, rename => 1, keep => 1); for (my $i = 0; $i < @$ref; $i++) { my $instr = $$ref[$i]; my($size, $instr_ref, $comment) = @$instr; my($opcode) = @$instr_ref; if ($ending{$opcode}) { my $first = $i; $first-- while $first > 0 and $opcode[$first-1] eq 'next_arg'; my $n = $i - $first; splice @$ref, $first, $n; $i -= $n; } $opcode[$i] = $opcode; } } sub tr_code_len { my($sum) = 0; my($ref); foreach $ref (@_) { $sum += $$ref[0]; } $sum; } sub make_op { my($comment, @op) = @_; [scalar(@op), [@op], $comment, []]; } sub op_slot_usage { my($op_ref, @slots) = @_; $$op_ref[3] = \@slots; } sub is_instr { my($ref,$op) = @_; return 0 unless ref($ref) eq 'ARRAY'; $ref->[1][0] eq $op; } sub get_comment { my($ref,$op) = @_; return '' unless ref($ref) eq 'ARRAY'; $ref->[2]; } sub tr_next_index { my($lref,$href,$name,@args) = @_; my $code = "RVAL = $name(" . join(', ', 'st', @args) . "); break;\n"; my $index; if (defined $$href{$code}) { $index = $$href{$code}; } else { $index = scalar(@$lref); push(@$lref, $code); $$href{$code} = $index; } $index; } sub tr_gen_call { my(@call_table) = @_; my($i); for ($i = 0; $i < @call_table; $i++) { print "case $i: $call_table[$i]"; } }