gecko-dev/ef/Tools/PerlAssemblyConverter/assembly.pl

450 lines
13 KiB
Perl

#/*
# Christian Bennett
#
# Purpose:
# For use in a Make system when generating the at&t assembly syntax file (for gnu software) from some the
# Intel standard assembly syntax file.
#
#
# Expandability:
# This program is ready to be hooked into a make system. The design for this script
# is based on the assumption there is just an assembly file with no distinct header or
# footer information. This was not designed to parse through a C++ file and find the inlined
# assembly. Quite simply it takes an Intel asm instruction and outputs the AT&T version. If
# there is a line it does not recognize, it outputs it as is.
# So if there is distinct headers and footers per compiler you may have to modify the linux
# output slightly, or modify this code slightly.
#
# Use:
# The IO in this file is done through standard in and standard out. READ: cat the input
# and redirect the output to a file. Or do whatever means necessary. So for testing I did something
# like: cat sampleInput.asm perl assembly.pl > linuxTranslation.pl
#
#
# KEY: in comments: ws = whitespcae, pws = possible white space (read: there may be ws here).
# Here is a hash for identifying registers. Querying the hash will return TRUE if, the query contains a
# register. Else will return null. If there are additional registers that may be used, add them in a
# similar fasion.
#*/
%registers = (
"eax" => 1,
"ebx" => 1,
"ecx" => 1,
"edx" => 1,
"esi" => 1,
"edi" => 1,
"eip" => 1,
"esp" => 1,
"ebp" => 1,
"efl" => 1,
);
LINE: while ($line = <>) {
if($line eq "\n"){ #goto next line if endline
print "\n";
next LINE;
}
#/*----------------Interpretation of instructions--------------
### The Different Cases Handled
#
# Each case takes a string. It then parses the string and grabs arguments and parameters.
# It then formats the instruction and args for AT&T assembly syntax.
#
# Formating preserves c++ style comments.
#
# 1) instruction = insn reg, int
# This case grabs the three words, and makes sure the second is a reg, and the third a integer
#
# 2) insn reg, reg2
# This case grabs the three words, and check if the 2nd and 3rd parameters are valid registers.
#
# 3) insn reg, [reg + offet] (where offset is an integer)
# This case grabs 5 items: inst, reg, reg, sign, offset.
#
# 4) insn int
# This case grabs the instruction, then grabs an int.
#
# 5) insn location (e.g. call FOO)
# insn reg
#
# 6) inverse of case 3: insn [reg + offset], reg
#
# 7) insn reg, [reg2 + FOO * 4] //note FOO may be a register.
#
# 8) insn [reg + FOO*4], reg2
#
# 9) insn reg, [reg2]
#
# 10) insn [reg], reg2
#
# 11) insn reg, [reg2 + reg3]
#
# 12) insn [reg + reg2], reg3
#
# 13) insn reg, [reg2 + reg3 + offset]
#
# 14) insn [reg + reg2 + offset], reg4
#
# 15) insn reg, [reg2 + reg3*offset + someintOffset]
#
# 16) insn [reg1 + reg2*offset + someintOffset], reg3
#
#
#
#*/
if($line =~ m@(^//.*)@){ #if a line is a comment,
print $1, "\n"; #send to the output stream.
next LINE;
}
#/* CASE 1 -- insn reg, int --> insnl %reg, int
# This statment is a regexp looking for: a word, whitespace, alphaNum chars, possible space
# an integer (digit), possible whitespace, possible comment.
# It then to makes sure the second word it absorbed was a register by using the hash feature.
# If entered, 'next' gotos the next iteration of the while loop.
#*/
if($line =~ m@(\w+)\s+([a-zA-Z]+),\s*(\d+)\s*(/*.*)@ && $registers{$2}){
print $1, "l %", $2, ", ", $3;
print " ", $4, "\n";
next LINE;
}
#/* CASE 2 -- insn reg, reg2 --> insnl %reg2, %reg
# This statment is a regexp looking for: a word, whitespace, a word, possible space
# alphanum chars, possible whitespace, possible comments.
# It then to makes sure the second and third words absorbed were registers by
# using the hash feature.
# If entered, 'next' gotos the next iteration of the while loop.
#*/
if(($line =~ m@(\w+)\s+(\w+),\s*([a-zA-Z]+)\s*(/*.*)@)
&& $registers{$2} && $registers{$3}){
print $1, "l %", $3, ", %", $2;
print " ", $4, "\n";
next LINE;
}
#/* CASE 3-- insn reg, [reg2+offset] --> insnl offset(%reg2), %reg
# This statment is a regexp looking for: a word, whitespace, alphanum chars, possible space
# [ char, alphanum chars, possible whitespace, + or -, possible whitespace, ] char,
# possible comments. It then to makes sure the second and third words absorbed were
# actually registers by using the hash feature.
# If entered, 'next' gotos the next iteration of the while loop.
#*/
if($line =~ m@(\w+)\s+([a-zA-Z]+),\s*\[([a-zA-Z]+)\s*([\+\-])\s*(\d+)\s*]\s*(/*.*)@
&& $registers{$2} && $registers{$3}){
if($4 eq "\+"){
print $1, "l ", $5, "(%", $3, "), %", $2; #if +
}
else{
print $1, "l -", $5, "(%", $3, "), %", $2; #if -
}
print " ", $6, "\n";
next LINE;
}
#/* CASE 4 -- insn int --> insnl $int, or ret int
# This statment is a regexp looking for: a word, whitespace, digits, possible space
# possible comments. It then to makes sure the second word absorbed was
# actually a register by using the hash feature.
# Also makes sure there are no '['s in the string.
# If entered, 'next' gotos the next iteration of the while loop.
#*/
if($line =~ m@(\w+)\s+(\d+)\s*(/*.*)@ && !$registers{$2} && !($line =~ /\[/)){
$temp = $1;
if($1 eq "ret"){
print $temp, " \$", $2;
}
else{
print $temp, "l \$", $2;
}
print " ", $3, "\n";
next LINE;
}
#/* CASE 5 insn LOCATION or insn reg --> either insn *%reg or insnl %LOCATION
# This statment is a regexp looking for: a word, whitespace, alphanum chars, possible space
# possible comments. It tests to exclude strings with '[' in them.
# If entered, 'next' gotos the next iteration of the while loop.
#*/
if($line =~ m@(\w+)\s+([a-zA-Z]+)\s*(/*.*)@ && !($line =~ /\[/)){
$temp = $1;
$temp2 = $2;
if((($registers{$2}) && (($temp eq "call") || ($temp eq "jmp")))){
print $temp, " *\%", $temp2;
}
elsif($registers{$2}){
print $temp, "l \%", $temp2;
}
else{
print $, " ", $2;
}
print " ", $3, "\n";
next LINE;
}
#/* CASE 6 insn [reg + offset], reg2 -- > insnl %reg2, offset(%reg)
# This statment is a regexp looking for: a word, whitespace, alphanum chars, possible space
# + or - char, possible whitespace, possible whitespace, digits, ws,], ws, alphanum chars, ws,
# possible comments. It then to makes sure the second and fifth words absorbed were
# actually registers by using the hash feature.
# If entered, 'next' gotos the next iteration of the while loop.
#*/
if($line =~ m@(\w+)\s+\[([a-zA-Z]+)\s*([+-])\s*(\d+)\s*\]\s*,\s*([a-zA-Z]+)\s*(/*.*)@ &&
$registers{$2} && $registers{$5}){
if($3 eq "\+"){
print $1, "l ", $5, ", ", $4, "(", $2, ")"; #if +
}
else{
print $1, "l ", $5, ", -", $4, "(", $2, ")"; #if -
}
print " ", $6, "\n";
next LINE;
}
# CASE 7 : insn reg, [reg2 + FOO*int] --> insnl %reg2(FOO*int), %reg
# E.G. : add eax, [ecx + FOO*4] --> addl %ecx(FOO*4), %eax
if($line =~ m@(\w+)\s+([a-zA-Z]+),\s*\[([a-zA-Z]+)\s*([\+\-])\s*([a-zA-Z]+)\*(\d+)\s*]\s*(/*.*)@
&& $registers{$2} && $registers{$3}){
if($registers{$5}){ #if FOO is a register
if($4 eq "\+"){
print $1, "l %", $3, "(%", $5, "*", $6, "), %", $2; #if +
}
else{
print $1, "l %", $3, "(-%", $5, "*", $6, "), %", $2; #if -
}
}
else{
if($4 eq "\+"){
print $1, "l %", $3, "(", $5, "*", $6, "), %", $2;
}
else{
print $1, "l %", $3, "(-", $5, "*", $6, "), %", $2;
}
}
print " ", $7, "\n";
next LINE;
}
# case 8: insn [reg1 + FOO*int], reg2 --> insnl %reg2, %reg1(%FOO*int) if foo is a reg
# --> insnl %reg2, %reg1(FOO*int) if foo is not a register
# This code first checks to see if foo is a register. If foo is a register, it switches on whether
# or not there was a plus or minus symbol used. The rest is formatting to make the case mentioned above
# have the right output.
#
if($line =~ m@(\w+)\s+\[([a-zA-Z]+)\s*([+-])\s*(\w+)\s*\*\s*(\d+)\s*\]\s*,\s*(\w+.*)\s*(/*.*)@
&& $registers{$2} && $registers{$5}){
if($registers{$5}){ #if FOO is a register
if($4 eq "\+"){
print $1, "l %", $6, ", %", $2, "(%", $4, "*", $5, ")"; #if +
}
else{
print $1, "l %", $6, ", %", $2, "(-%", $4, "*", $5, ")"; #if -
}
}
else{ #if FOO is not a register
if($4 eq "\+"){
print $1, "l %", $6, ", %", $2, "(", $4, "*", $5, ")";
}
else{
print $1, "l %", $6, ", %", $2, "(-", $4, "*", $5, ")";
}
}
print " ", $7, "\n";
next LINE;
}
# case 9: insn reg, [reg2] --> insn (%reg2), %reg
# This code looks for and write the input and output described above.
# The only error checking is the identical matching of the input (with ws as a variant), and
# to make sure the args are valid registers.
# The rest is formatting to make the case mentioned above have the right output.
if($line =~ m@(\w+)\s+(\w+),\s*\[(\w+)\]\s*(/*.*)@ && $registers{$2} && $registers{$3}){
print $1, "l (%", $3, "), %", $2;
print " ", $4, "\n";
next LINE;
}
# Case 10: insn [reg], reg2 --> insn %reg2, (%reg)
# Just the reverse of the previous case. See case 9.
if($line =~ m@(\w+)\s+\[(\w+)\],\s*(\w+)\s*(/*.*)@ && $registers{$2} && $registers{$3}){ #*/
print $1, "l %", $3, ", (%", $2, ")";
print " ", $4, "\n";
next LINE;
}
# case 11: insn reg, [reg2 + reg3] --> insnl (%reg2, %reg3), %reg
#
# This looks for the input matching the description above. If found it then formats it match the output described.
# Error checking is in the formatting and making sure appropriate args are valid registers.
# If entered, goto LINE to iterate through the next line.
if($line =~ m@(\w+)\s+(\w+),\s*\[\s*([a-zA-Z]+)\s*([/+/-])\s*([a-zA-Z]+)\s*\]\s*(/*.*)@ && $registers{$2} #*/
&& $registers{$3} && $registers{$5}){
if($4 eq "\+"){
print $1, "l (%", $3, ",%", $5, "), ", $2;
}
else{
# does subtraction work??
}
print " ", $6, "\n";
next LINE;
}
# case 12: insn [reg + reg2], reg3 (reverse of case 11) --> insnl %reg3, (%reg, %reg2)
# See case 11 for details.
if($line =~ m@(\w+)\s+\[\s*([a-zA-Z]+)\s*([+-])\s*([a-zA-Z]+)\s*\],\s*([a-zA-Z]+)\s*(/*.*)@){
if($3 eq "\+"){
print $1, "l %", $5, ", (%", $2, ",%", $4, ")";
}
print " ", $6, "\n";
next LINE;
}
# case 13: insn reg, [reg2 + reg3 + int] --> insnl (%reg2, %reg3, int), %reg
# The following code takes the above
#
#
if($line =~ m@(\w+)\s+(\w+),\s*\[\s*([a-zA-Z]+)\s*([/+/-])\s*([a-zA-Z]+)\s*\+\s*(\d+)\s*\]\s*(/*.*)@ #*/
&& $registers{$2} && $registers{$3} && $registers{$5}){
if($4 eq "\+"){
print $1, "l (%", $3, ",%", $5, ",", $6, "), ", $2;
}
else{
# does subtraction work??
}
print " ", $7, "\n";
next LINE;
}
# case 14: insn [reg + reg2 + int], reg3 (reverse of case 13) --> insnl %reg3, (%reg, %reg2, int)
#
# The regular expression parses for (in this order): word, ws, '[', pws, Chars, pws, + or -,
# pws, chars, pws, +, pws, int, pws, ']', ',',pws, chars, pws, possible comments
#
# Grabs and puts items in temp vars $1-$7 respectively: insn, reg, sign, reg, int, reg, comment
#
if($line =~ m@(\w+)\s+\[\s*([a-zA-Z]+)\s*([+-])\s*([a-zA-Z]+)\s*\+\s*(\d*)\s*\],\s*([a-zA-Z]+)\s*(/*.*)@){
if($3 eq "\+"){
print $1, "l %", $6, ", (%", $2, ",%", $4, ",", $5, ")";
}
print " ", $7, "\n";
next LINE;
}
# case 15: insn reg, [reg2 + reg3*4 + int] --> insn (%reg2,%reg3*4,int), reg
#
# The regular expression parses for (in this order): word, ws, word, ',', pws, '['
# pws, chars, pws, + or -, pws, chars, pws, '*', int, pws, +, pws, int, pws, ']', pws,
# possible comments.
#
# Grabs and puts items temp vars $1-$8 respectively: insn, reg, reg2, reg3, int, + or -, int, comment
#
if($line =~ m@(\w+)\s+(\w+),\s*\[\s*([a-zA-Z]+)\s*([/+/-])\s*([a-zA-Z]+)\s*\*\s*(\d+)\s*\+\s*(\d+)\s*\]\s*(/*.*)@
&& $registers{$2} && $registers{$3} && $registers{$5}){
if($4 eq "\+"){
print $1, "l ", $6, "(%", $3, ",%", $5, ",", $7, "), %", $2;
}
else{
# does subtraction work??
}
print " ", $8, "\n";
next LINE;
}
# case 16: insn [reg + reg2*4 + int], reg3 (reverse of case 12) --> insn %reg3, (%reg,%reg2*4,int)
# The regular expression parses for (in this order): word, ws, '[', possible ws, chars, poss ws,
# + or -, possible ws, chars, pws, '*', int, pws, '+', pws, int, pws, ']', pws, chars, pws
# then looks for possible comments.
#
# Grabs and puts items temp vars $1-$8 respectively: insn, reg, reg2, int, + or -, int, reg3, comment
#
#
if($line =~ m@(\w+)\s+\[\s*([a-zA-Z]+)\s*([+-])\s*([a-zA-Z]+)\s*\*\s*(\d*)\s*\+\s*(\d*)\s*\],\s*([a-zA-Z]+)\s*(/*.*)@
&& $registers{$2} && $registers{$4} && $registers{$7}){
if($3 eq "\+"){
print $1, "l %", $7, ", ", $5, "(%", $2, ",%", $4, ",", $6, ")";
}
print " ", $8, "\n";
next LINE;
}
# ELSE just output the line as is...
if($line =~ /\s*(.+)/){
print $1, "\n";
}
}