Initial checkin of X86 backend.

We can instruction select exactly one instruction 'ret void'. Wow. llvm-svn: 4284
2024-12-24 20:44:51 +00:00 · 2002-10-25 22:55:53 +00:00 · 2002-10-25 22:55:53 +00:00 · d25a097994
commit d25a097994
parent cfa05a3de9
11 changed files with 565 additions and 0 deletions
--- a/lib/Target/X86/InstSelectSimple.cpp
+++ b/lib/Target/X86/InstSelectSimple.cpp
@ -0,0 +1,133 @@
+//===-- InstSelectSimple.cpp - A simple instruction selector for x86 ------===//
+//
+// This file defines a simple peephole instruction selector for the x86 platform
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstructionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/iTerminators.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MFunction.h"
+#include "llvm/CodeGen/MInstBuilder.h"
+#include "llvm/Support/InstVisitor.h"
+#include <map>
+
+namespace {
+  struct ISel : public InstVisitor<ISel> {  // eventually will be a FunctionPass
+    MFunction   *F;               // The function we are compiling into
+    MBasicBlock *BB;              // The current basic block we are compiling
+
+    unsigned CurReg;
+    std::map<Value*, unsigned> RegMap;  // Mapping between Val's and SSA Regs
+
+    ISel(MFunction *f)
+      : F(f), BB(0), CurReg(MRegisterInfo::FirstVirtualRegister) {}
+
+    /// runOnFunction - Top level implementation of instruction selection for
+    /// the entire function.
+    ///
+    bool runOnFunction(Function &F) {
+      visit(F);
+      RegMap.clear();
+      return false;  // We never modify the LLVM itself.
+    }
+
+    /// visitBasicBlock - This method is called when we are visiting a new basic
+    /// block.  This simply creates a new MBasicBlock to emit code into and adds
+    /// it to the current MFunction.  Subsequent visit* for instructions will be
+    /// invoked for all instructions in the basic block.
+    ///
+    void visitBasicBlock(BasicBlock &LLVM_BB) {
+      BB = new MBasicBlock();
+      // FIXME: Use the auto-insert form when it's available
+      F->getBasicBlockList().push_back(BB);
+    }
+
+    // Visitation methods for various instructions.  These methods simply emit
+    // fixed X86 code for each instruction.
+    //
+    void visitReturnInst(ReturnInst &RI);
+    void visitAdd(BinaryOperator &B);
+
+    void visitInstruction(Instruction &I) {
+      std::cerr << "Cannot instruction select: " << I;
+      abort();
+    }
+
+    /// getReg - This method turns an LLVM value into a register number.  This
+    /// is guaranteed to produce the same register number for a particular value
+    /// every time it is queried.
+    ///
+    unsigned getReg(Value &V) { return getReg(&V); }  // Allow references
+    unsigned getReg(Value *V) {
+      unsigned &Reg = RegMap[V];
+      if (Reg == 0)
+        Reg = CurReg++;
+
+      // FIXME: Constants should be thrown into registers here and appended to
+      // the end of the current basic block!
+
+      return Reg;
+    }
+
+  };
+}
+
+/// 'ret' instruction - Here we are interested in meeting the x86 ABI.  As such,
+/// we have the following possibilities:
+///
+///   ret void: No return value, simply emit a 'ret' instruction
+///   ret sbyte, ubyte : Extend value into EAX and return
+///   ret short, ushort: Extend value into EAX and return
+///   ret int, uint    : Move value into EAX and return
+///   ret pointer      : Move value into EAX and return
+///   ret long, ulong  : Move value into EAX/EDX (?) and return
+///   ret float/double : ?  Top of FP stack?  XMM0?
+///
+void ISel::visitReturnInst(ReturnInst &I) {
+  if (I.getNumOperands() != 0) {  // Not 'ret void'?
+    // Move result into a hard register... then emit a ret
+    visitInstruction(I);  // abort
+  }
+
+  // Emit a simple 'ret' instruction... appending it to the end of the basic
+  // block
+  new MInstruction(BB, X86::RET);
+}
+
+
+/// 'add' instruction - Simply turn this into an x86 reg,reg add instruction.
+void ISel::visitAdd(BinaryOperator &B) {
+  unsigned Op0r = getReg(B.getOperand(0)), Op1r = getReg(B.getOperand(1));
+  unsigned DestReg = getReg(B);
+
+  switch (B.getType()->getPrimitiveSize()) {
+  case 1:   // UByte, SByte
+    BuildMInst(BB, X86::ADDrr8, DestReg).addReg(Op0r).addReg(Op1r);
+    break;
+  case 2:   // UShort, Short
+    BuildMInst(BB, X86::ADDrr16, DestReg).addReg(Op0r).addReg(Op1r);
+    break;
+  case 4:   // UInt, Int
+    BuildMInst(BB, X86::ADDrr32, DestReg).addReg(Op0r).addReg(Op1r);
+    break;
+
+  case 8:   // ULong, Long
+  default:
+    visitInstruction(B);  // abort
+  }
+}
+
+
+
+/// X86SimpleInstructionSelection - This function converts an LLVM function into
+/// a machine code representation is a very simple peep-hole fashion.  The
+/// generated code sucks but the implementation is nice and simple.
+///
+MFunction *X86SimpleInstructionSelection(Function &F) {
+  MFunction *Result = new MFunction();
+  ISel(Result).runOnFunction(F);
+  return Result;
+}
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@ -0,0 +1,4 @@
+LEVEL = ../../..
+LIBRARYNAME = x86
+include $(LEVEL)/Makefile.common
+
--- a/lib/Target/X86/Printer.cpp
+++ b/lib/Target/X86/Printer.cpp
@ -0,0 +1,21 @@
+//===-- X86/Printer.cpp - Convert X86 code to human readable rep. ---------===//
+//
+// This file contains a printer that converts from our internal representation
+// of LLVM code to a nice human readable form that is suitable for debuggging.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include <iostream>
+
+/// X86PrintCode - Print out the specified machine code function to the
+/// specified stream.  This function should work regardless of whether or not
+/// the function is in SSA form or not, although when in SSA form, we obviously
+/// don't care about being consumable by an assembler.
+///
+void X86PrintCode(const MFunction *MF, std::ostream &O) {
+  O << "x86 printing not implemented yet!\n";
+
+  // This should use the X86InstructionInfo::print method to print assembly for
+  // each instruction
+}
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@ -0,0 +1,143 @@
+//===- README.txt - Information about the X86 backend and related files ---===//
+//
+// This file contains random notes and points of interest about the X86 backend.
+//
+// Snippets of this document will probably become the final report for CS497
+//
+//===----------------------------------------------------------------------===//
+
+===========
+I. Overview
+===========
+
+This directory contains a machine description for the X86 processor.  Currently
+this machine description is used for a high performance code generator used by a
+LLVM JIT.  One of the main objectives that we would like to support with this
+project is to build a nice clean code generator that may be extended in the
+future in a variety of ways: new targets, new optimizations, new
+transformations, etc.
+
+This document describes the current state of the LLVM JIT, along with
+implementation notes, design decisions, and other stuff.
+
+
+===================================
+II. Architecture / Design Decisions
+===================================
+
+We designed the infrastructure for the machine specific representation to be as
+light-weight as possible, while also being able to support as many targets as
+possible with our framework.  This framework should allow us to share many
+common machine specific transformations (register allocation, instruction
+scheduling, etc...) among all of the backends that may eventually be supported
+by the JIT, and unify the JIT and static compiler backends.
+
+At the high-level, LLVM code is translated to a machine specific representation
+formed out of MFunction, MBasicBlock, and MInstruction instances (defined in
+include/llvm/CodeGen).  This representation is completely target agnostic,
+representing instructions in their most abstract form: an opcode, a destination,
+and a series of operands.  This representation is designed to support both SSA
+representation for machine code, as well as a register allocated, non-SSA form.
+
+Because the M* representation must work regardless of the target machine, it
+contains very little semantic information about the program.  To get semantic
+information about the program, a layer of Target description datastructures are
+used, defined in include/llvm/Target.
+
+Currently the Sparc backend and the X86 backend do not share a common
+representation.  This is an intentional decision, and will be rectified in the
+future (after the project is done).
+
+
+=======================
+III. Source Code Layout
+=======================
+
+The LLVM-JIT is composed of source files primarily in the following locations:
+
+include/llvm/CodeGen
+--------------------
+
+This directory contains header files that are used to represent the program in a
+machine specific representation.  It currently also contains a bunch of stuff
+used by the Sparc backend that we don't want to get mixed up in.
+
+include/llvm/Target
+-------------------
+
+This directory contains header files that are used to interpret the machine
+specific representation of the program.  This allows us to write generic
+transformations that will work on any target that implements the interfaces
+defined in this directory.  Again, this also contains a bunch of stuff from the
+Sparc Backend that we don't want to deal with.
+
+lib/CodeGen
+-----------
+This directory will contain all of the target independant transformations (for
+example, register allocation) that we write.  These transformations should only
+use information exposed through the Target interface, it should not include any
+target specific header files.
+
+lib/Target/X86
+--------------
+This directory contains the machine description for X86 that is required to the
+rest of the compiler working.  It contains any code that is truely specific to
+the X86 backend, for example the instruction selector and machine code emitter.
+
+tools/jello
+-----------
+This directory contains the top-level code for the JIT compiler.
+
+test/Regression/Jello
+---------------------
+This directory contains regression tests for the JIT.  Initially it contains a
+bunch of really trivial testcases that we should build up to supporting.
+
+
+==========================
+IV. TODO / Future Projects
+==========================
+
+There are a large number of things remaining to do.  Here is a partial list:
+
+Critial path:
+-------------
+
+0. Finish providing SSA form.  This involves keeping track of some information
+   when instructions are added to the function, but should not affect that API
+   for creating new MInstructions or adding them to the program.  There are
+   also various FIXMEs in the M* files that need to get taken care of in the
+   near term.
+1. Finish dumb instruction selector
+2. Write dumb register allocator
+3. Write assembly language emitter
+4. Write machine code emitter
+
+Next Phase:
+-----------
+1. Implement linear time optimal instruction selector
+2. Implement smarter (linear scan?) register allocator
+
+After this project:
+-------------------
+1. Implement lots of nifty runtime optimizations
+2. Implement a static compiler backend for x86
+3. Migrate Sparc backend to new representation
+4. Implement new spiffy targets: IA64? X86-64? M68k?  Who knows...
+
+Infrastructure Improvements:
+----------------------------
+
+1. Bytecode is designed to be able to read particular functions from the
+   bytecode without having to read the whole program.  Bytecode reader should be
+   extended to allow on demand loading of functions.
+
+2. PassManager needs to be able to run just a single function through a pipeline
+   of FunctionPass's.  When this happens, all of our code will become
+   FunctionPass's for real.
+
+3. llvmgcc needs to be modified to output 32-bit little endian LLVM files.
+   Preferably it will be parameterizable so that multiple binaries need not
+   exist.  Until this happens, we will be restricted to using type safe
+   programs (most of the Olden suite and many smaller tests), which should be
+   sufficient for our 497 project.
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@ -0,0 +1,61 @@
+//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
+//
+// This file contains the entry points for global functions defined in the x86
+// target library, as used by the LLVM JIT.
+//
+// FIXME: This file will be dramatically changed in the future
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_X86_H
+#define TARGET_X86_H
+
+#include <iosfwd>
+class MFunction;
+class Function;
+
+/// X86PrintCode - Print out the specified machine code function to the
+/// specified stream.  This function should work regardless of whether or not
+/// the function is in SSA form or not.
+///
+void X86PrintCode(const MFunction *MF, std::ostream &O);
+
+/// X86SimpleInstructionSelection - This function converts an LLVM function into
+/// a machine code representation is a very simple peep-hole fashion.  The
+/// generated code sucks but the implementation is nice and simple.
+///
+MFunction *X86SimpleInstructionSelection(Function &F);
+
+/// X86SimpleRegisterAllocation - This function converts the specified machine
+/// code function from SSA form to use explicit registers by spilling every
+/// register.  Wow, great policy huh?
+///
+inline void X86SimpleRegisterAllocation(MFunction *MF) {}
+
+/// X86EmitCodeToMemory - This function converts a register allocated function
+/// into raw machine code in a dynamically allocated chunk of memory.  A pointer
+/// to the start of the function is returned.
+///
+inline void *X86EmitCodeToMemory(MFunction *MF) { return 0; }
+
+
+// Put symbolic names in a namespace to avoid causing these to clash with all
+// kinds of other things...
+//
+namespace X86 {
+  // Defines a large number of symbolic names for X86 registers.  This defines a
+  // mapping from register name to register number.
+  //
+  enum Register {
+#define R(ENUM, NAME, FLAGS, TSFLAGS) ENUM,
+#include "X86RegisterInfo.def"
+  };
+
+  // This defines a large number of symbolic names for X86 instruction opcodes.
+  enum Opcode {
+#define I(ENUM, NAME, FLAGS, TSFLAGS) ENUM,
+#include "X86InstructionInfo.def"
+  };
+}
+
+#endif
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@ -0,0 +1,29 @@
+//===- X86InstructionInfo.cpp - X86 Instruction Information ---------------===//
+//
+// This file contains the X86 implementation of the MInstructionInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstructionInfo.h"
+#include "llvm/CodeGen/MInstruction.h"
+#include <ostream>
+
+// X86Insts - Turn the InstructionInfo.def file into a bunch of instruction
+// descriptors
+//
+static const MInstructionDesc X86Insts[] = {
+#define I(ENUM, NAME, FLAGS, TSFLAGS) { NAME, FLAGS, TSFLAGS },
+#include "X86InstructionInfo.def"
+};
+
+X86InstructionInfo::X86InstructionInfo()
+  : MInstructionInfo(X86Insts, sizeof(X86Insts)/sizeof(X86Insts[0])) {
+}
+
+
+// print - Print out an x86 instruction in GAS syntax
+void X86InstructionInfo::print(const MInstruction *MI, std::ostream &O) const {
+  // FIXME: This sucks.
+  O << get(MI->getOpcode()).Name << "\n";
+}
+
--- a/lib/Target/X86/X86InstrInfo.def
+++ b/lib/Target/X86/X86InstrInfo.def
@ -0,0 +1,40 @@
+//===-- X86InstructionInfo.def - X86 Instruction Information ----*- C++ -*-===//
+//
+// This file describes all of the instructions that the X86 backend uses.  It
+// relys on an external 'I' macro being defined that takes the arguments
+// specified below, and is used to make all of the information relevant to an
+// instruction be in one place.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: No include guards desired
+
+#ifndef I
+#errror "Must define I macro before including X86/X86InstructionInfo.def!"
+#endif
+
+// Arguments to be passed into the I macro
+//  #1: Enum name - This ends up being the opcode symbol in the X86 namespace
+//  #2: Opcode name, as used by the gnu assembler
+//  #3: Instruction Flags - This should be a field or'd together that contains
+//      constants from the MInstructionInfo.h file.
+//  #4: Target Specific Flags - Another bitfield containing X86 specific flags
+//      that we are interested in for each instruction
+//
+
+// The first instruction must always be the PHI instruction:
+I(PHI         , "phi",                0, 0)
+
+// The second instruction must always be the noop instruction
+I(NOOP        , "nop",                0, 0)      // nop          90
+
+// Miscellaneous instructions
+I(RET         , "ret",         MIF::RET, 0)      // ret          CB
+
+I(ADDrr8      , "add",                0, 0)      // R8  += R8    00/r
+I(ADDrr16     , "add",                0, 0)      // R16 += R16   01/r
+I(ADDrr32     , "addl",               0, 0)      // R32 += R32   02/r
+
+
+// At this point, I is dead to undefine the macro
+#undef I
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@ -0,0 +1,30 @@
+//===- X86InstructionInfo.h - X86 Instruction Information ---------*-C++-*-===//
+//
+// This file contains the X86 implementation of the MInstructionInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRUCTIONINFO_H
+#define X86INSTRUCTIONINFO_H
+
+#include "llvm/Target/MInstructionInfo.h"
+#include "X86RegisterInfo.h"
+
+class X86InstructionInfo : public MInstructionInfo {
+  const X86RegisterInfo RI;
+public:
+  X86InstructionInfo();
+
+  /// getRegisterInfo - MInstructionInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const MRegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// print - Print out an x86 instruction in GAS syntax
+  ///
+  virtual void print(const MInstruction *MI, std::ostream &O) const;
+};
+
+
+#endif
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@ -0,0 +1,19 @@
+//===- X86RegisterInfo.cpp - X86 Register Information ---------------------===//
+//
+// This file contains the X86 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86RegisterInfo.h"
+
+// X86Regs - Turn the X86RegisterInfo.def file into a bunch of register
+// descriptors
+//
+static const MRegisterDesc X86Regs[] = {
+#define R(ENUM, NAME, FLAGS, TSFLAGS) { NAME, FLAGS, TSFLAGS },
+#include "X86RegisterInfo.def"
+};
+
+X86RegisterInfo::X86RegisterInfo()
+  : MRegisterInfo(X86Regs, sizeof(X86Regs)/sizeof(X86Regs[0])) {
+}
--- a/lib/Target/X86/X86RegisterInfo.def
+++ b/lib/Target/X86/X86RegisterInfo.def
@ -0,0 +1,68 @@
+//===-- X86RegisterInfo.def - X86 Register Information ----------*- C++ -*-===//
+//
+// This file describes all of the registers that the X86 backend uses.  It relys
+// on an external 'R' macro being defined that takes the arguments specified
+// below, and is used to make all of the information relevant to an registers be
+// in one place.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: No include guards desired
+
+#ifndef R
+#errror "Must define R macro before including X86/X86RegisterInfo.def!"
+#endif
+
+// Arguments passed into the R macro
+//  #1: Enum Name - This ends up being a symbol in the X86 namespace
+//  #2: Register name - The name of the register as used by the gnu assembler
+//  #3: Register Flags - A bitfield of flags or'd together from the
+//      MRegisterInfo.h file.
+//  #4: Target Specific Flags - Another bitfield containing X86 specific flags
+//      as neccesary.
+
+
+// The first register must always be a 'noop' register for all backends.  This
+// is used as the destination register for instructions that do not produce a
+// value.  Some frontends may use this as an operand register to mean special
+// things, for example, the Sparc backend uses R#0 to mean %g0 which always
+// PRODUCES the value 0.  The X86 backend does not use this value as an operand
+// register.
+//
+R(NoReg, "none", 0, 0)
+
+
+// 32 bit registers, ordered as the processor does...
+R(EAX, "eax", MRF::INT32, 0)
+R(ECX, "ecx", MRF::INT32, 0)
+R(EDX, "edx", MRF::INT32, 0)
+R(EBX, "ebx", MRF::INT32, 0)
+R(ESP, "esp", MRF::INT32, 0)
+R(EBP, "ebp", MRF::INT32, 0)
+R(ESI, "esi", MRF::INT32, 0)
+R(EDI, "edi", MRF::INT32, 0)
+
+// 16 bit registers, aliased with the corresponding 32 bit registers above
+R(AX, "ax", MRF::INT16, 0)
+R(CX, "cx", MRF::INT16, 0)
+R(DX, "dx", MRF::INT16, 0)
+R(BX, "bx", MRF::INT16, 0)
+R(SP, "sp", MRF::INT16, 0)
+R(BP, "bp", MRF::INT16, 0)
+R(SI, "si", MRF::INT16, 0)
+R(DI, "di", MRF::INT16, 0)
+
+// 8 bit registers aliased with registers above as well
+R(AL, "al", MRF::INT8, 0)
+R(CL, "cl", MRF::INT8, 0)
+R(DL, "dl", MRF::INT8, 0)
+R(BL, "bl", MRF::INT8, 0)
+R(AH, "ah", MRF::INT8, 0)
+R(CH, "ch", MRF::INT8, 0)
+R(DH, "dh", MRF::INT8, 0)
+R(BH, "bh", MRF::INT8, 0)
+
+// Flags, Segment registers, etc...
+
+// We are now done with the R macro
+#undef R
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@ -0,0 +1,17 @@
+//===- X86RegisterInfo.h - X86 Register Information Impl ----------*-C++-*-===//
+//
+// This file contains the X86 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86REGISTERINFO_H
+#define X86REGISTERINFO_H
+
+#include "llvm/Target/MRegisterInfo.h"
+
+struct X86RegisterInfo : public MRegisterInfo {
+  X86RegisterInfo();
+
+};
+
+#endif