Initial checkin of X86 backend.

We can instruction select exactly one instruction 'ret void'.  Wow.

llvm-svn: 4284
This commit is contained in:
Chris Lattner 2002-10-25 22:55:53 +00:00
parent cfa05a3de9
commit d25a097994
11 changed files with 565 additions and 0 deletions

View File

@ -0,0 +1,133 @@
//===-- InstSelectSimple.cpp - A simple instruction selector for x86 ------===//
//
// This file defines a simple peephole instruction selector for the x86 platform
//
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "X86InstructionInfo.h"
#include "llvm/Function.h"
#include "llvm/iTerminators.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/MFunction.h"
#include "llvm/CodeGen/MInstBuilder.h"
#include "llvm/Support/InstVisitor.h"
#include <map>
namespace {
struct ISel : public InstVisitor<ISel> { // eventually will be a FunctionPass
MFunction *F; // The function we are compiling into
MBasicBlock *BB; // The current basic block we are compiling
unsigned CurReg;
std::map<Value*, unsigned> RegMap; // Mapping between Val's and SSA Regs
ISel(MFunction *f)
: F(f), BB(0), CurReg(MRegisterInfo::FirstVirtualRegister) {}
/// runOnFunction - Top level implementation of instruction selection for
/// the entire function.
///
bool runOnFunction(Function &F) {
visit(F);
RegMap.clear();
return false; // We never modify the LLVM itself.
}
/// visitBasicBlock - This method is called when we are visiting a new basic
/// block. This simply creates a new MBasicBlock to emit code into and adds
/// it to the current MFunction. Subsequent visit* for instructions will be
/// invoked for all instructions in the basic block.
///
void visitBasicBlock(BasicBlock &LLVM_BB) {
BB = new MBasicBlock();
// FIXME: Use the auto-insert form when it's available
F->getBasicBlockList().push_back(BB);
}
// Visitation methods for various instructions. These methods simply emit
// fixed X86 code for each instruction.
//
void visitReturnInst(ReturnInst &RI);
void visitAdd(BinaryOperator &B);
void visitInstruction(Instruction &I) {
std::cerr << "Cannot instruction select: " << I;
abort();
}
/// getReg - This method turns an LLVM value into a register number. This
/// is guaranteed to produce the same register number for a particular value
/// every time it is queried.
///
unsigned getReg(Value &V) { return getReg(&V); } // Allow references
unsigned getReg(Value *V) {
unsigned &Reg = RegMap[V];
if (Reg == 0)
Reg = CurReg++;
// FIXME: Constants should be thrown into registers here and appended to
// the end of the current basic block!
return Reg;
}
};
}
/// 'ret' instruction - Here we are interested in meeting the x86 ABI. As such,
/// we have the following possibilities:
///
/// ret void: No return value, simply emit a 'ret' instruction
/// ret sbyte, ubyte : Extend value into EAX and return
/// ret short, ushort: Extend value into EAX and return
/// ret int, uint : Move value into EAX and return
/// ret pointer : Move value into EAX and return
/// ret long, ulong : Move value into EAX/EDX (?) and return
/// ret float/double : ? Top of FP stack? XMM0?
///
void ISel::visitReturnInst(ReturnInst &I) {
if (I.getNumOperands() != 0) { // Not 'ret void'?
// Move result into a hard register... then emit a ret
visitInstruction(I); // abort
}
// Emit a simple 'ret' instruction... appending it to the end of the basic
// block
new MInstruction(BB, X86::RET);
}
/// 'add' instruction - Simply turn this into an x86 reg,reg add instruction.
void ISel::visitAdd(BinaryOperator &B) {
unsigned Op0r = getReg(B.getOperand(0)), Op1r = getReg(B.getOperand(1));
unsigned DestReg = getReg(B);
switch (B.getType()->getPrimitiveSize()) {
case 1: // UByte, SByte
BuildMInst(BB, X86::ADDrr8, DestReg).addReg(Op0r).addReg(Op1r);
break;
case 2: // UShort, Short
BuildMInst(BB, X86::ADDrr16, DestReg).addReg(Op0r).addReg(Op1r);
break;
case 4: // UInt, Int
BuildMInst(BB, X86::ADDrr32, DestReg).addReg(Op0r).addReg(Op1r);
break;
case 8: // ULong, Long
default:
visitInstruction(B); // abort
}
}
/// X86SimpleInstructionSelection - This function converts an LLVM function into
/// a machine code representation is a very simple peep-hole fashion. The
/// generated code sucks but the implementation is nice and simple.
///
MFunction *X86SimpleInstructionSelection(Function &F) {
MFunction *Result = new MFunction();
ISel(Result).runOnFunction(F);
return Result;
}

4
lib/Target/X86/Makefile Normal file
View File

@ -0,0 +1,4 @@
LEVEL = ../../..
LIBRARYNAME = x86
include $(LEVEL)/Makefile.common

View File

@ -0,0 +1,21 @@
//===-- X86/Printer.cpp - Convert X86 code to human readable rep. ---------===//
//
// This file contains a printer that converts from our internal representation
// of LLVM code to a nice human readable form that is suitable for debuggging.
//
//===----------------------------------------------------------------------===//
#include "X86.h"
#include <iostream>
/// X86PrintCode - Print out the specified machine code function to the
/// specified stream. This function should work regardless of whether or not
/// the function is in SSA form or not, although when in SSA form, we obviously
/// don't care about being consumable by an assembler.
///
void X86PrintCode(const MFunction *MF, std::ostream &O) {
O << "x86 printing not implemented yet!\n";
// This should use the X86InstructionInfo::print method to print assembly for
// each instruction
}

143
lib/Target/X86/README.txt Normal file
View File

@ -0,0 +1,143 @@
//===- README.txt - Information about the X86 backend and related files ---===//
//
// This file contains random notes and points of interest about the X86 backend.
//
// Snippets of this document will probably become the final report for CS497
//
//===----------------------------------------------------------------------===//
===========
I. Overview
===========
This directory contains a machine description for the X86 processor. Currently
this machine description is used for a high performance code generator used by a
LLVM JIT. One of the main objectives that we would like to support with this
project is to build a nice clean code generator that may be extended in the
future in a variety of ways: new targets, new optimizations, new
transformations, etc.
This document describes the current state of the LLVM JIT, along with
implementation notes, design decisions, and other stuff.
===================================
II. Architecture / Design Decisions
===================================
We designed the infrastructure for the machine specific representation to be as
light-weight as possible, while also being able to support as many targets as
possible with our framework. This framework should allow us to share many
common machine specific transformations (register allocation, instruction
scheduling, etc...) among all of the backends that may eventually be supported
by the JIT, and unify the JIT and static compiler backends.
At the high-level, LLVM code is translated to a machine specific representation
formed out of MFunction, MBasicBlock, and MInstruction instances (defined in
include/llvm/CodeGen). This representation is completely target agnostic,
representing instructions in their most abstract form: an opcode, a destination,
and a series of operands. This representation is designed to support both SSA
representation for machine code, as well as a register allocated, non-SSA form.
Because the M* representation must work regardless of the target machine, it
contains very little semantic information about the program. To get semantic
information about the program, a layer of Target description datastructures are
used, defined in include/llvm/Target.
Currently the Sparc backend and the X86 backend do not share a common
representation. This is an intentional decision, and will be rectified in the
future (after the project is done).
=======================
III. Source Code Layout
=======================
The LLVM-JIT is composed of source files primarily in the following locations:
include/llvm/CodeGen
--------------------
This directory contains header files that are used to represent the program in a
machine specific representation. It currently also contains a bunch of stuff
used by the Sparc backend that we don't want to get mixed up in.
include/llvm/Target
-------------------
This directory contains header files that are used to interpret the machine
specific representation of the program. This allows us to write generic
transformations that will work on any target that implements the interfaces
defined in this directory. Again, this also contains a bunch of stuff from the
Sparc Backend that we don't want to deal with.
lib/CodeGen
-----------
This directory will contain all of the target independant transformations (for
example, register allocation) that we write. These transformations should only
use information exposed through the Target interface, it should not include any
target specific header files.
lib/Target/X86
--------------
This directory contains the machine description for X86 that is required to the
rest of the compiler working. It contains any code that is truely specific to
the X86 backend, for example the instruction selector and machine code emitter.
tools/jello
-----------
This directory contains the top-level code for the JIT compiler.
test/Regression/Jello
---------------------
This directory contains regression tests for the JIT. Initially it contains a
bunch of really trivial testcases that we should build up to supporting.
==========================
IV. TODO / Future Projects
==========================
There are a large number of things remaining to do. Here is a partial list:
Critial path:
-------------
0. Finish providing SSA form. This involves keeping track of some information
when instructions are added to the function, but should not affect that API
for creating new MInstructions or adding them to the program. There are
also various FIXMEs in the M* files that need to get taken care of in the
near term.
1. Finish dumb instruction selector
2. Write dumb register allocator
3. Write assembly language emitter
4. Write machine code emitter
Next Phase:
-----------
1. Implement linear time optimal instruction selector
2. Implement smarter (linear scan?) register allocator
After this project:
-------------------
1. Implement lots of nifty runtime optimizations
2. Implement a static compiler backend for x86
3. Migrate Sparc backend to new representation
4. Implement new spiffy targets: IA64? X86-64? M68k? Who knows...
Infrastructure Improvements:
----------------------------
1. Bytecode is designed to be able to read particular functions from the
bytecode without having to read the whole program. Bytecode reader should be
extended to allow on demand loading of functions.
2. PassManager needs to be able to run just a single function through a pipeline
of FunctionPass's. When this happens, all of our code will become
FunctionPass's for real.
3. llvmgcc needs to be modified to output 32-bit little endian LLVM files.
Preferably it will be parameterizable so that multiple binaries need not
exist. Until this happens, we will be restricted to using type safe
programs (most of the Olden suite and many smaller tests), which should be
sufficient for our 497 project.

61
lib/Target/X86/X86.h Normal file
View File

@ -0,0 +1,61 @@
//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
//
// This file contains the entry points for global functions defined in the x86
// target library, as used by the LLVM JIT.
//
// FIXME: This file will be dramatically changed in the future
//
//===----------------------------------------------------------------------===//
#ifndef TARGET_X86_H
#define TARGET_X86_H
#include <iosfwd>
class MFunction;
class Function;
/// X86PrintCode - Print out the specified machine code function to the
/// specified stream. This function should work regardless of whether or not
/// the function is in SSA form or not.
///
void X86PrintCode(const MFunction *MF, std::ostream &O);
/// X86SimpleInstructionSelection - This function converts an LLVM function into
/// a machine code representation is a very simple peep-hole fashion. The
/// generated code sucks but the implementation is nice and simple.
///
MFunction *X86SimpleInstructionSelection(Function &F);
/// X86SimpleRegisterAllocation - This function converts the specified machine
/// code function from SSA form to use explicit registers by spilling every
/// register. Wow, great policy huh?
///
inline void X86SimpleRegisterAllocation(MFunction *MF) {}
/// X86EmitCodeToMemory - This function converts a register allocated function
/// into raw machine code in a dynamically allocated chunk of memory. A pointer
/// to the start of the function is returned.
///
inline void *X86EmitCodeToMemory(MFunction *MF) { return 0; }
// Put symbolic names in a namespace to avoid causing these to clash with all
// kinds of other things...
//
namespace X86 {
// Defines a large number of symbolic names for X86 registers. This defines a
// mapping from register name to register number.
//
enum Register {
#define R(ENUM, NAME, FLAGS, TSFLAGS) ENUM,
#include "X86RegisterInfo.def"
};
// This defines a large number of symbolic names for X86 instruction opcodes.
enum Opcode {
#define I(ENUM, NAME, FLAGS, TSFLAGS) ENUM,
#include "X86InstructionInfo.def"
};
}
#endif

View File

@ -0,0 +1,29 @@
//===- X86InstructionInfo.cpp - X86 Instruction Information ---------------===//
//
// This file contains the X86 implementation of the MInstructionInfo class.
//
//===----------------------------------------------------------------------===//
#include "X86InstructionInfo.h"
#include "llvm/CodeGen/MInstruction.h"
#include <ostream>
// X86Insts - Turn the InstructionInfo.def file into a bunch of instruction
// descriptors
//
static const MInstructionDesc X86Insts[] = {
#define I(ENUM, NAME, FLAGS, TSFLAGS) { NAME, FLAGS, TSFLAGS },
#include "X86InstructionInfo.def"
};
X86InstructionInfo::X86InstructionInfo()
: MInstructionInfo(X86Insts, sizeof(X86Insts)/sizeof(X86Insts[0])) {
}
// print - Print out an x86 instruction in GAS syntax
void X86InstructionInfo::print(const MInstruction *MI, std::ostream &O) const {
// FIXME: This sucks.
O << get(MI->getOpcode()).Name << "\n";
}

View File

@ -0,0 +1,40 @@
//===-- X86InstructionInfo.def - X86 Instruction Information ----*- C++ -*-===//
//
// This file describes all of the instructions that the X86 backend uses. It
// relys on an external 'I' macro being defined that takes the arguments
// specified below, and is used to make all of the information relevant to an
// instruction be in one place.
//
//===----------------------------------------------------------------------===//
// NOTE: No include guards desired
#ifndef I
#errror "Must define I macro before including X86/X86InstructionInfo.def!"
#endif
// Arguments to be passed into the I macro
// #1: Enum name - This ends up being the opcode symbol in the X86 namespace
// #2: Opcode name, as used by the gnu assembler
// #3: Instruction Flags - This should be a field or'd together that contains
// constants from the MInstructionInfo.h file.
// #4: Target Specific Flags - Another bitfield containing X86 specific flags
// that we are interested in for each instruction
//
// The first instruction must always be the PHI instruction:
I(PHI , "phi", 0, 0)
// The second instruction must always be the noop instruction
I(NOOP , "nop", 0, 0) // nop 90
// Miscellaneous instructions
I(RET , "ret", MIF::RET, 0) // ret CB
I(ADDrr8 , "add", 0, 0) // R8 += R8 00/r
I(ADDrr16 , "add", 0, 0) // R16 += R16 01/r
I(ADDrr32 , "addl", 0, 0) // R32 += R32 02/r
// At this point, I is dead to undefine the macro
#undef I

View File

@ -0,0 +1,30 @@
//===- X86InstructionInfo.h - X86 Instruction Information ---------*-C++-*-===//
//
// This file contains the X86 implementation of the MInstructionInfo class.
//
//===----------------------------------------------------------------------===//
#ifndef X86INSTRUCTIONINFO_H
#define X86INSTRUCTIONINFO_H
#include "llvm/Target/MInstructionInfo.h"
#include "X86RegisterInfo.h"
class X86InstructionInfo : public MInstructionInfo {
const X86RegisterInfo RI;
public:
X86InstructionInfo();
/// getRegisterInfo - MInstructionInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
virtual const MRegisterInfo &getRegisterInfo() const { return RI; }
/// print - Print out an x86 instruction in GAS syntax
///
virtual void print(const MInstruction *MI, std::ostream &O) const;
};
#endif

View File

@ -0,0 +1,19 @@
//===- X86RegisterInfo.cpp - X86 Register Information ---------------------===//
//
// This file contains the X86 implementation of the MRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#include "X86RegisterInfo.h"
// X86Regs - Turn the X86RegisterInfo.def file into a bunch of register
// descriptors
//
static const MRegisterDesc X86Regs[] = {
#define R(ENUM, NAME, FLAGS, TSFLAGS) { NAME, FLAGS, TSFLAGS },
#include "X86RegisterInfo.def"
};
X86RegisterInfo::X86RegisterInfo()
: MRegisterInfo(X86Regs, sizeof(X86Regs)/sizeof(X86Regs[0])) {
}

View File

@ -0,0 +1,68 @@
//===-- X86RegisterInfo.def - X86 Register Information ----------*- C++ -*-===//
//
// This file describes all of the registers that the X86 backend uses. It relys
// on an external 'R' macro being defined that takes the arguments specified
// below, and is used to make all of the information relevant to an registers be
// in one place.
//
//===----------------------------------------------------------------------===//
// NOTE: No include guards desired
#ifndef R
#errror "Must define R macro before including X86/X86RegisterInfo.def!"
#endif
// Arguments passed into the R macro
// #1: Enum Name - This ends up being a symbol in the X86 namespace
// #2: Register name - The name of the register as used by the gnu assembler
// #3: Register Flags - A bitfield of flags or'd together from the
// MRegisterInfo.h file.
// #4: Target Specific Flags - Another bitfield containing X86 specific flags
// as neccesary.
// The first register must always be a 'noop' register for all backends. This
// is used as the destination register for instructions that do not produce a
// value. Some frontends may use this as an operand register to mean special
// things, for example, the Sparc backend uses R#0 to mean %g0 which always
// PRODUCES the value 0. The X86 backend does not use this value as an operand
// register.
//
R(NoReg, "none", 0, 0)
// 32 bit registers, ordered as the processor does...
R(EAX, "eax", MRF::INT32, 0)
R(ECX, "ecx", MRF::INT32, 0)
R(EDX, "edx", MRF::INT32, 0)
R(EBX, "ebx", MRF::INT32, 0)
R(ESP, "esp", MRF::INT32, 0)
R(EBP, "ebp", MRF::INT32, 0)
R(ESI, "esi", MRF::INT32, 0)
R(EDI, "edi", MRF::INT32, 0)
// 16 bit registers, aliased with the corresponding 32 bit registers above
R(AX, "ax", MRF::INT16, 0)
R(CX, "cx", MRF::INT16, 0)
R(DX, "dx", MRF::INT16, 0)
R(BX, "bx", MRF::INT16, 0)
R(SP, "sp", MRF::INT16, 0)
R(BP, "bp", MRF::INT16, 0)
R(SI, "si", MRF::INT16, 0)
R(DI, "di", MRF::INT16, 0)
// 8 bit registers aliased with registers above as well
R(AL, "al", MRF::INT8, 0)
R(CL, "cl", MRF::INT8, 0)
R(DL, "dl", MRF::INT8, 0)
R(BL, "bl", MRF::INT8, 0)
R(AH, "ah", MRF::INT8, 0)
R(CH, "ch", MRF::INT8, 0)
R(DH, "dh", MRF::INT8, 0)
R(BH, "bh", MRF::INT8, 0)
// Flags, Segment registers, etc...
// We are now done with the R macro
#undef R

View File

@ -0,0 +1,17 @@
//===- X86RegisterInfo.h - X86 Register Information Impl ----------*-C++-*-===//
//
// This file contains the X86 implementation of the MRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#ifndef X86REGISTERINFO_H
#define X86REGISTERINFO_H
#include "llvm/Target/MRegisterInfo.h"
struct X86RegisterInfo : public MRegisterInfo {
X86RegisterInfo();
};
#endif