diff --git a/Makefile.rules b/Makefile.rules index 43fc030e8b1..ebebc0a85c4 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -1673,13 +1673,18 @@ $(ObjDir)/%GenAsmMatcher.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN) $(TARGET:%=$(ObjDir)/%GenMCCodeEmitter.inc.tmp): \ $(ObjDir)/%GenMCCodeEmitter.inc.tmp: %.td $(ObjDir)/.dir $(LLVM_TBLGEN) $(Echo) "Building $( bool */ diff --git a/docs/TableGen/BackEnds.rst b/docs/TableGen/BackEnds.rst index e8544b65216..42de41da74f 100644 --- a/docs/TableGen/BackEnds.rst +++ b/docs/TableGen/BackEnds.rst @@ -78,7 +78,8 @@ returns the (currently, 32-bit unsigned) value of the instruction. **Output**: C++ code, implementing the target's CodeEmitter class by overriding the virtual functions as ``CodeEmitter::function()``. -**Usage**: Used to include directly at the end of ``MCCodeEmitter.cpp``. +**Usage**: Used to include directly at the end of ``CodeEmitter.cpp``, and +with option `-mc-emitter` to be included in ``MCCodeEmitter.cpp``. RegisterInfo ------------ diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp index c8c440b5f47..e2de6bc58d7 100644 --- a/examples/BrainF/BrainFDriver.cpp +++ b/examples/BrainF/BrainFDriver.cpp @@ -26,8 +26,8 @@ #include "BrainF.h" #include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" diff --git a/examples/BrainF/CMakeLists.txt b/examples/BrainF/CMakeLists.txt index cf1cf1b6159..65589d9f39f 100644 --- a/examples/BrainF/CMakeLists.txt +++ b/examples/BrainF/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS BitWriter Core ExecutionEngine + JIT MC Support nativecodegen diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp index 3583677b689..24e538cacf2 100644 --- a/examples/ExceptionDemo/ExceptionDemo.cpp +++ b/examples/ExceptionDemo/ExceptionDemo.cpp @@ -1964,8 +1964,10 @@ int main(int argc, char *argv[]) { // Build engine with JIT llvm::EngineBuilder factory(module); factory.setEngineKind(llvm::EngineKind::JIT); + factory.setAllocateGVsWithCode(false); factory.setTargetOptions(Opts); factory.setMCJITMemoryManager(MemMgr); + factory.setUseMCJIT(true); llvm::ExecutionEngine *executionEngine = factory.create(); { diff --git a/examples/Fibonacci/CMakeLists.txt b/examples/Fibonacci/CMakeLists.txt index 087ccdd7d84..c015e50ac35 100644 --- a/examples/Fibonacci/CMakeLists.txt +++ b/examples/Fibonacci/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine Interpreter + JIT MC Support nativecodegen diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp index 40137c3a051..ba8e95342fa 100644 --- a/examples/Fibonacci/fibonacci.cpp +++ b/examples/Fibonacci/fibonacci.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Verifier.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" diff --git a/examples/HowToUseJIT/CMakeLists.txt b/examples/HowToUseJIT/CMakeLists.txt index a344ad07ca6..237cbea861d 100644 --- a/examples/HowToUseJIT/CMakeLists.txt +++ b/examples/HowToUseJIT/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine Interpreter + JIT MC Support nativecodegen diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp index 906b066ca45..7125a156104 100644 --- a/examples/HowToUseJIT/HowToUseJIT.cpp +++ b/examples/HowToUseJIT/HowToUseJIT.cpp @@ -36,6 +36,7 @@ #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" @@ -125,6 +126,7 @@ int main() { // Import result of execution: outs() << "Result: " << gv.IntVal << "\n"; + EE->freeMachineCodeForFunction(FooF); delete EE; llvm_shutdown(); return 0; diff --git a/examples/Kaleidoscope/Chapter4/CMakeLists.txt b/examples/Kaleidoscope/Chapter4/CMakeLists.txt index 2f828dc819e..2b87e868498 100644 --- a/examples/Kaleidoscope/Chapter4/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter4/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine InstCombine + JIT MC ScalarOpts Support diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp index 8a9e7dfbebf..a8f59428c0d 100644 --- a/examples/Kaleidoscope/Chapter4/toy.cpp +++ b/examples/Kaleidoscope/Chapter4/toy.cpp @@ -1,5 +1,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/examples/Kaleidoscope/Chapter5/CMakeLists.txt b/examples/Kaleidoscope/Chapter5/CMakeLists.txt index 1912ddc0741..c3e7c43cb41 100644 --- a/examples/Kaleidoscope/Chapter5/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter5/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine InstCombine + JIT MC ScalarOpts Support diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp index 1abc8809086..a31b5b4792a 100644 --- a/examples/Kaleidoscope/Chapter5/toy.cpp +++ b/examples/Kaleidoscope/Chapter5/toy.cpp @@ -1,5 +1,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/examples/Kaleidoscope/Chapter6/CMakeLists.txt b/examples/Kaleidoscope/Chapter6/CMakeLists.txt index d36f03090e5..cd61cec89d5 100644 --- a/examples/Kaleidoscope/Chapter6/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter6/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine InstCombine + JIT MC ScalarOpts Support diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp index c21fe8a20ac..5a3bd2e3147 100644 --- a/examples/Kaleidoscope/Chapter6/toy.cpp +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -1,5 +1,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/examples/Kaleidoscope/Chapter7/CMakeLists.txt index bdc0e5525db..cdb13c465d1 100644 --- a/examples/Kaleidoscope/Chapter7/CMakeLists.txt +++ b/examples/Kaleidoscope/Chapter7/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine InstCombine + JIT MC ScalarOpts Support diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp index e23637e2681..c2c337c9008 100644 --- a/examples/Kaleidoscope/Chapter7/toy.cpp +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -1,5 +1,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/examples/Kaleidoscope/MCJIT/cached/toy-jit.cpp b/examples/Kaleidoscope/MCJIT/cached/toy-jit.cpp index 00f5b83bde5..9466360af19 100644 --- a/examples/Kaleidoscope/MCJIT/cached/toy-jit.cpp +++ b/examples/Kaleidoscope/MCJIT/cached/toy-jit.cpp @@ -2,6 +2,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/examples/Kaleidoscope/MCJIT/cached/toy.cpp b/examples/Kaleidoscope/MCJIT/cached/toy.cpp index af51b4a8314..16c548c9806 100644 --- a/examples/Kaleidoscope/MCJIT/cached/toy.cpp +++ b/examples/Kaleidoscope/MCJIT/cached/toy.cpp @@ -897,6 +897,7 @@ ExecutionEngine *MCJITHelper::compileModule(Module *M) { std::string ErrStr; ExecutionEngine *NewEngine = EngineBuilder(M) .setErrorStr(&ErrStr) + .setUseMCJIT(true) .setMCJITMemoryManager(new HelpingMemoryManager(this)) .create(); if (!NewEngine) { diff --git a/examples/Kaleidoscope/MCJIT/complete/toy.cpp b/examples/Kaleidoscope/MCJIT/complete/toy.cpp index 3beb0d83789..10e7ada1e88 100644 --- a/examples/Kaleidoscope/MCJIT/complete/toy.cpp +++ b/examples/Kaleidoscope/MCJIT/complete/toy.cpp @@ -1,5 +1,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" @@ -51,6 +52,10 @@ namespace { cl::desc("Dump IR from modules to stderr on shutdown"), cl::init(false)); + cl::opt UseMCJIT( + "use-mcjit", cl::desc("Use the MCJIT execution engine"), + cl::init(true)); + cl::opt EnableLazyCompilation( "enable-lazy-compilation", cl::desc("Enable lazy compilation when using the MCJIT engine"), cl::init(true)); @@ -787,6 +792,96 @@ public: virtual void dump(); }; +//===----------------------------------------------------------------------===// +// Helper class for JIT execution engine +//===----------------------------------------------------------------------===// + +class JITHelper : public BaseHelper { +public: + JITHelper(LLVMContext &Context) { + // Make the module, which holds all the code. + if (!InputIR.empty()) { + TheModule = parseInputIR(InputIR, Context); + } else { + TheModule = new Module("my cool jit", Context); + } + + // Create the JIT. This takes ownership of the module. + std::string ErrStr; + TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); + if (!TheExecutionEngine) { + fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); + exit(1); + } + + TheFPM = new FunctionPassManager(TheModule); + + // Set up the optimizer pipeline. Start with registering info about how the + // target lays out data structures. + TheFPM->add(new DataLayout(*TheExecutionEngine->getDataLayout())); + // Provide basic AliasAnalysis support for GVN. + TheFPM->add(createBasicAliasAnalysisPass()); + // Promote allocas to registers. + TheFPM->add(createPromoteMemoryToRegisterPass()); + // Do simple "peephole" optimizations and bit-twiddling optzns. + TheFPM->add(createInstructionCombiningPass()); + // Reassociate expressions. + TheFPM->add(createReassociatePass()); + // Eliminate Common SubExpressions. + TheFPM->add(createGVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + TheFPM->add(createCFGSimplificationPass()); + + TheFPM->doInitialization(); + } + + virtual ~JITHelper() { + if (TheFPM) + delete TheFPM; + if (TheExecutionEngine) + delete TheExecutionEngine; + } + + virtual Function *getFunction(const std::string FnName) { + assert(TheModule); + return TheModule->getFunction(FnName); + } + + virtual Module *getModuleForNewFunction() { + assert(TheModule); + return TheModule; + } + + virtual void *getPointerToFunction(Function* F) { + assert(TheExecutionEngine); + return TheExecutionEngine->getPointerToFunction(F); + } + + virtual void *getPointerToNamedFunction(const std::string &Name) { + return TheExecutionEngine->getPointerToNamedFunction(Name); + } + + virtual void runFPM(Function &F) { + assert(TheFPM); + TheFPM->run(F); + } + + virtual void closeCurrentModule() { + // This should never be called for JIT + assert(false); + } + + virtual void dump() { + assert(TheModule); + TheModule->dump(); + } + +private: + Module *TheModule; + ExecutionEngine *TheExecutionEngine; + FunctionPassManager *TheFPM; +}; + //===----------------------------------------------------------------------===// // MCJIT helper class //===----------------------------------------------------------------------===// @@ -939,6 +1034,7 @@ ExecutionEngine *MCJITHelper::compileModule(Module *M) { std::string ErrStr; ExecutionEngine *EE = EngineBuilder(M) .setErrorStr(&ErrStr) + .setUseMCJIT(true) .setMCJITMemoryManager(new HelpingMemoryManager(this)) .create(); if (!EE) { @@ -1098,8 +1194,10 @@ Value *UnaryExprAST::Codegen() { Value *OperandV = Operand->Codegen(); if (OperandV == 0) return 0; Function *F; - F = TheHelper->getFunction( - MakeLegalFunctionName(std::string("unary") + Opcode)); + if (UseMCJIT) + F = TheHelper->getFunction(MakeLegalFunctionName(std::string("unary")+Opcode)); + else + F = TheHelper->getFunction(std::string("unary")+Opcode); if (F == 0) return ErrorV("Unknown unary operator"); @@ -1148,7 +1246,10 @@ Value *BinaryExprAST::Codegen() { // If it wasn't a builtin binary operator, it must be a user defined one. Emit // a call to it. Function *F; - F = TheHelper->getFunction(MakeLegalFunctionName(std::string("binary")+Op)); + if (UseMCJIT) + F = TheHelper->getFunction(MakeLegalFunctionName(std::string("binary")+Op)); + else + F = TheHelper->getFunction(std::string("binary")+Op); assert(F && "binary operator not found!"); Value *Ops[] = { L, R }; @@ -1381,7 +1482,10 @@ Function *PrototypeAST::Codegen() { Doubles, false); std::string FnName; - FnName = MakeLegalFunctionName(Name); + if (UseMCJIT) + FnName = MakeLegalFunctionName(Name); + else + FnName = Name; Module* M = TheHelper->getModuleForNewFunction(); Function *F = Function::Create(FT, Function::ExternalLinkage, FnName, M); @@ -1456,6 +1560,10 @@ Function *FunctionAST::Codegen() { // Validate the generated code, checking for consistency. verifyFunction(*TheFunction); + // Optimize the function. + if (!UseMCJIT) + TheHelper->runFPM(*TheFunction); + return TheFunction; } @@ -1473,7 +1581,7 @@ Function *FunctionAST::Codegen() { static void HandleDefinition() { if (FunctionAST *F = ParseDefinition()) { - if (EnableLazyCompilation) + if (UseMCJIT && EnableLazyCompilation) TheHelper->closeCurrentModule(); Function *LF = F->Codegen(); if (LF && VerboseOutput) { @@ -1563,8 +1671,10 @@ double printlf() { int main(int argc, char **argv) { InitializeNativeTarget(); - InitializeNativeTargetAsmPrinter(); - InitializeNativeTargetAsmParser(); + if (UseMCJIT) { + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); + } LLVMContext &Context = getGlobalContext(); cl::ParseCommandLineOptions(argc, argv, @@ -1580,7 +1690,10 @@ int main(int argc, char **argv) { BinopPrecedence['*'] = 40; // highest. // Make the Helper, which holds all the code. - TheHelper = new MCJITHelper(Context); + if (UseMCJIT) + TheHelper = new MCJITHelper(Context); + else + TheHelper = new JITHelper(Context); // Prime the first token. if (!SuppressPrompts) diff --git a/examples/Kaleidoscope/MCJIT/initial/toy.cpp b/examples/Kaleidoscope/MCJIT/initial/toy.cpp index 2c1b2973af5..4c4711338c4 100644 --- a/examples/Kaleidoscope/MCJIT/initial/toy.cpp +++ b/examples/Kaleidoscope/MCJIT/initial/toy.cpp @@ -778,6 +778,7 @@ void *MCJITHelper::getPointerToFunction(Function* F) { std::string ErrStr; ExecutionEngine *NewEngine = EngineBuilder(OpenModule) .setErrorStr(&ErrStr) + .setUseMCJIT(true) .setMCJITMemoryManager(new HelpingMemoryManager(this)) .create(); if (!NewEngine) { diff --git a/examples/Kaleidoscope/MCJIT/lazy/toy-jit.cpp b/examples/Kaleidoscope/MCJIT/lazy/toy-jit.cpp index 98c1001dc51..2d540dd040f 100644 --- a/examples/Kaleidoscope/MCJIT/lazy/toy-jit.cpp +++ b/examples/Kaleidoscope/MCJIT/lazy/toy-jit.cpp @@ -2,6 +2,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" diff --git a/examples/Kaleidoscope/MCJIT/lazy/toy.cpp b/examples/Kaleidoscope/MCJIT/lazy/toy.cpp index 9c2a0d48f39..ff88e23bd35 100644 --- a/examples/Kaleidoscope/MCJIT/lazy/toy.cpp +++ b/examples/Kaleidoscope/MCJIT/lazy/toy.cpp @@ -808,6 +808,7 @@ ExecutionEngine *MCJITHelper::compileModule(Module *M) { std::string ErrStr; ExecutionEngine *NewEngine = EngineBuilder(M) .setErrorStr(&ErrStr) + .setUseMCJIT(true) .setMCJITMemoryManager(new HelpingMemoryManager(this)) .create(); if (!NewEngine) { diff --git a/examples/ParallelJIT/CMakeLists.txt b/examples/ParallelJIT/CMakeLists.txt index 3c489e83027..8673917f558 100644 --- a/examples/ParallelJIT/CMakeLists.txt +++ b/examples/ParallelJIT/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_LINK_COMPONENTS Core ExecutionEngine Interpreter + JIT Support nativecodegen ) diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp index 653b8cc0342..2aa63d91ffb 100644 --- a/examples/ParallelJIT/ParallelJIT.cpp +++ b/examples/ParallelJIT/ParallelJIT.cpp @@ -19,6 +19,7 @@ #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/Interpreter.h" +#include "llvm/ExecutionEngine/JIT.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h index f1f4cadec34..7cdf0d78d5b 100644 --- a/include/llvm-c/ExecutionEngine.h +++ b/include/llvm-c/ExecutionEngine.h @@ -34,6 +34,7 @@ extern "C" { * @{ */ +void LLVMLinkInJIT(void); void LLVMLinkInMCJIT(void); void LLVMLinkInInterpreter(void); diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h new file mode 100644 index 00000000000..dc2a0272db4 --- /dev/null +++ b/include/llvm/CodeGen/JITCodeEmitter.h @@ -0,0 +1,344 @@ +//===-- llvm/CodeGen/JITCodeEmitter.h - Code emission ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an abstract interface that is used by the machine code +// emission framework to output the code. This allows machine code emission to +// be separated from concerns such as resolution of call targets, and where the +// machine code will be written (memory or disk, f.e.). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_JITCODEEMITTER_H +#define LLVM_CODEGEN_JITCODEEMITTER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/MathExtras.h" +#include + +namespace llvm { + +class MachineBasicBlock; +class MachineConstantPool; +class MachineJumpTableInfo; +class MachineFunction; +class MachineModuleInfo; +class MachineRelocation; +class Value; +class GlobalValue; +class Function; + +/// JITCodeEmitter - This class defines two sorts of methods: those for +/// emitting the actual bytes of machine code, and those for emitting auxiliary +/// structures, such as jump tables, relocations, etc. +/// +/// Emission of machine code is complicated by the fact that we don't (in +/// general) know the size of the machine code that we're about to emit before +/// we emit it. As such, we preallocate a certain amount of memory, and set the +/// BufferBegin/BufferEnd pointers to the start and end of the buffer. As we +/// emit machine instructions, we advance the CurBufferPtr to indicate the +/// location of the next byte to emit. In the case of a buffer overflow (we +/// need to emit more machine code than we have allocated space for), the +/// CurBufferPtr will saturate to BufferEnd and ignore stores. Once the entire +/// function has been emitted, the overflow condition is checked, and if it has +/// occurred, more memory is allocated, and we reemit the code into it. +/// +class JITCodeEmitter : public MachineCodeEmitter { + void anchor() override; +public: + virtual ~JITCodeEmitter() {} + + /// startFunction - This callback is invoked when the specified function is + /// about to be code generated. This initializes the BufferBegin/End/Ptr + /// fields. + /// + void startFunction(MachineFunction &F) override = 0; + + /// finishFunction - This callback is invoked when the specified function has + /// finished code generation. If a buffer overflow has occurred, this method + /// returns true (the callee is required to try again), otherwise it returns + /// false. + /// + bool finishFunction(MachineFunction &F) override = 0; + + /// allocIndirectGV - Allocates and fills storage for an indirect + /// GlobalValue, and returns the address. + virtual void *allocIndirectGV(const GlobalValue *GV, + const uint8_t *Buffer, size_t Size, + unsigned Alignment) = 0; + + /// emitByte - This callback is invoked when a byte needs to be written to the + /// output stream. + /// + void emitByte(uint8_t B) { + if (CurBufferPtr != BufferEnd) + *CurBufferPtr++ = B; + } + + /// emitWordLE - This callback is invoked when a 32-bit word needs to be + /// written to the output stream in little-endian format. + /// + void emitWordLE(uint32_t W) { + if (4 <= BufferEnd-CurBufferPtr) { + *CurBufferPtr++ = (uint8_t)(W >> 0); + *CurBufferPtr++ = (uint8_t)(W >> 8); + *CurBufferPtr++ = (uint8_t)(W >> 16); + *CurBufferPtr++ = (uint8_t)(W >> 24); + } else { + CurBufferPtr = BufferEnd; + } + } + + /// emitWordBE - This callback is invoked when a 32-bit word needs to be + /// written to the output stream in big-endian format. + /// + void emitWordBE(uint32_t W) { + if (4 <= BufferEnd-CurBufferPtr) { + *CurBufferPtr++ = (uint8_t)(W >> 24); + *CurBufferPtr++ = (uint8_t)(W >> 16); + *CurBufferPtr++ = (uint8_t)(W >> 8); + *CurBufferPtr++ = (uint8_t)(W >> 0); + } else { + CurBufferPtr = BufferEnd; + } + } + + /// emitDWordLE - This callback is invoked when a 64-bit word needs to be + /// written to the output stream in little-endian format. + /// + void emitDWordLE(uint64_t W) { + if (8 <= BufferEnd-CurBufferPtr) { + *CurBufferPtr++ = (uint8_t)(W >> 0); + *CurBufferPtr++ = (uint8_t)(W >> 8); + *CurBufferPtr++ = (uint8_t)(W >> 16); + *CurBufferPtr++ = (uint8_t)(W >> 24); + *CurBufferPtr++ = (uint8_t)(W >> 32); + *CurBufferPtr++ = (uint8_t)(W >> 40); + *CurBufferPtr++ = (uint8_t)(W >> 48); + *CurBufferPtr++ = (uint8_t)(W >> 56); + } else { + CurBufferPtr = BufferEnd; + } + } + + /// emitDWordBE - This callback is invoked when a 64-bit word needs to be + /// written to the output stream in big-endian format. + /// + void emitDWordBE(uint64_t W) { + if (8 <= BufferEnd-CurBufferPtr) { + *CurBufferPtr++ = (uint8_t)(W >> 56); + *CurBufferPtr++ = (uint8_t)(W >> 48); + *CurBufferPtr++ = (uint8_t)(W >> 40); + *CurBufferPtr++ = (uint8_t)(W >> 32); + *CurBufferPtr++ = (uint8_t)(W >> 24); + *CurBufferPtr++ = (uint8_t)(W >> 16); + *CurBufferPtr++ = (uint8_t)(W >> 8); + *CurBufferPtr++ = (uint8_t)(W >> 0); + } else { + CurBufferPtr = BufferEnd; + } + } + + /// emitAlignment - Move the CurBufferPtr pointer up to the specified + /// alignment (saturated to BufferEnd of course). + void emitAlignment(unsigned Alignment) { + if (Alignment == 0) Alignment = 1; + uint8_t *NewPtr = (uint8_t*)RoundUpToAlignment((uintptr_t)CurBufferPtr, + Alignment); + CurBufferPtr = std::min(NewPtr, BufferEnd); + } + + /// emitAlignmentWithFill - Similar to emitAlignment, except that the + /// extra bytes are filled with the provided byte. + void emitAlignmentWithFill(unsigned Alignment, uint8_t Fill) { + if (Alignment == 0) Alignment = 1; + uint8_t *NewPtr = (uint8_t*)RoundUpToAlignment((uintptr_t)CurBufferPtr, + Alignment); + // Fail if we don't have room. + if (NewPtr > BufferEnd) { + CurBufferPtr = BufferEnd; + return; + } + while (CurBufferPtr < NewPtr) { + *CurBufferPtr++ = Fill; + } + } + + /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be + /// written to the output stream. + void emitULEB128Bytes(uint64_t Value, unsigned PadTo = 0) { + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + if (Value || PadTo != 0) Byte |= 0x80; + emitByte(Byte); + } while (Value); + + if (PadTo) { + do { + uint8_t Byte = (PadTo > 1) ? 0x80 : 0x0; + emitByte(Byte); + } while (--PadTo); + } + } + + /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be + /// written to the output stream. + void emitSLEB128Bytes(int64_t Value) { + int32_t Sign = Value >> (8 * sizeof(Value) - 1); + bool IsMore; + + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; + if (IsMore) Byte |= 0x80; + emitByte(Byte); + } while (IsMore); + } + + /// emitString - This callback is invoked when a String needs to be + /// written to the output stream. + void emitString(const std::string &String) { + for (size_t i = 0, N = String.size(); i < N; ++i) { + uint8_t C = String[i]; + emitByte(C); + } + emitByte(0); + } + + /// emitInt32 - Emit a int32 directive. + void emitInt32(uint32_t Value) { + if (4 <= BufferEnd-CurBufferPtr) { + *((uint32_t*)CurBufferPtr) = Value; + CurBufferPtr += 4; + } else { + CurBufferPtr = BufferEnd; + } + } + + /// emitInt64 - Emit a int64 directive. + void emitInt64(uint64_t Value) { + if (8 <= BufferEnd-CurBufferPtr) { + *((uint64_t*)CurBufferPtr) = Value; + CurBufferPtr += 8; + } else { + CurBufferPtr = BufferEnd; + } + } + + /// emitInt32At - Emit the Int32 Value in Addr. + void emitInt32At(uintptr_t *Addr, uintptr_t Value) { + if (Addr >= (uintptr_t*)BufferBegin && Addr < (uintptr_t*)BufferEnd) + (*(uint32_t*)Addr) = (uint32_t)Value; + } + + /// emitInt64At - Emit the Int64 Value in Addr. + void emitInt64At(uintptr_t *Addr, uintptr_t Value) { + if (Addr >= (uintptr_t*)BufferBegin && Addr < (uintptr_t*)BufferEnd) + (*(uint64_t*)Addr) = (uint64_t)Value; + } + + + /// emitLabel - Emits a label + void emitLabel(MCSymbol *Label) override = 0; + + /// allocateSpace - Allocate a block of space in the current output buffer, + /// returning null (and setting conditions to indicate buffer overflow) on + /// failure. Alignment is the alignment in bytes of the buffer desired. + void *allocateSpace(uintptr_t Size, unsigned Alignment) override { + emitAlignment(Alignment); + void *Result; + + // Check for buffer overflow. + if (Size >= (uintptr_t)(BufferEnd-CurBufferPtr)) { + CurBufferPtr = BufferEnd; + Result = nullptr; + } else { + // Allocate the space. + Result = CurBufferPtr; + CurBufferPtr += Size; + } + + return Result; + } + + /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace, + /// this method does not allocate memory in the current output buffer, + /// because a global may live longer than the current function. + virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment) = 0; + + /// StartMachineBasicBlock - This should be called by the target when a new + /// basic block is about to be emitted. This way the MCE knows where the + /// start of the block is, and can implement getMachineBasicBlockAddress. + void StartMachineBasicBlock(MachineBasicBlock *MBB) override = 0; + + /// getCurrentPCValue - This returns the address that the next emitted byte + /// will be output to. + /// + uintptr_t getCurrentPCValue() const override { + return (uintptr_t)CurBufferPtr; + } + + /// getCurrentPCOffset - Return the offset from the start of the emitted + /// buffer that we are currently writing to. + uintptr_t getCurrentPCOffset() const override { + return CurBufferPtr-BufferBegin; + } + + /// earlyResolveAddresses - True if the code emitter can use symbol addresses + /// during code emission time. The JIT is capable of doing this because it + /// creates jump tables or constant pools in memory on the fly while the + /// object code emitters rely on a linker to have real addresses and should + /// use relocations instead. + bool earlyResolveAddresses() const override { return true; } + + /// addRelocation - Whenever a relocatable address is needed, it should be + /// noted with this interface. + void addRelocation(const MachineRelocation &MR) override = 0; + + /// FIXME: These should all be handled with relocations! + + /// getConstantPoolEntryAddress - Return the address of the 'Index' entry in + /// the constant pool that was last emitted with the emitConstantPool method. + /// + uintptr_t getConstantPoolEntryAddress(unsigned Index) const override = 0; + + /// getJumpTableEntryAddress - Return the address of the jump table with index + /// 'Index' in the function that last called initJumpTableInfo. + /// + uintptr_t getJumpTableEntryAddress(unsigned Index) const override = 0; + + /// getMachineBasicBlockAddress - Return the address of the specified + /// MachineBasicBlock, only usable after the label for the MBB has been + /// emitted. + /// + uintptr_t + getMachineBasicBlockAddress(MachineBasicBlock *MBB) const override = 0; + + /// getLabelAddress - Return the address of the specified Label, only usable + /// after the Label has been emitted. + /// + uintptr_t getLabelAddress(MCSymbol *Label) const override = 0; + + /// Specifies the MachineModuleInfo object. This is used for exception handling + /// purposes. + void setModuleInfo(MachineModuleInfo* Info) override = 0; + + /// getLabelLocations - Return the label locations map of the label IDs to + /// their address. + virtual DenseMap *getLabelLocations() { + return nullptr; + } +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index 22ce4491b7e..e3ef104aea8 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -141,6 +141,12 @@ protected: // To avoid having libexecutionengine depend on the JIT and interpreter // libraries, the execution engine implementations set these functions to ctor // pointers at startup time if they are linked in. + static ExecutionEngine *(*JITCtor)( + Module *M, + std::string *ErrorStr, + JITMemoryManager *JMM, + bool GVsWithCode, + TargetMachine *TM); static ExecutionEngine *(*MCJITCtor)( Module *M, std::string *ErrorStr, @@ -329,6 +335,13 @@ public: /// getFunctionAddress instead. virtual void *getPointerToFunction(Function *F) = 0; + /// getPointerToBasicBlock - The different EE's represent basic blocks in + /// different ways. Return the representation for a blockaddress of the + /// specified block. + /// + /// This function will not be implemented for the MCJIT execution engine. + virtual void *getPointerToBasicBlock(BasicBlock *BB) = 0; + /// getPointerToFunctionOrStub - If the specified function has been /// code-gen'd, return a pointer to the function. If not, compile it, or use /// a stub to implement lazy compilation if available. See @@ -376,6 +389,18 @@ public: void InitializeMemory(const Constant *Init, void *Addr); + /// recompileAndRelinkFunction - This method is used to force a function which + /// has already been compiled to be compiled again, possibly after it has been + /// modified. Then the entry to the old copy is overwritten with a branch to + /// the new copy. If there was no old copy, this acts just like + /// VM::getPointerToFunction(). + virtual void *recompileAndRelinkFunction(Function *F) = 0; + + /// freeMachineCodeForFunction - Release memory in the ExecutionEngine + /// corresponding to the machine code emitted to execute this function, useful + /// for garbage-collecting generated code. + virtual void freeMachineCodeForFunction(Function *F) = 0; + /// getOrEmitGlobalVariable - Return the address of the specified global /// variable, possibly emitting it to memory if needed. This is used by the /// Emitter. @@ -512,12 +537,14 @@ private: CodeGenOpt::Level OptLevel; RTDyldMemoryManager *MCJMM; JITMemoryManager *JMM; + bool AllocateGVsWithCode; TargetOptions Options; Reloc::Model RelocModel; CodeModel::Model CMModel; std::string MArch; std::string MCPU; SmallVector MAttrs; + bool UseMCJIT; bool VerifyModules; /// InitEngine - Does the common initialization of default options. @@ -599,6 +626,18 @@ public: return *this; } + /// setAllocateGVsWithCode - Sets whether global values should be allocated + /// into the same buffer as code. For most applications this should be set + /// to false. Allocating globals with code breaks freeMachineCodeForFunction + /// and is probably unsafe and bad for performance. However, we have clients + /// who depend on this behavior, so we must support it. This option defaults + /// to false so that users of the new API can safely use the new memory + /// manager and free machine code. + EngineBuilder &setAllocateGVsWithCode(bool a) { + AllocateGVsWithCode = a; + return *this; + } + /// setMArch - Override the architecture set by the Module's triple. EngineBuilder &setMArch(StringRef march) { MArch.assign(march.begin(), march.end()); @@ -611,6 +650,13 @@ public: return *this; } + /// setUseMCJIT - Set whether the MC-JIT implementation should be used + /// (experimental). + EngineBuilder &setUseMCJIT(bool Value) { + UseMCJIT = Value; + return *this; + } + /// setVerifyModules - Set whether the JIT implementation should verify /// IR modules during compilation. EngineBuilder &setVerifyModules(bool Verify) { diff --git a/include/llvm/ExecutionEngine/JIT.h b/include/llvm/ExecutionEngine/JIT.h new file mode 100644 index 00000000000..581d6e6c35e --- /dev/null +++ b/include/llvm/ExecutionEngine/JIT.h @@ -0,0 +1,38 @@ +//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file forces the JIT to link in on certain operating systems. +// (Windows). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JIT_H +#define LLVM_EXECUTIONENGINE_JIT_H + +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include + +extern "C" void LLVMLinkInJIT(); + +namespace { + struct ForceJITLinking { + ForceJITLinking() { + // We must reference JIT in such a way that compilers will not + // delete it all as dead code, even with whole program optimization, + // yet is effectively a NO-OP. As the compiler isn't smart enough + // to know that getenv() never returns -1, this will do the job. + if (std::getenv("bar") != (char*) -1) + return; + + LLVMLinkInJIT(); + } + } ForceJITLinking; +} + +#endif diff --git a/include/llvm/Target/TargetJITInfo.h b/include/llvm/Target/TargetJITInfo.h new file mode 100644 index 00000000000..58acf30e034 --- /dev/null +++ b/include/llvm/Target/TargetJITInfo.h @@ -0,0 +1,136 @@ +//===- Target/TargetJITInfo.h - Target Information for JIT ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes an abstract interface used by the Just-In-Time code +// generator to perform target-specific activities, such as emitting stubs. If +// a TargetMachine supports JIT code generation, it should provide one of these +// objects through the getJITInfo() method. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_TARGETJITINFO_H +#define LLVM_TARGET_TARGETJITINFO_H + +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include + +namespace llvm { + class Function; + class GlobalValue; + class JITCodeEmitter; + class MachineRelocation; + + /// TargetJITInfo - Target specific information required by the Just-In-Time + /// code generator. + class TargetJITInfo { + virtual void anchor(); + public: + virtual ~TargetJITInfo() {} + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + virtual void replaceMachineCodeForFunction(void *Old, void *New) = 0; + + /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object + /// to emit an indirect symbol which contains the address of the specified + /// ptr. + virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE) { + llvm_unreachable("This target doesn't implement " + "emitGlobalValueIndirectSym!"); + } + + /// Records the required size and alignment for a call stub in bytes. + struct StubLayout { + size_t Size; + size_t Alignment; + }; + /// Returns the maximum size and alignment for a call stub on this target. + virtual StubLayout getStubLayout() { + llvm_unreachable("This target doesn't implement getStubLayout!"); + } + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. The JITCodeEmitter must already have storage allocated for the + /// stub. Return the address of the resultant function, which may have been + /// aligned from the address the JCE was set up to emit at. + virtual void *emitFunctionStub(const Function* F, void *Target, + JITCodeEmitter &JCE) { + llvm_unreachable("This target doesn't implement emitFunctionStub!"); + } + + /// getPICJumpTableEntry - Returns the value of the jumptable entry for the + /// specific basic block. + virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase) { + llvm_unreachable("This target doesn't implement getPICJumpTableEntry!"); + } + + /// LazyResolverFn - This typedef is used to represent the function that + /// unresolved call points should invoke. This is a target specific + /// function that knows how to walk the stack and find out which stub the + /// call is coming from. + typedef void (*LazyResolverFn)(); + + /// JITCompilerFn - This typedef is used to represent the JIT function that + /// lazily compiles the function corresponding to a stub. The JIT keeps + /// track of the mapping between stubs and LLVM Functions, the target + /// provides the ability to figure out the address of a stub that is called + /// by the LazyResolverFn. + typedef void* (*JITCompilerFn)(void *); + + /// getLazyResolverFunction - This method is used to initialize the JIT, + /// giving the target the function that should be used to compile a + /// function, and giving the JIT the target function used to do the lazy + /// resolving. + virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn) { + llvm_unreachable("Not implemented for this target!"); + } + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + virtual void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + assert(NumRelocs == 0 && "This target does not have relocations!"); + } + + /// allocateThreadLocalMemory - Each target has its own way of + /// handling thread local variables. This method returns a value only + /// meaningful to the target. + virtual char* allocateThreadLocalMemory(size_t size) { + llvm_unreachable("This target does not implement thread local storage!"); + } + + /// needsGOT - Allows a target to specify that it would like the + /// JIT to manage a GOT for it. + bool needsGOT() const { return useGOT; } + + /// hasCustomConstantPool - Allows a target to specify that constant + /// pool address resolution is handled by the target. + virtual bool hasCustomConstantPool() const { return false; } + + /// hasCustomJumpTables - Allows a target to specify that jumptables + /// are emitted by the target. + virtual bool hasCustomJumpTables() const { return false; } + + /// allocateSeparateGVMemory - If true, globals should be placed in + /// separately allocated heap memory rather than in the same + /// code memory allocated by JITCodeEmitter. + virtual bool allocateSeparateGVMemory() const { return false; } + protected: + bool useGOT; + }; +} // End llvm namespace + +#endif diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index f5c4132b8ff..ac35737b6c5 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -828,6 +828,11 @@ public: return UseUnderscoreLongJmp; } + /// Return whether the target can generate code for jump tables. + bool supportJumpTables() const { + return SupportJumpTables; + } + /// Return integer threshold on number of blocks to use jump tables rather /// than if sequence. int getMinimumJumpTableEntries() const { @@ -996,6 +1001,11 @@ protected: UseUnderscoreLongJmp = Val; } + /// Indicate whether the target can generate code for jump tables. + void setSupportJumpTables(bool Val) { + SupportJumpTables = Val; + } + /// Indicate the number of blocks to generate jump tables rather than if /// sequence. void setMinimumJumpTableEntries(int Val) { @@ -1499,6 +1509,10 @@ private: /// Defaults to false. bool UseUnderscoreLongJmp; + /// Whether the target can generate code for jumptables. If it's not true, + /// then each jumptable must be lowered into if-then-else's. + bool SupportJumpTables; + /// Number of blocks threshold to use jump tables. int MinimumJumpTableEntries; diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index d4b93da19be..22d01bfc846 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -24,6 +24,7 @@ namespace llvm { class InstrItineraryData; +class JITCodeEmitter; class GlobalValue; class Mangler; class MCAsmInfo; @@ -35,6 +36,7 @@ class DataLayout; class TargetLibraryInfo; class TargetFrameLowering; class TargetIntrinsicInfo; +class TargetJITInfo; class TargetLowering; class TargetPassConfig; class TargetRegisterInfo; @@ -99,6 +101,10 @@ public: virtual const TargetSubtargetInfo *getSubtargetImpl() const { return nullptr; } + TargetSubtargetInfo *getSubtargetImpl() { + const TargetMachine *TM = this; + return const_cast(TM->getSubtargetImpl()); + } /// getSubtarget - This method returns a pointer to the specified type of /// TargetSubtargetInfo. In debug builds, it verifies that the object being @@ -195,6 +201,18 @@ public: return true; } + /// addPassesToEmitMachineCode - Add passes to the specified pass manager to + /// get machine code emitted. This uses a JITCodeEmitter object to handle + /// actually outputting the machine code and resolving things like the address + /// of functions. This method returns true if machine code emission is + /// not supported. + /// + virtual bool addPassesToEmitMachineCode(PassManagerBase &, + JITCodeEmitter &, + bool /*DisableVerify*/ = true) { + return true; + } + /// addPassesToEmitMC - Add passes to the specified pass manager to get /// machine code emitted with the MCJIT. This method returns true if machine /// code is not supported. It fills the MCContext Ctx pointer which can be @@ -241,6 +259,15 @@ public: AnalysisID StartAfter = nullptr, AnalysisID StopAfter = nullptr) override; + /// addPassesToEmitMachineCode - Add passes to the specified pass manager to + /// get machine code emitted. This uses a JITCodeEmitter object to handle + /// actually outputting the machine code and resolving things like the address + /// of functions. This method returns true if machine code emission is + /// not supported. + /// + bool addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &MCE, + bool DisableVerify = true) override; + /// addPassesToEmitMC - Add passes to the specified pass manager to get /// machine code emitted with the MCJIT. This method returns true if machine /// code is not supported. It fills the MCContext Ctx pointer which can be @@ -248,6 +275,14 @@ public: /// bool addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_ostream &OS, bool DisableVerify = true) override; + + /// addCodeEmitter - This pass should be overridden by the target to add a + /// code emitter, if supported. If this is not supported, 'true' should be + /// returned. + virtual bool addCodeEmitter(PassManagerBase &, + JITCodeEmitter &) { + return true; + } }; } // End llvm namespace diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h index 9791d4051ee..45a93309501 100644 --- a/include/llvm/Target/TargetSubtargetInfo.h +++ b/include/llvm/Target/TargetSubtargetInfo.h @@ -26,6 +26,7 @@ class SDep; class SUnit; class TargetFrameLowering; class TargetInstrInfo; +class TargetJITInfo; class TargetLowering; class TargetRegisterClass; class TargetRegisterInfo; @@ -78,6 +79,11 @@ public: /// virtual const TargetRegisterInfo *getRegisterInfo() const { return nullptr; } + /// getJITInfo - If this target supports a JIT, return information for it, + /// otherwise return null. + /// + virtual TargetJITInfo *getJITInfo() { return nullptr; } + /// getInstrItineraryData - Returns instruction itinerary data for the target /// or specific subtarget. /// diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 43a6e0f26dc..24bc570f44a 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -188,8 +188,9 @@ unsigned BasicTTI::getJumpBufSize() const { bool BasicTTI::shouldBuildLookupTables() const { const TargetLoweringBase *TLI = getTLI(); - return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); + return TLI->supportJumpTables() && + (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } bool BasicTTI::haveFastSqrt(Type *Ty) const { diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index acc8a26936a..2a247c12e64 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_library(LLVMCodeGen InlineSpiller.cpp InterferenceCache.cpp IntrinsicLowering.cpp + JITCodeEmitter.cpp JumpInstrTables.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp diff --git a/lib/CodeGen/JITCodeEmitter.cpp b/lib/CodeGen/JITCodeEmitter.cpp new file mode 100644 index 00000000000..96a53892f6d --- /dev/null +++ b/lib/CodeGen/JITCodeEmitter.cpp @@ -0,0 +1,14 @@ +//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/JITCodeEmitter.h" + +using namespace llvm; + +void JITCodeEmitter::anchor() { } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 2c77eabb0af..0e0d7ba4c89 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -226,6 +226,26 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, return false; } +/// addPassesToEmitMachineCode - Add passes to the specified pass manager to +/// get machine code emitted. This uses a JITCodeEmitter object to handle +/// actually outputting the machine code and resolving things like the address +/// of functions. This method should return true if machine code emission is +/// not supported. +/// +bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, + JITCodeEmitter &JCE, + bool DisableVerify) { + // Add common CodeGen passes. + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, + nullptr); + if (!Context) + return true; + + addCodeEmitter(PM, JCE); + + return false; // success! +} + /// addPassesToEmitMC - Add passes to the specified pass manager to get /// machine code emitted with the MCJIT. This method returns true if machine /// code is not supported. It fills the MCContext Ctx pointer which can be diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1fa5bf1114e..cc6eac70ba7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2228,8 +2228,9 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } static inline bool areJTsAllowed(const TargetLowering &TLI) { - return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); + return TLI.supportJumpTables() && + (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } static APInt ComputeRange(const APInt &First, const APInt &Last) { diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 1a90c6c866f..a8c7a28a4e8 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -719,6 +719,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, PrefLoopAlignment = 0; MinStackArgumentAlignment = 1; InsertFencesForAtomic = false; + SupportJumpTables = true; MinimumJumpTableEntries = 4; InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple())); diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt index 208495c8847..3102c7bd582 100644 --- a/lib/ExecutionEngine/CMakeLists.txt +++ b/lib/ExecutionEngine/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMExecutionEngine ) add_subdirectory(Interpreter) +add_subdirectory(JIT) add_subdirectory(MCJIT) add_subdirectory(RuntimeDyld) diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 01b9bcc8905..063f3fb05c2 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -48,6 +48,12 @@ void ObjectCache::anchor() {} void ObjectBuffer::anchor() {} void ObjectBufferStream::anchor() {} +ExecutionEngine *(*ExecutionEngine::JITCtor)( + Module *M, + std::string *ErrorStr, + JITMemoryManager *JMM, + bool GVsWithCode, + TargetMachine *TM) = nullptr; ExecutionEngine *(*ExecutionEngine::MCJITCtor)( Module *M, std::string *ErrorStr, @@ -411,8 +417,10 @@ void EngineBuilder::InitEngine() { MCJMM = nullptr; JMM = nullptr; Options = TargetOptions(); + AllocateGVsWithCode = false; RelocModel = Reloc::Default; CMModel = CodeModel::JITDefault; + UseMCJIT = false; // IR module verification is enabled by default in debug builds, and disabled // by default in release builds. @@ -445,6 +453,14 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { return nullptr; } } + + if (MCJMM && ! UseMCJIT) { + if (ErrorStr) + *ErrorStr = + "Cannot create a legacy JIT with a runtime dyld memory " + "manager."; + return nullptr; + } // Unless the interpreter was explicitly selected or the JIT is not linked, // try making a JIT. @@ -457,9 +473,12 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { } ExecutionEngine *EE = nullptr; - if (ExecutionEngine::MCJITCtor) + if (UseMCJIT && ExecutionEngine::MCJITCtor) EE = ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM, TheTM.release()); + else if (ExecutionEngine::JITCtor) + EE = ExecutionEngine::JITCtor(M, ErrorStr, JMM, + AllocateGVsWithCode, TheTM.release()); if (EE) { EE->setVerifyModules(VerifyModules); @@ -477,7 +496,8 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { return nullptr; } - if ((WhichEngine & EngineKind::JIT) && !ExecutionEngine::MCJITCtor) { + if ((WhichEngine & EngineKind::JIT) && !ExecutionEngine::JITCtor && + !ExecutionEngine::MCJITCtor) { if (ErrorStr) *ErrorStr = "JIT has not been linked in."; } @@ -823,6 +843,9 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { Result = PTOGV(getPointerToFunctionOrStub(const_cast(F))); else if (const GlobalVariable *GV = dyn_cast(C)) Result = PTOGV(getOrEmitGlobalVariable(const_cast(GV))); + else if (const BlockAddress *BA = dyn_cast(C)) + Result = PTOGV(getPointerToBasicBlock(const_cast( + BA->getBasicBlock()))); else llvm_unreachable("Unknown constant pointer type!"); break; diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index fa2f23809a8..6ff1e7ac063 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -192,6 +192,7 @@ LLVMBool LLVMCreateMCJITCompilerForModule( EngineBuilder builder(unwrap(M)); builder.setEngineKind(EngineKind::JIT) .setErrorStr(&Error) + .setUseMCJIT(true) .setOptLevel((CodeGenOpt::Level)options.OptLevel) .setCodeModel(unwrap(options.CodeModel)) .setTargetOptions(targetOptions); @@ -274,6 +275,7 @@ LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F, } void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) { + unwrap(EE)->freeMachineCodeForFunction(unwrap(F)); } void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){ @@ -312,7 +314,7 @@ LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name, void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) { - return nullptr; + return unwrap(EE)->recompileAndRelinkFunction(unwrap(Fn)); } LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) { diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index ed6f8f44629..2145cde05fb 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -121,6 +121,17 @@ public: return nullptr; } + /// recompileAndRelinkFunction - For the interpreter, functions are always + /// up-to-date. + /// + void *recompileAndRelinkFunction(Function *F) override { + return getPointerToFunction(F); + } + + /// freeMachineCodeForFunction - The interpreter does not generate any code. + /// + void freeMachineCodeForFunction(Function *F) override { } + // Methods used to execute code: // Place a call on the stack void callFunction(Function *F, const std::vector &ArgVals); @@ -202,6 +213,7 @@ private: // Helper functions void SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF); void *getPointerToFunction(Function *F) override { return (void*)F; } + void *getPointerToBasicBlock(BasicBlock *BB) override { return (void*)BB; } void initializeExecutionEngine() { } void initializeExternalFunctions(); diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt new file mode 100644 index 00000000000..e16baede50f --- /dev/null +++ b/lib/ExecutionEngine/JIT/CMakeLists.txt @@ -0,0 +1,8 @@ +# TODO: Support other architectures. See Makefile. +add_definitions(-DENABLE_X86_JIT) + +add_llvm_library(LLVMJIT + JIT.cpp + JITEmitter.cpp + JITMemoryManager.cpp + ) diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp new file mode 100644 index 00000000000..ab0c1a680bd --- /dev/null +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -0,0 +1,696 @@ +//===-- JIT.cpp - LLVM Just in Time Compiler ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This tool implements a just-in-time compiler for LLVM, allowing direct +// execution of LLVM bitcode in an efficient manner. +// +//===----------------------------------------------------------------------===// + +#include "JIT.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineCodeInfo.h" +#include "llvm/Config/config.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#ifdef __APPLE__ +// Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead +// of atexit). It passes the address of linker generated symbol __dso_handle +// to the function. +// This configuration change happened at version 5330. +# include +# if defined(MAC_OS_X_VERSION_10_4) && \ + ((MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4) || \ + (MAC_OS_X_VERSION_MIN_REQUIRED == MAC_OS_X_VERSION_10_4 && \ + __APPLE_CC__ >= 5330)) +# ifndef HAVE___DSO_HANDLE +# define HAVE___DSO_HANDLE 1 +# endif +# endif +#endif + +#if HAVE___DSO_HANDLE +extern void *__dso_handle __attribute__ ((__visibility__ ("hidden"))); +#endif + +namespace { + +static struct RegisterJIT { + RegisterJIT() { JIT::Register(); } +} JITRegistrator; + +} + +extern "C" void LLVMLinkInJIT() { +} + +/// createJIT - This is the factory method for creating a JIT for the current +/// machine, it does not fall back to the interpreter. This takes ownership +/// of the module. +ExecutionEngine *JIT::createJIT(Module *M, + std::string *ErrorStr, + JITMemoryManager *JMM, + bool GVsWithCode, + TargetMachine *TM) { + // Try to register the program as a source of symbols to resolve against. + // + // FIXME: Don't do this here. + sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr); + + // If the target supports JIT code generation, create the JIT. + if (TargetJITInfo *TJ = TM->getSubtargetImpl()->getJITInfo()) { + return new JIT(M, *TM, *TJ, JMM, GVsWithCode); + } else { + if (ErrorStr) + *ErrorStr = "target does not support JIT code generation"; + return nullptr; + } +} + +namespace { +/// This class supports the global getPointerToNamedFunction(), which allows +/// bugpoint or gdb users to search for a function by name without any context. +class JitPool { + SmallPtrSet JITs; // Optimize for process containing just 1 JIT. + mutable sys::Mutex Lock; +public: + void Add(JIT *jit) { + MutexGuard guard(Lock); + JITs.insert(jit); + } + void Remove(JIT *jit) { + MutexGuard guard(Lock); + JITs.erase(jit); + } + void *getPointerToNamedFunction(const char *Name) const { + MutexGuard guard(Lock); + assert(JITs.size() != 0 && "No Jit registered"); + //search function in every instance of JIT + for (SmallPtrSet::const_iterator Jit = JITs.begin(), + end = JITs.end(); + Jit != end; ++Jit) { + if (Function *F = (*Jit)->FindFunctionNamed(Name)) + return (*Jit)->getPointerToFunction(F); + } + // The function is not available : fallback on the first created (will + // search in symbol of the current program/library) + return (*JITs.begin())->getPointerToNamedFunction(Name); + } +}; +ManagedStatic AllJits; +} +extern "C" { + // getPointerToNamedFunction - This function is used as a global wrapper to + // JIT::getPointerToNamedFunction for the purpose of resolving symbols when + // bugpoint is debugging the JIT. In that scenario, we are loading an .so and + // need to resolve function(s) that are being mis-codegenerated, so we need to + // resolve their addresses at runtime, and this is the way to do it. + void *getPointerToNamedFunction(const char *Name) { + return AllJits->getPointerToNamedFunction(Name); + } +} + +JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, + JITMemoryManager *jmm, bool GVsWithCode) + : ExecutionEngine(M), TM(tm), TJI(tji), + JMM(jmm ? jmm : JITMemoryManager::CreateDefaultMemManager()), + AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) { + setDataLayout(TM.getSubtargetImpl()->getDataLayout()); + + jitstate = new JITState(M); + + // Initialize JCE + JCE = createEmitter(*this, JMM, TM); + + // Register in global list of all JITs. + AllJits->Add(this); + + // Add target data + MutexGuard locked(lock); + FunctionPassManager &PM = jitstate->getPM(); + M->setDataLayout(TM.getSubtargetImpl()->getDataLayout()); + PM.add(new DataLayoutPass(M)); + + // Turn the machine code intermediate representation into bytes in memory that + // may be executed. + if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) { + report_fatal_error("Target does not support machine code emission!"); + } + + // Initialize passes. + PM.doInitialization(); +} + +JIT::~JIT() { + // Cleanup. + AllJits->Remove(this); + delete jitstate; + delete JCE; + // JMM is a ownership of JCE, so we no need delete JMM here. + delete &TM; +} + +/// addModule - Add a new Module to the JIT. If we previously removed the last +/// Module, we need re-initialize jitstate with a valid Module. +void JIT::addModule(Module *M) { + MutexGuard locked(lock); + + if (Modules.empty()) { + assert(!jitstate && "jitstate should be NULL if Modules vector is empty!"); + + jitstate = new JITState(M); + + FunctionPassManager &PM = jitstate->getPM(); + M->setDataLayout(TM.getSubtargetImpl()->getDataLayout()); + PM.add(new DataLayoutPass(M)); + + // Turn the machine code intermediate representation into bytes in memory + // that may be executed. + if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) { + report_fatal_error("Target does not support machine code emission!"); + } + + // Initialize passes. + PM.doInitialization(); + } + + ExecutionEngine::addModule(M); +} + +/// removeModule - If we are removing the last Module, invalidate the jitstate +/// since the PassManager it contains references a released Module. +bool JIT::removeModule(Module *M) { + bool result = ExecutionEngine::removeModule(M); + + MutexGuard locked(lock); + + if (jitstate && jitstate->getModule() == M) { + delete jitstate; + jitstate = nullptr; + } + + if (!jitstate && !Modules.empty()) { + jitstate = new JITState(Modules[0]); + + FunctionPassManager &PM = jitstate->getPM(); + M->setDataLayout(TM.getSubtargetImpl()->getDataLayout()); + PM.add(new DataLayoutPass(M)); + + // Turn the machine code intermediate representation into bytes in memory + // that may be executed. + if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) { + report_fatal_error("Target does not support machine code emission!"); + } + + // Initialize passes. + PM.doInitialization(); + } + return result; +} + +/// run - Start execution with the specified function and arguments. +/// +GenericValue JIT::runFunction(Function *F, + const std::vector &ArgValues) { + assert(F && "Function *F was null at entry to run()"); + + void *FPtr = getPointerToFunction(F); + assert(FPtr && "Pointer to fn's code was null after getPointerToFunction"); + FunctionType *FTy = F->getFunctionType(); + Type *RetTy = FTy->getReturnType(); + + assert((FTy->getNumParams() == ArgValues.size() || + (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) && + "Wrong number of arguments passed into function!"); + assert(FTy->getNumParams() == ArgValues.size() && + "This doesn't support passing arguments through varargs (yet)!"); + + // Handle some common cases first. These cases correspond to common `main' + // prototypes. + if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) { + switch (ArgValues.size()) { + case 3: + if (FTy->getParamType(0)->isIntegerTy(32) && + FTy->getParamType(1)->isPointerTy() && + FTy->getParamType(2)->isPointerTy()) { + int (*PF)(int, char **, const char **) = + (int(*)(int, char **, const char **))(intptr_t)FPtr; + + // Call the function. + GenericValue rv; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + (char **)GVTOP(ArgValues[1]), + (const char **)GVTOP(ArgValues[2]))); + return rv; + } + break; + case 2: + if (FTy->getParamType(0)->isIntegerTy(32) && + FTy->getParamType(1)->isPointerTy()) { + int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; + + // Call the function. + GenericValue rv; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + (char **)GVTOP(ArgValues[1]))); + return rv; + } + break; + case 1: + if (FTy->getParamType(0)->isIntegerTy(32)) { + GenericValue rv; + int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); + return rv; + } + if (FTy->getParamType(0)->isPointerTy()) { + GenericValue rv; + int (*PF)(char *) = (int(*)(char *))(intptr_t)FPtr; + rv.IntVal = APInt(32, PF((char*)GVTOP(ArgValues[0]))); + return rv; + } + break; + } + } + + // Handle cases where no arguments are passed first. + if (ArgValues.empty()) { + GenericValue rv; + switch (RetTy->getTypeID()) { + default: llvm_unreachable("Unknown return type for function call!"); + case Type::IntegerTyID: { + unsigned BitWidth = cast(RetTy)->getBitWidth(); + if (BitWidth == 1) + rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 8) + rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 16) + rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 32) + rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 64) + rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)()); + else + llvm_unreachable("Integer types > 64 bits not supported"); + return rv; + } + case Type::VoidTyID: + rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)()); + return rv; + case Type::FloatTyID: + rv.FloatVal = ((float(*)())(intptr_t)FPtr)(); + return rv; + case Type::DoubleTyID: + rv.DoubleVal = ((double(*)())(intptr_t)FPtr)(); + return rv; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + llvm_unreachable("long double not supported yet"); + case Type::PointerTyID: + return PTOGV(((void*(*)())(intptr_t)FPtr)()); + } + } + + // Okay, this is not one of our quick and easy cases. Because we don't have a + // full FFI, we have to codegen a nullary stub function that just calls the + // function we are interested in, passing in constants for all of the + // arguments. Make this function and return. + + // First, create the function. + FunctionType *STy=FunctionType::get(RetTy, false); + Function *Stub = Function::Create(STy, Function::InternalLinkage, "", + F->getParent()); + + // Insert a basic block. + BasicBlock *StubBB = BasicBlock::Create(F->getContext(), "", Stub); + + // Convert all of the GenericValue arguments over to constants. Note that we + // currently don't support varargs. + SmallVector Args; + for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) { + Constant *C = nullptr; + Type *ArgTy = FTy->getParamType(i); + const GenericValue &AV = ArgValues[i]; + switch (ArgTy->getTypeID()) { + default: llvm_unreachable("Unknown argument type for function call!"); + case Type::IntegerTyID: + C = ConstantInt::get(F->getContext(), AV.IntVal); + break; + case Type::FloatTyID: + C = ConstantFP::get(F->getContext(), APFloat(AV.FloatVal)); + break; + case Type::DoubleTyID: + C = ConstantFP::get(F->getContext(), APFloat(AV.DoubleVal)); + break; + case Type::PPC_FP128TyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + C = ConstantFP::get(F->getContext(), APFloat(ArgTy->getFltSemantics(), + AV.IntVal)); + break; + case Type::PointerTyID: + void *ArgPtr = GVTOP(AV); + if (sizeof(void*) == 4) + C = ConstantInt::get(Type::getInt32Ty(F->getContext()), + (int)(intptr_t)ArgPtr); + else + C = ConstantInt::get(Type::getInt64Ty(F->getContext()), + (intptr_t)ArgPtr); + // Cast the integer to pointer + C = ConstantExpr::getIntToPtr(C, ArgTy); + break; + } + Args.push_back(C); + } + + CallInst *TheCall = CallInst::Create(F, Args, "", StubBB); + TheCall->setCallingConv(F->getCallingConv()); + TheCall->setTailCall(); + if (!TheCall->getType()->isVoidTy()) + // Return result of the call. + ReturnInst::Create(F->getContext(), TheCall, StubBB); + else + ReturnInst::Create(F->getContext(), StubBB); // Just return void. + + // Finally, call our nullary stub function. + GenericValue Result = runFunction(Stub, std::vector()); + // Erase it, since no other function can have a reference to it. + Stub->eraseFromParent(); + // And return the result. + return Result; +} + +void JIT::RegisterJITEventListener(JITEventListener *L) { + if (!L) + return; + MutexGuard locked(lock); + EventListeners.push_back(L); +} +void JIT::UnregisterJITEventListener(JITEventListener *L) { + if (!L) + return; + MutexGuard locked(lock); + std::vector::reverse_iterator I= + std::find(EventListeners.rbegin(), EventListeners.rend(), L); + if (I != EventListeners.rend()) { + std::swap(*I, EventListeners.back()); + EventListeners.pop_back(); + } +} +void JIT::NotifyFunctionEmitted( + const Function &F, + void *Code, size_t Size, + const JITEvent_EmittedFunctionDetails &Details) { + MutexGuard locked(lock); + for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { + EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details); + } +} + +void JIT::NotifyFreeingMachineCode(void *OldPtr) { + MutexGuard locked(lock); + for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { + EventListeners[I]->NotifyFreeingMachineCode(OldPtr); + } +} + +/// runJITOnFunction - Run the FunctionPassManager full of +/// just-in-time compilation passes on F, hopefully filling in +/// GlobalAddress[F] with the address of F's machine code. +/// +void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) { + MutexGuard locked(lock); + + class MCIListener : public JITEventListener { + MachineCodeInfo *const MCI; + public: + MCIListener(MachineCodeInfo *mci) : MCI(mci) {} + void NotifyFunctionEmitted(const Function &, void *Code, size_t Size, + const EmittedFunctionDetails &) override { + MCI->setAddress(Code); + MCI->setSize(Size); + } + }; + MCIListener MCIL(MCI); + if (MCI) + RegisterJITEventListener(&MCIL); + + runJITOnFunctionUnlocked(F); + + if (MCI) + UnregisterJITEventListener(&MCIL); +} + +void JIT::runJITOnFunctionUnlocked(Function *F) { + assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!"); + + jitTheFunctionUnlocked(F); + + // If the function referred to another function that had not yet been + // read from bitcode, and we are jitting non-lazily, emit it now. + while (!jitstate->getPendingFunctions().empty()) { + Function *PF = jitstate->getPendingFunctions().back(); + jitstate->getPendingFunctions().pop_back(); + + assert(!PF->hasAvailableExternallyLinkage() && + "Externally-defined function should not be in pending list."); + + jitTheFunctionUnlocked(PF); + + // Now that the function has been jitted, ask the JITEmitter to rewrite + // the stub with real address of the function. + updateFunctionStubUnlocked(PF); + } +} + +void JIT::jitTheFunctionUnlocked(Function *F) { + isAlreadyCodeGenerating = true; + jitstate->getPM().run(*F); + isAlreadyCodeGenerating = false; + + // clear basic block addresses after this function is done + getBasicBlockAddressMap().clear(); +} + +/// getPointerToFunction - This method is used to get the address of the +/// specified function, compiling it if necessary. +/// +void *JIT::getPointerToFunction(Function *F) { + + if (void *Addr = getPointerToGlobalIfAvailable(F)) + return Addr; // Check if function already code gen'd + + MutexGuard locked(lock); + + // Now that this thread owns the lock, make sure we read in the function if it + // exists in this Module. + std::string ErrorMsg; + if (F->Materialize(&ErrorMsg)) { + report_fatal_error("Error reading function '" + F->getName()+ + "' from bitcode file: " + ErrorMsg); + } + + // ... and check if another thread has already code gen'd the function. + if (void *Addr = getPointerToGlobalIfAvailable(F)) + return Addr; + + if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { + bool AbortOnFailure = !F->hasExternalWeakLinkage(); + void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure); + addGlobalMapping(F, Addr); + return Addr; + } + + runJITOnFunctionUnlocked(F); + + void *Addr = getPointerToGlobalIfAvailable(F); + assert(Addr && "Code generation didn't add function to GlobalAddress table!"); + return Addr; +} + +void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { + MutexGuard locked(lock); + + BasicBlockAddressMapTy::iterator I = + getBasicBlockAddressMap().find(BB); + if (I == getBasicBlockAddressMap().end()) { + getBasicBlockAddressMap()[BB] = Addr; + } else { + // ignore repeats: some BBs can be split into few MBBs? + } +} + +void JIT::clearPointerToBasicBlock(const BasicBlock *BB) { + MutexGuard locked(lock); + getBasicBlockAddressMap().erase(BB); +} + +void *JIT::getPointerToBasicBlock(BasicBlock *BB) { + // make sure it's function is compiled by JIT + (void)getPointerToFunction(BB->getParent()); + + // resolve basic block address + MutexGuard locked(lock); + + BasicBlockAddressMapTy::iterator I = + getBasicBlockAddressMap().find(BB); + if (I != getBasicBlockAddressMap().end()) { + return I->second; + } else { + llvm_unreachable("JIT does not have BB address for address-of-label, was" + " it eliminated by optimizer?"); + } +} + +void *JIT::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure){ + if (!isSymbolSearchingDisabled()) { + void *ptr = JMM->getPointerToNamedFunction(Name, false); + if (ptr) + return ptr; + } + + /// If a LazyFunctionCreator is installed, use it to get/create the function. + if (LazyFunctionCreator) + if (void *RP = LazyFunctionCreator(Name)) + return RP; + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return nullptr; +} + + +/// getOrEmitGlobalVariable - Return the address of the specified global +/// variable, possibly emitting it to memory if needed. This is used by the +/// Emitter. +void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) { + MutexGuard locked(lock); + + void *Ptr = getPointerToGlobalIfAvailable(GV); + if (Ptr) return Ptr; + + // If the global is external, just remember the address. + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) { +#if HAVE___DSO_HANDLE + if (GV->getName() == "__dso_handle") + return (void*)&__dso_handle; +#endif + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName()); + if (!Ptr) { + report_fatal_error("Could not resolve external global address: " + +GV->getName()); + } + addGlobalMapping(GV, Ptr); + } else { + // If the global hasn't been emitted to memory yet, allocate space and + // emit it into memory. + Ptr = getMemoryForGV(GV); + addGlobalMapping(GV, Ptr); + EmitGlobalVariable(GV); // Initialize the variable. + } + return Ptr; +} + +/// recompileAndRelinkFunction - This method is used to force a function +/// which has already been compiled, to be compiled again, possibly +/// after it has been modified. Then the entry to the old copy is overwritten +/// with a branch to the new copy. If there was no old copy, this acts +/// just like JIT::getPointerToFunction(). +/// +void *JIT::recompileAndRelinkFunction(Function *F) { + void *OldAddr = getPointerToGlobalIfAvailable(F); + + // If it's not already compiled there is no reason to patch it up. + if (!OldAddr) return getPointerToFunction(F); + + // Delete the old function mapping. + addGlobalMapping(F, nullptr); + + // Recodegen the function + runJITOnFunction(F); + + // Update state, forward the old function to the new function. + void *Addr = getPointerToGlobalIfAvailable(F); + assert(Addr && "Code generation didn't add function to GlobalAddress table!"); + TJI.replaceMachineCodeForFunction(OldAddr, Addr); + return Addr; +} + +/// getMemoryForGV - This method abstracts memory allocation of global +/// variable so that the JIT can allocate thread local variables depending +/// on the target. +/// +char* JIT::getMemoryForGV(const GlobalVariable* GV) { + char *Ptr; + + // GlobalVariable's which are not "constant" will cause trouble in a server + // situation. It's returned in the same block of memory as code which may + // not be writable. + if (isGVCompilationDisabled() && !GV->isConstant()) { + report_fatal_error("Compilation of non-internal GlobalValue is disabled!"); + } + + // Some applications require globals and code to live together, so they may + // be allocated into the same buffer, but in general globals are allocated + // through the memory manager which puts them near the code but not in the + // same buffer. + Type *GlobalType = GV->getType()->getElementType(); + size_t S = getDataLayout()->getTypeAllocSize(GlobalType); + size_t A = getDataLayout()->getPreferredAlignment(GV); + if (GV->isThreadLocal()) { + MutexGuard locked(lock); + Ptr = TJI.allocateThreadLocalMemory(S); + } else if (TJI.allocateSeparateGVMemory()) { + if (A <= 8) { + Ptr = (char*)malloc(S); + } else { + // Allocate S+A bytes of memory, then use an aligned pointer within that + // space. + Ptr = (char*)malloc(S+A); + unsigned MisAligned = ((intptr_t)Ptr & (A-1)); + Ptr = Ptr + (MisAligned ? (A-MisAligned) : 0); + } + } else if (AllocateGVsWithCode) { + Ptr = (char*)JCE->allocateSpace(S, A); + } else { + Ptr = (char*)JCE->allocateGlobal(S, A); + } + return Ptr; +} + +void JIT::addPendingFunction(Function *F) { + MutexGuard locked(lock); + jitstate->getPendingFunctions().push_back(F); +} + + +JITEventListener::~JITEventListener() {} diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h new file mode 100644 index 00000000000..a742a61de7f --- /dev/null +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -0,0 +1,214 @@ +//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the top-level JIT data structure. +// +//===----------------------------------------------------------------------===// + +#ifndef JIT_H +#define JIT_H + +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/PassManager.h" + +namespace llvm { + +class Function; +struct JITEvent_EmittedFunctionDetails; +class MachineCodeEmitter; +class MachineCodeInfo; +class TargetJITInfo; +class TargetMachine; + +class JITState { +private: + FunctionPassManager PM; // Passes to compile a function + Module *M; // Module used to create the PM + + /// PendingFunctions - Functions which have not been code generated yet, but + /// were called from a function being code generated. + std::vector > PendingFunctions; + +public: + explicit JITState(Module *M) : PM(M), M(M) {} + + FunctionPassManager &getPM() { + return PM; + } + + Module *getModule() const { return M; } + std::vector > &getPendingFunctions() { + return PendingFunctions; + } +}; + + +class JIT : public ExecutionEngine { + /// types + typedef ValueMap + BasicBlockAddressMapTy; + /// data + TargetMachine &TM; // The current target we are compiling to + TargetJITInfo &TJI; // The JITInfo for the target we are compiling to + JITCodeEmitter *JCE; // JCE object + JITMemoryManager *JMM; + std::vector EventListeners; + + /// AllocateGVsWithCode - Some applications require that global variables and + /// code be allocated into the same region of memory, in which case this flag + /// should be set to true. Doing so breaks freeMachineCodeForFunction. + bool AllocateGVsWithCode; + + /// True while the JIT is generating code. Used to assert against recursive + /// entry. + bool isAlreadyCodeGenerating; + + JITState *jitstate; + + /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their + /// actualized version, only filled for basic blocks that have their address + /// taken. + BasicBlockAddressMapTy BasicBlockAddressMap; + + + JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, + JITMemoryManager *JMM, bool AllocateGVsWithCode); +public: + ~JIT(); + + static void Register() { + JITCtor = createJIT; + } + + /// getJITInfo - Return the target JIT information structure. + /// + TargetJITInfo &getJITInfo() const { return TJI; } + + void addModule(Module *M) override; + + /// removeModule - Remove a Module from the list of modules. Returns true if + /// M is found. + bool removeModule(Module *M) override; + + /// runFunction - Start execution with the specified function and arguments. + /// + GenericValue runFunction(Function *F, + const std::vector &ArgValues) override; + + /// getPointerToNamedFunction - This method returns the address of the + /// specified function by using the MemoryManager. As such it is only + /// useful for resolving library symbols, not code generated symbols. + /// + /// If AbortOnFailure is false and no function with the given name is + /// found, this function silently returns a null pointer. Otherwise, + /// it prints a message to stderr and aborts. + /// + void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) override; + + // CompilationCallback - Invoked the first time that a call site is found, + // which causes lazy compilation of the target function. + // + static void CompilationCallback(); + + /// getPointerToFunction - This returns the address of the specified function, + /// compiling it if necessary. + /// + void *getPointerToFunction(Function *F) override; + + /// addPointerToBasicBlock - Adds address of the specific basic block. + void addPointerToBasicBlock(const BasicBlock *BB, void *Addr); + + /// clearPointerToBasicBlock - Removes address of specific basic block. + void clearPointerToBasicBlock(const BasicBlock *BB); + + /// getPointerToBasicBlock - This returns the address of the specified basic + /// block, assuming function is compiled. + void *getPointerToBasicBlock(BasicBlock *BB) override; + + /// getOrEmitGlobalVariable - Return the address of the specified global + /// variable, possibly emitting it to memory if needed. This is used by the + /// Emitter. + void *getOrEmitGlobalVariable(const GlobalVariable *GV) override; + + /// getPointerToFunctionOrStub - If the specified function has been + /// code-gen'd, return a pointer to the function. If not, compile it, or use + /// a stub to implement lazy compilation if available. + /// + void *getPointerToFunctionOrStub(Function *F) override; + + /// recompileAndRelinkFunction - This method is used to force a function + /// which has already been compiled, to be compiled again, possibly + /// after it has been modified. Then the entry to the old copy is overwritten + /// with a branch to the new copy. If there was no old copy, this acts + /// just like JIT::getPointerToFunction(). + /// + void *recompileAndRelinkFunction(Function *F) override; + + /// freeMachineCodeForFunction - deallocate memory used to code-generate this + /// Function. + /// + void freeMachineCodeForFunction(Function *F) override; + + /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd + /// function was encountered. Add it to a pending list to be processed after + /// the current function. + /// + void addPendingFunction(Function *F); + + /// getCodeEmitter - Return the code emitter this JIT is emitting into. + /// + JITCodeEmitter *getCodeEmitter() const { return JCE; } + + static ExecutionEngine *createJIT(Module *M, + std::string *ErrorStr, + JITMemoryManager *JMM, + bool GVsWithCode, + TargetMachine *TM); + + // Run the JIT on F and return information about the generated code + void runJITOnFunction(Function *F, MachineCodeInfo *MCI = nullptr) override; + + void RegisterJITEventListener(JITEventListener *L) override; + void UnregisterJITEventListener(JITEventListener *L) override; + + TargetMachine *getTargetMachine() override { return &TM; } + + /// These functions correspond to the methods on JITEventListener. They + /// iterate over the registered listeners and call the corresponding method on + /// each. + void NotifyFunctionEmitted( + const Function &F, void *Code, size_t Size, + const JITEvent_EmittedFunctionDetails &Details); + void NotifyFreeingMachineCode(void *OldPtr); + + BasicBlockAddressMapTy & + getBasicBlockAddressMap() { + return BasicBlockAddressMap; + } + + +private: + static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM, + TargetMachine &tm); + void runJITOnFunctionUnlocked(Function *F); + void updateFunctionStubUnlocked(Function *F); + void jitTheFunctionUnlocked(Function *F); + +protected: + + /// getMemoryforGV - Allocate memory for a global variable. + char* getMemoryForGV(const GlobalVariable* GV) override; + +}; + +} // End llvm namespace + +#endif diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp new file mode 100644 index 00000000000..2ba1f8695d7 --- /dev/null +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -0,0 +1,1249 @@ +//===-- JITEmitter.cpp - Write machine code to executable memory ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a MachineCodeEmitter object that is used by the JIT to +// write machine code to memory and remember where relocatable values are. +// +//===----------------------------------------------------------------------===// + +#include "JIT.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineCodeInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include +#ifndef NDEBUG +#include +#endif +using namespace llvm; + +#define DEBUG_TYPE "jit" + +STATISTIC(NumBytes, "Number of bytes of machine code compiled"); +STATISTIC(NumRelos, "Number of relocations applied"); +STATISTIC(NumRetries, "Number of retries with more memory"); + + +// A declaration may stop being a declaration once it's fully read from bitcode. +// This function returns true if F is fully read and is still a declaration. +static bool isNonGhostDeclaration(const Function *F) { + return F->isDeclaration() && !F->isMaterializable(); +} + +//===----------------------------------------------------------------------===// +// JIT lazy compilation code. +// +namespace { + class JITEmitter; + class JITResolverState; + + template + struct NoRAUWValueMapConfig : public ValueMapConfig { + typedef JITResolverState *ExtraData; + static void onRAUW(JITResolverState *, Value *Old, Value *New) { + llvm_unreachable("The JIT doesn't know how to handle a" + " RAUW on a value it has emitted."); + } + }; + + struct CallSiteValueMapConfig : public NoRAUWValueMapConfig { + typedef JITResolverState *ExtraData; + static void onDelete(JITResolverState *JRS, Function *F); + }; + + class JITResolverState { + public: + typedef ValueMap > + FunctionToLazyStubMapTy; + typedef std::map > CallSiteToFunctionMapTy; + typedef ValueMap, + CallSiteValueMapConfig> FunctionToCallSitesMapTy; + typedef std::map, void*> GlobalToIndirectSymMapTy; + private: + /// FunctionToLazyStubMap - Keep track of the lazy stub created for a + /// particular function so that we can reuse them if necessary. + FunctionToLazyStubMapTy FunctionToLazyStubMap; + + /// CallSiteToFunctionMap - Keep track of the function that each lazy call + /// site corresponds to, and vice versa. + CallSiteToFunctionMapTy CallSiteToFunctionMap; + FunctionToCallSitesMapTy FunctionToCallSitesMap; + + /// GlobalToIndirectSymMap - Keep track of the indirect symbol created for a + /// particular GlobalVariable so that we can reuse them if necessary. + GlobalToIndirectSymMapTy GlobalToIndirectSymMap; + +#ifndef NDEBUG + /// Instance of the JIT this ResolverState serves. + JIT *TheJIT; +#endif + + public: + JITResolverState(JIT *jit) : FunctionToLazyStubMap(this), + FunctionToCallSitesMap(this) { +#ifndef NDEBUG + TheJIT = jit; +#endif + } + + FunctionToLazyStubMapTy& getFunctionToLazyStubMap() { + return FunctionToLazyStubMap; + } + + GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap() { + return GlobalToIndirectSymMap; + } + + std::pair LookupFunctionFromCallSite( + void *CallSite) const { + // The address given to us for the stub may not be exactly right, it + // might be a little bit after the stub. As such, use upper_bound to + // find it. + CallSiteToFunctionMapTy::const_iterator I = + CallSiteToFunctionMap.upper_bound(CallSite); + assert(I != CallSiteToFunctionMap.begin() && + "This is not a known call site!"); + --I; + return *I; + } + + void AddCallSite(void *CallSite, Function *F) { + bool Inserted = CallSiteToFunctionMap.insert( + std::make_pair(CallSite, F)).second; + (void)Inserted; + assert(Inserted && "Pair was already in CallSiteToFunctionMap"); + FunctionToCallSitesMap[F].insert(CallSite); + } + + void EraseAllCallSitesForPrelocked(Function *F); + + // Erases _all_ call sites regardless of their function. This is used to + // unregister the stub addresses from the StubToResolverMap in + // ~JITResolver(). + void EraseAllCallSitesPrelocked(); + }; + + /// JITResolver - Keep track of, and resolve, call sites for functions that + /// have not yet been compiled. + class JITResolver { + typedef JITResolverState::FunctionToLazyStubMapTy FunctionToLazyStubMapTy; + typedef JITResolverState::CallSiteToFunctionMapTy CallSiteToFunctionMapTy; + typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy; + + /// LazyResolverFn - The target lazy resolver function that we actually + /// rewrite instructions to use. + TargetJITInfo::LazyResolverFn LazyResolverFn; + + JITResolverState state; + + /// ExternalFnToStubMap - This is the equivalent of FunctionToLazyStubMap + /// for external functions. TODO: Of course, external functions don't need + /// a lazy stub. It's actually here to make it more likely that far calls + /// succeed, but no single stub can guarantee that. I'll remove this in a + /// subsequent checkin when I actually fix far calls. + std::map ExternalFnToStubMap; + + /// revGOTMap - map addresses to indexes in the GOT + std::map revGOTMap; + unsigned nextGOTIndex; + + JITEmitter &JE; + + /// Instance of JIT corresponding to this Resolver. + JIT *TheJIT; + + public: + explicit JITResolver(JIT &jit, JITEmitter &je) + : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) { + LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn); + } + + ~JITResolver(); + + /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's + /// lazy-compilation stub if it has already been created. + void *getLazyFunctionStubIfAvailable(Function *F); + + /// getLazyFunctionStub - This returns a pointer to a function's + /// lazy-compilation stub, creating one on demand as needed. + void *getLazyFunctionStub(Function *F); + + /// getExternalFunctionStub - Return a stub for the function at the + /// specified address, created lazily on demand. + void *getExternalFunctionStub(void *FnAddr); + + /// getGlobalValueIndirectSym - Return an indirect symbol containing the + /// specified GV address. + void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress); + + /// getGOTIndexForAddress - Return a new or existing index in the GOT for + /// an address. This function only manages slots, it does not manage the + /// contents of the slots or the memory associated with the GOT. + unsigned getGOTIndexForAddr(void *addr); + + /// JITCompilerFn - This function is called to resolve a stub to a compiled + /// address. If the LLVM Function corresponding to the stub has not yet + /// been compiled, this function compiles it first. + static void *JITCompilerFn(void *Stub); + }; + + class StubToResolverMapTy { + /// Map a stub address to a specific instance of a JITResolver so that + /// lazily-compiled functions can find the right resolver to use. + /// + /// Guarded by Lock. + std::map Map; + + /// Guards Map from concurrent accesses. + mutable sys::Mutex Lock; + + public: + /// Registers a Stub to be resolved by Resolver. + void RegisterStubResolver(void *Stub, JITResolver *Resolver) { + MutexGuard guard(Lock); + Map.insert(std::make_pair(Stub, Resolver)); + } + /// Unregisters the Stub when it's invalidated. + void UnregisterStubResolver(void *Stub) { + MutexGuard guard(Lock); + Map.erase(Stub); + } + /// Returns the JITResolver instance that owns the Stub. + JITResolver *getResolverFromStub(void *Stub) const { + MutexGuard guard(Lock); + // The address given to us for the stub may not be exactly right, it might + // be a little bit after the stub. As such, use upper_bound to find it. + // This is the same trick as in LookupFunctionFromCallSite from + // JITResolverState. + std::map::const_iterator I = Map.upper_bound(Stub); + assert(I != Map.begin() && "This is not a known stub!"); + --I; + return I->second; + } + /// True if any stubs refer to the given resolver. Only used in an assert(). + /// O(N) + bool ResolverHasStubs(JITResolver* Resolver) const { + MutexGuard guard(Lock); + for (std::map::const_iterator I = Map.begin(), + E = Map.end(); I != E; ++I) { + if (I->second == Resolver) + return true; + } + return false; + } + }; + /// This needs to be static so that a lazy call stub can access it with no + /// context except the address of the stub. + ManagedStatic StubToResolverMap; + + /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is + /// used to output functions to memory for execution. + class JITEmitter : public JITCodeEmitter { + JITMemoryManager *MemMgr; + + // When outputting a function stub in the context of some other function, we + // save BufferBegin/BufferEnd/CurBufferPtr here. + uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr; + + // When reattempting to JIT a function after running out of space, we store + // the estimated size of the function we're trying to JIT here, so we can + // ask the memory manager for at least this much space. When we + // successfully emit the function, we reset this back to zero. + uintptr_t SizeEstimate; + + /// Relocations - These are the relocations that the function needs, as + /// emitted. + std::vector Relocations; + + /// MBBLocations - This vector is a mapping from MBB ID's to their address. + /// It is filled in by the StartMachineBasicBlock callback and queried by + /// the getMachineBasicBlockAddress callback. + std::vector MBBLocations; + + /// ConstantPool - The constant pool for the current function. + /// + MachineConstantPool *ConstantPool; + + /// ConstantPoolBase - A pointer to the first entry in the constant pool. + /// + void *ConstantPoolBase; + + /// ConstPoolAddresses - Addresses of individual constant pool entries. + /// + SmallVector ConstPoolAddresses; + + /// JumpTable - The jump tables for the current function. + /// + MachineJumpTableInfo *JumpTable; + + /// JumpTableBase - A pointer to the first entry in the jump table. + /// + void *JumpTableBase; + + /// Resolver - This contains info about the currently resolved functions. + JITResolver Resolver; + + /// LabelLocations - This vector is a mapping from Label ID's to their + /// address. + DenseMap LabelLocations; + + /// MMI - Machine module info for exception informations + MachineModuleInfo* MMI; + + // CurFn - The llvm function being emitted. Only valid during + // finishFunction(). + const Function *CurFn; + + /// Information about emitted code, which is passed to the + /// JITEventListeners. This is reset in startFunction and used in + /// finishFunction. + JITEvent_EmittedFunctionDetails EmissionDetails; + + struct EmittedCode { + void *FunctionBody; // Beginning of the function's allocation. + void *Code; // The address the function's code actually starts at. + void *ExceptionTable; + EmittedCode() : FunctionBody(nullptr), Code(nullptr), + ExceptionTable(nullptr) {} + }; + struct EmittedFunctionConfig : public ValueMapConfig { + typedef JITEmitter *ExtraData; + static void onDelete(JITEmitter *, const Function*); + static void onRAUW(JITEmitter *, const Function*, const Function*); + }; + ValueMap EmittedFunctions; + + DebugLoc PrevDL; + + /// Instance of the JIT + JIT *TheJIT; + + public: + JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) + : SizeEstimate(0), Resolver(jit, *this), MMI(nullptr), CurFn(nullptr), + EmittedFunctions(this), TheJIT(&jit) { + MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); + if (jit.getJITInfo().needsGOT()) { + MemMgr->AllocateGOT(); + DEBUG(dbgs() << "JIT is managing a GOT\n"); + } + + } + ~JITEmitter() { + delete MemMgr; + } + + JITResolver &getJITResolver() { return Resolver; } + + void startFunction(MachineFunction &F) override; + bool finishFunction(MachineFunction &F) override; + + void emitConstantPool(MachineConstantPool *MCP); + void initJumpTableInfo(MachineJumpTableInfo *MJTI); + void emitJumpTableInfo(MachineJumpTableInfo *MJTI); + + void startGVStub(const GlobalValue* GV, + unsigned StubSize, unsigned Alignment = 1); + void startGVStub(void *Buffer, unsigned StubSize); + void finishGVStub(); + void *allocIndirectGV(const GlobalValue *GV, const uint8_t *Buffer, + size_t Size, unsigned Alignment) override; + + /// allocateSpace - Reserves space in the current block if any, or + /// allocate a new one of the given size. + void *allocateSpace(uintptr_t Size, unsigned Alignment) override; + + /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace, + /// this method does not allocate memory in the current output buffer, + /// because a global may live longer than the current function. + void *allocateGlobal(uintptr_t Size, unsigned Alignment) override; + + void addRelocation(const MachineRelocation &MR) override { + Relocations.push_back(MR); + } + + void StartMachineBasicBlock(MachineBasicBlock *MBB) override { + if (MBBLocations.size() <= (unsigned)MBB->getNumber()) + MBBLocations.resize((MBB->getNumber()+1)*2); + MBBLocations[MBB->getNumber()] = getCurrentPCValue(); + if (MBB->hasAddressTaken()) + TheJIT->addPointerToBasicBlock(MBB->getBasicBlock(), + (void*)getCurrentPCValue()); + DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at [" + << (void*) getCurrentPCValue() << "]\n"); + } + + uintptr_t getConstantPoolEntryAddress(unsigned Entry) const override; + uintptr_t getJumpTableEntryAddress(unsigned Entry) const override; + + uintptr_t + getMachineBasicBlockAddress(MachineBasicBlock *MBB) const override { + assert(MBBLocations.size() > (unsigned)MBB->getNumber() && + MBBLocations[MBB->getNumber()] && "MBB not emitted!"); + return MBBLocations[MBB->getNumber()]; + } + + /// retryWithMoreMemory - Log a retry and deallocate all memory for the + /// given function. Increase the minimum allocation size so that we get + /// more memory next time. + void retryWithMoreMemory(MachineFunction &F); + + /// deallocateMemForFunction - Deallocate all memory for the specified + /// function body. + void deallocateMemForFunction(const Function *F); + + void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) override; + + void emitLabel(MCSymbol *Label) override { + LabelLocations[Label] = getCurrentPCValue(); + } + + DenseMap *getLabelLocations() override { + return &LabelLocations; + } + + uintptr_t getLabelAddress(MCSymbol *Label) const override { + assert(LabelLocations.count(Label) && "Label not emitted!"); + return LabelLocations.find(Label)->second; + } + + void setModuleInfo(MachineModuleInfo* Info) override { + MMI = Info; + } + + private: + void *getPointerToGlobal(GlobalValue *GV, void *Reference, + bool MayNeedFarStub); + void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference); + }; +} + +void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) { + JRS->EraseAllCallSitesForPrelocked(F); +} + +void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) { + FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F); + if (F2C == FunctionToCallSitesMap.end()) + return; + StubToResolverMapTy &S2RMap = *StubToResolverMap; + for (SmallPtrSet::const_iterator I = F2C->second.begin(), + E = F2C->second.end(); I != E; ++I) { + S2RMap.UnregisterStubResolver(*I); + bool Erased = CallSiteToFunctionMap.erase(*I); + (void)Erased; + assert(Erased && "Missing call site->function mapping"); + } + FunctionToCallSitesMap.erase(F2C); +} + +void JITResolverState::EraseAllCallSitesPrelocked() { + StubToResolverMapTy &S2RMap = *StubToResolverMap; + for (CallSiteToFunctionMapTy::const_iterator + I = CallSiteToFunctionMap.begin(), + E = CallSiteToFunctionMap.end(); I != E; ++I) { + S2RMap.UnregisterStubResolver(I->first); + } + CallSiteToFunctionMap.clear(); + FunctionToCallSitesMap.clear(); +} + +JITResolver::~JITResolver() { + // No need to lock because we're in the destructor, and state isn't shared. + state.EraseAllCallSitesPrelocked(); + assert(!StubToResolverMap->ResolverHasStubs(this) && + "Resolver destroyed with stubs still alive."); +} + +/// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub +/// if it has already been created. +void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) { + MutexGuard locked(TheJIT->lock); + + // If we already have a stub for this function, recycle it. + return state.getFunctionToLazyStubMap().lookup(F); +} + +/// getFunctionStub - This returns a pointer to a function stub, creating +/// one on demand as needed. +void *JITResolver::getLazyFunctionStub(Function *F) { + MutexGuard locked(TheJIT->lock); + + // If we already have a lazy stub for this function, recycle it. + void *&Stub = state.getFunctionToLazyStubMap()[F]; + if (Stub) return Stub; + + // Call the lazy resolver function if we are JIT'ing lazily. Otherwise we + // must resolve the symbol now. + void *Actual = TheJIT->isCompilingLazily() + ? (void *)(intptr_t)LazyResolverFn : (void *)nullptr; + + // If this is an external declaration, attempt to resolve the address now + // to place in the stub. + if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage()) { + Actual = TheJIT->getPointerToFunction(F); + + // If we resolved the symbol to a null address (eg. a weak external) + // don't emit a stub. Return a null pointer to the application. + if (!Actual) return nullptr; + } + + TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout(); + JE.startGVStub(F, SL.Size, SL.Alignment); + // Codegen a new stub, calling the lazy resolver or the actual address of the + // external function, if it was resolved. + Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE); + JE.finishGVStub(); + + if (Actual != (void*)(intptr_t)LazyResolverFn) { + // If we are getting the stub for an external function, we really want the + // address of the stub in the GlobalAddressMap for the JIT, not the address + // of the external function. + TheJIT->updateGlobalMapping(F, Stub); + } + + DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '" + << F->getName() << "'\n"); + + if (TheJIT->isCompilingLazily()) { + // Register this JITResolver as the one corresponding to this call site so + // JITCompilerFn will be able to find it. + StubToResolverMap->RegisterStubResolver(Stub, this); + + // Finally, keep track of the stub-to-Function mapping so that the + // JITCompilerFn knows which function to compile! + state.AddCallSite(Stub, F); + } else if (!Actual) { + // If we are JIT'ing non-lazily but need to call a function that does not + // exist yet, add it to the JIT's work list so that we can fill in the + // stub address later. + assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() && + "'Actual' should have been set above."); + TheJIT->addPendingFunction(F); + } + + return Stub; +} + +/// getGlobalValueIndirectSym - Return a lazy pointer containing the specified +/// GV address. +void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) { + MutexGuard locked(TheJIT->lock); + + // If we already have a stub for this global variable, recycle it. + void *&IndirectSym = state.getGlobalToIndirectSymMap()[GV]; + if (IndirectSym) return IndirectSym; + + // Otherwise, codegen a new indirect symbol. + IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress, + JE); + + DEBUG(dbgs() << "JIT: Indirect symbol emitted at [" << IndirectSym + << "] for GV '" << GV->getName() << "'\n"); + + return IndirectSym; +} + +/// getExternalFunctionStub - Return a stub for the function at the +/// specified address, created lazily on demand. +void *JITResolver::getExternalFunctionStub(void *FnAddr) { + // If we already have a stub for this function, recycle it. + void *&Stub = ExternalFnToStubMap[FnAddr]; + if (Stub) return Stub; + + TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout(); + JE.startGVStub(nullptr, SL.Size, SL.Alignment); + Stub = TheJIT->getJITInfo().emitFunctionStub(nullptr, FnAddr, JE); + JE.finishGVStub(); + + DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub + << "] for external function at '" << FnAddr << "'\n"); + return Stub; +} + +unsigned JITResolver::getGOTIndexForAddr(void* addr) { + unsigned idx = revGOTMap[addr]; + if (!idx) { + idx = ++nextGOTIndex; + revGOTMap[addr] = idx; + DEBUG(dbgs() << "JIT: Adding GOT entry " << idx << " for addr [" + << addr << "]\n"); + } + return idx; +} + +/// JITCompilerFn - This function is called when a lazy compilation stub has +/// been entered. It looks up which function this stub corresponds to, compiles +/// it if necessary, then returns the resultant function pointer. +void *JITResolver::JITCompilerFn(void *Stub) { + JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub); + assert(JR && "Unable to find the corresponding JITResolver to the call site"); + + Function* F = nullptr; + void* ActualPtr = nullptr; + + { + // Only lock for getting the Function. The call getPointerToFunction made + // in this function might trigger function materializing, which requires + // JIT lock to be unlocked. + MutexGuard locked(JR->TheJIT->lock); + + // The address given to us for the stub may not be exactly right, it might + // be a little bit after the stub. As such, use upper_bound to find it. + std::pair I = + JR->state.LookupFunctionFromCallSite(Stub); + F = I.second; + ActualPtr = I.first; + } + + // If we have already code generated the function, just return the address. + void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F); + + if (!Result) { + // Otherwise we don't have it, do lazy compilation now. + + // If lazy compilation is disabled, emit a useful error message and abort. + if (!JR->TheJIT->isCompilingLazily()) { + report_fatal_error("LLVM JIT requested to do lazy compilation of" + " function '" + + F->getName() + "' when lazy compiles are disabled!"); + } + + DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName() + << "' In stub ptr = " << Stub << " actual ptr = " + << ActualPtr << "\n"); + (void)ActualPtr; + + Result = JR->TheJIT->getPointerToFunction(F); + } + + // Reacquire the lock to update the GOT map. + MutexGuard locked(JR->TheJIT->lock); + + // We might like to remove the call site from the CallSiteToFunction map, but + // we can't do that! Multiple threads could be stuck, waiting to acquire the + // lock above. As soon as the 1st function finishes compiling the function, + // the next one will be released, and needs to be able to find the function it + // needs to call. + + // FIXME: We could rewrite all references to this stub if we knew them. + + // What we will do is set the compiled function address to map to the + // same GOT entry as the stub so that later clients may update the GOT + // if they see it still using the stub address. + // Note: this is done so the Resolver doesn't have to manage GOT memory + // Do this without allocating map space if the target isn't using a GOT + if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end()) + JR->revGOTMap[Result] = JR->revGOTMap[Stub]; + + return Result; +} + +//===----------------------------------------------------------------------===// +// JITEmitter code. +// + +static GlobalObject *getSimpleAliasee(Constant *C) { + C = C->stripPointerCasts(); + return dyn_cast(C); +} + +void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, + bool MayNeedFarStub) { + if (GlobalVariable *GV = dyn_cast(V)) + return TheJIT->getOrEmitGlobalVariable(GV); + + if (GlobalAlias *GA = dyn_cast(V)) { + // We can only handle simple cases. + if (GlobalValue *GV = getSimpleAliasee(GA->getAliasee())) + return TheJIT->getPointerToGlobal(GV); + return nullptr; + } + + // If we have already compiled the function, return a pointer to its body. + Function *F = cast(V); + + void *FnStub = Resolver.getLazyFunctionStubIfAvailable(F); + if (FnStub) { + // Return the function stub if it's already created. We do this first so + // that we're returning the same address for the function as any previous + // call. TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be + // close enough to call. + return FnStub; + } + + // If we know the target can handle arbitrary-distance calls, try to + // return a direct pointer. + if (!MayNeedFarStub) { + // If we have code, go ahead and return that. + void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F); + if (ResultPtr) return ResultPtr; + + // If this is an external function pointer, we can force the JIT to + // 'compile' it, which really just adds it to the map. + if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage()) + return TheJIT->getPointerToFunction(F); + } + + // Otherwise, we may need a to emit a stub, and, conservatively, we always do + // so. Note that it's possible to return null from getLazyFunctionStub in the + // case of a weak extern that fails to resolve. + return Resolver.getLazyFunctionStub(F); +} + +void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) { + // Make sure GV is emitted first, and create a stub containing the fully + // resolved address. + void *GVAddress = getPointerToGlobal(V, Reference, false); + void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress); + return StubAddr; +} + +void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { + if (DL.isUnknown()) return; + if (!BeforePrintingInsn) return; + + const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext(); + + if (DL.getScope(Context) != nullptr && PrevDL != DL) { + JITEvent_EmittedFunctionDetails::LineStart NextLine; + NextLine.Address = getCurrentPCValue(); + NextLine.Loc = DL; + EmissionDetails.LineStarts.push_back(NextLine); + } + + PrevDL = DL; +} + +static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP, + const DataLayout *TD) { + const std::vector &Constants = MCP->getConstants(); + if (Constants.empty()) return 0; + + unsigned Size = 0; + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + MachineConstantPoolEntry CPE = Constants[i]; + unsigned AlignMask = CPE.getAlignment() - 1; + Size = (Size + AlignMask) & ~AlignMask; + Type *Ty = CPE.getType(); + Size += TD->getTypeAllocSize(Ty); + } + return Size; +} + +void JITEmitter::startFunction(MachineFunction &F) { + DEBUG(dbgs() << "JIT: Starting CodeGen of Function " + << F.getName() << "\n"); + + uintptr_t ActualSize = 0; + // Set the memory writable, if it's not already + MemMgr->setMemoryWritable(); + + if (SizeEstimate > 0) { + // SizeEstimate will be non-zero on reallocation attempts. + ActualSize = SizeEstimate; + } + + BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(), + ActualSize); + BufferEnd = BufferBegin+ActualSize; + EmittedFunctions[F.getFunction()].FunctionBody = BufferBegin; + + // Ensure the constant pool/jump table info is at least 4-byte aligned. + emitAlignment(16); + + emitConstantPool(F.getConstantPool()); + if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) + initJumpTableInfo(MJTI); + + // About to start emitting the machine code for the function. + emitAlignment(std::max(F.getFunction()->getAlignment(), 8U)); + TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr); + EmittedFunctions[F.getFunction()].Code = CurBufferPtr; + + MBBLocations.clear(); + + EmissionDetails.MF = &F; + EmissionDetails.LineStarts.clear(); +} + +bool JITEmitter::finishFunction(MachineFunction &F) { + if (CurBufferPtr == BufferEnd) { + // We must call endFunctionBody before retrying, because + // deallocateMemForFunction requires it. + MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr); + retryWithMoreMemory(F); + return true; + } + + if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) + emitJumpTableInfo(MJTI); + + // FnStart is the start of the text, not the start of the constant pool and + // other per-function data. + uint8_t *FnStart = + (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction()); + + // FnEnd is the end of the function's machine code. + uint8_t *FnEnd = CurBufferPtr; + + if (!Relocations.empty()) { + CurFn = F.getFunction(); + NumRelos += Relocations.size(); + + // Resolve the relocations to concrete pointers. + for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { + MachineRelocation &MR = Relocations[i]; + void *ResultPtr = nullptr; + if (!MR.letTargetResolve()) { + if (MR.isExternalSymbol()) { + ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(), + false); + DEBUG(dbgs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to [" + << ResultPtr << "]\n"); + + // If the target REALLY wants a stub for this function, emit it now. + if (MR.mayNeedFarStub()) { + ResultPtr = Resolver.getExternalFunctionStub(ResultPtr); + } + } else if (MR.isGlobalValue()) { + ResultPtr = getPointerToGlobal(MR.getGlobalValue(), + BufferBegin+MR.getMachineCodeOffset(), + MR.mayNeedFarStub()); + } else if (MR.isIndirectSymbol()) { + ResultPtr = getPointerToGVIndirectSym( + MR.getGlobalValue(), BufferBegin+MR.getMachineCodeOffset()); + } else if (MR.isBasicBlock()) { + ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock()); + } else if (MR.isConstantPoolIndex()) { + ResultPtr = + (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex()); + } else { + assert(MR.isJumpTableIndex()); + ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex()); + } + + MR.setResultPointer(ResultPtr); + } + + // if we are managing the GOT and the relocation wants an index, + // give it one + if (MR.isGOTRelative() && MemMgr->isManagingGOT()) { + unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr); + MR.setGOTIndex(idx); + if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) { + DEBUG(dbgs() << "JIT: GOT was out of date for " << ResultPtr + << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] + << "\n"); + ((void**)MemMgr->getGOTBase())[idx] = ResultPtr; + } + } + } + + CurFn = nullptr; + TheJIT->getJITInfo().relocate(BufferBegin, &Relocations[0], + Relocations.size(), MemMgr->getGOTBase()); + } + + // Update the GOT entry for F to point to the new code. + if (MemMgr->isManagingGOT()) { + unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin); + if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) { + DEBUG(dbgs() << "JIT: GOT was out of date for " << (void*)BufferBegin + << " pointing at " << ((void**)MemMgr->getGOTBase())[idx] + << "\n"); + ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin; + } + } + + // CurBufferPtr may have moved beyond FnEnd, due to memory allocation for + // global variables that were referenced in the relocations. + MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr); + + if (CurBufferPtr == BufferEnd) { + retryWithMoreMemory(F); + return true; + } else { + // Now that we've succeeded in emitting the function, reset the + // SizeEstimate back down to zero. + SizeEstimate = 0; + } + + BufferBegin = CurBufferPtr = nullptr; + NumBytes += FnEnd-FnStart; + + // Invalidate the icache if necessary. + sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart); + + TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart, + EmissionDetails); + + // Reset the previous debug location. + PrevDL = DebugLoc(); + + DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart + << "] Function: " << F.getName() + << ": " << (FnEnd-FnStart) << " bytes of text, " + << Relocations.size() << " relocations\n"); + + Relocations.clear(); + ConstPoolAddresses.clear(); + + // Mark code region readable and executable if it's not so already. + MemMgr->setMemoryExecutable(); + + DEBUG({ + dbgs() << "JIT: Binary code:\n"; + uint8_t* q = FnStart; + for (int i = 0; q < FnEnd; q += 4, ++i) { + if (i == 4) + i = 0; + if (i == 0) + dbgs() << "JIT: " << (long)(q - FnStart) << ": "; + bool Done = false; + for (int j = 3; j >= 0; --j) { + if (q + j >= FnEnd) + Done = true; + else + dbgs() << (unsigned short)q[j]; + } + if (Done) + break; + dbgs() << ' '; + if (i == 3) + dbgs() << '\n'; + } + dbgs()<< '\n'; + }); + + if (MMI) + MMI->EndFunction(); + + return false; +} + +void JITEmitter::retryWithMoreMemory(MachineFunction &F) { + DEBUG(dbgs() << "JIT: Ran out of space for native code. Reattempting.\n"); + Relocations.clear(); // Clear the old relocations or we'll reapply them. + ConstPoolAddresses.clear(); + ++NumRetries; + deallocateMemForFunction(F.getFunction()); + // Try again with at least twice as much free space. + SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin)); + + for (MachineFunction::iterator MBB = F.begin(), E = F.end(); MBB != E; ++MBB){ + if (MBB->hasAddressTaken()) + TheJIT->clearPointerToBasicBlock(MBB->getBasicBlock()); + } +} + +/// deallocateMemForFunction - Deallocate all memory for the specified +/// function body. Also drop any references the function has to stubs. +/// May be called while the Function is being destroyed inside ~Value(). +void JITEmitter::deallocateMemForFunction(const Function *F) { + ValueMap::iterator + Emitted = EmittedFunctions.find(F); + if (Emitted != EmittedFunctions.end()) { + MemMgr->deallocateFunctionBody(Emitted->second.FunctionBody); + TheJIT->NotifyFreeingMachineCode(Emitted->second.Code); + + EmittedFunctions.erase(Emitted); + } +} + + +void *JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) { + if (BufferBegin) + return JITCodeEmitter::allocateSpace(Size, Alignment); + + // create a new memory block if there is no active one. + // care must be taken so that BufferBegin is invalidated when a + // block is trimmed + BufferBegin = CurBufferPtr = MemMgr->allocateSpace(Size, Alignment); + BufferEnd = BufferBegin+Size; + return CurBufferPtr; +} + +void *JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) { + // Delegate this call through the memory manager. + return MemMgr->allocateGlobal(Size, Alignment); +} + +void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { + if (TheJIT->getJITInfo().hasCustomConstantPool()) + return; + + const std::vector &Constants = MCP->getConstants(); + if (Constants.empty()) return; + + unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getDataLayout()); + unsigned Align = MCP->getConstantPoolAlignment(); + ConstantPoolBase = allocateSpace(Size, Align); + ConstantPool = MCP; + + if (!ConstantPoolBase) return; // Buffer overflow. + + DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase + << "] (size: " << Size << ", alignment: " << Align << ")\n"); + + // Initialize the memory for all of the constant pool entries. + unsigned Offset = 0; + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + MachineConstantPoolEntry CPE = Constants[i]; + unsigned AlignMask = CPE.getAlignment() - 1; + Offset = (Offset + AlignMask) & ~AlignMask; + + uintptr_t CAddr = (uintptr_t)ConstantPoolBase + Offset; + ConstPoolAddresses.push_back(CAddr); + if (CPE.isMachineConstantPoolEntry()) { + // FIXME: add support to lower machine constant pool values into bytes! + report_fatal_error("Initialize memory with machine specific constant pool" + "entry has not been implemented!"); + } + TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr); + DEBUG(dbgs() << "JIT: CP" << i << " at [0x"; + dbgs().write_hex(CAddr) << "]\n"); + + Type *Ty = CPE.Val.ConstVal->getType(); + Offset += TheJIT->getDataLayout()->getTypeAllocSize(Ty); + } +} + +void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) { + if (TheJIT->getJITInfo().hasCustomJumpTables()) + return; + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) + return; + + const std::vector &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + unsigned NumEntries = 0; + for (unsigned i = 0, e = JT.size(); i != e; ++i) + NumEntries += JT[i].MBBs.size(); + + unsigned EntrySize = MJTI->getEntrySize(*TheJIT->getDataLayout()); + + // Just allocate space for all the jump tables now. We will fix up the actual + // MBB entries in the tables after we emit the code for each block, since then + // we will know the final locations of the MBBs in memory. + JumpTable = MJTI; + JumpTableBase = allocateSpace(NumEntries * EntrySize, + MJTI->getEntryAlignment(*TheJIT->getDataLayout())); +} + +void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { + if (TheJIT->getJITInfo().hasCustomJumpTables()) + return; + + const std::vector &JT = MJTI->getJumpTables(); + if (JT.empty() || !JumpTableBase) return; + + + switch (MJTI->getEntryKind()) { + case MachineJumpTableInfo::EK_Inline: + return; + case MachineJumpTableInfo::EK_BlockAddress: { + // EK_BlockAddress - Each entry is a plain address of block, e.g.: + // .word LBB123 + assert(MJTI->getEntrySize(*TheJIT->getDataLayout()) == sizeof(void*) && + "Cross JIT'ing?"); + + // For each jump table, map each target in the jump table to the address of + // an emitted MachineBasicBlock. + intptr_t *SlotPtr = (intptr_t*)JumpTableBase; + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + const std::vector &MBBs = JT[i].MBBs; + // Store the address of the basic block for this jump table slot in the + // memory we allocated for the jump table in 'initJumpTableInfo' + for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) + *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]); + } + break; + } + + case MachineJumpTableInfo::EK_Custom32: + case MachineJumpTableInfo::EK_GPRel32BlockAddress: + case MachineJumpTableInfo::EK_LabelDifference32: { + assert(MJTI->getEntrySize(*TheJIT->getDataLayout()) == 4&&"Cross JIT'ing?"); + // For each jump table, place the offset from the beginning of the table + // to the target address. + int *SlotPtr = (int*)JumpTableBase; + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + const std::vector &MBBs = JT[i].MBBs; + // Store the offset of the basic block for this jump table slot in the + // memory we allocated for the jump table in 'initJumpTableInfo' + uintptr_t Base = (uintptr_t)SlotPtr; + for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { + uintptr_t MBBAddr = getMachineBasicBlockAddress(MBBs[mi]); + /// FIXME: USe EntryKind instead of magic "getPICJumpTableEntry" hook. + *SlotPtr++ = TheJIT->getJITInfo().getPICJumpTableEntry(MBBAddr, Base); + } + } + break; + } + case MachineJumpTableInfo::EK_GPRel64BlockAddress: + llvm_unreachable( + "JT Info emission not implemented for GPRel64BlockAddress yet."); + } +} + +void JITEmitter::startGVStub(const GlobalValue* GV, + unsigned StubSize, unsigned Alignment) { + SavedBufferBegin = BufferBegin; + SavedBufferEnd = BufferEnd; + SavedCurBufferPtr = CurBufferPtr; + + BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment); + BufferEnd = BufferBegin+StubSize+1; +} + +void JITEmitter::startGVStub(void *Buffer, unsigned StubSize) { + SavedBufferBegin = BufferBegin; + SavedBufferEnd = BufferEnd; + SavedCurBufferPtr = CurBufferPtr; + + BufferBegin = CurBufferPtr = (uint8_t *)Buffer; + BufferEnd = BufferBegin+StubSize+1; +} + +void JITEmitter::finishGVStub() { + assert(CurBufferPtr != BufferEnd && "Stub overflowed allocated space."); + NumBytes += getCurrentPCOffset(); + BufferBegin = SavedBufferBegin; + BufferEnd = SavedBufferEnd; + CurBufferPtr = SavedCurBufferPtr; +} + +void *JITEmitter::allocIndirectGV(const GlobalValue *GV, + const uint8_t *Buffer, size_t Size, + unsigned Alignment) { + uint8_t *IndGV = MemMgr->allocateStub(GV, Size, Alignment); + memcpy(IndGV, Buffer, Size); + return IndGV; +} + +// getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry +// in the constant pool that was last emitted with the 'emitConstantPool' +// method. +// +uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const { + assert(ConstantNum < ConstantPool->getConstants().size() && + "Invalid ConstantPoolIndex!"); + return ConstPoolAddresses[ConstantNum]; +} + +// getJumpTableEntryAddress - Return the address of the JumpTable with index +// 'Index' in the jumpp table that was last initialized with 'initJumpTableInfo' +// +uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const { + const std::vector &JT = JumpTable->getJumpTables(); + assert(Index < JT.size() && "Invalid jump table index!"); + + unsigned EntrySize = JumpTable->getEntrySize(*TheJIT->getDataLayout()); + + unsigned Offset = 0; + for (unsigned i = 0; i < Index; ++i) + Offset += JT[i].MBBs.size(); + + Offset *= EntrySize; + + return (uintptr_t)((char *)JumpTableBase + Offset); +} + +void JITEmitter::EmittedFunctionConfig::onDelete( + JITEmitter *Emitter, const Function *F) { + Emitter->deallocateMemForFunction(F); +} +void JITEmitter::EmittedFunctionConfig::onRAUW( + JITEmitter *, const Function*, const Function*) { + llvm_unreachable("The JIT doesn't know how to handle a" + " RAUW on a value it has emitted."); +} + + +//===----------------------------------------------------------------------===// +// Public interface to this file +//===----------------------------------------------------------------------===// + +JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM, + TargetMachine &tm) { + return new JITEmitter(jit, JMM, tm); +} + +// getPointerToFunctionOrStub - If the specified function has been +// code-gen'd, return a pointer to the function. If not, compile it, or use +// a stub to implement lazy compilation if available. +// +void *JIT::getPointerToFunctionOrStub(Function *F) { + // If we have already code generated the function, just return the address. + if (void *Addr = getPointerToGlobalIfAvailable(F)) + return Addr; + + // Get a stub if the target supports it. + JITEmitter *JE = static_cast(getCodeEmitter()); + return JE->getJITResolver().getLazyFunctionStub(F); +} + +void JIT::updateFunctionStubUnlocked(Function *F) { + // Get the empty stub we generated earlier. + JITEmitter *JE = static_cast(getCodeEmitter()); + void *Stub = JE->getJITResolver().getLazyFunctionStub(F); + void *Addr = getPointerToGlobalIfAvailable(F); + assert(Addr != Stub && "Function must have non-stub address to be updated."); + + // Tell the target jit info to rewrite the stub at the specified address, + // rather than creating a new one. + TargetJITInfo::StubLayout layout = getJITInfo().getStubLayout(); + JE->startGVStub(Stub, layout.Size); + getJITInfo().emitFunctionStub(F, Addr, *getCodeEmitter()); + JE->finishGVStub(); +} + +/// freeMachineCodeForFunction - release machine code memory for given Function. +/// +void JIT::freeMachineCodeForFunction(Function *F) { + // Delete translation for this from the ExecutionEngine, so it will get + // retranslated next time it is used. + updateGlobalMapping(F, nullptr); + + // Free the actual memory for the function body and related stuff. + static_cast(JCE)->deallocateMemForFunction(F); +} diff --git a/lib/ExecutionEngine/MCJIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp similarity index 100% rename from lib/ExecutionEngine/MCJIT/JITMemoryManager.cpp rename to lib/ExecutionEngine/JIT/JITMemoryManager.cpp diff --git a/lib/ExecutionEngine/JIT/LLVMBuild.txt b/lib/ExecutionEngine/JIT/LLVMBuild.txt new file mode 100644 index 00000000000..dd22f1b464a --- /dev/null +++ b/lib/ExecutionEngine/JIT/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/ExecutionEngine/JIT/LLVMBuild.txt ------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = JIT +parent = ExecutionEngine +required_libraries = CodeGen Core ExecutionEngine Support diff --git a/lib/ExecutionEngine/JIT/Makefile b/lib/ExecutionEngine/JIT/Makefile new file mode 100644 index 00000000000..aafa3d9d420 --- /dev/null +++ b/lib/ExecutionEngine/JIT/Makefile @@ -0,0 +1,38 @@ +##===- lib/ExecutionEngine/JIT/Makefile --------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMJIT + +# Get the $(ARCH) setting +include $(LEVEL)/Makefile.config + +# Enable the X86 JIT if compiling on X86 +ifeq ($(ARCH), x86) + ENABLE_X86_JIT = 1 +endif + +# This flag can also be used on the command line to force inclusion +# of the X86 JIT on non-X86 hosts +ifdef ENABLE_X86_JIT + CPPFLAGS += -DENABLE_X86_JIT +endif + +# Enable the Sparc JIT if compiling on Sparc +ifeq ($(ARCH), Sparc) + ENABLE_SPARC_JIT = 1 +endif + +# This flag can also be used on the command line to force inclusion +# of the Sparc JIT on non-Sparc hosts +ifdef ENABLE_SPARC_JIT + CPPFLAGS += -DENABLE_SPARC_JIT +endif + +include $(LEVEL)/Makefile.common diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt index ecae078ec7d..6dc75af2ec9 100644 --- a/lib/ExecutionEngine/LLVMBuild.txt +++ b/lib/ExecutionEngine/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT +subdirectories = Interpreter JIT MCJIT RuntimeDyld IntelJITEvents OProfileJIT [component_0] type = Library diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt index 0f42c31060b..088635a0e99 100644 --- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt +++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt @@ -1,5 +1,4 @@ add_llvm_library(LLVMMCJIT - JITMemoryManager.cpp MCJIT.cpp SectionMemoryManager.cpp ) diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 5f1fac7eff1..53630d5a5e8 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -247,6 +247,10 @@ void MCJIT::finalizeModule(Module *M) { finalizeLoadedModules(); } +void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { + report_fatal_error("not yet implemented"); +} + uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) { Mangler Mang(TM->getSubtargetImpl()->getDataLayout()); SmallString<128> FullName; @@ -368,6 +372,14 @@ void *MCJIT::getPointerToFunction(Function *F) { return (void*)Dyld.getSymbolLoadAddress(Name); } +void *MCJIT::recompileAndRelinkFunction(Function *F) { + report_fatal_error("not yet implemented"); +} + +void MCJIT::freeMachineCodeForFunction(Function *F) { + report_fatal_error("not yet implemented"); +} + void MCJIT::runStaticConstructorsDestructorsInModulePtrSet( bool isDtors, ModulePtrSet::iterator I, ModulePtrSet::iterator E) { for (; I != E; ++I) { @@ -537,7 +549,8 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) { if (!L) return; MutexGuard locked(lock); - auto I = std::find(EventListeners.rbegin(), EventListeners.rend(), L); + SmallVector::reverse_iterator I= + std::find(EventListeners.rbegin(), EventListeners.rend(), L); if (I != EventListeners.rend()) { std::swap(*I, EventListeners.back()); EventListeners.pop_back(); @@ -553,8 +566,7 @@ void MCJIT::NotifyObjectEmitted(const ObjectImage& Obj) { void MCJIT::NotifyFreeingObject(const ObjectImage& Obj) { MutexGuard locked(lock); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { - JITEventListener *L = EventListeners[I]; - L->NotifyFreeingObject(Obj); + EventListeners[I]->NotifyFreeingObject(Obj); } } diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index 247de7c90b8..83e3321db92 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -211,7 +211,7 @@ class MCJIT : public ExecutionEngine { MCContext *Ctx; LinkingMemoryManager MemMgr; RuntimeDyld Dyld; - std::vector EventListeners; + SmallVector EventListeners; OwningModuleContainer OwnedModules; @@ -275,8 +275,14 @@ public: /// \param isDtors - Run the destructors instead of constructors. void runStaticConstructorsDestructors(bool isDtors) override; + void *getPointerToBasicBlock(BasicBlock *BB) override; + void *getPointerToFunction(Function *F) override; + void *recompileAndRelinkFunction(Function *F) override; + + void freeMachineCodeForFunction(Function *F) override; + GenericValue runFunction(Function *F, const std::vector &ArgValues) override; diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile index cf714324e3b..c26e0ada5bc 100644 --- a/lib/ExecutionEngine/Makefile +++ b/lib/ExecutionEngine/Makefile @@ -11,7 +11,7 @@ LIBRARYNAME = LLVMExecutionEngine include $(LEVEL)/Makefile.config -PARALLEL_DIRS = Interpreter MCJIT RuntimeDyld +PARALLEL_DIRS = Interpreter JIT MCJIT RuntimeDyld ifeq ($(USE_INTEL_JITEVENTS), 1) PARALLEL_DIRS += IntelJITEvents diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp index e6679cfb7f7..b10d51f6486 100644 --- a/lib/ExecutionEngine/TargetSelect.cpp +++ b/lib/ExecutionEngine/TargetSelect.cpp @@ -30,7 +30,7 @@ TargetMachine *EngineBuilder::selectTarget() { // MCJIT can generate code for remote targets, but the old JIT and Interpreter // must use the host architecture. - if (WhichEngine != EngineKind::Interpreter && M) + if (UseMCJIT && WhichEngine != EngineKind::Interpreter && M) TT.setTriple(M->getTargetTriple()); return selectTarget(TT, MArch, MCPU, MAttrs); @@ -89,7 +89,8 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, } // FIXME: non-iOS ARM FastISel is broken with MCJIT. - if (TheTriple.getArch() == Triple::arm && + if (UseMCJIT && + TheTriple.getArch() == Triple::arm && !TheTriple.isiOS() && OptLevel == CodeGenOpt::None) { OptLevel = CodeGenOpt::Less; diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt index 8462d3664a0..789d549bb15 100644 --- a/lib/Target/AArch64/CMakeLists.txt +++ b/lib/Target/AArch64/CMakeLists.txt @@ -2,7 +2,7 @@ set(LLVM_TARGET_DEFINITIONS AArch64.td) tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info) tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info) -tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer) tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index aec283f8085..55df29c1499 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -23,6 +23,7 @@ class ARMAsmPrinter; class ARMBaseTargetMachine; class FunctionPass; class ImmutablePass; +class JITCodeEmitter; class MachineInstr; class MCInst; class TargetLowering; @@ -30,6 +31,10 @@ class TargetMachine; FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); + +FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, + JITCodeEmitter &JCE); + FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp new file mode 100644 index 00000000000..714497c1bd8 --- /dev/null +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -0,0 +1,1910 @@ +//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the pass that transforms the ARM machine instructions into +// relocatable machine code. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMRelocations.h" +#include "ARMSubtarget.h" +#include "ARMTargetMachine.h" +#include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/PassManager.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#ifndef NDEBUG +#include +#endif +using namespace llvm; + +#define DEBUG_TYPE "jit" + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { + + class ARMCodeEmitter : public MachineFunctionPass { + ARMJITInfo *JTI; + const ARMBaseInstrInfo *II; + const DataLayout *TD; + const ARMSubtarget *Subtarget; + TargetMachine &TM; + JITCodeEmitter &MCE; + MachineModuleInfo *MMI; + const std::vector *MCPEs; + const std::vector *MJTEs; + bool IsPIC; + bool IsThumb; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static char ID; + public: + ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(ID), JTI(nullptr), + II((const ARMBaseInstrInfo *)tm.getSubtargetImpl()->getInstrInfo()), + TD(tm.getSubtargetImpl()->getDataLayout()), TM(tm), MCE(mce), + MCPEs(nullptr), MJTEs(nullptr), + IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "ARM Machine Code Emitter"; + } + + void emitInstruction(const MachineInstr &MI); + + private: + + void emitWordLE(unsigned Binary); + void emitDWordLE(uint64_t Binary); + void emitConstPoolInstruction(const MachineInstr &MI); + void emitMOVi32immInstruction(const MachineInstr &MI); + void emitMOVi2piecesInstruction(const MachineInstr &MI); + void emitLEApcrelJTInstruction(const MachineInstr &MI); + void emitPseudoMoveInstruction(const MachineInstr &MI); + void addPCLabel(unsigned LabelID); + void emitPseudoInstruction(const MachineInstr &MI); + unsigned getMachineSoRegOpValue(const MachineInstr &MI, + const MCInstrDesc &MCID, + const MachineOperand &MO, + unsigned OpIdx); + + unsigned getMachineSoImmOpValue(unsigned SoImm); + unsigned getAddrModeSBit(const MachineInstr &MI, + const MCInstrDesc &MCID) const; + + void emitDataProcessingInstruction(const MachineInstr &MI, + unsigned ImplicitRd = 0, + unsigned ImplicitRn = 0); + + void emitLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRd = 0, + unsigned ImplicitRn = 0); + + void emitMiscLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRn = 0); + + void emitLoadStoreMultipleInstruction(const MachineInstr &MI); + + void emitMulFrmInstruction(const MachineInstr &MI); + + void emitExtendInstruction(const MachineInstr &MI); + + void emitMiscArithInstruction(const MachineInstr &MI); + + void emitSaturateInstruction(const MachineInstr &MI); + + void emitBranchInstruction(const MachineInstr &MI); + + void emitInlineJumpTable(unsigned JTIndex); + + void emitMiscBranchInstruction(const MachineInstr &MI); + + void emitVFPArithInstruction(const MachineInstr &MI); + + void emitVFPConversionInstruction(const MachineInstr &MI); + + void emitVFPLoadStoreInstruction(const MachineInstr &MI); + + void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI); + + void emitNEONLaneInstruction(const MachineInstr &MI); + void emitNEONDupInstruction(const MachineInstr &MI); + void emitNEON1RegModImmInstruction(const MachineInstr &MI); + void emitNEON2RegInstruction(const MachineInstr &MI); + void emitNEON3RegInstruction(const MachineInstr &MI); + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const { + return getMachineOpValue(MI, MI.getOperand(OpIdx)); + } + + // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the + // TableGen'erated getBinaryCodeForInstr() function to encode any + // operand values, instead querying getMachineOpValue() directly for + // each operand it needs to encode. Thus, any of the new encoder + // helper functions can simply return 0 as the values the return + // are already handled elsewhere. They are placeholders to allow this + // encoder to continue to function until the MC encoder is sufficiently + // far along that this one can be eliminated entirely. + unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val) + const { return 0; } + unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val) + const { return 0; } + unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) + const { return 0; } + unsigned NEONThumb2V8PostEncoder(const MachineInstr &MI,unsigned Val) + const { return 0; } + unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val) + const { return 0; } + unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI, + unsigned Op) const { return 0; } + unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op) + const { return 0; } + unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getAddrMode6OneLane32AddressOpValue(const MachineInstr &MI, + unsigned Op) + const { return 0; } + unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI, + unsigned Op) const { return 0; } + uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0; } + + unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op) + const { + // {17-13} = reg + // {12} = (U)nsigned (add == '1', sub == '0') + // {11-0} = imm12 + const MachineOperand &MO = MI.getOperand(Op); + const MachineOperand &MO1 = MI.getOperand(Op + 1); + if (!MO.isReg()) { + emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); + return 0; + } + unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); + int32_t Imm12 = MO1.getImm(); + uint32_t Binary; + Binary = Imm12 & 0xfff; + if (Imm12 >= 0) + Binary |= (1 << 12); + Binary |= (Reg << 13); + return Binary; + } + + unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const { + return 0; + } + + uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0;} + uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0;} + uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) + const { return 0;} + uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const { + // {17-13} = reg + // {12} = (U)nsigned (add == '1', sub == '0') + // {11-0} = imm12 + const MachineOperand &MO = MI.getOperand(Op); + const MachineOperand &MO1 = MI.getOperand(Op + 1); + if (!MO.isReg()) { + emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); + return 0; + } + unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); + int32_t Imm12 = MO1.getImm(); + + // Special value for #-0 + if (Imm12 == INT32_MIN) + Imm12 = 0; + + // Immediate is always encoded as positive. The 'U' bit controls add vs + // sub. + bool isAdd = true; + if (Imm12 < 0) { + Imm12 = -Imm12; + isAdd = false; + } + + uint32_t Binary = Imm12 & 0xfff; + if (isAdd) + Binary |= (1 << 12); + Binary |= (Reg << 13); + return Binary; + } + unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + + unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op) + const { return 0; } + + unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + + /// getMovi32Value - Return binary encoding of operand for movw/movt. If the + /// machine operand requires relocation, record the relocation and return + /// zero. + unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, + unsigned Reloc); + + /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. + /// + unsigned getShiftOp(unsigned Imm) const ; + + /// Routines that handle operands which add machine relocations which are + /// fixed up by the relocation stage. + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub, bool Indirect, + intptr_t ACPV = 0) const; + void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; + void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; + void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; + void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc, + intptr_t JTBase = 0) const; + unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const; + }; +} + +char ARMCodeEmitter::ID = 0; + +/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM +/// code to the specified MCE object. +FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, + JITCodeEmitter &JCE) { + return new ARMCodeEmitter(TM, JCE); +} + +bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { + TargetMachine &Target = const_cast(MF.getTarget()); + + assert((Target.getRelocationModel() != Reloc::Default || + Target.getRelocationModel() != Reloc::Static) && + "JIT relocation model must be set to static or default!"); + // Initialize the subtarget first so we can grab all of the + // subtarget dependent variables from there. + Subtarget = &TM.getSubtarget(); + JTI = static_cast(Target.getSubtargetImpl()->getJITInfo()); + II = static_cast(Subtarget->getInstrInfo()); + TD = Target.getSubtargetImpl()->getDataLayout(); + + MCPEs = &MF.getConstantPool()->getConstants(); + MJTEs = nullptr; + if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); + IsPIC = TM.getRelocationModel() == Reloc::PIC_; + IsThumb = MF.getInfo()->isThumbFunction(); + JTI->Initialize(MF, IsPIC); + MMI = &getAnalysis(); + MCE.setModuleInfo(MMI); + + do { + DEBUG(errs() << "JITTing function '" + << MF.getName() << "'\n"); + MCE.startFunction(MF); + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB) { + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) + emitInstruction(*I); + } + } while (MCE.finishFunction(MF)); + + return false; +} + +/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. +/// +unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { + switch (ARM_AM::getAM2ShiftOpc(Imm)) { + default: llvm_unreachable("Unknown shift opc!"); + case ARM_AM::asr: return 2; + case ARM_AM::lsl: return 0; + case ARM_AM::lsr: return 1; + case ARM_AM::ror: + case ARM_AM::rrx: return 3; + } +} + +/// getMovi32Value - Return binary encoding of operand for movw/movt. If the +/// machine operand requires relocation, record the relocation and return zero. +unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, + const MachineOperand &MO, + unsigned Reloc) { + assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw)) + && "Relocation to this function should be for movt or movw"); + + if (MO.isImm()) + return static_cast(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), Reloc, true, false); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), Reloc); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), Reloc); + else { +#ifndef NDEBUG + errs() << MO; +#endif + llvm_unreachable("Unsupported operand type for movw/movt"); + } + return 0; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { + if (MO.isReg()) + return II->getRegisterInfo().getEncodingValue(MO.getReg()); + else if (MO.isImm()) + return static_cast(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch); + else if (MO.isCPI()) { + const MCInstrDesc &MCID = MI.getDesc(); + // For VFP load, the immediate offset is multiplied by 4. + unsigned Reloc = ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm) + ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry; + emitConstPoolAddress(MO.getIndex(), Reloc); + } else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch); + else + llvm_unreachable("Unable to encode MachineOperand!"); + return 0; +} + +/// emitGlobalAddress - Emit the specified address to the code stream. +/// +void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub, bool Indirect, + intptr_t ACPV) const { + MachineRelocation MR = Indirect + ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), + ACPV, MayNeedFarStub) + : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), ACPV, + MayNeedFarStub); + MCE.addRelocation(MR); +} + +/// emitExternalSymbolAddress - Arrange for the address of an external symbol to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +void ARMCodeEmitter:: +emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES)); +} + +/// emitConstPoolAddress - Arrange for the address of an constant pool +/// to be emitted to the current location in the function, and allow it to be PC +/// relative. +void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const { + // Tell JIT emitter we'll resolve the address. + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, 0, true)); +} + +/// emitJumpTableAddress - Arrange for the address of a jump table to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +void ARMCodeEmitter:: +emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + Reloc, JTIndex, 0, true)); +} + +/// emitMachineBasicBlock - Emit the specified address basic block. +void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, + unsigned Reloc, + intptr_t JTBase) const { + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + Reloc, BB, JTBase)); +} + +void ARMCodeEmitter::emitWordLE(unsigned Binary) { + DEBUG(errs() << " 0x"; + errs().write_hex(Binary) << "\n"); + MCE.emitWordLE(Binary); +} + +void ARMCodeEmitter::emitDWordLE(uint64_t Binary) { + DEBUG(errs() << " 0x"; + errs().write_hex(Binary) << "\n"); + MCE.emitDWordLE(Binary); +} + +void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { + DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); + + MCE.processDebugLoc(MI.getDebugLoc(), true); + + ++NumEmitted; // Keep track of the # of mi's emitted + switch (MI.getDesc().TSFlags & ARMII::FormMask) { + default: { + llvm_unreachable("Unhandled instruction encoding format!"); + } + case ARMII::MiscFrm: + if (MI.getOpcode() == ARM::LEApcrelJT) { + // Materialize jumptable address. + emitLEApcrelJTInstruction(MI); + break; + } + llvm_unreachable("Unhandled instruction encoding!"); + case ARMII::Pseudo: + emitPseudoInstruction(MI); + break; + case ARMII::DPFrm: + case ARMII::DPSoRegFrm: + emitDataProcessingInstruction(MI); + break; + case ARMII::LdFrm: + case ARMII::StFrm: + emitLoadStoreInstruction(MI); + break; + case ARMII::LdMiscFrm: + case ARMII::StMiscFrm: + emitMiscLoadStoreInstruction(MI); + break; + case ARMII::LdStMulFrm: + emitLoadStoreMultipleInstruction(MI); + break; + case ARMII::MulFrm: + emitMulFrmInstruction(MI); + break; + case ARMII::ExtFrm: + emitExtendInstruction(MI); + break; + case ARMII::ArithMiscFrm: + emitMiscArithInstruction(MI); + break; + case ARMII::SatFrm: + emitSaturateInstruction(MI); + break; + case ARMII::BrFrm: + emitBranchInstruction(MI); + break; + case ARMII::BrMiscFrm: + emitMiscBranchInstruction(MI); + break; + // VFP instructions. + case ARMII::VFPUnaryFrm: + case ARMII::VFPBinaryFrm: + emitVFPArithInstruction(MI); + break; + case ARMII::VFPConv1Frm: + case ARMII::VFPConv2Frm: + case ARMII::VFPConv3Frm: + case ARMII::VFPConv4Frm: + case ARMII::VFPConv5Frm: + emitVFPConversionInstruction(MI); + break; + case ARMII::VFPLdStFrm: + emitVFPLoadStoreInstruction(MI); + break; + case ARMII::VFPLdStMulFrm: + emitVFPLoadStoreMultipleInstruction(MI); + break; + + // NEON instructions. + case ARMII::NGetLnFrm: + case ARMII::NSetLnFrm: + emitNEONLaneInstruction(MI); + break; + case ARMII::NDupFrm: + emitNEONDupInstruction(MI); + break; + case ARMII::N1RegModImmFrm: + emitNEON1RegModImmInstruction(MI); + break; + case ARMII::N2RegFrm: + emitNEON2RegInstruction(MI); + break; + case ARMII::N3RegFrm: + emitNEON3RegInstruction(MI); + break; + } + MCE.processDebugLoc(MI.getDebugLoc(), false); +} + +void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { + unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index. + unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index. + const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex]; + + // Remember the CONSTPOOL_ENTRY address for later relocation. + JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue()); + + // Emit constpool island entry. In most cases, the actual values will be + // resolved and relocated after code emission. + if (MCPE.isMachineConstantPoolEntry()) { + ARMConstantPoolValue *ACPV = + static_cast(MCPE.Val.MachineCPVal); + + DEBUG(errs() << " ** ARM constant pool #" << CPI << " @ " + << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'); + + assert(ACPV->isGlobalValue() && "unsupported constant pool value"); + const GlobalValue *GV = cast(ACPV)->getGV(); + if (GV) { + Reloc::Model RelocM = TM.getRelocationModel(); + emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, + isa(GV), + Subtarget->GVIsIndirectSymbol(GV, RelocM), + (intptr_t)ACPV); + } else { + const char *Sym = cast(ACPV)->getSymbol(); + emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute); + } + emitWordLE(0); + } else { + const Constant *CV = MCPE.Val.ConstVal; + + DEBUG({ + errs() << " ** Constant pool #" << CPI << " @ " + << (void*)MCE.getCurrentPCValue() << " "; + if (const Function *F = dyn_cast(CV)) + errs() << F->getName(); + else + errs() << *CV; + errs() << '\n'; + }); + + if (const GlobalValue *GV = dyn_cast(CV)) { + emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa(GV), false); + emitWordLE(0); + } else if (const ConstantInt *CI = dyn_cast(CV)) { + uint32_t Val = uint32_t(*CI->getValue().getRawData()); + emitWordLE(Val); + } else if (const ConstantFP *CFP = dyn_cast(CV)) { + if (CFP->getType()->isFloatTy()) + emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); + else if (CFP->getType()->isDoubleTy()) + emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); + else { + llvm_unreachable("Unable to handle this constantpool entry!"); + } + } else { + llvm_unreachable("Unable to handle this constantpool entry!"); + } + } +} + +void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) { + const MachineOperand &MO0 = MI.getOperand(0); + const MachineOperand &MO1 = MI.getOperand(1); + + // Emit the 'movw' instruction. + unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000 + + unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF; + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode imm16 as imm4:imm12 + Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12 + Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4 + emitWordLE(Binary); + + unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16; + // Emit the 'movt' instruction. + Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode imm16 as imm4:imm1, same as movw above. + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { + const MachineOperand &MO0 = MI.getOperand(0); + const MachineOperand &MO1 = MI.getOperand(1); + assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) && + "Not a valid so_imm value!"); + unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm()); + unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm()); + + // Emit the 'mov' instruction. + unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode so_imm. + // Set bit I(25) to identify this is the immediate form of + Binary |= 1 << ARMII::I_BitShift; + Binary |= getMachineSoImmOpValue(V1); + emitWordLE(Binary); + + // Now the 'orr' instruction. + Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100 + + // Set the conditional execution predicate. + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; + + // Encode Rn. + Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift; + + // Encode so_imm. + // Set bit I(25) to identify this is the immediate form of + Binary |= 1 << ARMII::I_BitShift; + Binary |= getMachineSoImmOpValue(V2); + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { + // It's basically add r, pc, (LJTI - $+8) + + const MCInstrDesc &MCID = MI.getDesc(); + + // Emit the 'add' instruction. + unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100 + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + Binary |= getAddrModeSBit(MI, MCID); + + // Encode Rd. + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode Rn which is PC. + Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift; + + // Encode the displacement. + Binary |= 1 << ARMII::I_BitShift; + emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) { + unsigned Opcode = MI.getDesc().Opcode; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag) + Binary |= 1 << ARMII::S_BitShift; + + // Encode register def if there is one. + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode the shift operation. + switch (Opcode) { + default: break; + case ARM::RRX: + // rrx + Binary |= 0x6 << 4; + break; + case ARM::MOVsrl_flag: + // lsr #1 + Binary |= (0x2 << 4) | (1 << 7); + break; + case ARM::MOVsra_flag: + // asr #1 + Binary |= (0x4 << 4) | (1 << 7); + break; + } + + // Encode register Rm. + Binary |= getMachineOpValue(MI, 1); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::addPCLabel(unsigned LabelID) { + DEBUG(errs() << " ** LPC" << LabelID << " @ " + << (void*)MCE.getCurrentPCValue() << '\n'); + JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue()); +} + +void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { + unsigned Opcode = MI.getDesc().Opcode; + switch (Opcode) { + default: + llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); + case ARM::BX_CALL: + case ARM::BMOVPCRX_CALL: { + // First emit mov lr, pc + unsigned Binary = 0x01a0e00f; + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + emitWordLE(Binary); + + // and then emit the branch. + emitMiscBranchInstruction(MI); + break; + } + case TargetOpcode::INLINEASM: { + // We allow inline assembler nodes with empty bodies - they can + // implicitly define registers, which is ok for JIT. + if (MI.getOperand(0).getSymbolName()[0]) { + report_fatal_error("JIT does not support inline asm!"); + } + break; + } + case TargetOpcode::CFI_INSTRUCTION: + break; + case TargetOpcode::EH_LABEL: + MCE.emitLabel(MI.getOperand(0).getMCSymbol()); + break; + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + // Do nothing. + break; + case ARM::CONSTPOOL_ENTRY: + emitConstPoolInstruction(MI); + break; + case ARM::PICADD: { + // Remember of the address of the PC label for relocation later. + addPCLabel(MI.getOperand(2).getImm()); + // PICADD is just an add instruction that implicitly read pc. + emitDataProcessingInstruction(MI, 0, ARM::PC); + break; + } + case ARM::PICLDR: + case ARM::PICLDRB: + case ARM::PICSTR: + case ARM::PICSTRB: { + // Remember of the address of the PC label for relocation later. + addPCLabel(MI.getOperand(2).getImm()); + // These are just load / store instructions that implicitly read pc. + emitLoadStoreInstruction(MI, 0, ARM::PC); + break; + } + case ARM::PICLDRH: + case ARM::PICLDRSH: + case ARM::PICLDRSB: + case ARM::PICSTRH: { + // Remember of the address of the PC label for relocation later. + addPCLabel(MI.getOperand(2).getImm()); + // These are just load / store instructions that implicitly read pc. + emitMiscLoadStoreInstruction(MI, ARM::PC); + break; + } + + case ARM::MOVi32imm: + // Two instructions to materialize a constant. + if (Subtarget->hasV6T2Ops()) + emitMOVi32immInstruction(MI); + else + emitMOVi2piecesInstruction(MI); + break; + + case ARM::LEApcrelJT: + // Materialize jumptable address. + emitLEApcrelJTInstruction(MI); + break; + case ARM::RRX: + case ARM::MOVsrl_flag: + case ARM::MOVsra_flag: + emitPseudoMoveInstruction(MI); + break; + } +} + +unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, + const MCInstrDesc &MCID, + const MachineOperand &MO, + unsigned OpIdx) { + unsigned Binary = getMachineOpValue(MI, MO); + + const MachineOperand &MO1 = MI.getOperand(OpIdx + 1); + const MachineOperand &MO2 = MI.getOperand(OpIdx + 2); + ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); + + // Encode the shift opcode. + unsigned SBits = 0; + unsigned Rs = MO1.getReg(); + if (Rs) { + // Set shift operand (bit[7:4]). + // LSL - 0001 + // LSR - 0011 + // ASR - 0101 + // ROR - 0111 + // RRX - 0110 and bit[11:8] clear. + switch (SOpc) { + default: llvm_unreachable("Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x1; break; + case ARM_AM::lsr: SBits = 0x3; break; + case ARM_AM::asr: SBits = 0x5; break; + case ARM_AM::ror: SBits = 0x7; break; + case ARM_AM::rrx: SBits = 0x6; break; + } + } else { + // Set shift operand (bit[6:4]). + // LSL - 000 + // LSR - 010 + // ASR - 100 + // ROR - 110 + switch (SOpc) { + default: llvm_unreachable("Unknown shift opc!"); + case ARM_AM::lsl: SBits = 0x0; break; + case ARM_AM::lsr: SBits = 0x2; break; + case ARM_AM::asr: SBits = 0x4; break; + case ARM_AM::ror: SBits = 0x6; break; + } + } + Binary |= SBits << 4; + if (SOpc == ARM_AM::rrx) + return Binary; + + // Encode the shift operation Rs or shift_imm (except rrx). + if (Rs) { + // Encode Rs bit[11:8]. + assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); + return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); + } + + // Encode shift_imm bit[11:7]. + return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7; +} + +unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { + int SoImmVal = ARM_AM::getSOImmVal(SoImm); + assert(SoImmVal != -1 && "Not a valid so_imm value!"); + + // Encode rotate_imm. + unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1) + << ARMII::SoRotImmShift; + + // Encode immed_8. + Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal); + return Binary; +} + +unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, + const MCInstrDesc &MCID) const { + for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){ + const MachineOperand &MO = MI.getOperand(i-1); + if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) + return 1 << ARMII::S_BitShift; + } + return 0; +} + +void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, + unsigned ImplicitRd, + unsigned ImplicitRn) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + Binary |= getAddrModeSBit(MI, MCID); + + // Encode register def if there is one. + unsigned NumDefs = MCID.getNumDefs(); + unsigned OpIdx = 0; + if (NumDefs) + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + else if (ImplicitRd) + // Special handling for implicit use (e.g. PC). + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); + + if (MCID.Opcode == ARM::MOVi16) { + // Get immediate from MI. + unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx), + ARM::reloc_arm_movw); + // Encode imm which is the same as in emitMOVi32immInstruction(). + Binary |= Lo16 & 0xFFF; + Binary |= ((Lo16 >> 12) & 0xF) << 16; + emitWordLE(Binary); + return; + } else if(MCID.Opcode == ARM::MOVTi16) { + unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx), + ARM::reloc_arm_movt) >> 16); + Binary |= Hi16 & 0xFFF; + Binary |= ((Hi16 >> 12) & 0xF) << 16; + emitWordLE(Binary); + return; + } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) { + uint32_t v = ~MI.getOperand(2).getImm(); + int32_t lsb = countTrailingZeros(v); + int32_t msb = (32 - countLeadingZeros(v)) - 1; + // Instr{20-16} = msb, Instr{11-7} = lsb + Binary |= (msb & 0x1F) << 16; + Binary |= (lsb & 0x1F) << 7; + emitWordLE(Binary); + return; + } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) { + // Encode Rn in Instr{0-3} + Binary |= getMachineOpValue(MI, OpIdx++); + + uint32_t lsb = MI.getOperand(OpIdx++).getImm(); + uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1; + + // Instr{20-16} = widthm1, Instr{11-7} = lsb + Binary |= (widthm1 & 0x1F) << 16; + Binary |= (lsb & 0x1F) << 7; + emitWordLE(Binary); + return; + } + + // If this is a two-address operand, skip it. e.g. MOVCCr operand 1. + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + + // Encode first non-shifter register operand if there is one. + bool isUnary = MCID.TSFlags & ARMII::UnaryDP; + if (!isUnary) { + if (ImplicitRn) + // Special handling for implicit use (e.g. PC). + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); + else { + Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift; + ++OpIdx; + } + } + + // Encode shifter operand. + const MachineOperand &MO = MI.getOperand(OpIdx); + if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) { + // Encode SoReg. + emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx)); + return; + } + + if (MO.isReg()) { + // Encode register Rm. + emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg())); + return; + } + + // Encode so_imm. + Binary |= getMachineSoImmOpValue((unsigned)MO.getImm()); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRd, + unsigned ImplicitRn) { + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Form = MCID.TSFlags & ARMII::FormMask; + bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done. + if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp || + MI.getOpcode() == ARM::STRi12) { + emitWordLE(Binary); + return; + } + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Operand 0 of a pre- and post-indexed store is the address base + // writeback. Skip it. + bool Skipped = false; + if (IsPrePost && Form == ARMII::StFrm) { + ++OpIdx; + Skipped = true; + } + + // Set first operand + if (ImplicitRd) + // Special handling for implicit use (e.g. PC). + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); + else + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + // Set second operand + if (ImplicitRn) + // Special handling for implicit use (e.g. PC). + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); + else + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; + + // If this is a two-address operand, skip it. e.g. LDR_PRE. + if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + + const MachineOperand &MO2 = MI.getOperand(OpIdx); + unsigned AM2Opc = (ImplicitRn == ARM::PC) + ? 0 : MI.getOperand(OpIdx+1).getImm(); + + // Set bit U(23) according to sign of immed value (positive or negative). + Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) << + ARMII::U_BitShift); + if (!MO2.getReg()) { // is immediate + if (ARM_AM::getAM2Offset(AM2Opc)) + // Set the value of offset_12 field + Binary |= ARM_AM::getAM2Offset(AM2Opc); + emitWordLE(Binary); + return; + } + + // Set bit I(25), because this is not in immediate encoding. + Binary |= 1 << ARMII::I_BitShift; + assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); + // Set bit[3:0] to the corresponding Rm register + Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); + + // If this instr is in scaled register offset/index instruction, set + // shift_immed(bit[11:7]) and shift(bit[6:5]) fields. + if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) { + Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift + Binary |= ShImm << ARMII::ShiftShift; // shift_immed + } + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, + unsigned ImplicitRn) { + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Form = MCID.TSFlags & ARMII::FormMask; + bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Operand 0 of a pre- and post-indexed store is the address base + // writeback. Skip it. + bool Skipped = false; + if (IsPrePost && Form == ARMII::StMiscFrm) { + ++OpIdx; + Skipped = true; + } + + // Set first operand + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + // Skip LDRD and STRD's second operand. + if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD) + ++OpIdx; + + // Set second operand + if (ImplicitRn) + // Special handling for implicit use (e.g. PC). + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); + else + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; + + // If this is a two-address operand, skip it. e.g. LDRH_POST. + if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + + const MachineOperand &MO2 = MI.getOperand(OpIdx); + unsigned AM3Opc = (ImplicitRn == ARM::PC) + ? 0 : MI.getOperand(OpIdx+1).getImm(); + + // Set bit U(23) according to sign of immed value (positive or negative) + Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) << + ARMII::U_BitShift); + + // If this instr is in register offset/index encoding, set bit[3:0] + // to the corresponding Rm register. + if (MO2.getReg()) { + Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); + emitWordLE(Binary); + return; + } + + // This instr is in immediate offset/index encoding, set bit 22 to 1. + Binary |= 1 << ARMII::AM3_I_BitShift; + if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) { + // Set operands + Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH + Binary |= (ImmOffs & 0xF); // immedL + } + + emitWordLE(Binary); +} + +static unsigned getAddrModeUPBits(unsigned Mode) { + unsigned Binary = 0; + + // Set addressing mode by modifying bits U(23) and P(24) + // IA - Increment after - bit U = 1 and bit P = 0 + // IB - Increment before - bit U = 1 and bit P = 1 + // DA - Decrement after - bit U = 0 and bit P = 0 + // DB - Decrement before - bit U = 0 and bit P = 1 + switch (Mode) { + default: llvm_unreachable("Unknown addressing sub-mode!"); + case ARM_AM::da: break; + case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break; + case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break; + case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break; + } + + return Binary; +} + +void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Skip operand 0 of an instruction with base register update. + unsigned OpIdx = 0; + if (IsUpdating) + ++OpIdx; + + // Set base address operand + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; + + // Set addressing mode by modifying bits U(23) and P(24) + ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode()); + Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode)); + + // Set bit W(21) + if (IsUpdating) + Binary |= 0x1 << ARMII::W_BitShift; + + // Set registers + for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + break; + unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg()); + assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && + RegNum < 16); + Binary |= 0x1 << RegNum; + } + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode S bit if MI modifies CPSR. + Binary |= getAddrModeSBit(MI, MCID); + + // 32x32->64bit operations have two destination registers. The number + // of register definitions will tell us if that's what we're dealing with. + unsigned OpIdx = 0; + if (MCID.getNumDefs() == 2) + Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift; + + // Encode Rd + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift; + + // Encode Rm + Binary |= getMachineOpValue(MI, OpIdx++); + + // Encode Rs + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift; + + // Many multiple instructions (e.g. MLA) have three src operands. Encode + // it as Rn (for multiply, that's in the same offset as RdLo. + if (MCID.getNumOperands() > OpIdx && + !MCID.OpInfo[OpIdx].isPredicate() && + !MCID.OpInfo[OpIdx].isOptionalDef()) + Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift; + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Encode Rd + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + const MachineOperand &MO1 = MI.getOperand(OpIdx++); + const MachineOperand &MO2 = MI.getOperand(OpIdx); + if (MO2.isReg()) { + // Two register operand form. + // Encode Rn. + Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift; + + // Encode Rm. + Binary |= getMachineOpValue(MI, MO2); + ++OpIdx; + } else { + Binary |= getMachineOpValue(MI, MO1); + } + + // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand. + if (MI.getOperand(OpIdx).isImm() && + !MCID.OpInfo[OpIdx].isPredicate() && + !MCID.OpInfo[OpIdx].isOptionalDef()) + Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift; + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // PKH instructions are finished at this point + if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) { + emitWordLE(Binary); + return; + } + + unsigned OpIdx = 0; + + // Encode Rd + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + + const MachineOperand &MO = MI.getOperand(OpIdx++); + if (OpIdx == MCID.getNumOperands() || + MCID.OpInfo[OpIdx].isPredicate() || + MCID.OpInfo[OpIdx].isOptionalDef()) { + // Encode Rm and it's done. + Binary |= getMachineOpValue(MI, MO); + emitWordLE(Binary); + return; + } + + // Encode Rn. + Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift; + + // Encode Rm. + Binary |= getMachineOpValue(MI, OpIdx++); + + // Encode shift_imm. + unsigned ShiftAmt = MI.getOperand(OpIdx).getImm(); + if (MCID.Opcode == ARM::PKHTB) { + assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!"); + if (ShiftAmt == 32) + ShiftAmt = 0; + } + assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); + Binary |= ShiftAmt << ARMII::ShiftShift; + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Part of binary is determined by TableGen. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Encode Rd + Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; + + // Encode saturate bit position. + unsigned Pos = MI.getOperand(1).getImm(); + if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16) + Pos -= 1; + assert((Pos < 16 || (Pos < 32 && + MCID.Opcode != ARM::SSAT16 && + MCID.Opcode != ARM::USAT16)) && + "saturate bit position out of range"); + Binary |= Pos << 16; + + // Encode Rm + Binary |= getMachineOpValue(MI, 2); + + // Encode shift_imm. + if (MCID.getNumOperands() == 4) { + unsigned ShiftOp = MI.getOperand(3).getImm(); + ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); + if (Opc == ARM_AM::asr) + Binary |= (1 << 6); + unsigned ShiftAmt = MI.getOperand(3).getImm(); + if (ShiftAmt == 32 && Opc == ARM_AM::asr) + ShiftAmt = 0; + assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); + Binary |= ShiftAmt << ARMII::ShiftShift; + } + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + if (MCID.Opcode == ARM::TPsoft) { + llvm_unreachable("ARM::TPsoft FIXME"); // FIXME + } + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Set signed_immed_24 field + Binary |= getMachineOpValue(MI, 0); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) { + // Remember the base address of the inline jump table. + uintptr_t JTBase = MCE.getCurrentPCValue(); + JTI->addJumpTableBaseAddr(JTIndex, JTBase); + DEBUG(errs() << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase + << '\n'); + + // Now emit the jump table entries. + const std::vector &MBBs = (*MJTEs)[JTIndex].MBBs; + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { + if (IsPIC) + // DestBB address - JT base. + emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase); + else + // Absolute DestBB address. + emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute); + emitWordLE(0); + } +} + +void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Handle jump tables. + if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) { + // First emit a ldr pc, [] instruction. + emitDataProcessingInstruction(MI, ARM::PC); + + // Then emit the inline jump table. + unsigned JTIndex = + (MCID.Opcode == ARM::BR_JTr) + ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex(); + emitInlineJumpTable(JTIndex); + return; + } else if (MCID.Opcode == ARM::BR_JTm) { + // First emit a ldr pc, [] instruction. + emitLoadStoreInstruction(MI, ARM::PC); + + // Then emit the inline jump table. + emitInlineJumpTable(MI.getOperand(3).getIndex()); + return; + } + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR) + // The return register is LR. + Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR); + else + // otherwise, set the return register + Binary |= getMachineOpValue(MI, 0); + + emitWordLE(Binary); +} + +unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI, + unsigned OpIdx) const { + unsigned RegD = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + bool isSPVFP = ARM::SPRRegClass.contains(RegD); + RegD = II->getRegisterInfo().getEncodingValue(RegD); + if (!isSPVFP) + Binary |= RegD << ARMII::RegRdShift; + else { + Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift; + Binary |= (RegD & 0x01) << ARMII::D_BitShift; + } + return Binary; +} + +unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI, + unsigned OpIdx) const { + unsigned RegN = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + bool isSPVFP = ARM::SPRRegClass.contains(RegN); + RegN = II->getRegisterInfo().getEncodingValue(RegN); + if (!isSPVFP) + Binary |= RegN << ARMII::RegRnShift; + else { + Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift; + Binary |= (RegN & 0x01) << ARMII::N_BitShift; + } + return Binary; +} + +unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI, + unsigned OpIdx) const { + unsigned RegM = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + bool isSPVFP = ARM::SPRRegClass.contains(RegM); + RegM = II->getRegisterInfo().getEncodingValue(RegM); + if (!isSPVFP) + Binary |= RegM; + else { + Binary |= ((RegM & 0x1E) >> 1); + Binary |= (RegM & 0x01) << ARMII::M_BitShift; + } + return Binary; +} + +void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + assert((Binary & ARMII::D_BitShift) == 0 && + (Binary & ARMII::N_BitShift) == 0 && + (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!"); + + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, OpIdx++); + + // If this is a two-address operand, skip it, e.g. FMACD. + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + + // Encode Dn / Sn. + if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm) + Binary |= encodeVFPRn(MI, OpIdx++); + + if (OpIdx == MCID.getNumOperands() || + MCID.OpInfo[OpIdx].isPredicate() || + MCID.OpInfo[OpIdx].isOptionalDef()) { + // FCMPEZD etc. has only one operand. + emitWordLE(Binary); + return; + } + + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, OpIdx); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Form = MCID.TSFlags & ARMII::FormMask; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + switch (Form) { + default: break; + case ARMII::VFPConv1Frm: + case ARMII::VFPConv2Frm: + case ARMII::VFPConv3Frm: + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, 0); + break; + case ARMII::VFPConv4Frm: + // Encode Dn / Sn. + Binary |= encodeVFPRn(MI, 0); + break; + case ARMII::VFPConv5Frm: + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, 0); + break; + } + + switch (Form) { + default: break; + case ARMII::VFPConv1Frm: + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, 1); + break; + case ARMII::VFPConv2Frm: + case ARMII::VFPConv3Frm: + // Encode Dn / Sn. + Binary |= encodeVFPRn(MI, 1); + break; + case ARMII::VFPConv4Frm: + case ARMII::VFPConv5Frm: + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, 1); + break; + } + + if (Form == ARMII::VFPConv5Frm) + // Encode Dn / Sn. + Binary |= encodeVFPRn(MI, 2); + else if (Form == ARMII::VFPConv3Frm) + // Encode Dm / Sm. + Binary |= encodeVFPRm(MI, 2); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) { + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + unsigned OpIdx = 0; + + // Encode Dd / Sd. + Binary |= encodeVFPRd(MI, OpIdx++); + + // Encode address base. + const MachineOperand &Base = MI.getOperand(OpIdx++); + Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift; + + // If there is a non-zero immediate offset, encode it. + if (Base.isReg()) { + const MachineOperand &Offset = MI.getOperand(OpIdx); + if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) { + if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add) + Binary |= 1 << ARMII::U_BitShift; + Binary |= ImmOffs; + emitWordLE(Binary); + return; + } + } + + // If immediate offset is omitted, default to +0. + Binary |= 1 << ARMII::U_BitShift; + + emitWordLE(Binary); +} + +void +ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0; + + // Part of binary is determined by TableGn. + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= II->getPredicate(&MI) << ARMII::CondShift; + + // Skip operand 0 of an instruction with base register update. + unsigned OpIdx = 0; + if (IsUpdating) + ++OpIdx; + + // Set base address operand + Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; + + // Set addressing mode by modifying bits U(23) and P(24) + ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode()); + Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode)); + + // Set bit W(21) + if (IsUpdating) + Binary |= 0x1 << ARMII::W_BitShift; + + // First register is encoded in Dd. + Binary |= encodeVFPRd(MI, OpIdx+2); + + // Count the number of registers. + unsigned NumRegs = 1; + for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + break; + ++NumRegs; + } + // Bit 8 will be set if is consecutive 64-bit registers (e.g., D0) + // Otherwise, it will be 0, in the case of 32-bit registers. + if(Binary & 0x100) + Binary |= NumRegs * 2; + else + Binary |= NumRegs; + + emitWordLE(Binary); +} + +unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI, + unsigned OpIdx) const { + unsigned RegD = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegD = II->getRegisterInfo().getEncodingValue(RegD); + Binary |= (RegD & 0xf) << ARMII::RegRdShift; + Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift; + return Binary; +} + +unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI, + unsigned OpIdx) const { + unsigned RegN = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegN = II->getRegisterInfo().getEncodingValue(RegN); + Binary |= (RegN & 0xf) << ARMII::RegRnShift; + Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift; + return Binary; +} + +unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI, + unsigned OpIdx) const { + unsigned RegM = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegM = II->getRegisterInfo().getEncodingValue(RegM); + Binary |= (RegM & 0xf); + Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift; + return Binary; +} + +/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON +/// data-processing instruction to the corresponding Thumb encoding. +static unsigned convertNEONDataProcToThumb(unsigned Binary) { + assert((Binary & 0xfe000000) == 0xf2000000 && + "not an ARM NEON data-processing instruction"); + unsigned UBit = (Binary >> 24) & 1; + return 0xef000000 | (UBit << 28) | (Binary & 0xffffff); +} + +void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + + unsigned RegTOpIdx, RegNOpIdx, LnOpIdx; + const MCInstrDesc &MCID = MI.getDesc(); + if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) { + RegTOpIdx = 0; + RegNOpIdx = 1; + LnOpIdx = 2; + } else { // ARMII::NSetLnFrm + RegTOpIdx = 2; + RegNOpIdx = 0; + LnOpIdx = 3; + } + + // Set the conditional execution predicate + Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; + + unsigned RegT = MI.getOperand(RegTOpIdx).getReg(); + RegT = II->getRegisterInfo().getEncodingValue(RegT); + Binary |= (RegT << ARMII::RegRdShift); + Binary |= encodeNEONRn(MI, RegNOpIdx); + + unsigned LaneShift; + if ((Binary & (1 << 22)) != 0) + LaneShift = 0; // 8-bit elements + else if ((Binary & (1 << 5)) != 0) + LaneShift = 1; // 16-bit elements + else + LaneShift = 2; // 32-bit elements + + unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift; + unsigned Opc1 = Lane >> 2; + unsigned Opc2 = Lane & 3; + assert((Opc1 & 3) == 0 && "out-of-range lane number operand"); + Binary |= (Opc1 << 21); + Binary |= (Opc2 << 5); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; + + unsigned RegT = MI.getOperand(1).getReg(); + RegT = II->getRegisterInfo().getEncodingValue(RegT); + Binary |= (RegT << ARMII::RegRdShift); + Binary |= encodeNEONRn(MI, 0); + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd. + Binary |= encodeNEONRd(MI, 0); + // Immediate fields: Op, Cmode, I, Imm3, Imm4 + unsigned Imm = MI.getOperand(1).getImm(); + unsigned Op = (Imm >> 12) & 1; + unsigned Cmode = (Imm >> 8) & 0xf; + unsigned I = (Imm >> 7) & 1; + unsigned Imm3 = (Imm >> 4) & 0x7; + unsigned Imm4 = Imm & 0xf; + Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4; + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd; source register in Dm. + unsigned OpIdx = 0; + Binary |= encodeNEONRd(MI, OpIdx++); + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRm(MI, OpIdx); + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + // FIXME: This does not handle VDUPfdf or VDUPfqf. + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd; source registers in Dn and Dm. + unsigned OpIdx = 0; + Binary |= encodeNEONRd(MI, OpIdx++); + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRn(MI, OpIdx++); + if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRm(MI, OpIdx); + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + // FIXME: This does not handle VMOVDneon or VMOVQ. + emitWordLE(Binary); +} + +#include "ARMGenCodeEmitter.inc" diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a7cd6ed9ea4..3a4f788c848 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -29,7 +29,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp new file mode 100644 index 00000000000..6d1114d51aa --- /dev/null +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -0,0 +1,344 @@ +//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the ARM target. +// +//===----------------------------------------------------------------------===// + +#include "ARMJITInfo.h" +#include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMRelocations.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "jit" + +void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction"); +} + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because the prolog/epilog inserted by GCC won't work for us. (We need +// to preserve more context and manipulate the stack directly). Instead, +// write our own wrapper, which does things our way, so we have complete +// control over register saving and restoring. +extern "C" { +#if defined(__arm__) + void ARMCompilationCallback(); + asm( + ".text\n" + ".align 2\n" + ".globl " ASMPREFIX "ARMCompilationCallback\n" + ASMPREFIX "ARMCompilationCallback:\n" + // Save caller saved registers since they may contain stuff + // for the real target function right now. We have to act as if this + // whole compilation callback doesn't exist as far as the caller is + // concerned, so we can't just preserve the callee saved regs. + "stmdb sp!, {r0, r1, r2, r3, lr}\n" +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" +#endif + // The LR contains the address of the stub function on entry. + // pass it as the argument to the C part of the callback + "mov r0, lr\n" + "sub sp, sp, #4\n" + // Call the C portion of the callback + "bl " ASMPREFIX "ARMCompilationCallbackC\n" + "add sp, sp, #4\n" + // Restoring the LR to the return address of the function that invoked + // the stub and de-allocating the stack space for it requires us to + // swap the two saved LR values on the stack, as they're backwards + // for what we need since the pop instruction has a pre-determined + // order for the registers. + // +--------+ + // 0 | LR | Original return address + // +--------+ + // 1 | LR | Stub address (start of stub) + // 2-5 | R3..R0 | Saved registers (we need to preserve all regs) + // 6-20 | D0..D7 | Saved VFP registers + // +--------+ + // +#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) + // Restore VFP caller-saved registers. + "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" +#endif + // + // We need to exchange the values in slots 0 and 1 so we can + // return to the address in slot 1 with the address in slot 0 + // restored to the LR. + "ldr r0, [sp,#20]\n" + "ldr r1, [sp,#16]\n" + "str r1, [sp,#20]\n" + "str r0, [sp,#16]\n" + // Return to the (newly modified) stub to invoke the real function. + // The above twiddling of the saved return addresses allows us to + // deallocate everything, including the LR the stub saved, with two + // updating load instructions. + "ldmia sp!, {r0, r1, r2, r3, lr}\n" + "ldr pc, [sp], #4\n" + ); +#else // Not an ARM host + void ARMCompilationCallback() { + llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!"); + } +#endif +} + +/// ARMCompilationCallbackC - This is the target-specific function invoked +/// by the function stub when we did not know the real target of a call. +/// This function must locate the start of the stub or call site and pass +/// it into the JIT compiler function. +extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) { + // Get the address of the compiled code for this function. + intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr); + + // Rewrite the call target... so that we don't end up here every time we + // execute the call. We're replacing the first two instructions of the + // stub with: + // ldr pc, [pc,#-4] + // + if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) { + llvm_unreachable("ERROR: Unable to mark stub writable"); + } + *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4] + *(intptr_t *)(StubAddr+4) = NewVal; + if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) { + llvm_unreachable("ERROR: Unable to mark stub executable"); + } +} + +TargetJITInfo::LazyResolverFn +ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) { + JITCompilerFunction = F; + return ARMCompilationCallback; +} + +void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr, + JITCodeEmitter &JCE) { + uint8_t Buffer[4]; + uint8_t *Cur = Buffer; + MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr); + void *PtrAddr = JCE.allocIndirectGV( + GV, Buffer, sizeof(Buffer), /*Alignment=*/4); + addIndirectSymAddr(Ptr, (intptr_t)PtrAddr); + return PtrAddr; +} + +TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() { + // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a + // 4-byte address. See emitFunctionStub for details. + StubLayout Result = {16, 4}; + return Result; +} + +void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) { + void *Addr; + // If this is just a call to an external function, emit a branch instead of a + // call. The code is the same except for one bit of the last instruction. + if (Fn != (void*)(intptr_t)ARMCompilationCallback) { + // Branch to the corresponding function addr. + if (IsPIC) { + // The stub is 16-byte size and 4-aligned. + intptr_t LazyPtr = getIndirectSymAddr(Fn); + if (!LazyPtr) { + // In PIC mode, the function stub is loading a lazy-ptr. + LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE); + DEBUG(if (F) + errs() << "JIT: Indirect symbol emitted at [" << LazyPtr + << "] for GV '" << F->getName() << "'\n"; + else + errs() << "JIT: Stub emitted at [" << LazyPtr + << "] for external function at '" << Fn << "'\n"); + } + JCE.emitAlignment(4); + Addr = (void*)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable(Addr, 16)) { + llvm_unreachable("ERROR: Unable to mark stub writable"); + } + JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4] + JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip + JCE.emitWordLE(0xe59cf000); // ldr pc, [ip] + JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8) + sys::Memory::InvalidateInstructionCache(Addr, 16); + if (!sys::Memory::setRangeExecutable(Addr, 16)) { + llvm_unreachable("ERROR: Unable to mark stub executable"); + } + } else { + // The stub is 8-byte size and 4-aligned. + JCE.emitAlignment(4); + Addr = (void*)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable(Addr, 8)) { + llvm_unreachable("ERROR: Unable to mark stub writable"); + } + JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] + JCE.emitWordLE((intptr_t)Fn); // addr of function + sys::Memory::InvalidateInstructionCache(Addr, 8); + if (!sys::Memory::setRangeExecutable(Addr, 8)) { + llvm_unreachable("ERROR: Unable to mark stub executable"); + } + } + } else { + // The compilation callback will overwrite the first two words of this + // stub with indirect branch instructions targeting the compiled code. + // This stub sets the return address to restart the stub, so that + // the new branch will be invoked when we come back. + // + // Branch and link to the compilation callback. + // The stub is 16-byte size and 4-byte aligned. + JCE.emitAlignment(4); + Addr = (void*)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable(Addr, 16)) { + llvm_unreachable("ERROR: Unable to mark stub writable"); + } + // Save LR so the callback can determine which stub called it. + // The compilation callback is responsible for popping this prior + // to returning. + JCE.emitWordLE(0xe92d4000); // push {lr} + // Set the return address to go back to the start of this stub. + JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12 + // Invoke the compilation callback. + JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] + // The address of the compilation callback. + JCE.emitWordLE((intptr_t)ARMCompilationCallback); + sys::Memory::InvalidateInstructionCache(Addr, 16); + if (!sys::Memory::setRangeExecutable(Addr, 16)) { + llvm_unreachable("ERROR: Unable to mark stub executable"); + } + } + + return Addr; +} + +intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const { + ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType(); + switch (RT) { + default: + return (intptr_t)(MR->getResultPointer()); + case ARM::reloc_arm_pic_jt: + // Destination address - jump table base. + return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal(); + case ARM::reloc_arm_jt_base: + // Jump table base address. + return getJumpTableBaseAddr(MR->getJumpTableIndex()); + case ARM::reloc_arm_cp_entry: + case ARM::reloc_arm_vfp_cp_entry: + // Constant pool entry address. + return getConstantPoolEntryAddr(MR->getConstantPoolIndex()); + case ARM::reloc_arm_machine_cp_entry: { + ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal(); + assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) && + "Can't handle this machine constant pool entry yet!"); + intptr_t Addr = (intptr_t)(MR->getResultPointer()); + Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment(); + return Addr; + } + } +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + void *RelocPos = (char*)Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = resolveRelocDestAddr(MR); + switch ((ARM::RelocationType)MR->getRelocationType()) { + case ARM::reloc_arm_cp_entry: + case ARM::reloc_arm_vfp_cp_entry: + case ARM::reloc_arm_relative: { + // It is necessary to calculate the correct PC relative value. We + // subtract the base addr from the target addr to form a byte offset. + ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; + // If the result is positive, set bit U(23) to 1. + if (ResultPtr >= 0) + *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift; + else { + // Otherwise, obtain the absolute value and set bit U(23) to 0. + *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift); + ResultPtr = - ResultPtr; + } + // Set the immed value calculated. + // VFP immediate offset is multiplied by 4. + if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry) + ResultPtr = ResultPtr >> 2; + *((intptr_t*)RelocPos) |= ResultPtr; + // Set register Rn to PC (which is register 15 on all architectures). + // FIXME: This avoids the need for register info in the JIT class. + *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift; + break; + } + case ARM::reloc_arm_pic_jt: + case ARM::reloc_arm_machine_cp_entry: + case ARM::reloc_arm_absolute: { + // These addresses have already been resolved. + *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr; + break; + } + case ARM::reloc_arm_branch: { + // It is necessary to calculate the correct value of signed_immed_24 + // field. We subtract the base addr from the target addr to form a + // byte offset, which must be inside the range -33554432 and +33554428. + // Then, we set the signed_immed_24 field of the instruction to bits + // [25:2] of the byte offset. More details ARM-ARM p. A4-11. + ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; + ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2; + assert(ResultPtr >= -33554432 && ResultPtr <= 33554428); + *((intptr_t*)RelocPos) |= ResultPtr; + break; + } + case ARM::reloc_arm_jt_base: { + // JT base - (instruction addr + 8) + ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; + *((intptr_t*)RelocPos) |= ResultPtr; + break; + } + case ARM::reloc_arm_movw: { + ResultPtr = ResultPtr & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; + break; + } + case ARM::reloc_arm_movt: { + ResultPtr = (ResultPtr >> 16) & 0xFFFF; + *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; + *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; + break; + } + } + } +} + +void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) { + const ARMFunctionInfo *AFI = MF.getInfo(); + ConstPoolId2AddrMap.resize(AFI->getNumPICLabels()); + JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); + IsPIC = isPIC; +} diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h new file mode 100644 index 00000000000..27e2a201340 --- /dev/null +++ b/lib/Target/ARM/ARMJITInfo.h @@ -0,0 +1,177 @@ +//===-- ARMJITInfo.h - ARM implementation of the JIT interface -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the ARMJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMJITINFO_H +#define ARMJITINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { + class ARMTargetMachine; + + class ARMJITInfo : public TargetJITInfo { + // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding + // CONSTPOOL_ENTRY addresses. + SmallVector ConstPoolId2AddrMap; + + // JumpTableId2AddrMap - A map from inline jumptable ids to the + // corresponding inline jump table bases. + SmallVector JumpTableId2AddrMap; + + // PCLabelMap - A map from PC labels to addresses. + DenseMap PCLabelMap; + + // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol) + // addresses to their indirect symbol addresses. + DenseMap Sym2IndirectSymMap; + + // IsPIC - True if the relocation model is PIC. This is used to determine + // how to codegen function stubs. + bool IsPIC; + + public: + explicit ARMJITInfo() : IsPIC(false) { useGOT = false; } + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + void replaceMachineCodeForFunction(void *Old, void *New) override; + + /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object + /// to emit an indirect symbol which contains the address of the specified + /// ptr. + void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE) override; + + // getStubLayout - Returns the size and alignment of the largest call stub + // on ARM. + StubLayout getStubLayout() override; + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + void *emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) override; + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) override; + + /// hasCustomConstantPool - Allows a target to specify that constant + /// pool address resolution is handled by the target. + bool hasCustomConstantPool() const override { return true; } + + /// hasCustomJumpTables - Allows a target to specify that jumptables + /// are emitted by the target. + bool hasCustomJumpTables() const override { return true; } + + /// allocateSeparateGVMemory - If true, globals should be placed in + /// separately allocated heap memory rather than in the same + /// code memory allocated by JITCodeEmitter. + bool allocateSeparateGVMemory() const override { +#ifdef __APPLE__ + return true; +#else + return false; +#endif + } + + /// Initialize - Initialize internal stage for the function being JITted. + /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize + /// jump table ids to jump table bases map; remember if codegen relocation + /// model is PIC. + void Initialize(const MachineFunction &MF, bool isPIC); + + /// getConstantPoolEntryAddr - The ARM target puts all constant + /// pool entries into constant islands. This returns the address of the + /// constant pool entry of the specified index. + intptr_t getConstantPoolEntryAddr(unsigned CPI) const { + assert(CPI < ConstPoolId2AddrMap.size()); + return ConstPoolId2AddrMap[CPI]; + } + + /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address + /// where its associated value is stored. When relocations are processed, + /// this value will be used to resolve references to the constant. + void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) { + assert(CPI < ConstPoolId2AddrMap.size()); + ConstPoolId2AddrMap[CPI] = Addr; + } + + /// getJumpTableBaseAddr - The ARM target inline all jump tables within + /// text section of the function. This returns the address of the base of + /// the jump table of the specified index. + intptr_t getJumpTableBaseAddr(unsigned JTI) const { + assert(JTI < JumpTableId2AddrMap.size()); + return JumpTableId2AddrMap[JTI]; + } + + /// addJumpTableBaseAddr - Map a jump table index to the address where + /// the corresponding inline jump table is emitted. When relocations are + /// processed, this value will be used to resolve references to the + /// jump table. + void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) { + assert(JTI < JumpTableId2AddrMap.size()); + JumpTableId2AddrMap[JTI] = Addr; + } + + /// getPCLabelAddr - Retrieve the address of the PC label of the + /// specified id. + intptr_t getPCLabelAddr(unsigned Id) const { + DenseMap::const_iterator I = PCLabelMap.find(Id); + assert(I != PCLabelMap.end()); + return I->second; + } + + /// addPCLabelAddr - Remember the address of the specified PC label. + void addPCLabelAddr(unsigned Id, intptr_t Addr) { + PCLabelMap.insert(std::make_pair(Id, Addr)); + } + + /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the + /// specified symbol located at address. Returns 0 if the indirect symbol + /// has not been emitted. + intptr_t getIndirectSymAddr(void *Addr) const { + DenseMap::const_iterator I= Sym2IndirectSymMap.find(Addr); + if (I != Sym2IndirectSymMap.end()) + return I->second; + return 0; + } + + /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to + /// its indirect symbol address. + void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) { + Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr)); + } + + private: + /// resolveRelocDestAddr - Resolve the resulting address of the relocation + /// if it's not already solved. Constantpool entries must be resolved by + /// ARM target. + intptr_t resolveRelocDestAddr(MachineRelocation *MR) const; + }; +} + +#endif diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 2ce083ca2b6..c1b4562f411 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -15,6 +15,7 @@ #include "ARMFrameLowering.h" #include "ARMISelLowering.h" #include "ARMInstrInfo.h" +#include "ARMJITInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" #include "ARMMachineFunctionInfo.h" @@ -157,7 +158,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN), DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))), - TSInfo(DL), + TSInfo(DL), JITInfo(), InstrInfo(isThumb1Only() ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this) : !isThumb() diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 10c61ccf02a..f79b69199fb 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -18,11 +18,13 @@ #include "ARMFrameLowering.h" #include "ARMISelLowering.h" #include "ARMInstrInfo.h" +#include "ARMJITInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" +#include "ARMJITInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" @@ -259,6 +261,7 @@ protected: const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } + ARMJITInfo *getJITInfo() override { return &JITInfo; } const ARMBaseInstrInfo *getInstrInfo() const override { return InstrInfo.get(); } @@ -275,6 +278,7 @@ protected: private: const DataLayout DL; ARMSelectionDAGInfo TSInfo; + ARMJITInfo JITInfo; // Either Thumb1InstrInfo or Thumb2InstrInfo. std::unique_ptr InstrInfo; ARMTargetLowering TLInfo; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 20e2624c830..d85194b75ec 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -244,3 +244,10 @@ bool ARMPassConfig::addPreEmitPass() { return true; } + +bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { + // Machine code emitter pass for ARM. + PM.add(createARMJITCodeEmitterPass(*this, JCE)); + return false; +} diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index d26d817ba7b..8b559682211 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -39,6 +39,8 @@ public: // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE) override; }; /// ARMTargetMachine - ARM target machine. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 2530640139a..9b5fa75fe2a 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -2,7 +2,8 @@ set(LLVM_TARGET_DEFINITIONS ARM.td) tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info) tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info) -tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM ARMGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher) @@ -18,6 +19,7 @@ add_llvm_target(ARMCodeGen ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp + ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp ARMExpandPseudoInsts.cpp @@ -27,6 +29,7 @@ add_llvm_target(ARMCodeGen ARMISelDAGToDAG.cpp ARMISelLowering.cpp ARMInstrInfo.cpp + ARMJITInfo.cpp ARMLoadStoreOptimizer.cpp ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index c1601a3f29d..f069535ff3c 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -15,7 +15,7 @@ TARGET = ARM BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \ ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \ - ARMGenCallingConv.inc \ + ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \ ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index c61805b63f5..06a74d72081 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMTarget Target.cpp TargetIntrinsicInfo.cpp + TargetJITInfo.cpp TargetLibraryInfo.cpp TargetLoweringObjectFile.cpp TargetMachine.cpp diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 6028db68507..bf67d71b0a0 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -3,7 +3,8 @@ set(LLVM_TARGET_DEFINITIONS Mips.td) tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info) tablegen(LLVM MipsGenInstrInfo.inc -gen-instr-info) tablegen(LLVM MipsGenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel) tablegen(LLVM MipsGenFastISel.inc -gen-fast-isel) @@ -23,9 +24,11 @@ add_llvm_target(MipsCodeGen Mips16RegisterInfo.cpp MipsAnalyzeImmediate.cpp MipsAsmPrinter.cpp + MipsCodeEmitter.cpp MipsConstantIslandPass.cpp MipsDelaySlotFiller.cpp MipsFastISel.cpp + MipsJITInfo.cpp MipsInstrInfo.cpp MipsISelDAGToDAG.cpp MipsISelLowering.cpp diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index 56db450f696..41efa470e42 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -13,7 +13,7 @@ TARGET = Mips # Make sure that tblgen is run, first thing. BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ - MipsGenAsmWriter.inc MipsGenFastISel.inc \ + MipsGenAsmWriter.inc MipsGenFastISel.inc MipsGenCodeEmitter.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \ MipsGenDisassemblerTables.inc \ diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index 387ef9f5ae4..d512d6589c4 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -26,6 +26,8 @@ namespace llvm { FunctionPass *createMipsOptimizePICCallPass(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); + FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM, + JITCodeEmitter &JCE); FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm); } // end namespace llvm; diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index 8d9cb024a9e..3ca0ffe23eb 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -12,8 +12,6 @@ //===----------------------------------------------------------------------===// #include "Mips16ISelLowering.h" #include "MCTargetDesc/MipsBaseInfo.h" -#include "Mips16HardFloatInfo.h" -#include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsTargetMachine.h" #include "llvm/ADT/StringRef.h" diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp new file mode 100644 index 00000000000..3885bb96e4f --- /dev/null +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -0,0 +1,483 @@ +//===-- Mips/MipsCodeEmitter.cpp - Convert Mips Code to Machine Code ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file contains the pass that transforms the Mips machine instructions +// into relocatable machine code. +// +//===---------------------------------------------------------------------===// + +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsInstrInfo.h" +#include "MipsRelocations.h" +#include "MipsSubtarget.h" +#include "MipsTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/PassManager.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#ifndef NDEBUG +#include +#endif + +using namespace llvm; + +#define DEBUG_TYPE "jit" + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { + +class MipsCodeEmitter : public MachineFunctionPass { + MipsJITInfo *JTI; + const MipsInstrInfo *II; + const DataLayout *TD; + const MipsSubtarget *Subtarget; + TargetMachine &TM; + JITCodeEmitter &MCE; + const std::vector *MCPEs; + const std::vector *MJTEs; + bool IsPIC; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired (); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static char ID; + +public: + MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(ID), JTI(nullptr), II(nullptr), TD(nullptr), + TM(tm), MCE(mce), MCPEs(nullptr), MJTEs(nullptr), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "Mips Machine Code Emitter"; + } + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + + void emitInstruction(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB); + +private: + + void emitWord(unsigned Word); + + /// Routines that handle operands which add machine relocations which are + /// fixed up by the relocation stage. + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub) const; + void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; + void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; + void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; + void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + + unsigned getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const; + + unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; + unsigned getJumpTargetOpValueMM(const MachineInstr &MI, unsigned OpNo) const; + unsigned getBranchTargetOpValueMM(const MachineInstr &MI, + unsigned OpNo) const; + + unsigned getBranchTarget21OpValue(const MachineInstr &MI, + unsigned OpNo) const; + unsigned getBranchTarget26OpValue(const MachineInstr &MI, + unsigned OpNo) const; + unsigned getJumpOffset16OpValue(const MachineInstr &MI, unsigned OpNo) const; + + unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemEncodingMMImm12(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMSAMemEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getLSAImmEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSimm19Lsl2Encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSimm18Lsl3Encoding(const MachineInstr &MI, unsigned OpNo) const; + + /// Expand pseudo instructions with accumulator register operands. + void expandACCInstr(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB, unsigned Opc) const; + + void expandPseudoIndirectBranch(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB) const; + + /// \brief Expand pseudo instruction. Return true if MI was expanded. + bool expandPseudos(MachineBasicBlock::instr_iterator &MI, + MachineBasicBlock &MBB) const; +}; +} + +char MipsCodeEmitter::ID = 0; + +bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { + MipsTargetMachine &Target = static_cast( + const_cast(MF.getTarget())); + // Initialize the subtarget so that we can grab the subtarget dependent + // variables from it. + Subtarget = &TM.getSubtarget(); + JTI = Target.getSubtargetImpl()->getJITInfo(); + II = Subtarget->getInstrInfo(); + TD = Subtarget->getDataLayout(); + MCPEs = &MF.getConstantPool()->getConstants(); + MJTEs = nullptr; + if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); + JTI->Initialize(MF, IsPIC, Subtarget->isLittle()); + MCE.setModuleInfo(&getAnalysis ()); + + do { + DEBUG(errs() << "JITTing function '" + << MF.getName() << "'\n"); + MCE.startFunction(MF); + + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB){ + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), + E = MBB->instr_end(); I != E;) + emitInstruction(*I++, *MBB); + } + } while (MCE.finishFunction(MF)); + + return false; +} + +unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const { + // NOTE: This relocations are for static. + uint64_t TSFlags = MI.getDesc().TSFlags; + uint64_t Form = TSFlags & MipsII::FormMask; + if (Form == MipsII::FrmJ) + return Mips::reloc_mips_26; + if ((Form == MipsII::FrmI || Form == MipsII::FrmFI) + && MI.isBranch()) + return Mips::reloc_mips_pc16; + if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi) + return Mips::reloc_mips_hi; + return Mips::reloc_mips_lo; +} + +unsigned MipsCodeEmitter::getJumpTargetOpValue(const MachineInstr &MI, + unsigned OpNo) const { + MachineOperand MO = MI.getOperand(OpNo); + if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO)); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO)); + else + llvm_unreachable("Unexpected jump target operand kind."); + return 0; +} + +unsigned MipsCodeEmitter::getJumpTargetOpValueMM(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getBranchTargetOpValueMM(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getBranchTarget21OpValue(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getBranchTarget26OpValue(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getJumpOffset16OpValue(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI, + unsigned OpNo) const { + MachineOperand MO = MI.getOperand(OpNo); + emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO)); + return 0; +} + +unsigned MipsCodeEmitter::getMemEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // Base register is encoded in bits 20-16, offset is encoded in bits 15-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo)) << 16; + return (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits; +} + +unsigned MipsCodeEmitter::getMemEncodingMMImm12(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getMSAMemEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // size is encoded as size-1. + return getMachineOpValue(MI, MI.getOperand(OpNo)) - 1; +} + +unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // size is encoded as pos+size-1. + return getMachineOpValue(MI, MI.getOperand(OpNo-1)) + + getMachineOpValue(MI, MI.getOperand(OpNo)) - 1; +} + +unsigned MipsCodeEmitter::getLSAImmEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getSimm18Lsl3Encoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getSimm19Lsl2Encoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { + if (MO.isReg()) + return TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue( + MO.getReg()); + else if (MO.isImm()) + return static_cast(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO)); + else if (MO.isCPI()) + emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO)); + else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), getRelocation(MI, MO)); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO)); + else + llvm_unreachable("Unable to encode MachineOperand!"); + return 0; +} + +void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + bool MayNeedFarStub) const { + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), 0, + MayNeedFarStub)); +} + +void MipsCodeEmitter:: +emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES, 0, 0)); +} + +void MipsCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, 0, false)); +} + +void MipsCodeEmitter:: +emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + Reloc, JTIndex, 0, false)); +} + +void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, + unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + Reloc, BB)); +} + +void MipsCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB) { + DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI); + + // Expand pseudo instruction. Skip if MI was not expanded. + if (((MI->getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) && + !expandPseudos(MI, MBB)) + return; + + MCE.processDebugLoc(MI->getDebugLoc(), true); + + emitWord(getBinaryCodeForInstr(*MI)); + ++NumEmitted; // Keep track of the # of mi's emitted + + MCE.processDebugLoc(MI->getDebugLoc(), false); +} + +void MipsCodeEmitter::emitWord(unsigned Word) { + DEBUG(errs() << " 0x"; + errs().write_hex(Word) << "\n"); + if (Subtarget->isLittle()) + MCE.emitWordLE(Word); + else + MCE.emitWordBE(Word); +} + +void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB, + unsigned Opc) const { + // Expand "pseudomult $ac0, $t0, $t1" to "mult $t0, $t1". + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Opc)) + .addReg(MI->getOperand(1).getReg()).addReg(MI->getOperand(2).getReg()); +} + +void MipsCodeEmitter::expandPseudoIndirectBranch( + MachineBasicBlock::instr_iterator MI, MachineBasicBlock &MBB) const { + // This logic is duplicated from MipsAsmPrinter::emitPseudoIndirectBranch() + bool HasLinkReg = false; + unsigned Opcode = 0; + + if (Subtarget->hasMips64r6()) { + // MIPS64r6 should use (JALR64 ZERO_64, $rs) + Opcode = Mips::JALR64; + HasLinkReg = true; + } else if (Subtarget->hasMips32r6()) { + // MIPS32r6 should use (JALR ZERO, $rs) + Opcode = Mips::JALR; + HasLinkReg = true; + } else if (Subtarget->inMicroMipsMode()) + // microMIPS should use (JR_MM $rs) + Opcode = Mips::JR_MM; + else { + // Everything else should use (JR $rs) + Opcode = Mips::JR; + } + + auto MIB = BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Opcode)); + + if (HasLinkReg) { + unsigned ZeroReg = Subtarget->isGP64bit() ? Mips::ZERO_64 : Mips::ZERO; + MIB.addReg(ZeroReg); + } + + MIB.addReg(MI->getOperand(0).getReg()); +} + +bool MipsCodeEmitter::expandPseudos(MachineBasicBlock::instr_iterator &MI, + MachineBasicBlock &MBB) const { + switch (MI->getOpcode()) { + default: + llvm_unreachable("Unhandled pseudo"); + return false; + case Mips::NOP: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::SLL), Mips::ZERO) + .addReg(Mips::ZERO).addImm(0); + break; + case Mips::B: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BEQ)).addReg(Mips::ZERO) + .addReg(Mips::ZERO).addOperand(MI->getOperand(0)); + break; + case Mips::TRAP: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::BREAK)).addImm(0) + .addImm(0); + break; + case Mips::JALRPseudo: + BuildMI(MBB, &*MI, MI->getDebugLoc(), II->get(Mips::JALR), Mips::RA) + .addReg(MI->getOperand(0).getReg()); + break; + case Mips::PseudoMULT: + expandACCInstr(MI, MBB, Mips::MULT); + break; + case Mips::PseudoMULTu: + expandACCInstr(MI, MBB, Mips::MULTu); + break; + case Mips::PseudoSDIV: + expandACCInstr(MI, MBB, Mips::SDIV); + break; + case Mips::PseudoUDIV: + expandACCInstr(MI, MBB, Mips::UDIV); + break; + case Mips::PseudoMADD: + expandACCInstr(MI, MBB, Mips::MADD); + break; + case Mips::PseudoMADDU: + expandACCInstr(MI, MBB, Mips::MADDU); + break; + case Mips::PseudoMSUB: + expandACCInstr(MI, MBB, Mips::MSUB); + break; + case Mips::PseudoMSUBU: + expandACCInstr(MI, MBB, Mips::MSUBU); + break; + case Mips::PseudoReturn: + case Mips::PseudoReturn64: + case Mips::PseudoIndirectBranch: + case Mips::PseudoIndirectBranch64: + expandPseudoIndirectBranch(MI, MBB); + break; + case TargetOpcode::CFI_INSTRUCTION: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + // Do nothing + return false; + } + + (MI--)->eraseFromBundle(); + return true; +} + +/// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips +/// code to the specified MCE object. +FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM, + JITCodeEmitter &JCE) { + return new MipsCodeEmitter(TM, JCE); +} + +#include "MipsGenCodeEmitter.inc" diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp index f40e53a34d8..57c20a6cf48 100644 --- a/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -28,7 +28,6 @@ #include "MipsTargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index ddbb2324e9a..416de552b4a 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp new file mode 100644 index 00000000000..2072488206a --- /dev/null +++ b/lib/Target/Mips/MipsJITInfo.cpp @@ -0,0 +1,286 @@ +//===-- MipsJITInfo.cpp - Implement the Mips JIT Interface ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the Mips target. +// +//===----------------------------------------------------------------------===// + +#include "MipsJITInfo.h" +#include "MipsInstrInfo.h" +#include "MipsRelocations.h" +#include "MipsSubtarget.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "jit" + + +void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + unsigned NewAddr = (intptr_t)New; + unsigned OldAddr = (intptr_t)Old; + const unsigned NopInstr = 0x0; + + // If the functions are in the same memory segment, insert PC-region branch. + if ((NewAddr & 0xF0000000) == ((OldAddr + 4) & 0xF0000000)) { + unsigned *OldInstruction = (unsigned *)Old; + *OldInstruction = 0x08000000; + unsigned JTargetAddr = NewAddr & 0x0FFFFFFC; + + JTargetAddr >>= 2; + *OldInstruction |= JTargetAddr; + + // Insert a NOP. + OldInstruction++; + *OldInstruction = NopInstr; + + sys::Memory::InvalidateInstructionCache(Old, 2 * 4); + } else { + // We need to clear hint bits from the instruction, in case it is 'jr ra'. + const unsigned HintMask = 0xFFFFF83F, ReturnSequence = 0x03e00008; + unsigned* CurrentInstr = (unsigned*)Old; + unsigned CurrInstrHintClear = (*CurrentInstr) & HintMask; + unsigned* NextInstr = CurrentInstr + 1; + unsigned NextInstrHintClear = (*NextInstr) & HintMask; + + // Do absolute jump if there are 2 or more instructions before return from + // the old function. + if ((CurrInstrHintClear != ReturnSequence) && + (NextInstrHintClear != ReturnSequence)) { + const unsigned LuiT0Instr = 0x3c080000, AddiuT0Instr = 0x25080000; + const unsigned JrT0Instr = 0x01000008; + // lui t0, high 16 bit of the NewAddr + (*(CurrentInstr++)) = LuiT0Instr | ((NewAddr & 0xffff0000) >> 16); + // addiu t0, t0, low 16 bit of the NewAddr + (*(CurrentInstr++)) = AddiuT0Instr | (NewAddr & 0x0000ffff); + // jr t0 + (*(CurrentInstr++)) = JrT0Instr; + (*CurrentInstr) = NopInstr; + + sys::Memory::InvalidateInstructionCache(Old, 4 * 4); + } else { + // Unsupported case + report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction"); + } + } +} + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because the prolog/epilog inserted by GCC won't work for us. Instead, +// write our own wrapper, which does things our way, so we have complete control +// over register saving and restoring. This code saves registers, calls +// MipsCompilationCallbackC and restores registers. +extern "C" { +#if defined (__mips__) +void MipsCompilationCallback(); + + asm( + ".text\n" + ".align 2\n" + ".globl " ASMPREFIX "MipsCompilationCallback\n" + ASMPREFIX "MipsCompilationCallback:\n" + ".ent " ASMPREFIX "MipsCompilationCallback\n" + ".frame $sp, 32, $ra\n" + ".set noreorder\n" + ".cpload $t9\n" + + "addiu $sp, $sp, -64\n" + ".cprestore 16\n" + + // Save argument registers a0, a1, a2, a3, f12, f14 since they may contain + // stuff for the real target function right now. We have to act as if this + // whole compilation callback doesn't exist as far as the caller is + // concerned. We also need to save the ra register since it contains the + // original return address, and t8 register since it contains the address + // of the end of function stub. + "sw $a0, 20($sp)\n" + "sw $a1, 24($sp)\n" + "sw $a2, 28($sp)\n" + "sw $a3, 32($sp)\n" + "sw $ra, 36($sp)\n" + "sw $t8, 40($sp)\n" + "sdc1 $f12, 48($sp)\n" + "sdc1 $f14, 56($sp)\n" + + // t8 points at the end of function stub. Pass the beginning of the stub + // to the MipsCompilationCallbackC. + "addiu $a0, $t8, -16\n" + "jal " ASMPREFIX "MipsCompilationCallbackC\n" + "nop\n" + + // Restore registers. + "lw $a0, 20($sp)\n" + "lw $a1, 24($sp)\n" + "lw $a2, 28($sp)\n" + "lw $a3, 32($sp)\n" + "lw $ra, 36($sp)\n" + "lw $t8, 40($sp)\n" + "ldc1 $f12, 48($sp)\n" + "ldc1 $f14, 56($sp)\n" + "addiu $sp, $sp, 64\n" + + // Jump to the (newly modified) stub to invoke the real function. + "addiu $t8, $t8, -16\n" + "jr $t8\n" + "nop\n" + + ".set reorder\n" + ".end " ASMPREFIX "MipsCompilationCallback\n" + ); +#else // host != Mips + void MipsCompilationCallback() { + llvm_unreachable( + "Cannot call MipsCompilationCallback() on a non-Mips arch!"); + } +#endif +} + +/// MipsCompilationCallbackC - This is the target-specific function invoked +/// by the function stub when we did not know the real target of a call. +/// This function must locate the start of the stub or call site and pass +/// it into the JIT compiler function. +extern "C" void MipsCompilationCallbackC(intptr_t StubAddr) { + // Get the address of the compiled code for this function. + intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr); + + // Rewrite the function stub so that we don't end up here every time we + // execute the call. We're replacing the first four instructions of the + // stub with code that jumps to the compiled function: + // lui $t9, %hi(NewVal) + // addiu $t9, $t9, %lo(NewVal) + // jr $t9 + // nop + + int Hi = ((unsigned)NewVal & 0xffff0000) >> 16; + if ((NewVal & 0x8000) != 0) + Hi++; + int Lo = (int)(NewVal & 0xffff); + + *(intptr_t *)(StubAddr) = 0xf << 26 | 25 << 16 | Hi; + *(intptr_t *)(StubAddr + 4) = 9 << 26 | 25 << 21 | 25 << 16 | Lo; + *(intptr_t *)(StubAddr + 8) = 25 << 21 | 8; + *(intptr_t *)(StubAddr + 12) = 0; + + sys::Memory::InvalidateInstructionCache((void*) StubAddr, 16); +} + +TargetJITInfo::LazyResolverFn MipsJITInfo::getLazyResolverFunction( + JITCompilerFn F) { + JITCompilerFunction = F; + return MipsCompilationCallback; +} + +TargetJITInfo::StubLayout MipsJITInfo::getStubLayout() { + // The stub contains 4 4-byte instructions, aligned at 4 bytes. See + // emitFunctionStub for details. + StubLayout Result = { 4*4, 4 }; + return Result; +} + +void *MipsJITInfo::emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) { + JCE.emitAlignment(4); + void *Addr = (void*) (JCE.getCurrentPCValue()); + if (!sys::Memory::setRangeWritable(Addr, 16)) + llvm_unreachable("ERROR: Unable to mark stub writable."); + + intptr_t EmittedAddr; + if (Fn != (void*)(intptr_t)MipsCompilationCallback) + EmittedAddr = (intptr_t)Fn; + else + EmittedAddr = (intptr_t)MipsCompilationCallback; + + + int Hi = ((unsigned)EmittedAddr & 0xffff0000) >> 16; + if ((EmittedAddr & 0x8000) != 0) + Hi++; + int Lo = (int)(EmittedAddr & 0xffff); + + // lui $t9, %hi(EmittedAddr) + // addiu $t9, $t9, %lo(EmittedAddr) + // jalr $t8, $t9 + // nop + if (IsLittleEndian) { + JCE.emitWordLE(0xf << 26 | 25 << 16 | Hi); + JCE.emitWordLE(9 << 26 | 25 << 21 | 25 << 16 | Lo); + JCE.emitWordLE(25 << 21 | 24 << 11 | 9); + JCE.emitWordLE(0); + } else { + JCE.emitWordBE(0xf << 26 | 25 << 16 | Hi); + JCE.emitWordBE(9 << 26 | 25 << 21 | 25 << 16 | Lo); + JCE.emitWordBE(25 << 21 | 24 << 11 | 9); + JCE.emitWordBE(0); + } + + sys::Memory::InvalidateInstructionCache(Addr, 16); + if (!sys::Memory::setRangeExecutable(Addr, 16)) + llvm_unreachable("ERROR: Unable to mark stub executable."); + + return Addr; +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void MipsJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char *GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + + void *RelocPos = (char*) Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = (intptr_t) MR->getResultPointer(); + + switch ((Mips::RelocationType) MR->getRelocationType()) { + case Mips::reloc_mips_pc16: + ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff; + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + case Mips::reloc_mips_26: + ResultPtr = (ResultPtr & 0x0fffffff) >> 2; + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + case Mips::reloc_mips_hi: + ResultPtr = ResultPtr >> 16; + if ((((intptr_t) (MR->getResultPointer()) & 0xffff) >> 15) == 1) { + ResultPtr += 1; + } + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + + case Mips::reloc_mips_lo: { + // Addend is needed for unaligned load/store instructions, where offset + // for the second load/store in the expanded instruction sequence must + // be modified by +1 or +3. Otherwise, Addend is 0. + int Addend = *((unsigned*) RelocPos) & 0xffff; + ResultPtr = (ResultPtr + Addend) & 0xffff; + *((unsigned*) RelocPos) &= 0xffff0000; + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + break; + } + } + } +} diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h new file mode 100644 index 00000000000..c9dfd831d2d --- /dev/null +++ b/lib/Target/Mips/MipsJITInfo.h @@ -0,0 +1,71 @@ +//===- MipsJITInfo.h - Mips Implementation of the JIT Interface -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MipsJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSJITINFO_H +#define MIPSJITINFO_H + +#include "MipsMachineFunction.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { +class MipsTargetMachine; + +class MipsJITInfo : public TargetJITInfo { + + bool IsPIC; + bool IsLittleEndian; + + public: + explicit MipsJITInfo() : + IsPIC(false), IsLittleEndian(true) {} + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + void replaceMachineCodeForFunction(void *Old, void *New) override; + + // getStubLayout - Returns the size and alignment of the largest call stub + // on Mips. + StubLayout getStubLayout() override; + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) override; + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char *GOTBase) override; + + /// Initialize - Initialize internal stage for the function being JITted. + void Initialize(const MachineFunction &MF, bool isPIC, + bool isLittleEndian) { + IsPIC = isPIC; + IsLittleEndian = isLittleEndian; + } + +}; +} + +#endif diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 71a6f4d4a11..19dac0c0419 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -16,7 +16,6 @@ #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCNaCl.h" -#include "MipsMachineFunction.h" #include "MipsTargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index c1bbf61712b..0733a62cc8a 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// #include "MipsSEISelLowering.h" -#include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 1b0f5d7fa91..5bf875daea9 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -115,7 +115,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), TM(_TM), TargetTriple(TT), DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS, TM))), - TSInfo(DL), InstrInfo(MipsInstrInfo::create(*this)), + TSInfo(DL), JITInfo(), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this)), TLInfo(MipsTargetLowering::create(*TM, *this)) { diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index ab01414b6b8..3f7a6c3ed73 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -17,6 +17,7 @@ #include "MipsFrameLowering.h" #include "MipsISelLowering.h" #include "MipsInstrInfo.h" +#include "MipsJITInfo.h" #include "MipsSelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" @@ -144,6 +145,7 @@ protected: const DataLayout DL; // Calculates type size & alignment const MipsSelectionDAGInfo TSInfo; + MipsJITInfo JITInfo; std::unique_ptr InstrInfo; std::unique_ptr FrameLowering; std::unique_ptr TLInfo; @@ -270,6 +272,7 @@ public: void setHelperClassesMips16(); void setHelperClassesMipsSE(); + MipsJITInfo *getJITInfo() override { return &JITInfo; } const MipsSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 79d1b4b13e9..ccf420962cd 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -189,3 +189,10 @@ bool MipsPassConfig::addPreEmitPass() { addPass(createMipsConstantIslandPass(TM)); return true; } + +bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { + // Machine code emitter pass for Mips. + PM.add(createMipsJITCodeEmitterPass(*this, JCE)); + return false; +} diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index e82efe2ecd2..eefd96ab4ae 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -44,12 +44,16 @@ public: return Subtarget; return &DefaultSubtarget; } + MipsSubtarget *getSubtargetImpl() { + return static_cast(TargetMachine::getSubtargetImpl()); + } /// \brief Reset the subtarget for the Mips target. void resetSubtarget(MachineFunction *MF); // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override; }; /// MipsebTargetMachine - Mips32/64 big endian target machine. diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index c0b44598a4b..55bb7293058 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -43,6 +43,12 @@ public: TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + // Emission of machine code through JITCodeEmitter is not supported. + bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, + bool = true) override { + return true; + } + // Emission of machine code through MCJIT is not supported. bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, bool = true) override { diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 47a9474ae16..ea4de63a244 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -2,8 +2,9 @@ set(LLVM_TARGET_DEFINITIONS PPC.td) tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter) tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info) tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel) @@ -15,6 +16,7 @@ add_public_tablegen_target(PowerPCCommonTableGen) add_llvm_target(PowerPCCodeGen PPCAsmPrinter.cpp PPCBranchSelector.cpp + PPCCodeEmitter.cpp PPCCTRLoops.cpp PPCHazardRecognizers.cpp PPCInstrInfo.cpp @@ -22,6 +24,7 @@ add_llvm_target(PowerPCCodeGen PPCISelLowering.cpp PPCFastISel.cpp PPCFrameLowering.cpp + PPCJITInfo.cpp PPCMCInstLower.cpp PPCMachineFunctionInfo.cpp PPCRegisterInfo.cpp diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile index cf516f4e5ec..c96674809b0 100644 --- a/lib/Target/PowerPC/Makefile +++ b/lib/Target/PowerPC/Makefile @@ -13,7 +13,7 @@ TARGET = PPC # Make sure that tblgen is run, first thing. BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \ - PPCGenAsmWriter.inc \ + PPCGenAsmWriter.inc PPCGenCodeEmitter.inc \ PPCGenInstrInfo.inc PPCGenDAGISel.inc \ PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \ PPCGenMCCodeEmitter.inc PPCGenFastISel.inc \ diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 87be6b5382b..ba5fa4f79b4 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -26,6 +26,7 @@ namespace llvm { class PassRegistry; class FunctionPass; class ImmutablePass; + class JITCodeEmitter; class MachineInstr; class AsmPrinter; class MCInst; @@ -40,6 +41,8 @@ namespace llvm { FunctionPass *createPPCVSXFMAMutatePass(); FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); + FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, + JITCodeEmitter &MCE); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP, bool isDarwin); diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 5f3b1764173..333780f1fcd 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -386,7 +386,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { return true; const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); - if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) + if (TLI->supportJumpTables() && + SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries()) return true; } } diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp new file mode 100644 index 00000000000..cf704fab277 --- /dev/null +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -0,0 +1,295 @@ +//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to +// JIT-compile bitcode to native PowerPC. +// +//===----------------------------------------------------------------------===// + +#include "PPC.h" +#include "PPCRelocations.h" +#include "PPCTargetMachine.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +namespace { + class PPCCodeEmitter : public MachineFunctionPass { + TargetMachine &TM; + JITCodeEmitter &MCE; + MachineModuleInfo *MMI; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static char ID; + + /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record + /// its address in the function into this pointer. + void *MovePCtoLROffset; + public: + + PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(ID), TM(tm), MCE(mce) {} + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + + + MachineRelocation GetRelocation(const MachineOperand &MO, + unsigned RelocID) const; + + /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + + unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getAbsDirectBrEncoding(const MachineInstr &MI, + unsigned OpNo) const; + unsigned getAbsCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const; + + unsigned getImm16Encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const; + + const char *getPassName() const override { + return "PowerPC Machine Code Emitter"; + } + + /// runOnMachineFunction - emits the given MachineFunction to memory + /// + bool runOnMachineFunction(MachineFunction &MF) override; + + /// emitBasicBlock - emits the given MachineBasicBlock to memory + /// + void emitBasicBlock(MachineBasicBlock &MBB); + }; +} + +char PPCCodeEmitter::ID = 0; + +/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code +/// to the specified MCE object. +FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM, + JITCodeEmitter &JCE) { + return new PPCCodeEmitter(TM, JCE); +} + +bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) { + assert((MF.getTarget().getRelocationModel() != Reloc::Default || + MF.getTarget().getRelocationModel() != Reloc::Static) && + "JIT relocation model must be set to static or default!"); + + MMI = &getAnalysis(); + MCE.setModuleInfo(MMI); + do { + MovePCtoLROffset = nullptr; + MCE.startFunction(MF); + for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) + emitBasicBlock(*BB); + } while (MCE.finishFunction(MF)); + + return false; +} + +void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) { + MCE.StartMachineBasicBlock(&MBB); + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){ + const MachineInstr &MI = *I; + MCE.processDebugLoc(MI.getDebugLoc(), true); + switch (MI.getOpcode()) { + default: + MCE.emitWordBE(getBinaryCodeForInstr(MI)); + break; + case TargetOpcode::CFI_INSTRUCTION: + break; + case TargetOpcode::EH_LABEL: + MCE.emitLabel(MI.getOperand(0).getMCSymbol()); + break; + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + break; // pseudo opcode, no side effects + case PPC::MovePCtoLR: + case PPC::MovePCtoLR8: + assert(TM.getRelocationModel() == Reloc::PIC_); + MovePCtoLROffset = (void*)MCE.getCurrentPCValue(); + MCE.emitWordBE(0x48000005); // bl 1 + break; + } + MCE.processDebugLoc(MI.getDebugLoc(), false); + } +} + +unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 || + MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) && + (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); + return 0x80 >> TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue( + MO.getReg()); +} + +MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, + unsigned RelocID) const { + // If in PIC mode, we need to encode the negated address of the + // 'movepctolr' into the unrelocated field. After relocation, we'll have + // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm + // field, we get &gv. This doesn't happen for branch relocations, which are + // always implicitly pc relative. + intptr_t Cst = 0; + if (TM.getRelocationModel() == Reloc::PIC_) { + assert(MovePCtoLROffset && "MovePCtoLR not seen yet?"); + Cst = -(intptr_t)MovePCtoLROffset - 4; + } + + if (MO.isGlobal()) + return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID, + const_cast(MO.getGlobal()), + Cst, isa(MO.getGlobal())); + if (MO.isSymbol()) + return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + RelocID, MO.getSymbolName(), Cst); + if (MO.isCPI()) + return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + RelocID, MO.getIndex(), Cst); + + if (MO.isMBB()) + return MachineRelocation::getBB(MCE.getCurrentPCOffset(), + RelocID, MO.getMBB()); + + assert(MO.isJTI()); + return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + RelocID, MO.getIndex(), Cst); +} + +unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx)); + return 0; +} + +unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx)); + return 0; +} + +unsigned PPCCodeEmitter::getAbsDirectBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + llvm_unreachable("Absolute branch relocations unsupported on the old JIT."); +} + +unsigned PPCCodeEmitter::getAbsCondBrEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Absolute branch relocations unsupported on the old JIT."); +} + +unsigned PPCCodeEmitter::getImm16Encoding(const MachineInstr &MI, + unsigned OpNo) const { + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO); + + unsigned RelocID; + switch (MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) { + default: llvm_unreachable("Unsupported target operand flags!"); + case PPCII::MO_LO: RelocID = PPC::reloc_absolute_low; break; + case PPCII::MO_HA: RelocID = PPC::reloc_absolute_high; break; + } + + MCE.addRelocation(GetRelocation(MO, RelocID)); + return 0; +} + +unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // Encode (imm, reg) as a memri, which has the low 16-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16; + + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) + return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits; + + // Add a fixup for the displacement field. + MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low)); + return RegBits; +} + +unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI, + unsigned OpNo) const { + // Encode (imm, reg) as a memrix, which has the low 14-bits as the + // displacement and the next 5 bits as the register #. + assert(MI.getOperand(OpNo+1).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14; + + const MachineOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) + return ((getMachineOpValue(MI, MO) >> 2) & 0x3FFF) | RegBits; + + MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix)); + return RegBits; +} + + +unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("TLS not supported on the old JIT."); + return 0; +} + +unsigned PPCCodeEmitter::getTLSCallEncoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("TLS not supported on the old JIT."); + return 0; +} + +unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { + + if (MO.isReg()) { + // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand. + // The GPR operand should come through here though. + assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 && + MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) || + MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); + return TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue( + MO.getReg()); + } + + assert(MO.isImm() && + "Relocation required in an instruction that we cannot encode!"); + return MO.getImm(); +} + +#include "PPCGenCodeEmitter.inc" diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 61003cf1b34..36e1e1334e5 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -684,6 +684,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) if (Subtarget.isDarwin()) setPrefFunctionAlignment(4); + if (isPPC64 && Subtarget.isJITCodeModel()) + // Temporary workaround for the inability of PPC64 JIT to handle jump + // tables. + setSupportJumpTables(false); + setInsertFencesForAtomic(true); if (Subtarget.enableMachineScheduler()) @@ -3559,27 +3564,33 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, } if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - unsigned OpFlags = 0; - if ((DAG.getTarget().getRelocationModel() != Reloc::Static && - (Subtarget.getTargetTriple().isMacOSX() && - Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && - (G->getGlobal()->isDeclaration() || - G->getGlobal()->isWeakForLinker())) || - (Subtarget.isTargetELF() && !isPPC64 && - !G->getGlobal()->hasLocalLinkage() && - DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { - // PC-relative references to external symbols should go through $stub, - // unless we're building with the leopard linker or later, which - // automatically synthesizes these stubs. - OpFlags = PPCII::MO_PLT_OR_STUB; - } + // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 + // Use indirect calls for ALL functions calls in JIT mode, since the + // far-call stubs may be outside relocation limits for a BL instruction. + if (!DAG.getTarget().getSubtarget().isJITCodeModel()) { + unsigned OpFlags = 0; + if ((DAG.getTarget().getRelocationModel() != Reloc::Static && + (Subtarget.getTargetTriple().isMacOSX() && + Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && + (G->getGlobal()->isDeclaration() || + G->getGlobal()->isWeakForLinker())) || + (Subtarget.isTargetELF() && !isPPC64 && + !G->getGlobal()->hasLocalLinkage() && + DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { + // PC-relative references to external symbols should go through $stub, + // unless we're building with the leopard linker or later, which + // automatically synthesizes these stubs. + OpFlags = PPCII::MO_PLT_OR_STUB; + } - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, - // every direct call is) turn it into a TargetGlobalAddress / - // TargetExternalSymbol node so that legalize doesn't hack it. - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - Callee.getValueType(), 0, OpFlags); - needIndirectCall = false; + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, + // every direct call is) turn it into a TargetGlobalAddress / + // TargetExternalSymbol node so that legalize doesn't hack it. + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + Callee.getValueType(), + 0, OpFlags); + needIndirectCall = false; + } } if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp new file mode 100644 index 00000000000..e5f113a0c03 --- /dev/null +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -0,0 +1,482 @@ +//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the 32-bit PowerPC target. +// +//===----------------------------------------------------------------------===// + +#include "PPCJITInfo.h" +#include "PPCRelocations.h" +#include "PPCSubtarget.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "jit" + +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +PPCJITInfo::PPCJITInfo(PPCSubtarget &STI) + : Subtarget(STI), is64Bit(STI.isPPC64()) { + useGOT = 0; +} + +#define BUILD_ADDIS(RD,RS,IMM16) \ + ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535)) +#define BUILD_ORI(RD,RS,UIMM16) \ + ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535)) +#define BUILD_ORIS(RD,RS,UIMM16) \ + ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535)) +#define BUILD_RLDICR(RD,RS,SH,ME) \ + ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \ + (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1)) +#define BUILD_MTSPR(RS,SPR) \ + ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1)) +#define BUILD_BCCTRx(BO,BI,LINK) \ + ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1)) +#define BUILD_B(TARGET, LINK) \ + ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1)) + +// Pseudo-ops +#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16) +#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6) +#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9) +#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK) + +static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){ + intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2; + unsigned *AtI = (unsigned*)(intptr_t)At; + + if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range? + AtI[0] = BUILD_B(Offset, isCall); // b/bl target + } else if (!is64Bit) { + AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address) + AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address) + AtI[2] = BUILD_MTCTR(12); // mtctr r12 + AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl + } else { + AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address) + AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address) + AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32 + AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address) + AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address) + AtI[5] = BUILD_MTCTR(12); // mtctr r12 + AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl + } +} + +extern "C" void PPC32CompilationCallback(); +extern "C" void PPC64CompilationCallback(); + +// The first clause of the preprocessor directive looks wrong, but it is +// necessary when compiling this code on non-PowerPC hosts. +#if (!defined(__ppc__) && !defined(__powerpc__)) || defined(__powerpc64__) || defined(__ppc64__) +void PPC32CompilationCallback() { + llvm_unreachable("This is not a 32bit PowerPC, you can't execute this!"); +} +#elif !defined(__ELF__) +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because we the prolog/epilog inserted by GCC won't work for us. Instead, +// write our own wrapper, which does things our way, so we have complete control +// over register saving and restoring. +asm( + ".text\n" + ".align 2\n" + ".globl _PPC32CompilationCallback\n" +"_PPC32CompilationCallback:\n" + // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the + // FIXME: need to save v[0-19] for altivec? + // FIXME: could shrink frame + // Set up a proper stack frame + // FIXME Layout + // PowerPC32 ABI linkage - 24 bytes + // parameters - 32 bytes + // 13 double registers - 104 bytes + // 8 int registers - 32 bytes + "mflr r0\n" + "stw r0, 8(r1)\n" + "stwu r1, -208(r1)\n" + // Save all int arg registers + "stw r10, 204(r1)\n" "stw r9, 200(r1)\n" + "stw r8, 196(r1)\n" "stw r7, 192(r1)\n" + "stw r6, 188(r1)\n" "stw r5, 184(r1)\n" + "stw r4, 180(r1)\n" "stw r3, 176(r1)\n" + // Save all call-clobbered FP regs. + "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n" + "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n" + "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n" + "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n" + "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n" + "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n" + "stfd f1, 72(r1)\n" + // Arguments to Compilation Callback: + // r3 - our lr (address of the call instruction in stub plus 4) + // r4 - stub's lr (address of instruction that called the stub plus 4) + // r5 - is64Bit - always 0. + "mr r3, r0\n" + "lwz r2, 208(r1)\n" // stub's frame + "lwz r4, 8(r2)\n" // stub's lr + "li r5, 0\n" // 0 == 32 bit + "bl _LLVMPPCCompilationCallback\n" + "mtctr r3\n" + // Restore all int arg registers + "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n" + "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n" + "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n" + "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n" + // Restore all FP arg registers + "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n" + "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n" + "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n" + "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n" + "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n" + "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n" + "lfd f1, 72(r1)\n" + // Pop 3 frames off the stack and branch to target + "lwz r1, 208(r1)\n" + "lwz r2, 8(r1)\n" + "mtlr r2\n" + "bctr\n" + ); + +#else +// ELF PPC 32 support + +// CompilationCallback stub - We can't use a C function with inline assembly in +// it, because we the prolog/epilog inserted by GCC won't work for us. Instead, +// write our own wrapper, which does things our way, so we have complete control +// over register saving and restoring. +asm( + ".text\n" + ".align 2\n" + ".globl PPC32CompilationCallback\n" +"PPC32CompilationCallback:\n" + // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the + // FIXME: need to save v[0-19] for altivec? + // FIXME: could shrink frame + // Set up a proper stack frame + // FIXME Layout + // 8 double registers - 64 bytes + // 8 int registers - 32 bytes + "mflr 0\n" + "stw 0, 4(1)\n" + "stwu 1, -104(1)\n" + // Save all int arg registers + "stw 10, 100(1)\n" "stw 9, 96(1)\n" + "stw 8, 92(1)\n" "stw 7, 88(1)\n" + "stw 6, 84(1)\n" "stw 5, 80(1)\n" + "stw 4, 76(1)\n" "stw 3, 72(1)\n" + // Save all call-clobbered FP regs. + "stfd 8, 64(1)\n" + "stfd 7, 56(1)\n" "stfd 6, 48(1)\n" + "stfd 5, 40(1)\n" "stfd 4, 32(1)\n" + "stfd 3, 24(1)\n" "stfd 2, 16(1)\n" + "stfd 1, 8(1)\n" + // Arguments to Compilation Callback: + // r3 - our lr (address of the call instruction in stub plus 4) + // r4 - stub's lr (address of instruction that called the stub plus 4) + // r5 - is64Bit - always 0. + "mr 3, 0\n" + "lwz 5, 104(1)\n" // stub's frame + "lwz 4, 4(5)\n" // stub's lr + "li 5, 0\n" // 0 == 32 bit + "bl LLVMPPCCompilationCallback\n" + "mtctr 3\n" + // Restore all int arg registers + "lwz 10, 100(1)\n" "lwz 9, 96(1)\n" + "lwz 8, 92(1)\n" "lwz 7, 88(1)\n" + "lwz 6, 84(1)\n" "lwz 5, 80(1)\n" + "lwz 4, 76(1)\n" "lwz 3, 72(1)\n" + // Restore all FP arg registers + "lfd 8, 64(1)\n" + "lfd 7, 56(1)\n" "lfd 6, 48(1)\n" + "lfd 5, 40(1)\n" "lfd 4, 32(1)\n" + "lfd 3, 24(1)\n" "lfd 2, 16(1)\n" + "lfd 1, 8(1)\n" + // Pop 3 frames off the stack and branch to target + "lwz 1, 104(1)\n" + "lwz 0, 4(1)\n" + "mtlr 0\n" + "bctr\n" + ); +#endif + +#if !defined(__powerpc64__) && !defined(__ppc64__) +void PPC64CompilationCallback() { + llvm_unreachable("This is not a 64bit PowerPC, you can't execute this!"); +} +#else +# ifdef __ELF__ +asm( + ".text\n" + ".align 2\n" + ".globl PPC64CompilationCallback\n" +#if _CALL_ELF == 2 + ".type PPC64CompilationCallback,@function\n" +"PPC64CompilationCallback:\n" +#else + ".section \".opd\",\"aw\",@progbits\n" + ".align 3\n" +"PPC64CompilationCallback:\n" + ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n" + ".size PPC64CompilationCallback,24\n" + ".previous\n" + ".align 4\n" + ".type PPC64CompilationCallback,@function\n" +".L.PPC64CompilationCallback:\n" +#endif +# else +asm( + ".text\n" + ".align 2\n" + ".globl _PPC64CompilationCallback\n" +"_PPC64CompilationCallback:\n" +# endif + // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the + // FIXME: need to save v[0-19] for altivec? + // Set up a proper stack frame + // Layout + // PowerPC64 ABI linkage - 48 bytes + // parameters - 64 bytes + // 13 double registers - 104 bytes + // 8 int registers - 64 bytes + "mflr 0\n" + "std 0, 16(1)\n" + "stdu 1, -280(1)\n" + // Save all int arg registers + "std 10, 272(1)\n" "std 9, 264(1)\n" + "std 8, 256(1)\n" "std 7, 248(1)\n" + "std 6, 240(1)\n" "std 5, 232(1)\n" + "std 4, 224(1)\n" "std 3, 216(1)\n" + // Save all call-clobbered FP regs. + "stfd 13, 208(1)\n" "stfd 12, 200(1)\n" + "stfd 11, 192(1)\n" "stfd 10, 184(1)\n" + "stfd 9, 176(1)\n" "stfd 8, 168(1)\n" + "stfd 7, 160(1)\n" "stfd 6, 152(1)\n" + "stfd 5, 144(1)\n" "stfd 4, 136(1)\n" + "stfd 3, 128(1)\n" "stfd 2, 120(1)\n" + "stfd 1, 112(1)\n" + // Arguments to Compilation Callback: + // r3 - our lr (address of the call instruction in stub plus 4) + // r4 - stub's lr (address of instruction that called the stub plus 4) + // r5 - is64Bit - always 1. + "mr 3, 0\n" // return address (still in r0) + "ld 5, 280(1)\n" // stub's frame + "ld 4, 16(5)\n" // stub's lr + "li 5, 1\n" // 1 == 64 bit +# ifdef __ELF__ + "bl LLVMPPCCompilationCallback\n" + "nop\n" +# else + "bl _LLVMPPCCompilationCallback\n" +# endif + "mtctr 3\n" + // Restore all int arg registers + "ld 10, 272(1)\n" "ld 9, 264(1)\n" + "ld 8, 256(1)\n" "ld 7, 248(1)\n" + "ld 6, 240(1)\n" "ld 5, 232(1)\n" + "ld 4, 224(1)\n" "ld 3, 216(1)\n" + // Restore all FP arg registers + "lfd 13, 208(1)\n" "lfd 12, 200(1)\n" + "lfd 11, 192(1)\n" "lfd 10, 184(1)\n" + "lfd 9, 176(1)\n" "lfd 8, 168(1)\n" + "lfd 7, 160(1)\n" "lfd 6, 152(1)\n" + "lfd 5, 144(1)\n" "lfd 4, 136(1)\n" + "lfd 3, 128(1)\n" "lfd 2, 120(1)\n" + "lfd 1, 112(1)\n" + // Pop 3 frames off the stack and branch to target + "ld 1, 280(1)\n" + "ld 0, 16(1)\n" + "mtlr 0\n" + // XXX: any special TOC handling in the ELF case for JIT? + "bctr\n" + ); +#endif + +extern "C" { +LLVM_LIBRARY_VISIBILITY void * +LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { + // Adjust the pointer to the address of the call instruction in the stub + // emitted by emitFunctionStub, rather than the instruction after it. + unsigned *StubCallAddr = StubCallAddrPlus4 - 1; + unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1; + + void *Target = JITCompilerFunction(StubCallAddr); + + // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite + // it to branch directly to the destination. If so, rewrite it so it does not + // need to go through the stub anymore. + unsigned OrigCallInst = *OrigCallAddr; + if ((OrigCallInst >> 26) == 18) { // Direct call. + intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2; + + if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range? + // Clear the original target out. + OrigCallInst &= (63 << 26) | 3; + // Fill in the new target. + OrigCallInst |= (Offset & ((1 << 24)-1)) << 2; + // Replace the call. + *OrigCallAddr = OrigCallInst; + } + } + + // Assert that we are coming from a stub that was created with our + // emitFunctionStub. + if ((*StubCallAddr >> 26) == 18) + StubCallAddr -= 3; + else { + assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!"); + StubCallAddr -= is64Bit ? 9 : 6; + } + + // Rewrite the stub with an unconditional branch to the target, for any users + // who took the address of the stub. + EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit); + sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4); + + // Put the address of the target function to call and the address to return to + // after calling the target function in a place that is easy to get on the + // stack after we restore all regs. + return Target; +} +} + + + +TargetJITInfo::LazyResolverFn +PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) { + JITCompilerFunction = Fn; + return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback; +} + +TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() { + // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3 + // instructions to save the caller's address if this is a lazy-compilation + // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address + // into a register and jump through it. + StubLayout Result = {10*4, 4}; + return Result; +} + +#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ +defined(__APPLE__) +extern "C" void sys_icache_invalidate(const void *Addr, size_t len); +#endif + +void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) { + // If this is just a call to an external function, emit a branch instead of a + // call. The code is the same except for one bit of the last instruction. + if (Fn != (void*)(intptr_t)PPC32CompilationCallback && + Fn != (void*)(intptr_t)PPC64CompilationCallback) { + void *Addr = (void*)JCE.getCurrentPCValue(); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit); + sys::Memory::InvalidateInstructionCache(Addr, 7*4); + return Addr; + } + + void *Addr = (void*)JCE.getCurrentPCValue(); + if (is64Bit) { + JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1) + JCE.emitWordBE(0x7d6802a6); // mflr r11 + JCE.emitWordBE(0xf9610060); // std r11, 96(r1) + } else if (Subtarget.isDarwinABI()){ + JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) + JCE.emitWordBE(0x7d6802a6); // mflr r11 + JCE.emitWordBE(0x91610028); // stw r11, 40(r1) + } else { + JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) + JCE.emitWordBE(0x7d6802a6); // mflr r11 + JCE.emitWordBE(0x91610024); // stw r11, 36(r1) + } + intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue(); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + JCE.emitWordBE(0); + EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit); + sys::Memory::InvalidateInstructionCache(Addr, 10*4); + return Addr; +} + + +void PPCJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4; + intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); + switch ((PPC::RelocationType)MR->getRelocationType()) { + default: llvm_unreachable("Unknown relocation type!"); + case PPC::reloc_pcrel_bx: + // PC-relative relocation for b and bl instructions. + ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; + assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) && + "Relocation out of range!"); + *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2; + break; + case PPC::reloc_pcrel_bcx: + // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other + // bcx instructions. + ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2; + assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) && + "Relocation out of range!"); + *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2; + break; + case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr + case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr + ResultPtr += MR->getConstantVal(); + + // If this is a high-part access, get the high-part. + if (MR->getRelocationType() == PPC::reloc_absolute_high) { + // If the low part will have a carry (really a borrow) from the low + // 16-bits into the high 16, add a bit to borrow from. + if (((int)ResultPtr << 16) < 0) + ResultPtr += 1 << 16; + ResultPtr >>= 16; + } + + // Do the addition then mask, so the addition does not overflow the 16-bit + // immediate section of the instruction. + unsigned LowBits = (*RelocPos + ResultPtr) & 65535; + unsigned HighBits = *RelocPos & ~65535; + *RelocPos = LowBits | HighBits; // Slam into low 16-bits + break; + } + case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr + ResultPtr += MR->getConstantVal(); + // Do the addition then mask, so the addition does not overflow the 16-bit + // immediate section of the instruction. + unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC; + unsigned HighBits = *RelocPos & 0xFFFF0003; + *RelocPos = LowBits | HighBits; // Slam into low 14-bits. + break; + } + } + } +} + +void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit); + sys::Memory::InvalidateInstructionCache(Old, 7*4); +} diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h new file mode 100644 index 00000000000..b6b37ffb852 --- /dev/null +++ b/lib/Target/PowerPC/PPCJITInfo.h @@ -0,0 +1,46 @@ +//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PowerPC implementation of the TargetJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPC_JITINFO_H +#define POWERPC_JITINFO_H + +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { +class PPCSubtarget; +class PPCJITInfo : public TargetJITInfo { +protected: + PPCSubtarget &Subtarget; + bool is64Bit; + +public: + PPCJITInfo(PPCSubtarget &STI); + + StubLayout getStubLayout() override; + void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) override; + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs, + unsigned char *GOTBase) override; + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + void replaceMachineCodeForFunction(void *Old, void *New) override; +}; +} + +#endif diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index fd7e0c761a2..85b77354de3 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -80,9 +80,21 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT), OptLevel(OptLevel), TargetABI(PPC_ABI_UNKNOWN), FrameLowering(initializeSubtargetDependencies(CPU, FS)), - DL(getDataLayoutString(*this)), InstrInfo(*this), + DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this), TLInfo(TM), TSInfo(&DL) {} +/// SetJITMode - This is called to inform the subtarget info that we are +/// producing code for the JIT. +void PPCSubtarget::SetJITMode() { + // JIT mode doesn't want lazy resolver stubs, it knows exactly where + // everything is. This matters for PPC64, which codegens in PIC mode without + // stubs. + HasLazyResolverStubs = false; + + // Calls to external functions need to use indirect calls + IsJITCodeModel = true; +} + void PPCSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { AttributeSet FnAttrs = MF->getFunction()->getAttributes(); Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, @@ -131,6 +143,7 @@ void PPCSubtarget::initializeEnvironment() { DeprecatedMFTB = false; DeprecatedDST = false; HasLazyResolverStubs = false; + IsJITCodeModel = false; } void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index f74a2a76d41..374962de427 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -17,6 +17,7 @@ #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" #include "PPCISelLowering.h" +#include "PPCJITInfo.h" #include "PPCSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" @@ -103,6 +104,7 @@ protected: bool DeprecatedMFTB; bool DeprecatedDST; bool HasLazyResolverStubs; + bool IsJITCodeModel; bool IsLittleEndian; /// TargetTriple - What processor and OS we're targeting. @@ -120,6 +122,7 @@ protected: PPCFrameLowering FrameLowering; const DataLayout DL; PPCInstrInfo InstrInfo; + PPCJITInfo JITInfo; PPCTargetLowering TLInfo; PPCSelectionDAGInfo TSInfo; @@ -135,6 +138,10 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + /// SetJITMode - This is called to inform the subtarget info that we are + /// producing code for the JIT. + void SetJITMode(); + /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. @@ -155,6 +162,7 @@ public: } const DataLayout *getDataLayout() const override { return &DL; } const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; } + PPCJITInfo *getJITInfo() override { return &JITInfo; } const PPCTargetLowering *getTargetLowering() const override { return &TLInfo; } @@ -199,6 +207,9 @@ public: bool hasLazyResolverStub(const GlobalValue *GV, const TargetMachine &TM) const; + // isJITCodeModel - True if we're generating code for the JIT + bool isJITCodeModel() const { return IsJITCodeModel; } + // isLittleEndian - True if generating little-endian code bool isLittleEndian() const { return IsLittleEndian; } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index e7f961c4324..9563b9045c3 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -148,6 +148,18 @@ bool PPCPassConfig::addPreEmitPass() { return false; } +bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { + // Inform the subtarget that we are in JIT mode. FIXME: does this break macho + // writing? + Subtarget.SetJITMode(); + + // Machine code emitter pass for PowerPC. + PM.add(createPPCJITCodeEmitterPass(*this, JCE)); + + return false; +} + void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) { // Add first the target-independent BasicTTI pass, then our PPC pass. This // allows the PPC pass to delegate to the target independent layer when diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index b8078bdb5b6..9bda22a354d 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -36,6 +36,8 @@ public: // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) override; /// \brief Register PPC analysis passes with a pass manager. void addAnalysisPasses(PassManagerBase &PM) override; diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index c5f4680d49c..49a7f8aa18c 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -6,7 +6,7 @@ tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel) tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv) tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic) -tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer) tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer) add_public_tablegen_target(AMDGPUCommonTableGen) diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index c486411f9a1..cebda920e74 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -2,8 +2,9 @@ set(LLVM_TARGET_DEFINITIONS Sparc.td) tablegen(LLVM SparcGenRegisterInfo.inc -gen-register-info) tablegen(LLVM SparcGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM SparcGenCodeEmitter.inc -gen-emitter) tablegen(LLVM SparcGenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM SparcGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM SparcGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM SparcGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM SparcGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM SparcGenDAGISel.inc -gen-dag-isel) @@ -23,6 +24,8 @@ add_llvm_target(SparcCodeGen SparcSubtarget.cpp SparcTargetMachine.cpp SparcSelectionDAGInfo.cpp + SparcJITInfo.cpp + SparcCodeEmitter.cpp SparcMCInstLower.cpp SparcTargetObjectFile.cpp ) diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile index c2a95b47151..bcc02918cdb 100644 --- a/lib/Target/Sparc/Makefile +++ b/lib/Target/Sparc/Makefile @@ -16,7 +16,7 @@ BUILT_SOURCES = SparcGenRegisterInfo.inc SparcGenInstrInfo.inc \ SparcGenAsmWriter.inc SparcGenAsmMatcher.inc \ SparcGenDAGISel.inc SparcGenDisassemblerTables.inc \ SparcGenSubtargetInfo.inc SparcGenCallingConv.inc \ - SparcGenMCCodeEmitter.inc + SparcGenCodeEmitter.inc SparcGenMCCodeEmitter.inc DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h index 75572f2ea75..de20aaa5db5 100644 --- a/lib/Target/Sparc/Sparc.h +++ b/lib/Target/Sparc/Sparc.h @@ -29,6 +29,8 @@ namespace llvm { FunctionPass *createSparcISelDag(SparcTargetMachine &TM); FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM); + FunctionPass *createSparcJITCodeEmitterPass(SparcTargetMachine &TM, + JITCodeEmitter &JCE); void LowerSparcMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, diff --git a/lib/Target/Sparc/SparcCodeEmitter.cpp b/lib/Target/Sparc/SparcCodeEmitter.cpp new file mode 100644 index 00000000000..98239bfc400 --- /dev/null +++ b/lib/Target/Sparc/SparcCodeEmitter.cpp @@ -0,0 +1,281 @@ +//===-- Sparc/SparcCodeEmitter.cpp - Convert Sparc Code to Machine Code ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---------------------------------------------------------------------===// +// +// This file contains the pass that transforms the Sparc machine instructions +// into relocatable machine code. +// +//===---------------------------------------------------------------------===// + +#include "Sparc.h" +#include "MCTargetDesc/SparcMCExpr.h" +#include "SparcRelocations.h" +#include "SparcTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "jit" + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { + +class SparcCodeEmitter : public MachineFunctionPass { + SparcJITInfo *JTI; + const SparcInstrInfo *II; + const DataLayout *TD; + const SparcSubtarget *Subtarget; + TargetMachine &TM; + JITCodeEmitter &MCE; + const std::vector *MCPEs; + bool IsPIC; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired (); + MachineFunctionPass::getAnalysisUsage(AU); + } + + static char ID; + +public: + SparcCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) + : MachineFunctionPass(ID), JTI(nullptr), II(nullptr), TD(nullptr), + TM(tm), MCE(mce), MCPEs(nullptr), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "Sparc Machine Code Emitter"; + } + + /// getBinaryCodeForInstr - This function, generated by the + /// CodeEmitterGenerator using TableGen, produces the binary encoding for + /// machine instructions. + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + + void emitInstruction(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB); + +private: + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + + unsigned getCallTargetOpValue(const MachineInstr &MI, + unsigned) const; + unsigned getBranchTargetOpValue(const MachineInstr &MI, + unsigned) const; + unsigned getBranchPredTargetOpValue(const MachineInstr &MI, + unsigned) const; + unsigned getBranchOnRegTargetOpValue(const MachineInstr &MI, + unsigned) const; + + void emitWord(unsigned Word); + + unsigned getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const; + + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc) const; + void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; + void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; + void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc) const; +}; +} // end anonymous namespace. + +char SparcCodeEmitter::ID = 0; + +bool SparcCodeEmitter::runOnMachineFunction(MachineFunction &MF) { + SparcTargetMachine &Target = static_cast( + const_cast(MF.getTarget())); + + JTI = Target.getSubtargetImpl()->getJITInfo(); + II = Target.getSubtargetImpl()->getInstrInfo(); + TD = Target.getSubtargetImpl()->getDataLayout(); + Subtarget = &TM.getSubtarget(); + MCPEs = &MF.getConstantPool()->getConstants(); + JTI->Initialize(MF, IsPIC); + MCE.setModuleInfo(&getAnalysis ()); + + do { + DEBUG(errs() << "JITTing function '" + << MF.getName() << "'\n"); + MCE.startFunction(MF); + + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB){ + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), + E = MBB->instr_end(); I != E;) + emitInstruction(*I++, *MBB); + } + } while (MCE.finishFunction(MF)); + + return false; +} + +void SparcCodeEmitter::emitInstruction(MachineBasicBlock::instr_iterator MI, + MachineBasicBlock &MBB) { + DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << *MI); + + MCE.processDebugLoc(MI->getDebugLoc(), true); + + ++NumEmitted; + + switch (MI->getOpcode()) { + default: { + emitWord(getBinaryCodeForInstr(*MI)); + break; + } + case TargetOpcode::INLINEASM: { + // We allow inline assembler nodes with empty bodies - they can + // implicitly define registers, which is ok for JIT. + if (MI->getOperand(0).getSymbolName()[0]) { + report_fatal_error("JIT does not support inline asm!"); + } + break; + } + case TargetOpcode::CFI_INSTRUCTION: + break; + case TargetOpcode::EH_LABEL: { + MCE.emitLabel(MI->getOperand(0).getMCSymbol()); + break; + } + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: { + // Do nothing. + break; + } + case SP::GETPCX: { + report_fatal_error("JIT does not support pseudo instruction GETPCX yet!"); + break; + } + } + + MCE.processDebugLoc(MI->getDebugLoc(), false); +} + +void SparcCodeEmitter::emitWord(unsigned Word) { + DEBUG(errs() << " 0x"; + errs().write_hex(Word) << "\n"); + MCE.emitWordBE(Word); +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned SparcCodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { + if (MO.isReg()) + return TM.getSubtargetImpl()->getRegisterInfo()->getEncodingValue( + MO.getReg()); + else if (MO.isImm()) + return static_cast(MO.getImm()); + else if (MO.isGlobal()) + emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO)); + else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO)); + else if (MO.isCPI()) + emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO)); + else if (MO.isMBB()) + emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO)); + else + llvm_unreachable("Unable to encode MachineOperand!"); + return 0; +} +unsigned SparcCodeEmitter::getCallTargetOpValue(const MachineInstr &MI, + unsigned opIdx) const { + const MachineOperand MO = MI.getOperand(opIdx); + return getMachineOpValue(MI, MO); +} + +unsigned SparcCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI, + unsigned opIdx) const { + const MachineOperand MO = MI.getOperand(opIdx); + return getMachineOpValue(MI, MO); +} + +unsigned SparcCodeEmitter::getBranchPredTargetOpValue(const MachineInstr &MI, + unsigned opIdx) const { + const MachineOperand MO = MI.getOperand(opIdx); + return getMachineOpValue(MI, MO); +} + +unsigned SparcCodeEmitter::getBranchOnRegTargetOpValue(const MachineInstr &MI, + unsigned opIdx) const { + const MachineOperand MO = MI.getOperand(opIdx); + return getMachineOpValue(MI, MO); +} + +unsigned SparcCodeEmitter::getRelocation(const MachineInstr &MI, + const MachineOperand &MO) const { + + unsigned TF = MO.getTargetFlags(); + switch (TF) { + default: + case SparcMCExpr::VK_Sparc_None: break; + case SparcMCExpr::VK_Sparc_LO: return SP::reloc_sparc_lo; + case SparcMCExpr::VK_Sparc_HI: return SP::reloc_sparc_hi; + case SparcMCExpr::VK_Sparc_H44: return SP::reloc_sparc_h44; + case SparcMCExpr::VK_Sparc_M44: return SP::reloc_sparc_m44; + case SparcMCExpr::VK_Sparc_L44: return SP::reloc_sparc_l44; + case SparcMCExpr::VK_Sparc_HH: return SP::reloc_sparc_hh; + case SparcMCExpr::VK_Sparc_HM: return SP::reloc_sparc_hm; + } + + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: break; + case SP::CALL: return SP::reloc_sparc_pc30; + case SP::BA: + case SP::BCOND: + case SP::FBCOND: return SP::reloc_sparc_pc22; + case SP::BPXCC: return SP::reloc_sparc_pc19; + } + llvm_unreachable("unknown reloc!"); +} + +void SparcCodeEmitter::emitGlobalAddress(const GlobalValue *GV, + unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), 0, + true)); +} + +void SparcCodeEmitter:: +emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES, 0, 0)); +} + +void SparcCodeEmitter:: +emitConstPoolAddress(unsigned CPI, unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, 0, false)); +} + +void SparcCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, + unsigned Reloc) const { + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + Reloc, BB)); +} + + +/// createSparcJITCodeEmitterPass - Return a pass that emits the collected Sparc +/// code to the specified MCE object. +FunctionPass *llvm::createSparcJITCodeEmitterPass(SparcTargetMachine &TM, + JITCodeEmitter &JCE) { + return new SparcCodeEmitter(TM, JCE); +} + +#include "SparcGenCodeEmitter.inc" diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp new file mode 100644 index 00000000000..d0eec98b5e9 --- /dev/null +++ b/lib/Target/Sparc/SparcJITInfo.cpp @@ -0,0 +1,326 @@ +//===-- SparcJITInfo.cpp - Implement the Sparc JIT Interface --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the Sparc target. +// +//===----------------------------------------------------------------------===// +#include "SparcJITInfo.h" +#include "Sparc.h" +#include "SparcRelocations.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Support/Memory.h" + +using namespace llvm; + +#define DEBUG_TYPE "jit" + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +extern "C" void SparcCompilationCallback(); + +extern "C" { +#if defined (__sparc__) + +#if defined(__arch64__) +#define FRAME_PTR(X) #X "+2047" +#else +#define FRAME_PTR(X) #X +#endif + + asm( + ".text\n" + "\t.align 4\n" + "\t.global SparcCompilationCallback\n" + "\t.type SparcCompilationCallback, #function\n" + "SparcCompilationCallback:\n" + // Save current register window and create stack. + // 128 (save area) + 6*8 (for arguments) + 16*8 (for float regfile) = 304 + "\tsave %sp, -304, %sp\n" + // save float regfile to the stack. + "\tstd %f0, [" FRAME_PTR(%fp) "-0]\n" + "\tstd %f2, [" FRAME_PTR(%fp) "-8]\n" + "\tstd %f4, [" FRAME_PTR(%fp) "-16]\n" + "\tstd %f6, [" FRAME_PTR(%fp) "-24]\n" + "\tstd %f8, [" FRAME_PTR(%fp) "-32]\n" + "\tstd %f10, [" FRAME_PTR(%fp) "-40]\n" + "\tstd %f12, [" FRAME_PTR(%fp) "-48]\n" + "\tstd %f14, [" FRAME_PTR(%fp) "-56]\n" + "\tstd %f16, [" FRAME_PTR(%fp) "-64]\n" + "\tstd %f18, [" FRAME_PTR(%fp) "-72]\n" + "\tstd %f20, [" FRAME_PTR(%fp) "-80]\n" + "\tstd %f22, [" FRAME_PTR(%fp) "-88]\n" + "\tstd %f24, [" FRAME_PTR(%fp) "-96]\n" + "\tstd %f26, [" FRAME_PTR(%fp) "-104]\n" + "\tstd %f28, [" FRAME_PTR(%fp) "-112]\n" + "\tstd %f30, [" FRAME_PTR(%fp) "-120]\n" + // stubaddr is in %g1. + "\tcall SparcCompilationCallbackC\n" + "\t mov %g1, %o0\n" + // restore float regfile from the stack. + "\tldd [" FRAME_PTR(%fp) "-0], %f0\n" + "\tldd [" FRAME_PTR(%fp) "-8], %f2\n" + "\tldd [" FRAME_PTR(%fp) "-16], %f4\n" + "\tldd [" FRAME_PTR(%fp) "-24], %f6\n" + "\tldd [" FRAME_PTR(%fp) "-32], %f8\n" + "\tldd [" FRAME_PTR(%fp) "-40], %f10\n" + "\tldd [" FRAME_PTR(%fp) "-48], %f12\n" + "\tldd [" FRAME_PTR(%fp) "-56], %f14\n" + "\tldd [" FRAME_PTR(%fp) "-64], %f16\n" + "\tldd [" FRAME_PTR(%fp) "-72], %f18\n" + "\tldd [" FRAME_PTR(%fp) "-80], %f20\n" + "\tldd [" FRAME_PTR(%fp) "-88], %f22\n" + "\tldd [" FRAME_PTR(%fp) "-96], %f24\n" + "\tldd [" FRAME_PTR(%fp) "-104], %f26\n" + "\tldd [" FRAME_PTR(%fp) "-112], %f28\n" + "\tldd [" FRAME_PTR(%fp) "-120], %f30\n" + // restore original register window and + // copy %o0 to %g1 + "\trestore %o0, 0, %g1\n" + // call the new stub + "\tjmp %g1\n" + "\t nop\n" + "\t.size SparcCompilationCallback, .-SparcCompilationCallback" + ); +#else + void SparcCompilationCallback() { + llvm_unreachable( + "Cannot call SparcCompilationCallback() on a non-sparc arch!"); + } +#endif +} + + +#define SETHI_INST(imm, rd) (0x01000000 | ((rd) << 25) | ((imm) & 0x3FFFFF)) +#define JMP_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x38 << 19) \ + | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF)) +#define NOP_INST SETHI_INST(0, 0) +#define OR_INST_I(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x02 << 19) \ + | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF)) +#define OR_INST_R(rs1, rs2, rd) (0x80000000 | ((rd) << 25) | (0x02 << 19) \ + | ((rs1) << 14) | (0 << 13) | ((rs2) & 0x1F)) +#define RDPC_INST(rd) (0x80000000 | ((rd) << 25) | (0x28 << 19) \ + | (5 << 14)) +#define LDX_INST(rs1, imm, rd) (0xC0000000 | ((rd) << 25) | (0x0B << 19) \ + | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF)) +#define SLLX_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x25 << 19) \ + | ((rs1) << 14) | (3 << 12) | ((imm) & 0x3F)) +#define SUB_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x04 << 19) \ + | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF)) +#define XOR_INST(rs1, imm, rd) (0x80000000 | ((rd) << 25) | (0x03 << 19) \ + | ((rs1) << 14) | (1 << 13) | ((imm) & 0x1FFF)) +#define BA_INST(tgt) (0x10800000 | ((tgt) & 0x3FFFFF)) + +// Emit instructions to jump to Addr and store the starting address of +// the instructions emitted in the scratch register. +static void emitInstrForIndirectJump(intptr_t Addr, + unsigned scratch, + SmallVectorImpl &Insts) { + + if (isInt<13>(Addr)) { + // Emit: jmpl %g0+Addr, + // nop + Insts.push_back(JMP_INST(0, LO10(Addr), scratch)); + Insts.push_back(NOP_INST); + return; + } + + if (isUInt<32>(Addr)) { + // Emit: sethi %hi(Addr), scratch + // jmpl scratch+%lo(Addr), scratch + // sub scratch, 4, scratch + Insts.push_back(SETHI_INST(HI22(Addr), scratch)); + Insts.push_back(JMP_INST(scratch, LO10(Addr), scratch)); + Insts.push_back(SUB_INST(scratch, 4, scratch)); + return; + } + + if (Addr < 0 && isInt<33>(Addr)) { + // Emit: sethi %hix(Addr), scratch) + // xor scratch, %lox(Addr), scratch + // jmpl scratch+0, scratch + // sub scratch, 8, scratch + Insts.push_back(SETHI_INST(HIX22(Addr), scratch)); + Insts.push_back(XOR_INST(scratch, LOX10(Addr), scratch)); + Insts.push_back(JMP_INST(scratch, 0, scratch)); + Insts.push_back(SUB_INST(scratch, 8, scratch)); + return; + } + + // Emit: rd %pc, scratch + // ldx [scratch+16], scratch + // jmpl scratch+0, scratch + // sub scratch, 8, scratch + // + Insts.push_back(RDPC_INST(scratch)); + Insts.push_back(LDX_INST(scratch, 16, scratch)); + Insts.push_back(JMP_INST(scratch, 0, scratch)); + Insts.push_back(SUB_INST(scratch, 8, scratch)); + Insts.push_back((uint32_t)(((int64_t)Addr) >> 32) & 0xffffffff); + Insts.push_back((uint32_t)(Addr & 0xffffffff)); + + // Instruction sequence without rdpc instruction + // 7 instruction and 2 scratch register + // Emit: sethi %hh(Addr), scratch + // or scratch, %hm(Addr), scratch + // sllx scratch, 32, scratch + // sethi %hi(Addr), scratch2 + // or scratch, scratch2, scratch + // jmpl scratch+%lo(Addr), scratch + // sub scratch, 20, scratch + // Insts.push_back(SETHI_INST(HH22(Addr), scratch)); + // Insts.push_back(OR_INST_I(scratch, HM10(Addr), scratch)); + // Insts.push_back(SLLX_INST(scratch, 32, scratch)); + // Insts.push_back(SETHI_INST(HI22(Addr), scratch2)); + // Insts.push_back(OR_INST_R(scratch, scratch2, scratch)); + // Insts.push_back(JMP_INST(scratch, LO10(Addr), scratch)); + // Insts.push_back(SUB_INST(scratch, 20, scratch)); +} + +extern "C" void *SparcCompilationCallbackC(intptr_t StubAddr) { + // Get the address of the compiled code for this function. + intptr_t NewVal = (intptr_t) JITCompilerFunction((void*) StubAddr); + + // Rewrite the function stub so that we don't end up here every time we + // execute the call. We're replacing the stub instructions with code + // that jumps to the compiled function: + + SmallVector Insts; + intptr_t diff = (NewVal - StubAddr) >> 2; + if (isInt<22>(diff)) { + // Use branch instruction to jump + Insts.push_back(BA_INST(diff)); + Insts.push_back(NOP_INST); + } else { + // Otherwise, use indirect jump to the compiled function + emitInstrForIndirectJump(NewVal, 1, Insts); + } + + for (unsigned i = 0, e = Insts.size(); i != e; ++i) + *(uint32_t *)(StubAddr + i*4) = Insts[i]; + + sys::Memory::InvalidateInstructionCache((void*) StubAddr, Insts.size() * 4); + return (void*)StubAddr; +} + + +void SparcJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + llvm_unreachable("FIXME: Implement SparcJITInfo::" + "replaceMachineCodeForFunction"); +} + + +TargetJITInfo::StubLayout SparcJITInfo::getStubLayout() { + // The stub contains maximum of 4 4-byte instructions and 8 bytes for address, + // aligned at 32 bytes. + // See emitFunctionStub and emitInstrForIndirectJump for details. + StubLayout Result = { 4*4 + 8, 32 }; + return Result; +} + +void *SparcJITInfo::emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) +{ + JCE.emitAlignment(32); + void *Addr = (void*) (JCE.getCurrentPCValue()); + + intptr_t CurrentAddr = (intptr_t)Addr; + intptr_t EmittedAddr; + SmallVector Insts; + if (Fn != (void*)(intptr_t)SparcCompilationCallback) { + EmittedAddr = (intptr_t)Fn; + intptr_t diff = (EmittedAddr - CurrentAddr) >> 2; + if (isInt<22>(diff)) { + Insts.push_back(BA_INST(diff)); + Insts.push_back(NOP_INST); + } + } else { + EmittedAddr = (intptr_t)SparcCompilationCallback; + } + + if (Insts.size() == 0) + emitInstrForIndirectJump(EmittedAddr, 1, Insts); + + + if (!sys::Memory::setRangeWritable(Addr, 4 * Insts.size())) + llvm_unreachable("ERROR: Unable to mark stub writable."); + + for (unsigned i = 0, e = Insts.size(); i != e; ++i) + JCE.emitWordBE(Insts[i]); + + sys::Memory::InvalidateInstructionCache(Addr, 4 * Insts.size()); + if (!sys::Memory::setRangeExecutable(Addr, 4 * Insts.size())) + llvm_unreachable("ERROR: Unable to mark stub executable."); + + return Addr; +} + + +TargetJITInfo::LazyResolverFn +SparcJITInfo::getLazyResolverFunction(JITCompilerFn F) { + JITCompilerFunction = F; + return SparcCompilationCallback; +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void SparcJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char *GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + void *RelocPos = (char*) Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = (intptr_t) MR->getResultPointer(); + + switch ((SP::RelocationType) MR->getRelocationType()) { + case SP::reloc_sparc_hi: + ResultPtr = (ResultPtr >> 10) & 0x3fffff; + break; + + case SP::reloc_sparc_lo: + ResultPtr = (ResultPtr & 0x3ff); + break; + + case SP::reloc_sparc_pc30: + ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x3fffffff; + break; + + case SP::reloc_sparc_pc22: + ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x3fffff; + break; + + case SP::reloc_sparc_pc19: + ResultPtr = ((ResultPtr - (intptr_t)RelocPos) >> 2) & 0x7ffff; + break; + + case SP::reloc_sparc_h44: + ResultPtr = (ResultPtr >> 22) & 0x3fffff; + break; + + case SP::reloc_sparc_m44: + ResultPtr = (ResultPtr >> 12) & 0x3ff; + break; + + case SP::reloc_sparc_l44: + ResultPtr = (ResultPtr & 0xfff); + break; + + case SP::reloc_sparc_hh: + ResultPtr = (((int64_t)ResultPtr) >> 42) & 0x3fffff; + break; + + case SP::reloc_sparc_hm: + ResultPtr = (((int64_t)ResultPtr) >> 32) & 0x3ff; + break; + + } + *((unsigned*) RelocPos) |= (unsigned) ResultPtr; + } +} diff --git a/lib/Target/Sparc/SparcJITInfo.h b/lib/Target/Sparc/SparcJITInfo.h new file mode 100644 index 00000000000..ff1b43a7f6a --- /dev/null +++ b/lib/Target/Sparc/SparcJITInfo.h @@ -0,0 +1,67 @@ +//==- SparcJITInfo.h - Sparc Implementation of the JIT Interface -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the SparcJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCJITINFO_H +#define SPARCJITINFO_H + +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { +class SparcTargetMachine; + +class SparcJITInfo : public TargetJITInfo { + + bool IsPIC; + + public: + explicit SparcJITInfo() + : IsPIC(false) {} + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + void replaceMachineCodeForFunction(void *Old, void *New) override; + + // getStubLayout - Returns the size and alignment of the largest call stub + // on Sparc. + StubLayout getStubLayout() override; + + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) override; + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char *GOTBase) override; + + /// Initialize - Initialize internal stage for the function being JITted. + void Initialize(const MachineFunction &MF, bool isPIC) { + IsPIC = isPIC; + } + +}; +} + +#endif diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index 6cb5e20e3b9..0f4a1626f04 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -17,6 +17,7 @@ #include "SparcFrameLowering.h" #include "SparcInstrInfo.h" #include "SparcISelLowering.h" +#include "SparcJITInfo.h" #include "SparcSelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" @@ -42,6 +43,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo { SparcTargetLowering TLInfo; SparcSelectionDAGInfo TSInfo; SparcFrameLowering FrameLowering; + SparcJITInfo JITInfo; public: SparcSubtarget(const std::string &TT, const std::string &CPU, @@ -60,6 +62,7 @@ public: const SparcSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } + SparcJITInfo *getJITInfo() override { return &JITInfo; } const DataLayout *getDataLayout() const override { return &DL; } bool isV9() const { return IsV9; } diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 80c71448963..0130face3ff 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -61,6 +61,13 @@ bool SparcPassConfig::addInstSelector() { return false; } +bool SparcTargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { + // Machine code emitter pass for Sparc. + PM.add(createSparcJITCodeEmitterPass(*this, JCE)); + return false; +} + /// addPreEmitPass - This pass may be implemented by targets that want to run /// passes immediately before machine code is emitted. This should return /// true if -print-machineinstrs should print out the code after the passes. diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index a8f5b85480f..62f088b1481 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -30,8 +30,13 @@ public: const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; } + SparcSubtarget *getSubtargetImpl() { + return static_cast(TargetMachine::getSubtargetImpl()); + } + // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override; }; /// SparcV8TargetMachine - Sparc 32-bit target machine diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 41a614d9d15..4da2d0f2dd5 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -5,7 +5,7 @@ tablegen(LLVM SystemZGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM SystemZGenCallingConv.inc -gen-callingconv) tablegen(LLVM SystemZGenDAGISel.inc -gen-dag-isel) tablegen(LLVM SystemZGenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM SystemZGenInstrInfo.inc -gen-instr-info) tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info) tablegen(LLVM SystemZGenSubtargetInfo.inc -gen-subtarget) diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile index 732c3172553..445725bd1e1 100644 --- a/lib/Target/SystemZ/Makefile +++ b/lib/Target/SystemZ/Makefile @@ -15,6 +15,7 @@ TARGET = SystemZ BUILT_SOURCES = SystemZGenRegisterInfo.inc \ SystemZGenAsmWriter.inc \ SystemZGenAsmMatcher.inc \ + SystemZGenCodeEmitter.inc \ SystemZGenDisassemblerTables.inc \ SystemZGenInstrInfo.inc \ SystemZGenDAGISel.inc \ diff --git a/lib/Target/TargetJITInfo.cpp b/lib/Target/TargetJITInfo.cpp new file mode 100644 index 00000000000..aafedf8749b --- /dev/null +++ b/lib/Target/TargetJITInfo.cpp @@ -0,0 +1,14 @@ +//===- Target/TargetJITInfo.h - Target Information for JIT ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetJITInfo.h" + +using namespace llvm; + +void TargetJITInfo::anchor() { } diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index b6fff7460e0..a09767e1eaf 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -15,12 +15,14 @@ add_public_tablegen_target(X86CommonTableGen) set(sources X86AsmPrinter.cpp X86AtomicExpandPass.cpp + X86CodeEmitter.cpp X86FastISel.cpp X86FloatingPoint.cpp X86FrameLowering.cpp X86ISelDAGToDAG.cpp X86ISelLowering.cpp X86InstrInfo.cpp + X86JITInfo.cpp X86MCInstLower.cpp X86MachineFunctionInfo.cpp X86PadShortFunction.cpp diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 20258197252..d5522ed95eb 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -21,6 +21,7 @@ namespace llvm { class FunctionPass; class ImmutablePass; +class JITCodeEmitter; class X86TargetMachine; /// createX86AtomicExpandPass - This pass expands atomic operations that cannot @@ -53,6 +54,11 @@ FunctionPass *createX86FloatingPointStackifierPass(); /// AVX and SSE. FunctionPass *createX86IssueVZeroUpperPass(); +/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code +/// to the specified MCE object. +FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM, + JITCodeEmitter &JCE); + /// createX86EmitCodeToMemory - Returns a pass that converts a register /// allocated function into raw machine code in a dynamically /// allocated chunk of memory. diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp new file mode 100644 index 00000000000..9c68a9ce9ca --- /dev/null +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -0,0 +1,1502 @@ +//===-- X86CodeEmitter.cpp - Convert X86 code to machine code -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the pass that transforms the X86 machine instructions into +// relocatable machine code. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86JITInfo.h" +#include "X86Relocations.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/PassManager.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +#define DEBUG_TYPE "x86-emitter" + +STATISTIC(NumEmitted, "Number of machine instructions emitted"); + +namespace { + template + class Emitter : public MachineFunctionPass { + const X86InstrInfo *II; + const DataLayout *TD; + X86TargetMachine &TM; + CodeEmitter &MCE; + MachineModuleInfo *MMI; + intptr_t PICBaseOffset; + bool Is64BitMode; + bool IsPIC; + public: + static char ID; + explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce) + : MachineFunctionPass(ID), II(nullptr), TD(nullptr), TM(tm), + MCE(mce), PICBaseOffset(0), Is64BitMode(false), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "X86 Machine Code Emitter"; + } + + void emitOpcodePrefix(uint64_t TSFlags, int MemOperand, + const MachineInstr &MI, + const MCInstrDesc *Desc) const; + + void emitVEXOpcodePrefix(uint64_t TSFlags, int MemOperand, + const MachineInstr &MI, + const MCInstrDesc *Desc) const; + + void emitSegmentOverridePrefix(uint64_t TSFlags, + int MemOperand, + const MachineInstr &MI) const; + + void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, + intptr_t Disp = 0, intptr_t PCAdj = 0, + bool Indirect = false); + void emitExternalSymbolAddress(const char *ES, unsigned Reloc); + void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0, + intptr_t PCAdj = 0); + void emitJumpTableAddress(unsigned JTI, unsigned Reloc, + intptr_t PCAdj = 0); + + void emitDisplacementField(const MachineOperand *RelocOp, int DispVal, + intptr_t Adj = 0, bool IsPCRel = true); + + void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField); + void emitRegModRMByte(unsigned RegOpcodeField); + void emitSIBByte(unsigned SS, unsigned Index, unsigned Base); + void emitConstant(uint64_t Val, unsigned Size); + + void emitMemModRMByte(const MachineInstr &MI, + unsigned Op, unsigned RegOpcodeField, + intptr_t PCAdj = 0); + + unsigned getX86RegNum(unsigned RegNo) const { + const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); + return TRI->getEncodingValue(RegNo) & 0x7; + } + + unsigned char getVEXRegisterEncoding(const MachineInstr &MI, + unsigned OpNum) const; + }; + +template + char Emitter::ID = 0; +} // end anonymous namespace. + +/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code +/// to the specified JITCodeEmitter object. +FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM, + JITCodeEmitter &JCE) { + return new Emitter(TM, JCE); +} + +template +bool Emitter::runOnMachineFunction(MachineFunction &MF) { + MMI = &getAnalysis(); + MCE.setModuleInfo(MMI); + + II = TM.getSubtargetImpl()->getInstrInfo(); + TD = TM.getSubtargetImpl()->getDataLayout(); + Is64BitMode = TM.getSubtarget().is64Bit(); + IsPIC = TM.getRelocationModel() == Reloc::PIC_; + + do { + DEBUG(dbgs() << "JITTing function '" << MF.getName() << "'\n"); + MCE.startFunction(MF); + for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); + MBB != E; ++MBB) { + MCE.StartMachineBasicBlock(MBB); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MCInstrDesc &Desc = I->getDesc(); + emitInstruction(*I, &Desc); + // MOVPC32r is basically a call plus a pop instruction. + if (Desc.getOpcode() == X86::MOVPC32r) + emitInstruction(*I, &II->get(X86::POP32r)); + ++NumEmitted; // Keep track of the # of mi's emitted + } + } + } while (MCE.finishFunction(MF)); + + return false; +} + +/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 +/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand +/// size, and 3) use of X86-64 extended registers. +static unsigned determineREX(const MachineInstr &MI) { + unsigned REX = 0; + const MCInstrDesc &Desc = MI.getDesc(); + + // Pseudo instructions do not need REX prefix byte. + if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) + return 0; + if (Desc.TSFlags & X86II::REX_W) + REX |= 1 << 3; + + unsigned NumOps = Desc.getNumOperands(); + if (NumOps) { + bool isTwoAddr = NumOps > 1 && + Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1; + + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. + unsigned i = isTwoAddr ? 1 : 0; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + if (X86II::isX86_64NonExtLowByteReg(Reg)) + REX |= 0x40; + } + } + + switch (Desc.TSFlags & X86II::FormMask) { + case X86II::MRMSrcReg: { + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << 0; + } + break; + } + case X86II::MRMSrcMem: { + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 2; + unsigned Bit = 0; + i = isTwoAddr ? 2 : 1; + for (; i != NumOps; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + case X86II::MRMXm: + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRMDestMem: { + unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); + i = isTwoAddr ? 1 : 0; + if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e))) + REX |= 1 << 2; + unsigned Bit = 0; + for (; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (MO.isReg()) { + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << Bit; + Bit++; + } + } + break; + } + default: { + if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0))) + REX |= 1 << 0; + i = isTwoAddr ? 2 : 1; + for (unsigned e = NumOps; i != e; ++i) { + const MachineOperand& MO = MI.getOperand(i); + if (X86InstrInfo::isX86_64ExtendedReg(MO)) + REX |= 1 << 2; + } + break; + } + } + } + return REX; +} + + +/// emitPCRelativeBlockAddress - This method keeps track of the information +/// necessary to resolve the address of this block later and emits a dummy +/// value. +/// +template +void Emitter::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) { + // Remember where this reference was and where it is to so we can + // deal with it later. + MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), + X86::reloc_pcrel_word, MBB)); + MCE.emitWordLE(0); +} + +/// emitGlobalAddress - Emit the specified address to the code stream assuming +/// this is part of a "take the address of a global" instruction. +/// +template +void Emitter::emitGlobalAddress(const GlobalValue *GV, + unsigned Reloc, + intptr_t Disp /* = 0 */, + intptr_t PCAdj /* = 0 */, + bool Indirect /* = false */) { + intptr_t RelocCST = Disp; + if (Reloc == X86::reloc_picrel_word) + RelocCST = PICBaseOffset; + else if (Reloc == X86::reloc_pcrel_word) + RelocCST = PCAdj; + MachineRelocation MR = Indirect + ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), + RelocCST, false) + : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), RelocCST, false); + MCE.addRelocation(MR); + // The relocated value will be added to the displacement + if (Reloc == X86::reloc_absolute_dword) + MCE.emitDWordLE(Disp); + else + MCE.emitWordLE((int32_t)Disp); +} + +/// emitExternalSymbolAddress - Arrange for the address of an external symbol to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +template +void Emitter::emitExternalSymbolAddress(const char *ES, + unsigned Reloc) { + intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0; + + // X86 never needs stubs because instruction selection will always pick + // an instruction sequence that is large enough to hold any address + // to a symbol. + // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall) + bool NeedStub = false; + MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), + Reloc, ES, RelocCST, + 0, NeedStub)); + if (Reloc == X86::reloc_absolute_dword) + MCE.emitDWordLE(0); + else + MCE.emitWordLE(0); +} + +/// emitConstPoolAddress - Arrange for the address of an constant pool +/// to be emitted to the current location in the function, and allow it to be PC +/// relative. +template +void Emitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc, + intptr_t Disp /* = 0 */, + intptr_t PCAdj /* = 0 */) { + intptr_t RelocCST = 0; + if (Reloc == X86::reloc_picrel_word) + RelocCST = PICBaseOffset; + else if (Reloc == X86::reloc_pcrel_word) + RelocCST = PCAdj; + MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), + Reloc, CPI, RelocCST)); + // The relocated value will be added to the displacement + if (Reloc == X86::reloc_absolute_dword) + MCE.emitDWordLE(Disp); + else + MCE.emitWordLE((int32_t)Disp); +} + +/// emitJumpTableAddress - Arrange for the address of a jump table to +/// be emitted to the current location in the function, and allow it to be PC +/// relative. +template +void Emitter::emitJumpTableAddress(unsigned JTI, unsigned Reloc, + intptr_t PCAdj /* = 0 */) { + intptr_t RelocCST = 0; + if (Reloc == X86::reloc_picrel_word) + RelocCST = PICBaseOffset; + else if (Reloc == X86::reloc_pcrel_word) + RelocCST = PCAdj; + MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), + Reloc, JTI, RelocCST)); + // The relocated value will be added to the displacement + if (Reloc == X86::reloc_absolute_dword) + MCE.emitDWordLE(0); + else + MCE.emitWordLE(0); +} + +inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, + unsigned RM) { + assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); + return RM | (RegOpcode << 3) | (Mod << 6); +} + +template +void Emitter::emitRegModRMByte(unsigned ModRMReg, + unsigned RegOpcodeFld){ + MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg))); +} + +template +void Emitter::emitRegModRMByte(unsigned RegOpcodeFld) { + MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0)); +} + +template +void Emitter::emitSIBByte(unsigned SS, + unsigned Index, + unsigned Base) { + // SIB byte is in the same format as the ModRMByte... + MCE.emitByte(ModRMByte(SS, Index, Base)); +} + +template +void Emitter::emitConstant(uint64_t Val, unsigned Size) { + // Output the constant in little endian byte order... + for (unsigned i = 0; i != Size; ++i) { + MCE.emitByte(Val & 255); + Val >>= 8; + } +} + +/// isDisp8 - Return true if this signed displacement fits in a 8-bit +/// sign-extended field. +static bool isDisp8(int Value) { + return Value == (signed char)Value; +} + +static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp, + const TargetMachine &TM) { + // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer + // mechanism as 32-bit mode. + if (TM.getSubtarget().is64Bit() && + !TM.getSubtarget().isTargetDarwin()) + return false; + + // Return true if this is a reference to a stub containing the address of the + // global, not the global itself. + return isGlobalStubReference(GVOp.getTargetFlags()); +} + +template +void Emitter::emitDisplacementField(const MachineOperand *RelocOp, + int DispVal, + intptr_t Adj /* = 0 */, + bool IsPCRel /* = true */) { + // If this is a simple integer displacement that doesn't require a relocation, + // emit it now. + if (!RelocOp) { + emitConstant(DispVal, 4); + return; + } + + // Otherwise, this is something that requires a relocation. Emit it as such + // now. + unsigned RelocType = Is64BitMode ? + (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext) + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (RelocOp->isGlobal()) { + // In 64-bit static small code model, we could potentially emit absolute. + // But it's probably not beneficial. If the MCE supports using RIP directly + // do it, otherwise fallback to absolute (this is determined by IsPCRel). + // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative + // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute + bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); + emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(), + Adj, Indirect); + } else if (RelocOp->isSymbol()) { + emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType); + } else if (RelocOp->isCPI()) { + emitConstPoolAddress(RelocOp->getIndex(), RelocType, + RelocOp->getOffset(), Adj); + } else { + assert(RelocOp->isJTI() && "Unexpected machine operand!"); + emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj); + } +} + +template +void Emitter::emitMemModRMByte(const MachineInstr &MI, + unsigned Op,unsigned RegOpcodeField, + intptr_t PCAdj) { + const MachineOperand &Op3 = MI.getOperand(Op+3); + int DispVal = 0; + const MachineOperand *DispForReloc = nullptr; + + // Figure out what sort of displacement we have to handle here. + if (Op3.isGlobal()) { + DispForReloc = &Op3; + } else if (Op3.isSymbol()) { + DispForReloc = &Op3; + } else if (Op3.isCPI()) { + if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) { + DispForReloc = &Op3; + } else { + DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex()); + DispVal += Op3.getOffset(); + } + } else if (Op3.isJTI()) { + if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) { + DispForReloc = &Op3; + } else { + DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex()); + } + } else { + DispVal = Op3.getImm(); + } + + const MachineOperand &Base = MI.getOperand(Op); + const MachineOperand &Scale = MI.getOperand(Op+1); + const MachineOperand &IndexReg = MI.getOperand(Op+2); + + unsigned BaseReg = Base.getReg(); + + // Handle %rip relative addressing. + if (BaseReg == X86::RIP || + (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode + assert(IndexReg.getReg() == 0 && Is64BitMode && + "Invalid rip-relative address"); + MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); + emitDisplacementField(DispForReloc, DispVal, PCAdj, true); + return; + } + + // Indicate that the displacement will use an pcrel or absolute reference + // by default. MCEs able to resolve addresses on-the-fly use pcrel by default + // while others, unless explicit asked to use RIP, use absolute references. + bool IsPCRel = MCE.earlyResolveAddresses() ? true : false; + + // Is a SIB byte needed? + // If no BaseReg, issue a RIP relative instruction only if the MCE can + // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table + // 2-7) and absolute references. + unsigned BaseRegNo = -1U; + if (BaseReg != 0 && BaseReg != X86::RIP) + BaseRegNo = getX86RegNum(BaseReg); + + if (// The SIB byte must be used if there is an index register. + IndexReg.getReg() == 0 && + // The SIB byte must be used if the base is ESP/RSP/R12, all of which + // encode to an R/M value of 4, which indicates that a SIB byte is + // present. + BaseRegNo != N86::ESP && + // If there is no base register and we're in 64-bit mode, we need a SIB + // byte to emit an addr that is just 'disp32' (the non-RIP relative form). + (!Is64BitMode || BaseReg != 0)) { + if (BaseReg == 0 || // [disp32] in X86-32 mode + BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode + MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); + emitDisplacementField(DispForReloc, DispVal, PCAdj, true); + return; + } + + // If the base is not EBP/ESP and there is no displacement, use simple + // indirect register encoding, this handles addresses like [EAX]. The + // encoding for [EBP] with no displacement means [disp32] so we handle it + // by emitting a displacement of 0 below. + if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { + MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo)); + return; + } + + // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. + if (!DispForReloc && isDisp8(DispVal)) { + MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo)); + emitConstant(DispVal, 1); + return; + } + + // Otherwise, emit the most general non-SIB encoding: [REG+disp32] + MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo)); + emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); + return; + } + + // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first. + assert(IndexReg.getReg() != X86::ESP && + IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); + + bool ForceDisp32 = false; + bool ForceDisp8 = false; + if (BaseReg == 0) { + // If there is no base register, we emit the special case SIB byte with + // MOD=0, BASE=4, to JUST get the index, scale, and displacement. + MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); + ForceDisp32 = true; + } else if (DispForReloc) { + // Emit the normal disp32 encoding. + MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); + ForceDisp32 = true; + } else if (DispVal == 0 && BaseRegNo != N86::EBP) { + // Emit no displacement ModR/M byte + MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); + } else if (isDisp8(DispVal)) { + // Emit the disp8 encoding... + MCE.emitByte(ModRMByte(1, RegOpcodeField, 4)); + ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP + } else { + // Emit the normal disp32 encoding... + MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); + } + + // Calculate what the SS field value should be... + static const unsigned SSTable[] = { ~0U, 0, 1, ~0U, 2, ~0U, ~0U, ~0U, 3 }; + unsigned SS = SSTable[Scale.getImm()]; + + if (BaseReg == 0) { + // Handle the SIB byte for the case where there is no base, see Intel + // Manual 2A, table 2-7. The displacement has already been output. + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = getX86RegNum(IndexReg.getReg()); + else // Examples: [ESP+1*+4] or [scaled idx]+disp32 (MOD=0,BASE=5) + IndexRegNo = 4; + emitSIBByte(SS, IndexRegNo, 5); + } else { + unsigned BaseRegNo = getX86RegNum(BaseReg); + unsigned IndexRegNo; + if (IndexReg.getReg()) + IndexRegNo = getX86RegNum(IndexReg.getReg()); + else + IndexRegNo = 4; // For example [ESP+1*+4] + emitSIBByte(SS, IndexRegNo, BaseRegNo); + } + + // Do we need to output a displacement? + if (ForceDisp8) { + emitConstant(DispVal, 1); + } else if (DispVal != 0 || ForceDisp32) { + emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel); + } +} + +static const MCInstrDesc *UpdateOp(MachineInstr &MI, const X86InstrInfo *II, + unsigned Opcode) { + const MCInstrDesc *Desc = &II->get(Opcode); + MI.setDesc(*Desc); + return Desc; +} + +/// Is16BitMemOperand - Return true if the specified instruction has +/// a 16-bit memory operand. Op specifies the operand # of the memoperand. +static bool Is16BitMemOperand(const MachineInstr &MI, unsigned Op) { + const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} + +/// Is32BitMemOperand - Return true if the specified instruction has +/// a 32-bit memory operand. Op specifies the operand # of the memoperand. +static bool Is32BitMemOperand(const MachineInstr &MI, unsigned Op) { + const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} + +/// Is64BitMemOperand - Return true if the specified instruction has +/// a 64-bit memory operand. Op specifies the operand # of the memoperand. +#ifndef NDEBUG +static bool Is64BitMemOperand(const MachineInstr &MI, unsigned Op) { + const MachineOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MachineOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} +#endif + +template +void Emitter::emitOpcodePrefix(uint64_t TSFlags, + int MemOperand, + const MachineInstr &MI, + const MCInstrDesc *Desc) const { + // Emit the operand size opcode prefix as needed. + if (((TSFlags & X86II::OpSizeMask) >> X86II::OpSizeShift) == X86II::OpSize16) + MCE.emitByte(0x66); + + switch (Desc->TSFlags & X86II::OpPrefixMask) { + case X86II::PD: // 66 + MCE.emitByte(0x66); + break; + case X86II::XS: // F3 + MCE.emitByte(0xF3); + break; + case X86II::XD: // F2 + MCE.emitByte(0xF2); + break; + } + + // Handle REX prefix. + if (Is64BitMode) { + if (unsigned REX = determineREX(MI)) + MCE.emitByte(0x40 | REX); + } + + // 0x0F escape code must be emitted just before the opcode. + switch (Desc->TSFlags & X86II::OpMapMask) { + case X86II::TB: // Two-byte opcode map + case X86II::T8: // 0F 38 + case X86II::TA: // 0F 3A + MCE.emitByte(0x0F); + break; + } + + switch (Desc->TSFlags & X86II::OpMapMask) { + case X86II::T8: // 0F 38 + MCE.emitByte(0x38); + break; + case X86II::TA: // 0F 3A + MCE.emitByte(0x3A); + break; + } +} + +// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range +// 0-7 and the difference between the 2 groups is given by the REX prefix. +// In the VEX prefix, registers are seen sequencially from 0-15 and encoded +// in 1's complement form, example: +// +// ModRM field => XMM9 => 1 +// VEX.VVVV => XMM9 => ~9 +// +// See table 4-35 of Intel AVX Programming Reference for details. +template +unsigned char +Emitter::getVEXRegisterEncoding(const MachineInstr &MI, + unsigned OpNum) const { + unsigned SrcReg = MI.getOperand(OpNum).getReg(); + unsigned SrcRegNum = getX86RegNum(MI.getOperand(OpNum).getReg()); + if (X86II::isX86_64ExtendedReg(SrcReg)) + SrcRegNum |= 8; + + // The registers represented through VEX_VVVV should + // be encoded in 1's complement form. + return (~SrcRegNum) & 0xf; +} + +/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed +template +void Emitter::emitSegmentOverridePrefix(uint64_t TSFlags, + int MemOperand, + const MachineInstr &MI) const { + if (MemOperand < 0) + return; // No memory operand + + // Check for explicit segment override on memory operand. + switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) { + default: llvm_unreachable("Unknown segment register!"); + case 0: break; + case X86::CS: MCE.emitByte(0x2E); break; + case X86::SS: MCE.emitByte(0x36); break; + case X86::DS: MCE.emitByte(0x3E); break; + case X86::ES: MCE.emitByte(0x26); break; + case X86::FS: MCE.emitByte(0x64); break; + case X86::GS: MCE.emitByte(0x65); break; + } +} + +template +void Emitter::emitVEXOpcodePrefix(uint64_t TSFlags, + int MemOperand, + const MachineInstr &MI, + const MCInstrDesc *Desc) const { + unsigned char Encoding = (TSFlags & X86II::EncodingMask) >> + X86II::EncodingShift; + bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; + bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; + + // VEX_R: opcode externsion equivalent to REX.R in + // 1's complement (inverted) form + // + // 1: Same as REX_R=0 (must be 1 in 32-bit mode) + // 0: Same as REX_R=1 (64 bit mode only) + // + unsigned char VEX_R = 0x1; + + // VEX_X: equivalent to REX.X, only used when a + // register is used for index in SIB Byte. + // + // 1: Same as REX.X=0 (must be 1 in 32-bit mode) + // 0: Same as REX.X=1 (64-bit mode only) + unsigned char VEX_X = 0x1; + + // VEX_B: + // + // 1: Same as REX_B=0 (ignored in 32-bit mode) + // 0: Same as REX_B=1 (64 bit mode only) + // + unsigned char VEX_B = 0x1; + + // VEX_W: opcode specific (use like REX.W, or used for + // opcode extension, or ignored, depending on the opcode byte) + unsigned char VEX_W = 0; + + // VEX_5M (VEX m-mmmmm field): + // + // 0b00000: Reserved for future use + // 0b00001: implied 0F leading opcode + // 0b00010: implied 0F 38 leading opcode bytes + // 0b00011: implied 0F 3A leading opcode bytes + // 0b00100-0b11111: Reserved for future use + // 0b01000: XOP map select - 08h instructions with imm byte + // 0b01001: XOP map select - 09h instructions with no imm byte + // 0b01010: XOP map select - 0Ah instructions with imm dword + unsigned char VEX_5M = 0; + + // VEX_4V (VEX vvvv field): a register specifier + // (in 1's complement form) or 1111 if unused. + unsigned char VEX_4V = 0xf; + + // VEX_L (Vector Length): + // + // 0: scalar or 128-bit vector + // 1: 256-bit vector + // + unsigned char VEX_L = 0; + + // VEX_PP: opcode extension providing equivalent + // functionality of a SIMD prefix + // + // 0b00: None + // 0b01: 66 + // 0b10: F3 + // 0b11: F2 + // + unsigned char VEX_PP = 0; + + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W) + VEX_W = 1; + + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) + VEX_L = 1; + + switch (TSFlags & X86II::OpPrefixMask) { + default: break; // VEX_PP already correct + case X86II::PD: VEX_PP = 0x1; break; // 66 + case X86II::XS: VEX_PP = 0x2; break; // F3 + case X86II::XD: VEX_PP = 0x3; break; // F2 + } + + switch (TSFlags & X86II::OpMapMask) { + default: llvm_unreachable("Invalid prefix!"); + case X86II::TB: VEX_5M = 0x1; break; // 0F + case X86II::T8: VEX_5M = 0x2; break; // 0F 38 + case X86II::TA: VEX_5M = 0x3; break; // 0F 3A + case X86II::XOP8: VEX_5M = 0x8; break; + case X86II::XOP9: VEX_5M = 0x9; break; + case X86II::XOPA: VEX_5M = 0xA; break; + } + + // Classify VEX_B, VEX_4V, VEX_R, VEX_X + unsigned NumOps = Desc->getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) + ++CurOp; + else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } + + switch (TSFlags & X86II::FormMask) { + default: llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!"); + case X86II::RawFrm: + break; + case X86II::MRMDestMem: { + // MRMDestMem instructions forms: + // MemAddr, src1(ModR/M) + // MemAddr, src1(VEX_4V), src2(ModR/M) + // MemAddr, src1(ModR/M), imm8 + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + + CurOp = X86::AddrNumOperands; + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + const MachineOperand &MO = MI.getOperand(CurOp); + if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) + VEX_R = 0x0; + break; + } + case X86II::MRMSrcMem: + // MRMSrcMem instructions forms: + // src1(ModR/M), MemAddr + // src1(ModR/M), src2(VEX_4V), MemAddr + // src1(ModR/M), MemAddr, imm8 + // src1(ModR/M), MemAddr, src2(VEX_I8IMM) + // + // FMA4: + // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; + CurOp++; + + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + CurOp++; + } + + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + + if (HasVEX_4VOp3) + VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands); + break; + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: { + // MRM[0-9]m instructions forms: + // MemAddr + // src1(VEX_4V), MemAddr + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + break; + } + case X86II::MRMSrcReg: + // MRMSrcReg instructions forms: + // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M), src1(ModR/M) + // dst(ModR/M), src1(ModR/M), imm8 + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; + CurOp++; + if (HasVEX_4VOp3) + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + break; + case X86II::MRMDestReg: + // MRMDestReg instructions forms: + // dst(ModR/M), src(ModR/M) + // dst(ModR/M), src(ModR/M), imm8 + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; + break; + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + // MRM0r-MRM7r instructions forms: + // dst(VEX_4V), src(ModR/M), imm8 + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + CurOp++; + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; + break; + } + + // Emit segment override opcode prefix as needed. + emitSegmentOverridePrefix(TSFlags, MemOperand, MI); + + // VEX opcode prefix can have 2 or 3 bytes + // + // 3 bytes: + // +-----+ +--------------+ +-------------------+ + // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | + // +-----+ +--------------+ +-------------------+ + // 2 bytes: + // +-----+ +-------------------+ + // | C5h | | R | vvvv | L | pp | + // +-----+ +-------------------+ + // + // XOP uses a similar prefix: + // +-----+ +--------------+ +-------------------+ + // | 8Fh | | RXB | m-mmmm | | W | vvvv | L | pp | + // +-----+ +--------------+ +-------------------+ + unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); + + // Can this use the 2 byte VEX prefix? + if (Encoding == X86II::VEX && VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { + MCE.emitByte(0xC5); + MCE.emitByte(LastByte | (VEX_R << 7)); + return; + } + + // 3 byte VEX prefix + MCE.emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4); + MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M); + MCE.emitByte(LastByte | (VEX_W << 7)); +} + +template +void Emitter::emitInstruction(MachineInstr &MI, + const MCInstrDesc *Desc) { + DEBUG(dbgs() << MI); + + // If this is a pseudo instruction, lower it. + switch (Desc->getOpcode()) { + case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break; + case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break; + case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break; + case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break; + case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break; + case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break; + case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break; + case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break; + case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break; + case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break; + case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break; + case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break; + case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break; + case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break; + case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break; + case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break; + case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break; + } + + + MCE.processDebugLoc(MI.getDebugLoc(), true); + + unsigned Opcode = Desc->Opcode; + + // If this is a two-address instruction, skip one of the register operands. + unsigned NumOps = Desc->getNumOperands(); + unsigned CurOp = 0; + if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) + ++CurOp; + else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { + assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); + // Special case for GATHER with 2 TIED_TO operands + // Skip the first 2 operands: dst, mask_wb + CurOp += 2; + } + + uint64_t TSFlags = Desc->TSFlags; + + // Encoding type for this instruction. + unsigned char Encoding = (TSFlags & X86II::EncodingMask) >> + X86II::EncodingShift; + + // It uses the VEX.VVVV field? + bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; + bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; + const unsigned MemOp4_I8IMMOperand = 2; + + // Determine where the memory operand starts, if present. + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); + if (MemoryOperand != -1) MemoryOperand += CurOp; + + // Emit the lock opcode prefix as needed. + if (Desc->TSFlags & X86II::LOCK) + MCE.emitByte(0xF0); + + // Emit segment override opcode prefix as needed. + emitSegmentOverridePrefix(TSFlags, MemoryOperand, MI); + + // Emit the repeat opcode prefix as needed. + if (Desc->TSFlags & X86II::REP) + MCE.emitByte(0xF3); + + // Emit the address size opcode prefix as needed. + bool need_address_override; + if (TSFlags & X86II::AdSize) { + need_address_override = true; + } else if (MemoryOperand < 0) { + need_address_override = false; + } else if (Is64BitMode) { + assert(!Is16BitMemOperand(MI, MemoryOperand)); + need_address_override = Is32BitMemOperand(MI, MemoryOperand); + } else { + assert(!Is64BitMemOperand(MI, MemoryOperand)); + need_address_override = Is16BitMemOperand(MI, MemoryOperand); + } + + if (need_address_override) + MCE.emitByte(0x67); + + if (Encoding == 0) + emitOpcodePrefix(TSFlags, MemoryOperand, MI, Desc); + else + emitVEXOpcodePrefix(TSFlags, MemoryOperand, MI, Desc); + + unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags); + switch (TSFlags & X86II::FormMask) { + default: + llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); + case X86II::Pseudo: + // Remember the current PC offset, this is the PIC relocation + // base address. + switch (Opcode) { + default: + llvm_unreachable("pseudo instructions should be removed before code" + " emission"); + // Do nothing for Int_MemBarrier - it's just a comment. Add a debug + // to make it slightly easier to see. + case X86::Int_MemBarrier: + DEBUG(dbgs() << "#MEMBARRIER\n"); + break; + + case TargetOpcode::INLINEASM: + // We allow inline assembler nodes with empty bodies - they can + // implicitly define registers, which is ok for JIT. + if (MI.getOperand(0).getSymbolName()[0]) { + DebugLoc DL = MI.getDebugLoc(); + DL.print(MI.getParent()->getParent()->getFunction()->getContext(), + llvm::errs()); + report_fatal_error("JIT does not support inline asm!"); + } + break; + case TargetOpcode::DBG_VALUE: + case TargetOpcode::CFI_INSTRUCTION: + break; + case TargetOpcode::GC_LABEL: + case TargetOpcode::EH_LABEL: + MCE.emitLabel(MI.getOperand(0).getMCSymbol()); + break; + + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + break; + + case X86::SEH_PushReg: + case X86::SEH_SaveReg: + case X86::SEH_SaveXMM: + case X86::SEH_StackAlloc: + case X86::SEH_SetFrame: + case X86::SEH_PushFrame: + case X86::SEH_EndPrologue: + case X86::SEH_Epilogue: + break; + + case X86::MOVPC32r: { + // This emits the "call" portion of this pseudo instruction. + MCE.emitByte(BaseOpcode); + emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags)); + // Remember PIC base. + PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset(); + X86JITInfo *JTI = TM.getSubtargetImpl()->getJITInfo(); + JTI->setPICBase(MCE.getCurrentPCValue()); + break; + } + } + CurOp = NumOps; + break; + case X86II::RawFrm: { + MCE.emitByte(BaseOpcode); + + if (CurOp == NumOps) + break; + + const MachineOperand &MO = MI.getOperand(CurOp++); + + DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n"); + DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n"); + DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n"); + DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n"); + DEBUG(dbgs() << "isImm " << MO.isImm() << "\n"); + + if (MO.isMBB()) { + emitPCRelativeBlockAddress(MO.getMBB()); + break; + } + + if (MO.isGlobal()) { + emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, + MO.getOffset(), 0); + break; + } + + if (MO.isSymbol()) { + emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word); + break; + } + + // FIXME: Only used by hackish MCCodeEmitter, remove when dead. + if (MO.isJTI()) { + emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word); + break; + } + + assert(MO.isImm() && "Unknown RawFrm operand!"); + if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) { + // Fix up immediate operand for pc relative calls. + intptr_t Imm = (intptr_t)MO.getImm(); + Imm = Imm - MCE.getCurrentPCValue() - 4; + emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags)); + } else + emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags)); + break; + } + + case X86II::AddRegFrm: { + MCE.emitByte(BaseOpcode + + getX86RegNum(MI.getOperand(CurOp++).getReg())); + + if (CurOp == NumOps) + break; + + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); + if (MO1.isImm()) { + emitConstant(MO1.getImm(), Size); + break; + } + + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV32ri64) + rt = X86::reloc_absolute_word; // FIXME: add X86II flag? + // This should not occur on Darwin for relocatable objects. + if (Opcode == X86::MOV64ri) + rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? + if (MO1.isGlobal()) { + bool Indirect = gvNeedsNonLazyPtr(MO1, TM); + emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, + Indirect); + } else if (MO1.isSymbol()) + emitExternalSymbolAddress(MO1.getSymbolName(), rt); + else if (MO1.isCPI()) + emitConstPoolAddress(MO1.getIndex(), rt); + else if (MO1.isJTI()) + emitJumpTableAddress(MO1.getIndex(), rt); + break; + } + + case X86II::MRMDestReg: { + MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp+1; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + + emitRegModRMByte(MI.getOperand(CurOp).getReg(), + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + CurOp = SrcRegNum + 1; + break; + } + case X86II::MRMDestMem: { + MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp + X86::AddrNumOperands; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + emitMemModRMByte(MI, CurOp, + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + CurOp = SrcRegNum + 1; + break; + } + + case X86II::MRMSrcReg: { + MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp+1; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + ++SrcRegNum; + + if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM) + ++SrcRegNum; + + emitRegModRMByte(MI.getOperand(SrcRegNum).getReg(), + getX86RegNum(MI.getOperand(CurOp).getReg())); + // 2 operands skipped with HasMemOp4, compensate accordingly + CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1; + if (HasVEX_4VOp3) + ++CurOp; + break; + } + case X86II::MRMSrcMem: { + int AddrOperands = X86::AddrNumOperands; + unsigned FirstMemOp = CurOp+1; + if (HasVEX_4V) { + ++AddrOperands; + ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). + } + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + ++FirstMemOp; + + MCE.emitByte(BaseOpcode); + + intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? + X86II::getSizeOfImm(Desc->TSFlags) : 0; + emitMemModRMByte(MI, FirstMemOp, + getX86RegNum(MI.getOperand(CurOp).getReg()),PCAdj); + CurOp += AddrOperands + 1; + if (HasVEX_4VOp3) + ++CurOp; + break; + } + + case X86II::MRMXr: + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: { + if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). + ++CurOp; + MCE.emitByte(BaseOpcode); + uint64_t Form = (Desc->TSFlags & X86II::FormMask); + emitRegModRMByte(MI.getOperand(CurOp++).getReg(), + (Form == X86II::MRMXr) ? 0 : Form-X86II::MRM0r); + + if (CurOp == NumOps) + break; + + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); + if (MO1.isImm()) { + emitConstant(MO1.getImm(), Size); + break; + } + + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64ri32) + rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? + if (MO1.isGlobal()) { + bool Indirect = gvNeedsNonLazyPtr(MO1, TM); + emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, + Indirect); + } else if (MO1.isSymbol()) + emitExternalSymbolAddress(MO1.getSymbolName(), rt); + else if (MO1.isCPI()) + emitConstPoolAddress(MO1.getIndex(), rt); + else if (MO1.isJTI()) + emitJumpTableAddress(MO1.getIndex(), rt); + break; + } + + case X86II::MRMXm: + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: { + if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). + ++CurOp; + intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ? + (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? + X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0; + + MCE.emitByte(BaseOpcode); + uint64_t Form = (Desc->TSFlags & X86II::FormMask); + emitMemModRMByte(MI, CurOp, (Form==X86II::MRMXm) ? 0 : Form - X86II::MRM0m, + PCAdj); + CurOp += X86::AddrNumOperands; + + if (CurOp == NumOps) + break; + + const MachineOperand &MO = MI.getOperand(CurOp++); + unsigned Size = X86II::getSizeOfImm(Desc->TSFlags); + if (MO.isImm()) { + emitConstant(MO.getImm(), Size); + break; + } + + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64mi32) + rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? + if (MO.isGlobal()) { + bool Indirect = gvNeedsNonLazyPtr(MO, TM); + emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, + Indirect); + } else if (MO.isSymbol()) + emitExternalSymbolAddress(MO.getSymbolName(), rt); + else if (MO.isCPI()) + emitConstPoolAddress(MO.getIndex(), rt); + else if (MO.isJTI()) + emitJumpTableAddress(MO.getIndex(), rt); + break; + } + + case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2: + case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8: + case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB: + case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1: + case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6: + case X86II::MRM_D7: case X86II::MRM_D8: case X86II::MRM_D9: + case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC: + case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF: + case X86II::MRM_E0: case X86II::MRM_E1: case X86II::MRM_E2: + case X86II::MRM_E3: case X86II::MRM_E4: case X86II::MRM_E5: + case X86II::MRM_E8: case X86II::MRM_E9: case X86II::MRM_EA: + case X86II::MRM_EB: case X86II::MRM_EC: case X86II::MRM_ED: + case X86II::MRM_EE: case X86II::MRM_F0: case X86II::MRM_F1: + case X86II::MRM_F2: case X86II::MRM_F3: case X86II::MRM_F4: + case X86II::MRM_F5: case X86II::MRM_F6: case X86II::MRM_F7: + case X86II::MRM_F8: case X86II::MRM_F9: case X86II::MRM_FA: + case X86II::MRM_FB: case X86II::MRM_FC: case X86II::MRM_FD: + case X86II::MRM_FE: case X86II::MRM_FF: + MCE.emitByte(BaseOpcode); + + unsigned char MRM; + switch (TSFlags & X86II::FormMask) { + default: llvm_unreachable("Invalid Form"); + case X86II::MRM_C0: MRM = 0xC0; break; + case X86II::MRM_C1: MRM = 0xC1; break; + case X86II::MRM_C2: MRM = 0xC2; break; + case X86II::MRM_C3: MRM = 0xC3; break; + case X86II::MRM_C4: MRM = 0xC4; break; + case X86II::MRM_C8: MRM = 0xC8; break; + case X86II::MRM_C9: MRM = 0xC9; break; + case X86II::MRM_CA: MRM = 0xCA; break; + case X86II::MRM_CB: MRM = 0xCB; break; + case X86II::MRM_CF: MRM = 0xCF; break; + case X86II::MRM_D0: MRM = 0xD0; break; + case X86II::MRM_D1: MRM = 0xD1; break; + case X86II::MRM_D4: MRM = 0xD4; break; + case X86II::MRM_D5: MRM = 0xD5; break; + case X86II::MRM_D6: MRM = 0xD6; break; + case X86II::MRM_D7: MRM = 0xD7; break; + case X86II::MRM_D8: MRM = 0xD8; break; + case X86II::MRM_D9: MRM = 0xD9; break; + case X86II::MRM_DA: MRM = 0xDA; break; + case X86II::MRM_DB: MRM = 0xDB; break; + case X86II::MRM_DC: MRM = 0xDC; break; + case X86II::MRM_DD: MRM = 0xDD; break; + case X86II::MRM_DE: MRM = 0xDE; break; + case X86II::MRM_DF: MRM = 0xDF; break; + case X86II::MRM_E0: MRM = 0xE0; break; + case X86II::MRM_E1: MRM = 0xE1; break; + case X86II::MRM_E2: MRM = 0xE2; break; + case X86II::MRM_E3: MRM = 0xE3; break; + case X86II::MRM_E4: MRM = 0xE4; break; + case X86II::MRM_E5: MRM = 0xE5; break; + case X86II::MRM_E8: MRM = 0xE8; break; + case X86II::MRM_E9: MRM = 0xE9; break; + case X86II::MRM_EA: MRM = 0xEA; break; + case X86II::MRM_EB: MRM = 0xEB; break; + case X86II::MRM_EC: MRM = 0xEC; break; + case X86II::MRM_ED: MRM = 0xED; break; + case X86II::MRM_EE: MRM = 0xEE; break; + case X86II::MRM_F0: MRM = 0xF0; break; + case X86II::MRM_F1: MRM = 0xF1; break; + case X86II::MRM_F2: MRM = 0xF2; break; + case X86II::MRM_F3: MRM = 0xF3; break; + case X86II::MRM_F4: MRM = 0xF4; break; + case X86II::MRM_F5: MRM = 0xF5; break; + case X86II::MRM_F6: MRM = 0xF6; break; + case X86II::MRM_F7: MRM = 0xF7; break; + case X86II::MRM_F8: MRM = 0xF8; break; + case X86II::MRM_F9: MRM = 0xF9; break; + case X86II::MRM_FA: MRM = 0xFA; break; + case X86II::MRM_FB: MRM = 0xFB; break; + case X86II::MRM_FC: MRM = 0xFC; break; + case X86II::MRM_FD: MRM = 0xFD; break; + case X86II::MRM_FE: MRM = 0xFE; break; + case X86II::MRM_FF: MRM = 0xFF; break; + } + MCE.emitByte(MRM); + break; + } + + while (CurOp != NumOps && NumOps - CurOp <= 2) { + // The last source register of a 4 operand instruction in AVX is encoded + // in bits[7:4] of a immediate byte. + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { + const MachineOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand + : CurOp); + ++CurOp; + unsigned RegNum = getX86RegNum(MO.getReg()) << 4; + if (X86II::isX86_64ExtendedReg(MO.getReg())) + RegNum |= 1 << 7; + // If there is an additional 5th operand it must be an immediate, which + // is encoded in bits[3:0] + if (CurOp != NumOps) { + const MachineOperand &MIMM = MI.getOperand(CurOp++); + if (MIMM.isImm()) { + unsigned Val = MIMM.getImm(); + assert(Val < 16 && "Immediate operand value out of range"); + RegNum |= Val; + } + } + emitConstant(RegNum, 1); + } else { + emitConstant(MI.getOperand(CurOp++).getImm(), + X86II::getSizeOfImm(Desc->TSFlags)); + } + } + + if (!MI.isVariadic() && CurOp != NumOps) { +#ifndef NDEBUG + dbgs() << "Cannot encode all operands of: " << MI << "\n"; +#endif + llvm_unreachable(nullptr); + } + + MCE.processDebugLoc(MI.getDebugLoc(), false); +} diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 7c973c2e55d..1f53b7cd791 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index f14179603eb..0d46f706906 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -26,7 +26,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp new file mode 100644 index 00000000000..a082c4f8b0e --- /dev/null +++ b/lib/Target/X86/X86JITInfo.cpp @@ -0,0 +1,588 @@ +//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the X86 target. +// +//===----------------------------------------------------------------------===// + +#include "X86JITInfo.h" +#include "X86Relocations.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Valgrind.h" +#include +#include +using namespace llvm; + +#define DEBUG_TYPE "jit" + +// Determine the platform we're running on +#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64) +# define X86_64_JIT +#elif defined(__i386__) || defined(i386) || defined(_M_IX86) +# define X86_32_JIT +#endif + +void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + unsigned char *OldByte = (unsigned char *)Old; + *OldByte++ = 0xE9; // Emit JMP opcode. + unsigned *OldWord = (unsigned *)OldByte; + unsigned NewAddr = (intptr_t)New; + unsigned OldAddr = (intptr_t)OldWord; + *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code. + + // X86 doesn't need to invalidate the processor cache, so just invalidate + // Valgrind's cache directly. + sys::ValgrindDiscardTranslations(Old, 5); +} + + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +// For ELF targets, use a .size and .type directive, to let tools +// know the extent of functions defined in assembler. +#if defined(__ELF__) +# define SIZE(sym) ".size " #sym ", . - " #sym "\n" +# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n" +#else +# define SIZE(sym) +# define TYPE_FUNCTION(sym) +#endif + +// Provide a convenient way for disabling usage of CFI directives. +// This is needed for old/broken assemblers (for example, gas on +// Darwin is pretty old and doesn't support these directives) +#if defined(__APPLE__) +# define CFI(x) +#else +// FIXME: Disable this until we really want to use it. Also, we will +// need to add some workarounds for compilers, which support +// only subset of these directives. +# define CFI(x) +#endif + +// Provide a wrapper for LLVMX86CompilationCallback2 that saves non-traditional +// callee saved registers, for the fastcc calling convention. +extern "C" { +#if defined(X86_64_JIT) +# ifndef _MSC_VER + // No need to save EAX/EDX for X86-64. + void X86CompilationCallback(void); + asm( + ".text\n" + ".align 8\n" + ".globl " ASMPREFIX "X86CompilationCallback\n" + TYPE_FUNCTION(X86CompilationCallback) + ASMPREFIX "X86CompilationCallback:\n" + CFI(".cfi_startproc\n") + // Save RBP + "pushq %rbp\n" + CFI(".cfi_def_cfa_offset 16\n") + CFI(".cfi_offset %rbp, -16\n") + // Save RSP + "movq %rsp, %rbp\n" + CFI(".cfi_def_cfa_register %rbp\n") + // Save all int arg registers + "pushq %rdi\n" + CFI(".cfi_rel_offset %rdi, 0\n") + "pushq %rsi\n" + CFI(".cfi_rel_offset %rsi, 8\n") + "pushq %rdx\n" + CFI(".cfi_rel_offset %rdx, 16\n") + "pushq %rcx\n" + CFI(".cfi_rel_offset %rcx, 24\n") + "pushq %r8\n" + CFI(".cfi_rel_offset %r8, 32\n") + "pushq %r9\n" + CFI(".cfi_rel_offset %r9, 40\n") + // Align stack on 16-byte boundary. ESP might not be properly aligned + // (8 byte) if this is called from an indirect stub. + "andq $-16, %rsp\n" + // Save all XMM arg registers + "subq $128, %rsp\n" + "movaps %xmm0, (%rsp)\n" + "movaps %xmm1, 16(%rsp)\n" + "movaps %xmm2, 32(%rsp)\n" + "movaps %xmm3, 48(%rsp)\n" + "movaps %xmm4, 64(%rsp)\n" + "movaps %xmm5, 80(%rsp)\n" + "movaps %xmm6, 96(%rsp)\n" + "movaps %xmm7, 112(%rsp)\n" + // JIT callee +#if defined(_WIN64) || defined(__CYGWIN__) + "subq $32, %rsp\n" + "movq %rbp, %rcx\n" // Pass prev frame and return address + "movq 8(%rbp), %rdx\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" + "addq $32, %rsp\n" +#else + "movq %rbp, %rdi\n" // Pass prev frame and return address + "movq 8(%rbp), %rsi\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" +#endif + // Restore all XMM arg registers + "movaps 112(%rsp), %xmm7\n" + "movaps 96(%rsp), %xmm6\n" + "movaps 80(%rsp), %xmm5\n" + "movaps 64(%rsp), %xmm4\n" + "movaps 48(%rsp), %xmm3\n" + "movaps 32(%rsp), %xmm2\n" + "movaps 16(%rsp), %xmm1\n" + "movaps (%rsp), %xmm0\n" + // Restore RSP + "movq %rbp, %rsp\n" + CFI(".cfi_def_cfa_register %rsp\n") + // Restore all int arg registers + "subq $48, %rsp\n" + CFI(".cfi_adjust_cfa_offset 48\n") + "popq %r9\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %r9\n") + "popq %r8\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %r8\n") + "popq %rcx\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rcx\n") + "popq %rdx\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rdx\n") + "popq %rsi\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rsi\n") + "popq %rdi\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rdi\n") + // Restore RBP + "popq %rbp\n" + CFI(".cfi_adjust_cfa_offset -8\n") + CFI(".cfi_restore %rbp\n") + "ret\n" + CFI(".cfi_endproc\n") + SIZE(X86CompilationCallback) + ); +# else + // No inline assembler support on this platform. The routine is in external + // file. + void X86CompilationCallback(); + +# endif +#elif defined (X86_32_JIT) +# ifndef _MSC_VER + void X86CompilationCallback(void); + asm( + ".text\n" + ".align 8\n" + ".globl " ASMPREFIX "X86CompilationCallback\n" + TYPE_FUNCTION(X86CompilationCallback) + ASMPREFIX "X86CompilationCallback:\n" + CFI(".cfi_startproc\n") + "pushl %ebp\n" + CFI(".cfi_def_cfa_offset 8\n") + CFI(".cfi_offset %ebp, -8\n") + "movl %esp, %ebp\n" // Standard prologue + CFI(".cfi_def_cfa_register %ebp\n") + "pushl %eax\n" + CFI(".cfi_rel_offset %eax, 0\n") + "pushl %edx\n" // Save EAX/EDX/ECX + CFI(".cfi_rel_offset %edx, 4\n") + "pushl %ecx\n" + CFI(".cfi_rel_offset %ecx, 8\n") +# if defined(__APPLE__) + "andl $-16, %esp\n" // Align ESP on 16-byte boundary +# endif + "subl $16, %esp\n" + "movl 4(%ebp), %eax\n" // Pass prev frame and return address + "movl %eax, 4(%esp)\n" + "movl %ebp, (%esp)\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" + "movl %ebp, %esp\n" // Restore ESP + CFI(".cfi_def_cfa_register %esp\n") + "subl $12, %esp\n" + CFI(".cfi_adjust_cfa_offset 12\n") + "popl %ecx\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %ecx\n") + "popl %edx\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %edx\n") + "popl %eax\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %eax\n") + "popl %ebp\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %ebp\n") + "ret\n" + CFI(".cfi_endproc\n") + SIZE(X86CompilationCallback) + ); + + // Same as X86CompilationCallback but also saves XMM argument registers. + void X86CompilationCallback_SSE(void); + asm( + ".text\n" + ".align 8\n" + ".globl " ASMPREFIX "X86CompilationCallback_SSE\n" + TYPE_FUNCTION(X86CompilationCallback_SSE) + ASMPREFIX "X86CompilationCallback_SSE:\n" + CFI(".cfi_startproc\n") + "pushl %ebp\n" + CFI(".cfi_def_cfa_offset 8\n") + CFI(".cfi_offset %ebp, -8\n") + "movl %esp, %ebp\n" // Standard prologue + CFI(".cfi_def_cfa_register %ebp\n") + "pushl %eax\n" + CFI(".cfi_rel_offset %eax, 0\n") + "pushl %edx\n" // Save EAX/EDX/ECX + CFI(".cfi_rel_offset %edx, 4\n") + "pushl %ecx\n" + CFI(".cfi_rel_offset %ecx, 8\n") + "andl $-16, %esp\n" // Align ESP on 16-byte boundary + // Save all XMM arg registers + "subl $64, %esp\n" + // FIXME: provide frame move information for xmm registers. + // This can be tricky, because CFA register is ebp (unaligned) + // and we need to produce offsets relative to it. + "movaps %xmm0, (%esp)\n" + "movaps %xmm1, 16(%esp)\n" + "movaps %xmm2, 32(%esp)\n" + "movaps %xmm3, 48(%esp)\n" + "subl $16, %esp\n" + "movl 4(%ebp), %eax\n" // Pass prev frame and return address + "movl %eax, 4(%esp)\n" + "movl %ebp, (%esp)\n" + "call " ASMPREFIX "LLVMX86CompilationCallback2\n" + "addl $16, %esp\n" + "movaps 48(%esp), %xmm3\n" + CFI(".cfi_restore %xmm3\n") + "movaps 32(%esp), %xmm2\n" + CFI(".cfi_restore %xmm2\n") + "movaps 16(%esp), %xmm1\n" + CFI(".cfi_restore %xmm1\n") + "movaps (%esp), %xmm0\n" + CFI(".cfi_restore %xmm0\n") + "movl %ebp, %esp\n" // Restore ESP + CFI(".cfi_def_cfa_register esp\n") + "subl $12, %esp\n" + CFI(".cfi_adjust_cfa_offset 12\n") + "popl %ecx\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %ecx\n") + "popl %edx\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %edx\n") + "popl %eax\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %eax\n") + "popl %ebp\n" + CFI(".cfi_adjust_cfa_offset -4\n") + CFI(".cfi_restore %ebp\n") + "ret\n" + CFI(".cfi_endproc\n") + SIZE(X86CompilationCallback_SSE) + ); +# else + void LLVMX86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr); + + _declspec(naked) void X86CompilationCallback(void) { + __asm { + push ebp + mov ebp, esp + push eax + push edx + push ecx + and esp, -16 + sub esp, 16 + mov eax, dword ptr [ebp+4] + mov dword ptr [esp+4], eax + mov dword ptr [esp], ebp + call LLVMX86CompilationCallback2 + mov esp, ebp + sub esp, 12 + pop ecx + pop edx + pop eax + pop ebp + ret + } + } + +# endif // _MSC_VER + +#else // Not an i386 host + void X86CompilationCallback() { + llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!"); + } +#endif +} + +/// This is the target-specific function invoked by the +/// function stub when we did not know the real target of a call. This function +/// must locate the start of the stub or call site and pass it into the JIT +/// compiler function. +extern "C" { +LLVM_ATTRIBUTE_USED // Referenced from inline asm. +LLVM_LIBRARY_VISIBILITY void LLVMX86CompilationCallback2(intptr_t *StackPtr, + intptr_t RetAddr) { + intptr_t *RetAddrLoc = &StackPtr[1]; + // We are reading raw stack data here. Tell MemorySanitizer that it is + // sufficiently initialized. + __msan_unpoison(RetAddrLoc, sizeof(*RetAddrLoc)); + assert(*RetAddrLoc == RetAddr && + "Could not find return address on the stack!"); + + // It's a stub if there is an interrupt marker after the call. + bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE; + + // The call instruction should have pushed the return value onto the stack... +#if defined (X86_64_JIT) + RetAddr--; // Backtrack to the reference itself... +#else + RetAddr -= 4; // Backtrack to the reference itself... +#endif + +#if 0 + DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr + << " ESP=" << (void*)StackPtr + << ": Resolving call to function: " + << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n"); +#endif + + // Sanity check to make sure this really is a call instruction. +#if defined (X86_64_JIT) + assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!"); + assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!"); +#else + assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!"); +#endif + + intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr); + + // Rewrite the call target... so that we don't end up here every time we + // execute the call. +#if defined (X86_64_JIT) + assert(isStub && + "X86-64 doesn't support rewriting non-stub lazy compilation calls:" + " the call instruction varies too much."); +#else + *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4); +#endif + + if (isStub) { + // If this is a stub, rewrite the call into an unconditional branch + // instruction so that two return addresses are not pushed onto the stack + // when the requested function finally gets called. This also makes the + // 0xCE byte (interrupt) dead, so the marker doesn't effect anything. +#if defined (X86_64_JIT) + // If the target address is within 32-bit range of the stub, use a + // PC-relative branch instead of loading the actual address. (This is + // considerably shorter than the 64-bit immediate load already there.) + // We assume here intptr_t is 64 bits. + intptr_t diff = NewVal-RetAddr+7; + if (diff >= -2147483648LL && diff <= 2147483647LL) { + *(unsigned char*)(RetAddr-0xc) = 0xE9; + *(intptr_t *)(RetAddr-0xb) = diff & 0xffffffff; + } else { + *(intptr_t *)(RetAddr - 0xa) = NewVal; + ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6)); + } + sys::ValgrindDiscardTranslations((void*)(RetAddr-0xc), 0xd); +#else + ((unsigned char*)RetAddr)[-1] = 0xE9; + sys::ValgrindDiscardTranslations((void*)(RetAddr-1), 5); +#endif + } + + // Change the return address to reexecute the call instruction... +#if defined (X86_64_JIT) + *RetAddrLoc -= 0xd; +#else + *RetAddrLoc -= 5; +#endif +} +} + +TargetJITInfo::LazyResolverFn +X86JITInfo::getLazyResolverFunction(JITCompilerFn F) { + TsanIgnoreWritesBegin(); + JITCompilerFunction = F; + TsanIgnoreWritesEnd(); + +#if defined (X86_32_JIT) && !defined (_MSC_VER) +#if defined(__SSE__) + // SSE Callback should be called for SSE-enabled LLVM. + return X86CompilationCallback_SSE; +#else + if (useSSE) + return X86CompilationCallback_SSE; +#endif +#endif + + return X86CompilationCallback; +} + +X86JITInfo::X86JITInfo(bool UseSSE) { + useSSE = UseSSE; + useGOT = 0; + TLSOffset = nullptr; +} + +void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE) { +#if defined (X86_64_JIT) + const unsigned Alignment = 8; + uint8_t Buffer[8]; + uint8_t *Cur = Buffer; + MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(intptr_t)ptr); + MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(((intptr_t)ptr) >> 32)); +#else + const unsigned Alignment = 4; + uint8_t Buffer[4]; + uint8_t *Cur = Buffer; + MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)ptr); +#endif + return JCE.allocIndirectGV(GV, Buffer, sizeof(Buffer), Alignment); +} + +TargetJITInfo::StubLayout X86JITInfo::getStubLayout() { + // The 64-bit stub contains: + // movabs r10 <- 8-byte-target-address # 10 bytes + // call|jmp *r10 # 3 bytes + // The 32-bit stub contains a 5-byte call|jmp. + // If the stub is a call to the compilation callback, an extra byte is added + // to mark it as a stub. + StubLayout Result = {14, 4}; + return Result; +} + +void *X86JITInfo::emitFunctionStub(const Function* F, void *Target, + JITCodeEmitter &JCE) { + // Note, we cast to intptr_t here to silence a -pedantic warning that + // complains about casting a function pointer to a normal pointer. +#if defined (X86_32_JIT) && !defined (_MSC_VER) + bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback && + Target != (void*)(intptr_t)X86CompilationCallback_SSE); +#else + bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback; +#endif + JCE.emitAlignment(4); + void *Result = (void*)JCE.getCurrentPCValue(); + if (NotCC) { +#if defined (X86_64_JIT) + JCE.emitByte(0x49); // REX prefix + JCE.emitByte(0xB8+2); // movabsq r10 + JCE.emitWordLE((unsigned)(intptr_t)Target); + JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32)); + JCE.emitByte(0x41); // REX prefix + JCE.emitByte(0xFF); // jmpq *r10 + JCE.emitByte(2 | (4 << 3) | (3 << 6)); +#else + JCE.emitByte(0xE9); + JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4); +#endif + return Result; + } + +#if defined (X86_64_JIT) + JCE.emitByte(0x49); // REX prefix + JCE.emitByte(0xB8+2); // movabsq r10 + JCE.emitWordLE((unsigned)(intptr_t)Target); + JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32)); + JCE.emitByte(0x41); // REX prefix + JCE.emitByte(0xFF); // callq *r10 + JCE.emitByte(2 | (2 << 3) | (3 << 6)); +#else + JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination... + + JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4); +#endif + + // This used to use 0xCD, but that value is used by JITMemoryManager to + // initialize the buffer with garbage, which means it may follow a + // noreturn function call, confusing LLVMX86CompilationCallback2. PR 4929. + JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub! + return Result; +} + +/// getPICJumpTableEntry - Returns the value of the jumptable entry for the +/// specific basic block. +uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) { +#if defined(X86_64_JIT) + return BB - Entry; +#else + return BB - PICBase; +#endif +} + +template static void addUnaligned(void *Pos, T Delta) { + T Value; + std::memcpy(reinterpret_cast(&Value), reinterpret_cast(Pos), + sizeof(T)); + Value += Delta; + std::memcpy(reinterpret_cast(Pos), reinterpret_cast(&Value), + sizeof(T)); +} + +/// relocate - Before the JIT can run a block of code that has been emitted, +/// it must rewrite the code to contain the actual addresses of any +/// referenced global symbols. +void X86JITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + void *RelocPos = (char*)Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); + switch ((X86::RelocationType)MR->getRelocationType()) { + case X86::reloc_pcrel_word: { + // PC relative relocation, add the relocated value to the value already in + // memory, after we adjust it for where the PC is. + ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal(); + addUnaligned(RelocPos, ResultPtr); + break; + } + case X86::reloc_picrel_word: { + // PIC base relative relocation, add the relocated value to the value + // already in memory, after we adjust it for where the PIC base is. + ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal()); + addUnaligned(RelocPos, ResultPtr); + break; + } + case X86::reloc_absolute_word: + case X86::reloc_absolute_word_sext: + // Absolute relocation, just add the relocated value to the value already + // in memory. + addUnaligned(RelocPos, ResultPtr); + break; + case X86::reloc_absolute_dword: + addUnaligned(RelocPos, ResultPtr); + break; + } + } +} + +char* X86JITInfo::allocateThreadLocalMemory(size_t size) { +#if defined(X86_32_JIT) && !defined(__APPLE__) && !defined(_MSC_VER) + TLSOffset -= size; + return TLSOffset; +#else + llvm_unreachable("Cannot allocate thread local storage on this arch!"); +#endif +} diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h new file mode 100644 index 00000000000..564343ffa3f --- /dev/null +++ b/lib/Target/X86/X86JITInfo.h @@ -0,0 +1,79 @@ +//===-- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the X86 implementation of the TargetJITInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef X86JITINFO_H +#define X86JITINFO_H + +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/IR/Function.h" +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { + class X86Subtarget; + + class X86JITInfo : public TargetJITInfo { + uintptr_t PICBase; + char *TLSOffset; + bool useSSE; + public: + explicit X86JITInfo(bool UseSSE); + + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + void replaceMachineCodeForFunction(void *Old, void *New) override; + + /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object + /// to emit an indirect symbol which contains the address of the specified + /// ptr. + void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, + JITCodeEmitter &JCE) override; + + // getStubLayout - Returns the size and alignment of the largest call stub + // on X86. + StubLayout getStubLayout() override; + + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a + /// small native function that simply calls the function at the specified + /// address. + void *emitFunctionStub(const Function* F, void *Target, + JITCodeEmitter &JCE) override; + + /// getPICJumpTableEntry - Returns the value of the jumptable entry for the + /// specific basic block. + uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase) override; + + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) override; + + /// allocateThreadLocalMemory - Each target has its own way of + /// handling thread local variables. This method returns a value only + /// meaningful to the target. + char* allocateThreadLocalMemory(size_t size) override; + + /// setPICBase / getPICBase - Getter / setter of PICBase, used to compute + /// PIC jumptable entry. + void setPICBase(uintptr_t Base) { PICBase = Base; } + uintptr_t getPICBase() const { return PICBase; } + }; +} + +#endif diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 3d13c4b59c0..c4caf06c936 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -356,7 +356,8 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, DL(computeDataLayout(*this)), TSInfo(DL), InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM), FrameLowering(TargetFrameLowering::StackGrowsDown, getStackAlignment(), - is64Bit() ? -8 : -4) {} + is64Bit() ? -8 : -4), + JITInfo(hasSSE1()) {} bool X86Subtarget::enableEarlyIfConversion() const { return hasCMov() && X86EarlyIfConv; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 45dc0b8ebe2..75e8ae5dc2b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -17,6 +17,7 @@ #include "X86FrameLowering.h" #include "X86ISelLowering.h" #include "X86InstrInfo.h" +#include "X86JITInfo.h" #include "X86SelectionDAGInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/CallingConv.h" @@ -242,6 +243,7 @@ private: X86InstrInfo InstrInfo; X86TargetLowering TLInfo; X86FrameLowering FrameLowering; + X86JITInfo JITInfo; public: /// This constructor initializes the data members to match that @@ -265,6 +267,7 @@ public: const X86RegisterInfo *getRegisterInfo() const override { return &getInstrInfo()->getRegisterInfo(); } + X86JITInfo *getJITInfo() override { return &JITInfo; } /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 0b1909f95c2..f12140f1f16 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -177,3 +177,10 @@ bool X86PassConfig::addPreEmitPass() { return ShouldPrint; } + +bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { + PM.add(createX86JITCodeEmitterPass(*this, JCE)); + + return false; +} diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 9de118a205e..633c5710315 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -33,11 +33,17 @@ public: CodeGenOpt::Level OL); const X86Subtarget *getSubtargetImpl() const override { return &Subtarget; } + X86Subtarget *getSubtargetImpl() { + return static_cast(TargetMachine::getSubtargetImpl()); + } + /// \brief Register X86 analysis passes with a pass manager. void addAnalysisPasses(PassManagerBase &PM) override; // Set up the pass pipeline. TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override; }; } // End llvm namespace diff --git a/test/ExecutionEngine/2002-12-16-ArgTest.ll b/test/ExecutionEngine/2002-12-16-ArgTest.ll index eb2fe8c0483..4c03519a85a 100644 --- a/test/ExecutionEngine/2002-12-16-ArgTest.ll +++ b/test/ExecutionEngine/2002-12-16-ArgTest.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm @.LC0 = internal global [10 x i8] c"argc: %d\0A\00" ; <[10 x i8]*> [#uses=1] diff --git a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll index 68fdefefa54..3182193453a 100644 --- a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll +++ b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm define i32 @foo(i32 %X, i32 %Y, double %A) { %cond212 = fcmp une double %A, 1.000000e+00 ; [#uses=1] diff --git a/test/ExecutionEngine/2003-01-04-LoopTest.ll b/test/ExecutionEngine/2003-01-04-LoopTest.ll index 5a0311dd939..3e27e0607ba 100644 --- a/test/ExecutionEngine/2003-01-04-LoopTest.ll +++ b/test/ExecutionEngine/2003-01-04-LoopTest.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm define i32 @main() { call i32 @mylog( i32 4 ) ; :1 [#uses=0] diff --git a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll index 038d7500101..80e19ba1932 100644 --- a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll +++ b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm define i32 @bar(i8* %X) { ; pointer should be 4 byte aligned! diff --git a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll index 576ef7cf638..6f61aa68b67 100644 --- a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll +++ b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll @@ -1,6 +1,7 @@ ; This testcase should return with an exit code of 1. ; ; RUN: not %lli %s +; XFAIL: arm @test = global i64 0 ; [#uses=1] diff --git a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll index 42db5fe93fc..236be18d96e 100644 --- a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll +++ b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s test +; XFAIL: arm declare i32 @puts(i8*) diff --git a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll index bee409c1441..22dd4ccb44c 100644 --- a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll +++ b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm ; This testcase failed to work because two variable sized allocas confused the ; local register allocator. diff --git a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll index 63303fcff7c..60dc3d6b7d4 100644 --- a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll +++ b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm ; ; Regression Test: EnvironmentTest.ll diff --git a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll index 8fb1bbbe9d7..04a5e1741bb 100644 --- a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll +++ b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm ; This testcase exposes a bug in the local register allocator where it runs out ; of registers (due to too many overlapping live ranges), but then attempts to diff --git a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll index 6513540903e..6e48c60db26 100644 --- a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll +++ b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll @@ -1,4 +1,5 @@ ; RUN: %lli %s > /dev/null +; XFAIL: arm @A = global i32 0 ; [#uses=1] diff --git a/test/ExecutionEngine/2005-12-02-TailCallBug.ll b/test/ExecutionEngine/2005-12-02-TailCallBug.ll index 2ac8ad1795d..8523b5e3f5b 100644 --- a/test/ExecutionEngine/2005-12-02-TailCallBug.ll +++ b/test/ExecutionEngine/2005-12-02-TailCallBug.ll @@ -1,5 +1,6 @@ ; PR672 ; RUN: %lli %s +; XFAIL: arm define i32 @main() { %f = bitcast i32 (i32, i32*, i32)* @check_tail to i32* ; [#uses=1] diff --git a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll index eb2fe8c0483..babd8f6a780 100644 --- a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll +++ b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll @@ -1,4 +1,4 @@ -; RUN: %lli %s > /dev/null +; RUN: %lli_mcjit %s > /dev/null @.LC0 = internal global [10 x i8] c"argc: %d\0A\00" ; <[10 x i8]*> [#uses=1] diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll index 68fdefefa54..bbb81b88b16 100644 --- a/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll +++ b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll @@ -1,4 +1,4 @@ -; RUN: %lli %s > /dev/null +; RUN: %lli_mcjit %s > /dev/null define i32 @foo(i32 %X, i32 %Y, double %A) { %cond212 = fcmp une double %A, 1.000000e+00 ; [#uses=1] diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll index 5a0311dd939..7574267bdcd 100644 --- a/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll +++ b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll @@ -1,4 +1,4 @@ -; RUN: %lli %s > /dev/null +; RUN: %lli_mcjit %s > /dev/null define i32 @main() { call i32 @mylog( i32 4 ) ; :1 [#uses=0] diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll index 48576e7c83e..261939ad202 100644 --- a/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll +++ b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll @@ -1,4 +1,4 @@ -; RUN: %lli %s > /dev/null +; RUN: %lli_mcjit %s > /dev/null define i32 @main() { ;