mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-01 09:18:30 +00:00
48d1e4517e
have situations where an SSE instruction turns into multiple blocks, with the live range of an x87 register crossing them. To do this correctly make sure we examine all blocks when inserting FP_REG_KILL. PR 1697. (This was exposed by my fix for PR 1681, but the same thing could happen mixing x87 long double with SSE.) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42281 91177308-0d34-0410-b5e6-96231b3b80d8
1449 lines
50 KiB
C++
1449 lines
50 KiB
C++
//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file was developed by the Evan Cheng and is distributed under
|
|
// the University of Illinois Open Source License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines a DAG pattern matching instruction selector for X86,
|
|
// converting from a legalized dag to a X86 dag.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "x86-isel"
|
|
#include "X86.h"
|
|
#include "X86InstrBuilder.h"
|
|
#include "X86ISelLowering.h"
|
|
#include "X86RegisterInfo.h"
|
|
#include "X86Subtarget.h"
|
|
#include "X86TargetMachine.h"
|
|
#include "llvm/GlobalValue.h"
|
|
#include "llvm/Instructions.h"
|
|
#include "llvm/Intrinsics.h"
|
|
#include "llvm/Support/CFG.h"
|
|
#include "llvm/Type.h"
|
|
#include "llvm/CodeGen/MachineConstantPool.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/CodeGen/SSARegMap.h"
|
|
#include "llvm/CodeGen/SelectionDAGISel.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
#include "llvm/Support/Compiler.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/MathExtras.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include <queue>
|
|
#include <set>
|
|
using namespace llvm;
|
|
|
|
STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added");
|
|
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Pattern Matcher Implementation
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
/// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
|
|
/// SDOperand's instead of register numbers for the leaves of the matched
|
|
/// tree.
|
|
struct X86ISelAddressMode {
|
|
enum {
|
|
RegBase,
|
|
FrameIndexBase
|
|
} BaseType;
|
|
|
|
struct { // This is really a union, discriminated by BaseType!
|
|
SDOperand Reg;
|
|
int FrameIndex;
|
|
} Base;
|
|
|
|
bool isRIPRel; // RIP relative?
|
|
unsigned Scale;
|
|
SDOperand IndexReg;
|
|
unsigned Disp;
|
|
GlobalValue *GV;
|
|
Constant *CP;
|
|
const char *ES;
|
|
int JT;
|
|
unsigned Align; // CP alignment.
|
|
|
|
X86ISelAddressMode()
|
|
: BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
|
|
GV(0), CP(0), ES(0), JT(-1), Align(0) {
|
|
}
|
|
};
|
|
}
|
|
|
|
namespace {
|
|
//===--------------------------------------------------------------------===//
|
|
/// ISel - X86 specific code to select X86 machine instructions for
|
|
/// SelectionDAG operations.
|
|
///
|
|
class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel {
|
|
/// ContainsFPCode - Every instruction we select that uses or defines a FP
|
|
/// register should set this to true.
|
|
bool ContainsFPCode;
|
|
|
|
/// FastISel - Enable fast(er) instruction selection.
|
|
///
|
|
bool FastISel;
|
|
|
|
/// TM - Keep a reference to X86TargetMachine.
|
|
///
|
|
X86TargetMachine &TM;
|
|
|
|
/// X86Lowering - This object fully describes how to lower LLVM code to an
|
|
/// X86-specific SelectionDAG.
|
|
X86TargetLowering X86Lowering;
|
|
|
|
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
|
|
/// make the right decision when generating code for different targets.
|
|
const X86Subtarget *Subtarget;
|
|
|
|
/// GlobalBaseReg - keeps track of the virtual register mapped onto global
|
|
/// base register.
|
|
unsigned GlobalBaseReg;
|
|
|
|
public:
|
|
X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
|
|
: SelectionDAGISel(X86Lowering),
|
|
ContainsFPCode(false), FastISel(fast), TM(tm),
|
|
X86Lowering(*TM.getTargetLowering()),
|
|
Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
|
|
|
|
virtual bool runOnFunction(Function &Fn) {
|
|
// Make sure we re-emit a set of the global base reg if necessary
|
|
GlobalBaseReg = 0;
|
|
return SelectionDAGISel::runOnFunction(Fn);
|
|
}
|
|
|
|
virtual const char *getPassName() const {
|
|
return "X86 DAG->DAG Instruction Selection";
|
|
}
|
|
|
|
/// InstructionSelectBasicBlock - This callback is invoked by
|
|
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
|
|
virtual void InstructionSelectBasicBlock(SelectionDAG &DAG);
|
|
|
|
virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const;
|
|
|
|
// Include the pieces autogenerated from the target description.
|
|
#include "X86GenDAGISel.inc"
|
|
|
|
private:
|
|
SDNode *Select(SDOperand N);
|
|
|
|
bool MatchAddress(SDOperand N, X86ISelAddressMode &AM,
|
|
bool isRoot = true, unsigned Depth = 0);
|
|
bool MatchAddressBase(SDOperand N, X86ISelAddressMode &AM,
|
|
bool isRoot, unsigned Depth);
|
|
bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
|
|
SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
|
|
bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base,
|
|
SDOperand &Scale, SDOperand &Index, SDOperand &Disp);
|
|
bool SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
|
|
SDOperand N, SDOperand &Base, SDOperand &Scale,
|
|
SDOperand &Index, SDOperand &Disp,
|
|
SDOperand &InChain, SDOperand &OutChain);
|
|
bool TryFoldLoad(SDOperand P, SDOperand N,
|
|
SDOperand &Base, SDOperand &Scale,
|
|
SDOperand &Index, SDOperand &Disp);
|
|
void InstructionSelectPreprocess(SelectionDAG &DAG);
|
|
|
|
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
|
|
/// inline asm expressions.
|
|
virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op,
|
|
char ConstraintCode,
|
|
std::vector<SDOperand> &OutOps,
|
|
SelectionDAG &DAG);
|
|
|
|
inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base,
|
|
SDOperand &Scale, SDOperand &Index,
|
|
SDOperand &Disp) {
|
|
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
|
|
CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
|
|
AM.Base.Reg;
|
|
Scale = getI8Imm(AM.Scale);
|
|
Index = AM.IndexReg;
|
|
// These are 32-bit even in 64-bit mode since RIP relative offset
|
|
// is 32-bit.
|
|
if (AM.GV)
|
|
Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
|
|
else if (AM.CP)
|
|
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp);
|
|
else if (AM.ES)
|
|
Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
|
|
else if (AM.JT != -1)
|
|
Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
|
|
else
|
|
Disp = getI32Imm(AM.Disp);
|
|
}
|
|
|
|
/// getI8Imm - Return a target constant with the specified value, of type
|
|
/// i8.
|
|
inline SDOperand getI8Imm(unsigned Imm) {
|
|
return CurDAG->getTargetConstant(Imm, MVT::i8);
|
|
}
|
|
|
|
/// getI16Imm - Return a target constant with the specified value, of type
|
|
/// i16.
|
|
inline SDOperand getI16Imm(unsigned Imm) {
|
|
return CurDAG->getTargetConstant(Imm, MVT::i16);
|
|
}
|
|
|
|
/// getI32Imm - Return a target constant with the specified value, of type
|
|
/// i32.
|
|
inline SDOperand getI32Imm(unsigned Imm) {
|
|
return CurDAG->getTargetConstant(Imm, MVT::i32);
|
|
}
|
|
|
|
/// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
|
|
/// base register. Return the virtual register that holds this value.
|
|
SDNode *getGlobalBaseReg();
|
|
|
|
/// getTruncate - return an SDNode that implements a subreg based truncate
|
|
/// of the specified operand to the the specified value type.
|
|
SDNode *getTruncate(SDOperand N0, MVT::ValueType VT);
|
|
|
|
#ifndef NDEBUG
|
|
unsigned Indent;
|
|
#endif
|
|
};
|
|
}
|
|
|
|
static SDNode *findFlagUse(SDNode *N) {
|
|
unsigned FlagResNo = N->getNumValues()-1;
|
|
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
|
|
SDNode *User = *I;
|
|
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
|
|
SDOperand Op = User->getOperand(i);
|
|
if (Op.Val == N && Op.ResNo == FlagResNo)
|
|
return User;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
|
|
SDNode *Root, SDNode *Skip, bool &found,
|
|
std::set<SDNode *> &Visited) {
|
|
if (found ||
|
|
Use->getNodeId() > Def->getNodeId() ||
|
|
!Visited.insert(Use).second)
|
|
return;
|
|
|
|
for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) {
|
|
SDNode *N = Use->getOperand(i).Val;
|
|
if (N == Skip)
|
|
continue;
|
|
if (N == Def) {
|
|
if (Use == ImmedUse)
|
|
continue; // Immediate use is ok.
|
|
if (Use == Root) {
|
|
assert(Use->getOpcode() == ISD::STORE ||
|
|
Use->getOpcode() == X86ISD::CMP);
|
|
continue;
|
|
}
|
|
found = true;
|
|
break;
|
|
}
|
|
findNonImmUse(N, Def, ImmedUse, Root, Skip, found, Visited);
|
|
}
|
|
}
|
|
|
|
/// isNonImmUse - Start searching from Root up the DAG to check is Def can
|
|
/// be reached. Return true if that's the case. However, ignore direct uses
|
|
/// by ImmedUse (which would be U in the example illustrated in
|
|
/// CanBeFoldedBy) and by Root (which can happen in the store case).
|
|
/// FIXME: to be really generic, we should allow direct use by any node
|
|
/// that is being folded. But realisticly since we only fold loads which
|
|
/// have one non-chain use, we only need to watch out for load/op/store
|
|
/// and load/op/cmp case where the root (store / cmp) may reach the load via
|
|
/// its chain operand.
|
|
static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
|
|
SDNode *Skip = NULL) {
|
|
std::set<SDNode *> Visited;
|
|
bool found = false;
|
|
findNonImmUse(Root, Def, ImmedUse, Root, Skip, found, Visited);
|
|
return found;
|
|
}
|
|
|
|
|
|
bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const {
|
|
if (FastISel) return false;
|
|
|
|
// If U use can somehow reach N through another path then U can't fold N or
|
|
// it will create a cycle. e.g. In the following diagram, U can reach N
|
|
// through X. If N is folded into into U, then X is both a predecessor and
|
|
// a successor of U.
|
|
//
|
|
// [ N ]
|
|
// ^ ^
|
|
// | |
|
|
// / \---
|
|
// / [X]
|
|
// | ^
|
|
// [U]--------|
|
|
|
|
if (isNonImmUse(Root, N, U))
|
|
return false;
|
|
|
|
// If U produces a flag, then it gets (even more) interesting. Since it
|
|
// would have been "glued" together with its flag use, we need to check if
|
|
// it might reach N:
|
|
//
|
|
// [ N ]
|
|
// ^ ^
|
|
// | |
|
|
// [U] \--
|
|
// ^ [TF]
|
|
// | ^
|
|
// | |
|
|
// \ /
|
|
// [FU]
|
|
//
|
|
// If FU (flag use) indirectly reach N (the load), and U fold N (call it
|
|
// NU), then TF is a predecessor of FU and a successor of NU. But since
|
|
// NU and FU are flagged together, this effectively creates a cycle.
|
|
bool HasFlagUse = false;
|
|
MVT::ValueType VT = Root->getValueType(Root->getNumValues()-1);
|
|
while ((VT == MVT::Flag && !Root->use_empty())) {
|
|
SDNode *FU = findFlagUse(Root);
|
|
if (FU == NULL)
|
|
break;
|
|
else {
|
|
Root = FU;
|
|
HasFlagUse = true;
|
|
}
|
|
VT = Root->getValueType(Root->getNumValues()-1);
|
|
}
|
|
|
|
if (HasFlagUse)
|
|
return !isNonImmUse(Root, N, Root, U);
|
|
return true;
|
|
}
|
|
|
|
/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
|
|
/// and move load below the TokenFactor. Replace store's chain operand with
|
|
/// load's chain result.
|
|
static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load,
|
|
SDOperand Store, SDOperand TF) {
|
|
std::vector<SDOperand> Ops;
|
|
for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i)
|
|
if (Load.Val == TF.Val->getOperand(i).Val)
|
|
Ops.push_back(Load.Val->getOperand(0));
|
|
else
|
|
Ops.push_back(TF.Val->getOperand(i));
|
|
DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size());
|
|
DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2));
|
|
DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1),
|
|
Store.getOperand(2), Store.getOperand(3));
|
|
}
|
|
|
|
/// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction
|
|
/// selector to pick more load-modify-store instructions. This is a common
|
|
/// case:
|
|
///
|
|
/// [Load chain]
|
|
/// ^
|
|
/// |
|
|
/// [Load]
|
|
/// ^ ^
|
|
/// | |
|
|
/// / \-
|
|
/// / |
|
|
/// [TokenFactor] [Op]
|
|
/// ^ ^
|
|
/// | |
|
|
/// \ /
|
|
/// \ /
|
|
/// [Store]
|
|
///
|
|
/// The fact the store's chain operand != load's chain will prevent the
|
|
/// (store (op (load))) instruction from being selected. We can transform it to:
|
|
///
|
|
/// [Load chain]
|
|
/// ^
|
|
/// |
|
|
/// [TokenFactor]
|
|
/// ^
|
|
/// |
|
|
/// [Load]
|
|
/// ^ ^
|
|
/// | |
|
|
/// | \-
|
|
/// | |
|
|
/// | [Op]
|
|
/// | ^
|
|
/// | |
|
|
/// \ /
|
|
/// \ /
|
|
/// [Store]
|
|
void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) {
|
|
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
|
|
E = DAG.allnodes_end(); I != E; ++I) {
|
|
if (!ISD::isNON_TRUNCStore(I))
|
|
continue;
|
|
SDOperand Chain = I->getOperand(0);
|
|
if (Chain.Val->getOpcode() != ISD::TokenFactor)
|
|
continue;
|
|
|
|
SDOperand N1 = I->getOperand(1);
|
|
SDOperand N2 = I->getOperand(2);
|
|
if (MVT::isFloatingPoint(N1.getValueType()) ||
|
|
MVT::isVector(N1.getValueType()) ||
|
|
!N1.hasOneUse())
|
|
continue;
|
|
|
|
bool RModW = false;
|
|
SDOperand Load;
|
|
unsigned Opcode = N1.Val->getOpcode();
|
|
switch (Opcode) {
|
|
case ISD::ADD:
|
|
case ISD::MUL:
|
|
case ISD::AND:
|
|
case ISD::OR:
|
|
case ISD::XOR:
|
|
case ISD::ADDC:
|
|
case ISD::ADDE: {
|
|
SDOperand N10 = N1.getOperand(0);
|
|
SDOperand N11 = N1.getOperand(1);
|
|
if (ISD::isNON_EXTLoad(N10.Val))
|
|
RModW = true;
|
|
else if (ISD::isNON_EXTLoad(N11.Val)) {
|
|
RModW = true;
|
|
std::swap(N10, N11);
|
|
}
|
|
RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
|
|
(N10.getOperand(1) == N2) &&
|
|
(N10.Val->getValueType(0) == N1.getValueType());
|
|
if (RModW)
|
|
Load = N10;
|
|
break;
|
|
}
|
|
case ISD::SUB:
|
|
case ISD::SHL:
|
|
case ISD::SRA:
|
|
case ISD::SRL:
|
|
case ISD::ROTL:
|
|
case ISD::ROTR:
|
|
case ISD::SUBC:
|
|
case ISD::SUBE:
|
|
case X86ISD::SHLD:
|
|
case X86ISD::SHRD: {
|
|
SDOperand N10 = N1.getOperand(0);
|
|
if (ISD::isNON_EXTLoad(N10.Val))
|
|
RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() &&
|
|
(N10.getOperand(1) == N2) &&
|
|
(N10.Val->getValueType(0) == N1.getValueType());
|
|
if (RModW)
|
|
Load = N10;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (RModW) {
|
|
MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain);
|
|
++NumLoadMoved;
|
|
}
|
|
}
|
|
}
|
|
|
|
/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
|
|
/// when it has created a SelectionDAG for us to codegen.
|
|
void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) {
|
|
DEBUG(BB->dump());
|
|
MachineFunction::iterator FirstMBB = BB;
|
|
|
|
if (!FastISel)
|
|
InstructionSelectPreprocess(DAG);
|
|
|
|
// Codegen the basic block.
|
|
#ifndef NDEBUG
|
|
DOUT << "===== Instruction selection begins:\n";
|
|
Indent = 0;
|
|
#endif
|
|
DAG.setRoot(SelectRoot(DAG.getRoot()));
|
|
#ifndef NDEBUG
|
|
DOUT << "===== Instruction selection ends:\n";
|
|
#endif
|
|
|
|
DAG.RemoveDeadNodes();
|
|
|
|
// Emit machine code to BB.
|
|
ScheduleAndEmitDAG(DAG);
|
|
|
|
// If we are emitting FP stack code, scan the basic block to determine if this
|
|
// block defines any FP values. If so, put an FP_REG_KILL instruction before
|
|
// the terminator of the block.
|
|
|
|
// Note that FP stack instructions are used in all modes for long double,
|
|
// so we always need to do this check.
|
|
// Also note that it's possible for an FP stack register to be live across
|
|
// an instruction that produces multiple basic blocks (SSE CMOV) so we
|
|
// must check all the generated basic blocks.
|
|
|
|
// Scan all of the machine instructions in these MBBs, checking for FP
|
|
// stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.)
|
|
MachineFunction::iterator MBBI = FirstMBB;
|
|
do {
|
|
bool ContainsFPCode = false;
|
|
for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end();
|
|
!ContainsFPCode && I != E; ++I) {
|
|
if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) {
|
|
const TargetRegisterClass *clas;
|
|
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
|
|
if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() &&
|
|
MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
|
|
((clas = RegMap->getRegClass(I->getOperand(0).getReg())) ==
|
|
X86::RFP32RegisterClass ||
|
|
clas == X86::RFP64RegisterClass ||
|
|
clas == X86::RFP80RegisterClass)) {
|
|
ContainsFPCode = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Check PHI nodes in successor blocks. These PHI's will be lowered to have
|
|
// a copy of the input value in this block. In SSE mode, we only care about
|
|
// 80-bit values.
|
|
if (!ContainsFPCode) {
|
|
// Final check, check LLVM BB's that are successors to the LLVM BB
|
|
// corresponding to BB for FP PHI nodes.
|
|
const BasicBlock *LLVMBB = BB->getBasicBlock();
|
|
const PHINode *PN;
|
|
for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
|
|
!ContainsFPCode && SI != E; ++SI) {
|
|
for (BasicBlock::const_iterator II = SI->begin();
|
|
(PN = dyn_cast<PHINode>(II)); ++II) {
|
|
if (PN->getType()==Type::X86_FP80Ty ||
|
|
(!Subtarget->hasSSE1() && PN->getType()->isFloatingPoint()) ||
|
|
(!Subtarget->hasSSE2() && PN->getType()==Type::DoubleTy)) {
|
|
ContainsFPCode = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Finally, if we found any FP code, emit the FP_REG_KILL instruction.
|
|
if (ContainsFPCode) {
|
|
BuildMI(*MBBI, MBBI->getFirstTerminator(),
|
|
TM.getInstrInfo()->get(X86::FP_REG_KILL));
|
|
++NumFPKill;
|
|
}
|
|
} while (&*(MBBI++) != BB);
|
|
}
|
|
|
|
/// MatchAddress - Add the specified node to the specified addressing mode,
|
|
/// returning true if it cannot be done. This just pattern matches for the
|
|
/// addressing mode
|
|
bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM,
|
|
bool isRoot, unsigned Depth) {
|
|
// Limit recursion.
|
|
if (Depth > 5)
|
|
return MatchAddressBase(N, AM, isRoot, Depth);
|
|
|
|
// RIP relative addressing: %rip + 32-bit displacement!
|
|
if (AM.isRIPRel) {
|
|
if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
|
|
int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
|
|
if (isInt32(AM.Disp + Val)) {
|
|
AM.Disp += Val;
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int id = N.Val->getNodeId();
|
|
bool Available = isSelected(id);
|
|
|
|
switch (N.getOpcode()) {
|
|
default: break;
|
|
case ISD::Constant: {
|
|
int64_t Val = cast<ConstantSDNode>(N)->getSignExtended();
|
|
if (isInt32(AM.Disp + Val)) {
|
|
AM.Disp += Val;
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case X86ISD::Wrapper: {
|
|
bool is64Bit = Subtarget->is64Bit();
|
|
// Under X86-64 non-small code model, GV (and friends) are 64-bits.
|
|
if (is64Bit && TM.getCodeModel() != CodeModel::Small)
|
|
break;
|
|
if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1)
|
|
break;
|
|
// If value is available in a register both base and index components have
|
|
// been picked, we can't fit the result available in the register in the
|
|
// addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
|
|
if (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val)) {
|
|
bool isStatic = TM.getRelocationModel() == Reloc::Static;
|
|
SDOperand N0 = N.getOperand(0);
|
|
// Mac OS X X86-64 lower 4G address is not available.
|
|
bool isAbs32 = !is64Bit ||
|
|
(isStatic && Subtarget->hasLow4GUserSpaceAddress());
|
|
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
|
|
GlobalValue *GV = G->getGlobal();
|
|
if (isAbs32 || isRoot) {
|
|
AM.GV = GV;
|
|
AM.Disp += G->getOffset();
|
|
AM.isRIPRel = !isAbs32;
|
|
return false;
|
|
}
|
|
} else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
|
|
if (isAbs32 || isRoot) {
|
|
AM.CP = CP->getConstVal();
|
|
AM.Align = CP->getAlignment();
|
|
AM.Disp += CP->getOffset();
|
|
AM.isRIPRel = !isAbs32;
|
|
return false;
|
|
}
|
|
} else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
|
|
if (isAbs32 || isRoot) {
|
|
AM.ES = S->getSymbol();
|
|
AM.isRIPRel = !isAbs32;
|
|
return false;
|
|
}
|
|
} else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
|
|
if (isAbs32 || isRoot) {
|
|
AM.JT = J->getIndex();
|
|
AM.isRIPRel = !isAbs32;
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
case ISD::FrameIndex:
|
|
if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) {
|
|
AM.BaseType = X86ISelAddressMode::FrameIndexBase;
|
|
AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
|
|
return false;
|
|
}
|
|
break;
|
|
|
|
case ISD::SHL:
|
|
if (!Available && AM.IndexReg.Val == 0 && AM.Scale == 1)
|
|
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) {
|
|
unsigned Val = CN->getValue();
|
|
if (Val == 1 || Val == 2 || Val == 3) {
|
|
AM.Scale = 1 << Val;
|
|
SDOperand ShVal = N.Val->getOperand(0);
|
|
|
|
// Okay, we know that we have a scale by now. However, if the scaled
|
|
// value is an add of something and a constant, we can fold the
|
|
// constant into the disp field here.
|
|
if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() &&
|
|
isa<ConstantSDNode>(ShVal.Val->getOperand(1))) {
|
|
AM.IndexReg = ShVal.Val->getOperand(0);
|
|
ConstantSDNode *AddVal =
|
|
cast<ConstantSDNode>(ShVal.Val->getOperand(1));
|
|
uint64_t Disp = AM.Disp + (AddVal->getValue() << Val);
|
|
if (isInt32(Disp))
|
|
AM.Disp = Disp;
|
|
else
|
|
AM.IndexReg = ShVal;
|
|
} else {
|
|
AM.IndexReg = ShVal;
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ISD::MUL:
|
|
// X*[3,5,9] -> X+X*[2,4,8]
|
|
if (!Available &&
|
|
AM.BaseType == X86ISelAddressMode::RegBase &&
|
|
AM.Base.Reg.Val == 0 &&
|
|
AM.IndexReg.Val == 0) {
|
|
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)))
|
|
if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) {
|
|
AM.Scale = unsigned(CN->getValue())-1;
|
|
|
|
SDOperand MulVal = N.Val->getOperand(0);
|
|
SDOperand Reg;
|
|
|
|
// Okay, we know that we have a scale by now. However, if the scaled
|
|
// value is an add of something and a constant, we can fold the
|
|
// constant into the disp field here.
|
|
if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
|
|
isa<ConstantSDNode>(MulVal.Val->getOperand(1))) {
|
|
Reg = MulVal.Val->getOperand(0);
|
|
ConstantSDNode *AddVal =
|
|
cast<ConstantSDNode>(MulVal.Val->getOperand(1));
|
|
uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue();
|
|
if (isInt32(Disp))
|
|
AM.Disp = Disp;
|
|
else
|
|
Reg = N.Val->getOperand(0);
|
|
} else {
|
|
Reg = N.Val->getOperand(0);
|
|
}
|
|
|
|
AM.IndexReg = AM.Base.Reg = Reg;
|
|
return false;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case ISD::ADD:
|
|
if (!Available) {
|
|
X86ISelAddressMode Backup = AM;
|
|
if (!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1) &&
|
|
!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1))
|
|
return false;
|
|
AM = Backup;
|
|
if (!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1) &&
|
|
!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1))
|
|
return false;
|
|
AM = Backup;
|
|
}
|
|
break;
|
|
|
|
case ISD::OR:
|
|
// Handle "X | C" as "X + C" iff X is known to have C bits clear.
|
|
if (!Available) {
|
|
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
|
|
X86ISelAddressMode Backup = AM;
|
|
// Start with the LHS as an addr mode.
|
|
if (!MatchAddress(N.getOperand(0), AM, false) &&
|
|
// Address could not have picked a GV address for the displacement.
|
|
AM.GV == NULL &&
|
|
// On x86-64, the resultant disp must fit in 32-bits.
|
|
isInt32(AM.Disp + CN->getSignExtended()) &&
|
|
// Check to see if the LHS & C is zero.
|
|
CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getValue())) {
|
|
AM.Disp += CN->getValue();
|
|
return false;
|
|
}
|
|
AM = Backup;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
return MatchAddressBase(N, AM, isRoot, Depth);
|
|
}
|
|
|
|
/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
|
|
/// specified addressing mode without any further recursion.
|
|
bool X86DAGToDAGISel::MatchAddressBase(SDOperand N, X86ISelAddressMode &AM,
|
|
bool isRoot, unsigned Depth) {
|
|
// Is the base register already occupied?
|
|
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) {
|
|
// If so, check to see if the scale index register is set.
|
|
if (AM.IndexReg.Val == 0) {
|
|
AM.IndexReg = N;
|
|
AM.Scale = 1;
|
|
return false;
|
|
}
|
|
|
|
// Otherwise, we cannot select it.
|
|
return true;
|
|
}
|
|
|
|
// Default, generate it as a register.
|
|
AM.BaseType = X86ISelAddressMode::RegBase;
|
|
AM.Base.Reg = N;
|
|
return false;
|
|
}
|
|
|
|
/// SelectAddr - returns true if it is able pattern match an addressing mode.
|
|
/// It returns the operands which make up the maximal addressing mode it can
|
|
/// match by reference.
|
|
bool X86DAGToDAGISel::SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base,
|
|
SDOperand &Scale, SDOperand &Index,
|
|
SDOperand &Disp) {
|
|
X86ISelAddressMode AM;
|
|
if (MatchAddress(N, AM))
|
|
return false;
|
|
|
|
MVT::ValueType VT = N.getValueType();
|
|
if (AM.BaseType == X86ISelAddressMode::RegBase) {
|
|
if (!AM.Base.Reg.Val)
|
|
AM.Base.Reg = CurDAG->getRegister(0, VT);
|
|
}
|
|
|
|
if (!AM.IndexReg.Val)
|
|
AM.IndexReg = CurDAG->getRegister(0, VT);
|
|
|
|
getAddressOperands(AM, Base, Scale, Index, Disp);
|
|
return true;
|
|
}
|
|
|
|
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
|
|
/// constant +0.0.
|
|
static inline bool isZeroNode(SDOperand Elt) {
|
|
return ((isa<ConstantSDNode>(Elt) &&
|
|
cast<ConstantSDNode>(Elt)->getValue() == 0) ||
|
|
(isa<ConstantFPSDNode>(Elt) &&
|
|
cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
|
|
}
|
|
|
|
|
|
/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
|
|
/// match a load whose top elements are either undef or zeros. The load flavor
|
|
/// is derived from the type of N, which is either v4f32 or v2f64.
|
|
bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred,
|
|
SDOperand N, SDOperand &Base,
|
|
SDOperand &Scale, SDOperand &Index,
|
|
SDOperand &Disp, SDOperand &InChain,
|
|
SDOperand &OutChain) {
|
|
if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
|
InChain = N.getOperand(0).getValue(1);
|
|
if (ISD::isNON_EXTLoad(InChain.Val) &&
|
|
InChain.getValue(0).hasOneUse() &&
|
|
N.hasOneUse() &&
|
|
CanBeFoldedBy(N.Val, Pred.Val, Op.Val)) {
|
|
LoadSDNode *LD = cast<LoadSDNode>(InChain);
|
|
if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
|
|
return false;
|
|
OutChain = LD->getChain();
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Also handle the case where we explicitly require zeros in the top
|
|
// elements. This is a vector shuffle from the zero vector.
|
|
if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() &&
|
|
N.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
|
|
N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR &&
|
|
N.getOperand(1).Val->hasOneUse() &&
|
|
ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) &&
|
|
N.getOperand(1).getOperand(0).hasOneUse()) {
|
|
// Check to see if the BUILD_VECTOR is building a zero vector.
|
|
SDOperand BV = N.getOperand(0);
|
|
for (unsigned i = 0, e = BV.getNumOperands(); i != e; ++i)
|
|
if (!isZeroNode(BV.getOperand(i)) &&
|
|
BV.getOperand(i).getOpcode() != ISD::UNDEF)
|
|
return false; // Not a zero/undef vector.
|
|
// Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something
|
|
// from the LHS.
|
|
unsigned VecWidth = BV.getNumOperands();
|
|
SDOperand ShufMask = N.getOperand(2);
|
|
assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!");
|
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) {
|
|
if (C->getValue() == VecWidth) {
|
|
for (unsigned i = 1; i != VecWidth; ++i) {
|
|
if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF) {
|
|
// ok.
|
|
} else {
|
|
ConstantSDNode *C = cast<ConstantSDNode>(ShufMask.getOperand(i));
|
|
if (C->getValue() >= VecWidth) return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Okay, this is a zero extending load. Fold it.
|
|
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(1).getOperand(0));
|
|
if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp))
|
|
return false;
|
|
OutChain = LD->getChain();
|
|
InChain = SDOperand(LD, 1);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
|
|
/// mode it matches can be cost effectively emitted as an LEA instruction.
|
|
bool X86DAGToDAGISel::SelectLEAAddr(SDOperand Op, SDOperand N,
|
|
SDOperand &Base, SDOperand &Scale,
|
|
SDOperand &Index, SDOperand &Disp) {
|
|
X86ISelAddressMode AM;
|
|
if (MatchAddress(N, AM))
|
|
return false;
|
|
|
|
MVT::ValueType VT = N.getValueType();
|
|
unsigned Complexity = 0;
|
|
if (AM.BaseType == X86ISelAddressMode::RegBase)
|
|
if (AM.Base.Reg.Val)
|
|
Complexity = 1;
|
|
else
|
|
AM.Base.Reg = CurDAG->getRegister(0, VT);
|
|
else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
|
|
Complexity = 4;
|
|
|
|
if (AM.IndexReg.Val)
|
|
Complexity++;
|
|
else
|
|
AM.IndexReg = CurDAG->getRegister(0, VT);
|
|
|
|
// Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
|
|
// a simple shift.
|
|
if (AM.Scale > 1)
|
|
Complexity++;
|
|
|
|
// FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
|
|
// to a LEA. This is determined with some expermentation but is by no means
|
|
// optimal (especially for code size consideration). LEA is nice because of
|
|
// its three-address nature. Tweak the cost function again when we can run
|
|
// convertToThreeAddress() at register allocation time.
|
|
if (AM.GV || AM.CP || AM.ES || AM.JT != -1) {
|
|
// For X86-64, we should always use lea to materialize RIP relative
|
|
// addresses.
|
|
if (Subtarget->is64Bit())
|
|
Complexity = 4;
|
|
else
|
|
Complexity += 2;
|
|
}
|
|
|
|
if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val))
|
|
Complexity++;
|
|
|
|
if (Complexity > 2) {
|
|
getAddressOperands(AM, Base, Scale, Index, Disp);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N,
|
|
SDOperand &Base, SDOperand &Scale,
|
|
SDOperand &Index, SDOperand &Disp) {
|
|
if (ISD::isNON_EXTLoad(N.Val) &&
|
|
N.hasOneUse() &&
|
|
CanBeFoldedBy(N.Val, P.Val, P.Val))
|
|
return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp);
|
|
return false;
|
|
}
|
|
|
|
/// getGlobalBaseReg - Output the instructions required to put the
|
|
/// base address to use for accessing globals into a register.
|
|
///
|
|
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
|
|
assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing");
|
|
if (!GlobalBaseReg) {
|
|
// Insert the set of GlobalBaseReg into the first MBB of the function
|
|
MachineBasicBlock &FirstMBB = BB->getParent()->front();
|
|
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
|
|
SSARegMap *RegMap = BB->getParent()->getSSARegMap();
|
|
unsigned PC = RegMap->createVirtualRegister(X86::GR32RegisterClass);
|
|
|
|
const TargetInstrInfo *TII = TM.getInstrInfo();
|
|
BuildMI(FirstMBB, MBBI, TII->get(X86::MovePCtoStack));
|
|
BuildMI(FirstMBB, MBBI, TII->get(X86::POP32r), PC);
|
|
|
|
// If we're using vanilla 'GOT' PIC style, we should use relative addressing
|
|
// not to pc, but to _GLOBAL_ADDRESS_TABLE_ external
|
|
if (TM.getRelocationModel() == Reloc::PIC_ &&
|
|
Subtarget->isPICStyleGOT()) {
|
|
GlobalBaseReg = RegMap->createVirtualRegister(X86::GR32RegisterClass);
|
|
BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg).
|
|
addReg(PC).
|
|
addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
|
|
} else {
|
|
GlobalBaseReg = PC;
|
|
}
|
|
|
|
}
|
|
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val;
|
|
}
|
|
|
|
static SDNode *FindCallStartFromCall(SDNode *Node) {
|
|
if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
|
|
assert(Node->getOperand(0).getValueType() == MVT::Other &&
|
|
"Node doesn't have a token chain argument!");
|
|
return FindCallStartFromCall(Node->getOperand(0).Val);
|
|
}
|
|
|
|
SDNode *X86DAGToDAGISel::getTruncate(SDOperand N0, MVT::ValueType VT) {
|
|
SDOperand SRIdx;
|
|
switch (VT) {
|
|
case MVT::i8:
|
|
SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
|
|
// Ensure that the source register has an 8-bit subreg on 32-bit targets
|
|
if (!Subtarget->is64Bit()) {
|
|
unsigned Opc;
|
|
MVT::ValueType VT;
|
|
switch (N0.getValueType()) {
|
|
default: assert(0 && "Unknown truncate!");
|
|
case MVT::i16:
|
|
Opc = X86::MOV16to16_;
|
|
VT = MVT::i16;
|
|
break;
|
|
case MVT::i32:
|
|
Opc = X86::MOV32to32_;
|
|
VT = MVT::i32;
|
|
break;
|
|
}
|
|
N0 =
|
|
SDOperand(CurDAG->getTargetNode(Opc, VT, N0), 0);
|
|
}
|
|
break;
|
|
case MVT::i16:
|
|
SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2
|
|
break;
|
|
case MVT::i32:
|
|
SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
|
|
break;
|
|
default: assert(0 && "Unknown truncate!");
|
|
}
|
|
return CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
|
|
VT,
|
|
N0, SRIdx);
|
|
}
|
|
|
|
|
|
SDNode *X86DAGToDAGISel::Select(SDOperand N) {
|
|
SDNode *Node = N.Val;
|
|
MVT::ValueType NVT = Node->getValueType(0);
|
|
unsigned Opc, MOpc;
|
|
unsigned Opcode = Node->getOpcode();
|
|
|
|
#ifndef NDEBUG
|
|
DOUT << std::string(Indent, ' ') << "Selecting: ";
|
|
DEBUG(Node->dump(CurDAG));
|
|
DOUT << "\n";
|
|
Indent += 2;
|
|
#endif
|
|
|
|
if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) {
|
|
#ifndef NDEBUG
|
|
DOUT << std::string(Indent-2, ' ') << "== ";
|
|
DEBUG(Node->dump(CurDAG));
|
|
DOUT << "\n";
|
|
Indent -= 2;
|
|
#endif
|
|
return NULL; // Already selected.
|
|
}
|
|
|
|
switch (Opcode) {
|
|
default: break;
|
|
case X86ISD::GlobalBaseReg:
|
|
return getGlobalBaseReg();
|
|
|
|
case ISD::ADD: {
|
|
// Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd
|
|
// code and is matched first so to prevent it from being turned into
|
|
// LEA32r X+c.
|
|
// In 64-bit mode, use LEA to take advantage of RIP-relative addressing.
|
|
MVT::ValueType PtrVT = TLI.getPointerTy();
|
|
SDOperand N0 = N.getOperand(0);
|
|
SDOperand N1 = N.getOperand(1);
|
|
if (N.Val->getValueType(0) == PtrVT &&
|
|
N0.getOpcode() == X86ISD::Wrapper &&
|
|
N1.getOpcode() == ISD::Constant) {
|
|
unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue();
|
|
SDOperand C(0, 0);
|
|
// TODO: handle ExternalSymbolSDNode.
|
|
if (GlobalAddressSDNode *G =
|
|
dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) {
|
|
C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT,
|
|
G->getOffset() + Offset);
|
|
} else if (ConstantPoolSDNode *CP =
|
|
dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) {
|
|
C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT,
|
|
CP->getAlignment(),
|
|
CP->getOffset()+Offset);
|
|
}
|
|
|
|
if (C.Val) {
|
|
if (Subtarget->is64Bit()) {
|
|
SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1),
|
|
CurDAG->getRegister(0, PtrVT), C };
|
|
return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4);
|
|
} else
|
|
return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C);
|
|
}
|
|
}
|
|
|
|
// Other cases are handled by auto-generated code.
|
|
break;
|
|
}
|
|
|
|
case ISD::MULHU:
|
|
case ISD::MULHS: {
|
|
if (Opcode == ISD::MULHU)
|
|
switch (NVT) {
|
|
default: assert(0 && "Unsupported VT!");
|
|
case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break;
|
|
case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
|
|
case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
|
|
case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
|
|
}
|
|
else
|
|
switch (NVT) {
|
|
default: assert(0 && "Unsupported VT!");
|
|
case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break;
|
|
case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
|
|
case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
|
|
case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
|
|
}
|
|
|
|
unsigned LoReg, HiReg;
|
|
switch (NVT) {
|
|
default: assert(0 && "Unsupported VT!");
|
|
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break;
|
|
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break;
|
|
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
|
|
case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
|
|
}
|
|
|
|
SDOperand N0 = Node->getOperand(0);
|
|
SDOperand N1 = Node->getOperand(1);
|
|
|
|
SDOperand Tmp0, Tmp1, Tmp2, Tmp3;
|
|
bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
|
|
// MULHU and MULHS are commmutative
|
|
if (!foldedLoad) {
|
|
foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3);
|
|
if (foldedLoad)
|
|
std::swap(N0, N1);
|
|
}
|
|
|
|
SDOperand Chain;
|
|
if (foldedLoad) {
|
|
Chain = N1.getOperand(0);
|
|
AddToISelQueue(Chain);
|
|
} else
|
|
Chain = CurDAG->getEntryNode();
|
|
|
|
SDOperand InFlag(0, 0);
|
|
AddToISelQueue(N0);
|
|
Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT),
|
|
N0, InFlag);
|
|
InFlag = Chain.getValue(1);
|
|
|
|
if (foldedLoad) {
|
|
AddToISelQueue(Tmp0);
|
|
AddToISelQueue(Tmp1);
|
|
AddToISelQueue(Tmp2);
|
|
AddToISelQueue(Tmp3);
|
|
SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag };
|
|
SDNode *CNode =
|
|
CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
|
|
Chain = SDOperand(CNode, 0);
|
|
InFlag = SDOperand(CNode, 1);
|
|
} else {
|
|
AddToISelQueue(N1);
|
|
InFlag =
|
|
SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
|
|
}
|
|
|
|
SDOperand Result;
|
|
if (HiReg == X86::AH && Subtarget->is64Bit()) {
|
|
// Prevent use of AH in a REX instruction by referencing AX instead.
|
|
// Shift it down 8 bits.
|
|
Result = CurDAG->getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
|
|
Chain = Result.getValue(1);
|
|
Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
|
|
CurDAG->getTargetConstant(8, MVT::i8)), 0);
|
|
// Then truncate it down to i8.
|
|
SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
|
|
Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
|
|
MVT::i8, Result, SRIdx), 0);
|
|
} else {
|
|
Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag);
|
|
}
|
|
ReplaceUses(N.getValue(0), Result);
|
|
if (foldedLoad)
|
|
ReplaceUses(N1.getValue(1), Result.getValue(1));
|
|
|
|
#ifndef NDEBUG
|
|
DOUT << std::string(Indent-2, ' ') << "=> ";
|
|
DEBUG(Result.Val->dump(CurDAG));
|
|
DOUT << "\n";
|
|
Indent -= 2;
|
|
#endif
|
|
return NULL;
|
|
}
|
|
|
|
case ISD::SDIV:
|
|
case ISD::UDIV:
|
|
case ISD::SREM:
|
|
case ISD::UREM: {
|
|
bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM;
|
|
bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV;
|
|
if (!isSigned)
|
|
switch (NVT) {
|
|
default: assert(0 && "Unsupported VT!");
|
|
case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
|
|
case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
|
|
case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
|
|
case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
|
|
}
|
|
else
|
|
switch (NVT) {
|
|
default: assert(0 && "Unsupported VT!");
|
|
case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break;
|
|
case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
|
|
case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
|
|
case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
|
|
}
|
|
|
|
unsigned LoReg, HiReg;
|
|
unsigned ClrOpcode, SExtOpcode;
|
|
switch (NVT) {
|
|
default: assert(0 && "Unsupported VT!");
|
|
case MVT::i8:
|
|
LoReg = X86::AL; HiReg = X86::AH;
|
|
ClrOpcode = 0;
|
|
SExtOpcode = X86::CBW;
|
|
break;
|
|
case MVT::i16:
|
|
LoReg = X86::AX; HiReg = X86::DX;
|
|
ClrOpcode = X86::MOV16r0;
|
|
SExtOpcode = X86::CWD;
|
|
break;
|
|
case MVT::i32:
|
|
LoReg = X86::EAX; HiReg = X86::EDX;
|
|
ClrOpcode = X86::MOV32r0;
|
|
SExtOpcode = X86::CDQ;
|
|
break;
|
|
case MVT::i64:
|
|
LoReg = X86::RAX; HiReg = X86::RDX;
|
|
ClrOpcode = X86::MOV64r0;
|
|
SExtOpcode = X86::CQO;
|
|
break;
|
|
}
|
|
|
|
SDOperand N0 = Node->getOperand(0);
|
|
SDOperand N1 = Node->getOperand(1);
|
|
SDOperand InFlag(0, 0);
|
|
if (NVT == MVT::i8 && !isSigned) {
|
|
// Special case for div8, just use a move with zero extension to AX to
|
|
// clear the upper 8 bits (AH).
|
|
SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Move, Chain;
|
|
if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3)) {
|
|
SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N0.getOperand(0) };
|
|
AddToISelQueue(N0.getOperand(0));
|
|
AddToISelQueue(Tmp0);
|
|
AddToISelQueue(Tmp1);
|
|
AddToISelQueue(Tmp2);
|
|
AddToISelQueue(Tmp3);
|
|
Move =
|
|
SDOperand(CurDAG->getTargetNode(X86::MOVZX16rm8, MVT::i16, MVT::Other,
|
|
Ops, 5), 0);
|
|
Chain = Move.getValue(1);
|
|
ReplaceUses(N0.getValue(1), Chain);
|
|
} else {
|
|
AddToISelQueue(N0);
|
|
Move =
|
|
SDOperand(CurDAG->getTargetNode(X86::MOVZX16rr8, MVT::i16, N0), 0);
|
|
Chain = CurDAG->getEntryNode();
|
|
}
|
|
Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, InFlag);
|
|
InFlag = Chain.getValue(1);
|
|
} else {
|
|
AddToISelQueue(N0);
|
|
InFlag =
|
|
CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg, N0,
|
|
InFlag).getValue(1);
|
|
if (isSigned) {
|
|
// Sign extend the low part into the high part.
|
|
InFlag =
|
|
SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0);
|
|
} else {
|
|
// Zero out the high part, effectively zero extending the input.
|
|
SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0);
|
|
InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg, ClrNode,
|
|
InFlag).getValue(1);
|
|
}
|
|
}
|
|
|
|
SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Chain;
|
|
bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3);
|
|
if (foldedLoad) {
|
|
AddToISelQueue(N1.getOperand(0));
|
|
AddToISelQueue(Tmp0);
|
|
AddToISelQueue(Tmp1);
|
|
AddToISelQueue(Tmp2);
|
|
AddToISelQueue(Tmp3);
|
|
SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag };
|
|
SDNode *CNode =
|
|
CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6);
|
|
Chain = SDOperand(CNode, 0);
|
|
InFlag = SDOperand(CNode, 1);
|
|
} else {
|
|
AddToISelQueue(N1);
|
|
Chain = CurDAG->getEntryNode();
|
|
InFlag =
|
|
SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0);
|
|
}
|
|
|
|
unsigned Reg = isDiv ? LoReg : HiReg;
|
|
SDOperand Result;
|
|
if (Reg == X86::AH && Subtarget->is64Bit()) {
|
|
// Prevent use of AH in a REX instruction by referencing AX instead.
|
|
// Shift it down 8 bits.
|
|
Result = CurDAG->getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
|
|
Chain = Result.getValue(1);
|
|
Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result,
|
|
CurDAG->getTargetConstant(8, MVT::i8)), 0);
|
|
// Then truncate it down to i8.
|
|
SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
|
|
Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG,
|
|
MVT::i8, Result, SRIdx), 0);
|
|
} else {
|
|
Result = CurDAG->getCopyFromReg(Chain, Reg, NVT, InFlag);
|
|
Chain = Result.getValue(1);
|
|
}
|
|
ReplaceUses(N.getValue(0), Result);
|
|
if (foldedLoad)
|
|
ReplaceUses(N1.getValue(1), Chain);
|
|
|
|
#ifndef NDEBUG
|
|
DOUT << std::string(Indent-2, ' ') << "=> ";
|
|
DEBUG(Result.Val->dump(CurDAG));
|
|
DOUT << "\n";
|
|
Indent -= 2;
|
|
#endif
|
|
|
|
return NULL;
|
|
}
|
|
|
|
case ISD::ANY_EXTEND: {
|
|
SDOperand N0 = Node->getOperand(0);
|
|
AddToISelQueue(N0);
|
|
if (NVT == MVT::i64 || NVT == MVT::i32 || NVT == MVT::i16) {
|
|
SDOperand SRIdx;
|
|
switch(N0.getValueType()) {
|
|
case MVT::i32:
|
|
SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
|
|
break;
|
|
case MVT::i16:
|
|
SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2
|
|
break;
|
|
case MVT::i8:
|
|
if (Subtarget->is64Bit())
|
|
SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
|
|
break;
|
|
default: assert(0 && "Unknown any_extend!");
|
|
}
|
|
if (SRIdx.Val) {
|
|
SDNode *ResNode = CurDAG->getTargetNode(X86::INSERT_SUBREG, NVT, N0, SRIdx);
|
|
|
|
#ifndef NDEBUG
|
|
DOUT << std::string(Indent-2, ' ') << "=> ";
|
|
DEBUG(ResNode->dump(CurDAG));
|
|
DOUT << "\n";
|
|
Indent -= 2;
|
|
#endif
|
|
return ResNode;
|
|
} // Otherwise let generated ISel handle it.
|
|
}
|
|
break;
|
|
}
|
|
|
|
case ISD::SIGN_EXTEND_INREG: {
|
|
SDOperand N0 = Node->getOperand(0);
|
|
AddToISelQueue(N0);
|
|
|
|
MVT::ValueType SVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
|
|
SDOperand TruncOp = SDOperand(getTruncate(N0, SVT), 0);
|
|
unsigned Opc;
|
|
switch (NVT) {
|
|
case MVT::i16:
|
|
if (SVT == MVT::i8) Opc = X86::MOVSX16rr8;
|
|
else assert(0 && "Unknown sign_extend_inreg!");
|
|
break;
|
|
case MVT::i32:
|
|
switch (SVT) {
|
|
case MVT::i8: Opc = X86::MOVSX32rr8; break;
|
|
case MVT::i16: Opc = X86::MOVSX32rr16; break;
|
|
default: assert(0 && "Unknown sign_extend_inreg!");
|
|
}
|
|
break;
|
|
case MVT::i64:
|
|
switch (SVT) {
|
|
case MVT::i8: Opc = X86::MOVSX64rr8; break;
|
|
case MVT::i16: Opc = X86::MOVSX64rr16; break;
|
|
case MVT::i32: Opc = X86::MOVSX64rr32; break;
|
|
default: assert(0 && "Unknown sign_extend_inreg!");
|
|
}
|
|
break;
|
|
default: assert(0 && "Unknown sign_extend_inreg!");
|
|
}
|
|
|
|
SDNode *ResNode = CurDAG->getTargetNode(Opc, NVT, TruncOp);
|
|
|
|
#ifndef NDEBUG
|
|
DOUT << std::string(Indent-2, ' ') << "=> ";
|
|
DEBUG(TruncOp.Val->dump(CurDAG));
|
|
DOUT << "\n";
|
|
DOUT << std::string(Indent-2, ' ') << "=> ";
|
|
DEBUG(ResNode->dump(CurDAG));
|
|
DOUT << "\n";
|
|
Indent -= 2;
|
|
#endif
|
|
return ResNode;
|
|
break;
|
|
}
|
|
|
|
case ISD::TRUNCATE: {
|
|
SDOperand Input = Node->getOperand(0);
|
|
AddToISelQueue(Node->getOperand(0));
|
|
SDNode *ResNode = getTruncate(Input, NVT);
|
|
|
|
#ifndef NDEBUG
|
|
DOUT << std::string(Indent-2, ' ') << "=> ";
|
|
DEBUG(ResNode->dump(CurDAG));
|
|
DOUT << "\n";
|
|
Indent -= 2;
|
|
#endif
|
|
return ResNode;
|
|
break;
|
|
}
|
|
}
|
|
|
|
SDNode *ResNode = SelectCode(N);
|
|
|
|
#ifndef NDEBUG
|
|
DOUT << std::string(Indent-2, ' ') << "=> ";
|
|
if (ResNode == NULL || ResNode == N.Val)
|
|
DEBUG(N.Val->dump(CurDAG));
|
|
else
|
|
DEBUG(ResNode->dump(CurDAG));
|
|
DOUT << "\n";
|
|
Indent -= 2;
|
|
#endif
|
|
|
|
return ResNode;
|
|
}
|
|
|
|
bool X86DAGToDAGISel::
|
|
SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode,
|
|
std::vector<SDOperand> &OutOps, SelectionDAG &DAG){
|
|
SDOperand Op0, Op1, Op2, Op3;
|
|
switch (ConstraintCode) {
|
|
case 'o': // offsetable ??
|
|
case 'v': // not offsetable ??
|
|
default: return true;
|
|
case 'm': // memory
|
|
if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3))
|
|
return true;
|
|
break;
|
|
}
|
|
|
|
OutOps.push_back(Op0);
|
|
OutOps.push_back(Op1);
|
|
OutOps.push_back(Op2);
|
|
OutOps.push_back(Op3);
|
|
AddToISelQueue(Op0);
|
|
AddToISelQueue(Op1);
|
|
AddToISelQueue(Op2);
|
|
AddToISelQueue(Op3);
|
|
return false;
|
|
}
|
|
|
|
/// createX86ISelDag - This pass converts a legalized DAG into a
|
|
/// X86-specific DAG, ready for instruction scheduling.
|
|
///
|
|
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) {
|
|
return new X86DAGToDAGISel(TM, Fast);
|
|
}
|