llvm/lib/Target/AArch64/AArch64FastISel.cpp
Eric Christopher 41612a9b85 Remove the target machine from CCState. Previously it was only used
to get the subtarget and that's accessible from the MachineFunction
now. This helps clear the way for smaller changes where we getting
a subtarget will require passing in a MachineFunction/Function as
well.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214988 91177308-0d34-0410-b5e6-96231b3b80d8
2014-08-06 18:45:26 +00:00

2676 lines
85 KiB
C++

//===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the AArch64-specific support for the FastISel class. Some
// of the target-specific code is generated by tablegen in the file
// AArch64GenFastISel.inc, which is #included here.
//
//===----------------------------------------------------------------------===//
#include "AArch64.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
namespace {
class AArch64FastISel : public FastISel {
class Address {
public:
typedef enum {
RegBase,
FrameIndexBase
} BaseKind;
private:
BaseKind Kind;
union {
unsigned Reg;
int FI;
} Base;
int64_t Offset;
const GlobalValue *GV;
public:
Address() : Kind(RegBase), Offset(0), GV(nullptr) { Base.Reg = 0; }
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
void setReg(unsigned Reg) {
assert(isRegBase() && "Invalid base register access!");
Base.Reg = Reg;
}
unsigned getReg() const {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
}
unsigned getFI() const {
assert(isFIBase() && "Invalid base frame index access!");
return Base.FI;
}
void setOffset(int64_t O) { Offset = O; }
int64_t getOffset() { return Offset; }
void setGlobalValue(const GlobalValue *G) { GV = G; }
const GlobalValue *getGlobalValue() { return GV; }
bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
};
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
const AArch64Subtarget *Subtarget;
LLVMContext *Context;
bool FastLowerArguments() override;
bool FastLowerCall(CallLoweringInfo &CLI) override;
bool FastLowerIntrinsicCall(const IntrinsicInst *II) override;
private:
// Selection routines.
bool SelectLoad(const Instruction *I);
bool SelectStore(const Instruction *I);
bool SelectBranch(const Instruction *I);
bool SelectIndirectBr(const Instruction *I);
bool SelectCmp(const Instruction *I);
bool SelectSelect(const Instruction *I);
bool SelectFPExt(const Instruction *I);
bool SelectFPTrunc(const Instruction *I);
bool SelectFPToInt(const Instruction *I, bool Signed);
bool SelectIntToFP(const Instruction *I, bool Signed);
bool SelectRem(const Instruction *I, unsigned ISDOpcode);
bool SelectRet(const Instruction *I);
bool SelectTrunc(const Instruction *I);
bool SelectIntExt(const Instruction *I);
bool SelectMul(const Instruction *I);
bool SelectShift(const Instruction *I, bool IsLeftShift, bool IsArithmetic);
bool SelectBitCast(const Instruction *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
bool ComputeAddress(const Value *Obj, Address &Addr);
bool ComputeCallAddress(const Value *V, Address &Addr);
bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
bool UseUnscaled);
void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
unsigned Flags, bool UseUnscaled);
bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
unsigned Alignment);
bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
const Value *Cond);
// Emit functions.
bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
bool UseUnscaled = false);
bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
bool UseUnscaled = false);
unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
unsigned Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
unsigned Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill);
unsigned Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
unsigned Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
unsigned Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill, uint64_t Imm);
unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
unsigned AArch64MaterializeGV(const GlobalValue *GV);
// Call handling routines.
private:
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
bool ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
unsigned &NumBytes);
bool FinishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
public:
// Backend specific FastISel code.
unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
unsigned TargetMaterializeConstant(const Constant *C) override;
explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
: FastISel(funcInfo, libInfo) {
Subtarget = &TM.getSubtarget<AArch64Subtarget>();
Context = &funcInfo.Fn->getContext();
}
bool TargetSelectInstruction(const Instruction *I) override;
#include "AArch64GenFastISel.inc"
};
} // end anonymous namespace
#include "AArch64GenCallingConv.inc"
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
if (CC == CallingConv::WebKit_JS)
return CC_AArch64_WebKit_JS;
return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
}
unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
"Alloca should always return a pointer.");
// Don't handle dynamic allocas.
if (!FuncInfo.StaticAllocaMap.count(AI))
return 0;
DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
.addFrameIndex(SI->second)
.addImm(0)
.addImm(0);
return ResultReg;
}
return 0;
}
unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
if (VT != MVT::f32 && VT != MVT::f64)
return 0;
const APFloat Val = CFP->getValueAPF();
bool is64bit = (VT == MVT::f64);
// This checks to see if we can use FMOV instructions to materialize
// a constant, otherwise we have to materialize via the constant pool.
if (TLI.isFPImmLegal(Val, VT)) {
int Imm;
unsigned Opc;
if (is64bit) {
Imm = AArch64_AM::getFP64Imm(Val);
Opc = AArch64::FMOVDi;
} else {
Imm = AArch64_AM::getFP32Imm(Val);
Opc = AArch64::FMOVSi;
}
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addImm(Imm);
return ResultReg;
}
// Materialize via constant pool. MachineConstantPool wants an explicit
// alignment.
unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
if (Align == 0)
Align = DL.getTypeAllocSize(CFP->getType());
unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(ADRPReg)
.addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
return ResultReg;
}
unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
// We can't handle thread-local variables quickly yet.
if (GV->isThreadLocal())
return 0;
// MachO still uses GOT for large code-model accesses, but ELF requires
// movz/movk sequences, which FastISel doesn't handle yet.
if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
return 0;
unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
EVT DestEVT = TLI.getValueType(GV->getType(), true);
if (!DestEVT.isSimple())
return 0;
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
unsigned ResultReg;
if (OpFlags & AArch64II::MO_GOT) {
// ADRP + LDRX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
ResultReg)
.addReg(ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
AArch64II::MO_NC);
} else {
// ADRP + ADDX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
.addReg(ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
.addImm(0);
}
return ResultReg;
}
unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple())
return 0;
MVT VT = CEVT.getSimpleVT();
// FIXME: Handle ConstantInt.
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
return AArch64MaterializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return AArch64MaterializeGV(GV);
return 0;
}
// Computes the address to get to an object.
bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
// another block, otherwise it may not have a virtual register assigned.
if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
Opcode = I->getOpcode();
U = I;
}
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
Opcode = C->getOpcode();
U = C;
}
if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
switch (Opcode) {
default:
break;
case Instruction::BitCast: {
// Look through bitcasts.
return ComputeAddress(U->getOperand(0), Addr);
}
case Instruction::IntToPtr: {
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
return ComputeAddress(U->getOperand(0), Addr);
break;
}
case Instruction::PtrToInt: {
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
return ComputeAddress(U->getOperand(0), Addr);
break;
}
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
uint64_t TmpOffset = Addr.getOffset();
// Iterate through the GEP folding the constants into offsets where
// we can.
gep_type_iterator GTI = gep_type_begin(U);
for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
++i, ++GTI) {
const Value *Op = *i;
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = DL.getStructLayout(STy);
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
for (;;) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
break;
}
if (canFoldAddIntoGEP(U, Op)) {
// A compatible add with a constant operand. Fold the constant.
ConstantInt *CI =
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
TmpOffset += CI->getSExtValue() * S;
// Iterate on the other operand.
Op = cast<AddOperator>(Op)->getOperand(0);
continue;
}
// Unsupported
goto unsupported_gep;
}
}
}
// Try to grab the base operand now.
Addr.setOffset(TmpOffset);
if (ComputeAddress(U->getOperand(0), Addr))
return true;
// We failed, restore everything and try the other options.
Addr = SavedAddr;
unsupported_gep:
break;
}
case Instruction::Alloca: {
const AllocaInst *AI = cast<AllocaInst>(Obj);
DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
Addr.setKind(Address::FrameIndexBase);
Addr.setFI(SI->second);
return true;
}
break;
}
case Instruction::Add:
// Adds of constants are common and easy enough.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
return ComputeAddress(U->getOperand(0), Addr);
}
break;
}
// Try to get this in a register if nothing else has worked.
if (!Addr.isValid())
Addr.setReg(getRegForValue(Obj));
return Addr.isValid();
}
bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
bool InMBB = true;
if (const auto *I = dyn_cast<Instruction>(V)) {
Opcode = I->getOpcode();
U = I;
InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
} else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
Opcode = C->getOpcode();
U = C;
}
switch (Opcode) {
default: break;
case Instruction::BitCast:
// Look past bitcasts if its operand is in the same BB.
if (InMBB)
return ComputeCallAddress(U->getOperand(0), Addr);
break;
case Instruction::IntToPtr:
// Look past no-op inttoptrs if its operand is in the same BB.
if (InMBB &&
TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
return ComputeCallAddress(U->getOperand(0), Addr);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints if its operand is in the same BB.
if (InMBB &&
TLI.getValueType(U->getType()) == TLI.getPointerTy())
return ComputeCallAddress(U->getOperand(0), Addr);
break;
}
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
Addr.setGlobalValue(GV);
return true;
}
// If all else fails, try to materialize the value in a register.
if (!Addr.getGlobalValue()) {
Addr.setReg(getRegForValue(V));
return Addr.getReg() != 0;
}
return false;
}
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(Ty, true);
// Only handle simple types.
if (evt == MVT::Other || !evt.isSimple())
return false;
VT = evt.getSimpleVT();
// This is a legal type, but it's not something we handle in fast-isel.
if (VT == MVT::f128)
return false;
// Handle all other legal types, i.e. a register that will directly hold this
// value.
return TLI.isTypeLegal(VT);
}
bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
if (isTypeLegal(Ty, VT))
return true;
// If this is a type than can be sign or zero-extended to a basic operation
// go ahead and accept it now. For stores, this reflects truncation.
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
return true;
return false;
}
bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
int64_t ScaleFactor, bool UseUnscaled) {
bool needsLowering = false;
int64_t Offset = Addr.getOffset();
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f32:
case MVT::f64:
if (!UseUnscaled)
// Using scaled, 12-bit, unsigned immediate offsets.
needsLowering = ((Offset & 0xfff) != Offset);
else
// Using unscaled, 9-bit, signed immediate offsets.
needsLowering = (Offset > 256 || Offset < -256);
break;
}
//If this is a stack pointer and the offset needs to be simplified then put
// the alloca address into a register, set the base type back to register and
// continue. This should almost never happen.
if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
.addFrameIndex(Addr.getFI())
.addImm(0)
.addImm(0);
Addr.setKind(Address::RegBase);
Addr.setReg(ResultReg);
}
// Since the offset is too large for the load/store instruction get the
// reg+offset into a register.
if (needsLowering) {
uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
UnscaledOffset, MVT::i64);
if (ResultReg == 0)
return false;
Addr.setReg(ResultReg);
Addr.setOffset(0);
}
return true;
}
void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
const MachineInstrBuilder &MIB,
unsigned Flags, bool UseUnscaled) {
int64_t Offset = Addr.getOffset();
// Frame base works a bit differently. Handle it separately.
if (Addr.getKind() == Address::FrameIndexBase) {
int FI = Addr.getFI();
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
// and alignment should be based on the VT.
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(FI, Offset), Flags,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// Now add the rest of the operands.
MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO);
} else {
// Now add the rest of the operands.
MIB.addReg(Addr.getReg());
MIB.addImm(Offset);
}
}
bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
bool UseUnscaled) {
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
if (!UseUnscaled && Addr.getOffset() < 0)
UseUnscaled = true;
unsigned Opc;
const TargetRegisterClass *RC;
bool VTIsi1 = false;
int64_t ScaleFactor = 0;
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
VTIsi1 = true;
// Intentional fall-through.
case MVT::i8:
Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
RC = &AArch64::GPR32RegClass;
ScaleFactor = 1;
break;
case MVT::i16:
Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
RC = &AArch64::GPR32RegClass;
ScaleFactor = 2;
break;
case MVT::i32:
Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
RC = &AArch64::GPR32RegClass;
ScaleFactor = 4;
break;
case MVT::i64:
Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
RC = &AArch64::GPR64RegClass;
ScaleFactor = 8;
break;
case MVT::f32:
Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
RC = TLI.getRegClassFor(VT);
ScaleFactor = 4;
break;
case MVT::f64:
Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
RC = TLI.getRegClassFor(VT);
ScaleFactor = 8;
break;
}
// Scale the offset.
if (!UseUnscaled) {
int64_t Offset = Addr.getOffset();
if (Offset & (ScaleFactor - 1))
// Retry using an unscaled, 9-bit, signed immediate offset.
return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true);
Addr.setOffset(Offset / ScaleFactor);
}
// Simplify this down to something we can handle.
if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
return false;
// Create the base instruction, then add the operands.
ResultReg = createResultReg(RC);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled);
// Loading an i1 requires special handling.
if (VTIsi1) {
MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ANDReg)
.addReg(ResultReg)
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
ResultReg = ANDReg;
}
return true;
}
bool AArch64FastISel::SelectLoad(const Instruction *I) {
MVT VT;
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
if (!isLoadStoreTypeLegal(I->getType(), VT) || cast<LoadInst>(I)->isAtomic())
return false;
// See if we can handle this address.
Address Addr;
if (!ComputeAddress(I->getOperand(0), Addr))
return false;
unsigned ResultReg;
if (!EmitLoad(VT, ResultReg, Addr))
return false;
UpdateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
bool UseUnscaled) {
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
if (!UseUnscaled && Addr.getOffset() < 0)
UseUnscaled = true;
unsigned StrOpc;
bool VTIsi1 = false;
int64_t ScaleFactor = 0;
// Using scaled, 12-bit, unsigned immediate offsets.
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
VTIsi1 = true;
case MVT::i8:
StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
ScaleFactor = 1;
break;
case MVT::i16:
StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
ScaleFactor = 2;
break;
case MVT::i32:
StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
ScaleFactor = 4;
break;
case MVT::i64:
StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
ScaleFactor = 8;
break;
case MVT::f32:
StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
ScaleFactor = 4;
break;
case MVT::f64:
StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
ScaleFactor = 8;
break;
}
// Scale the offset.
if (!UseUnscaled) {
int64_t Offset = Addr.getOffset();
if (Offset & (ScaleFactor - 1))
// Retry using an unscaled, 9-bit, signed immediate offset.
return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true);
Addr.setOffset(Offset / ScaleFactor);
}
// Simplify this down to something we can handle.
if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
return false;
// Storing an i1 requires special handling.
if (VTIsi1) {
MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ANDReg)
.addReg(SrcReg)
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
SrcReg = ANDReg;
}
// Create the base instruction, then add the operands.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(StrOpc)).addReg(SrcReg);
AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled);
return true;
}
bool AArch64FastISel::SelectStore(const Instruction *I) {
MVT VT;
Value *Op0 = I->getOperand(0);
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
if (!isLoadStoreTypeLegal(Op0->getType(), VT) ||
cast<StoreInst>(I)->isAtomic())
return false;
// Get the value to be stored into a register.
unsigned SrcReg = getRegForValue(Op0);
if (SrcReg == 0)
return false;
// See if we can handle this address.
Address Addr;
if (!ComputeAddress(I->getOperand(1), Addr))
return false;
if (!EmitStore(VT, SrcReg, Addr))
return false;
return true;
}
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
switch (Pred) {
case CmpInst::FCMP_ONE:
case CmpInst::FCMP_UEQ:
default:
// AL is our "false" for now. The other two need more compares.
return AArch64CC::AL;
case CmpInst::ICMP_EQ:
case CmpInst::FCMP_OEQ:
return AArch64CC::EQ;
case CmpInst::ICMP_SGT:
case CmpInst::FCMP_OGT:
return AArch64CC::GT;
case CmpInst::ICMP_SGE:
case CmpInst::FCMP_OGE:
return AArch64CC::GE;
case CmpInst::ICMP_UGT:
case CmpInst::FCMP_UGT:
return AArch64CC::HI;
case CmpInst::FCMP_OLT:
return AArch64CC::MI;
case CmpInst::ICMP_ULE:
case CmpInst::FCMP_OLE:
return AArch64CC::LS;
case CmpInst::FCMP_ORD:
return AArch64CC::VC;
case CmpInst::FCMP_UNO:
return AArch64CC::VS;
case CmpInst::FCMP_UGE:
return AArch64CC::PL;
case CmpInst::ICMP_SLT:
case CmpInst::FCMP_ULT:
return AArch64CC::LT;
case CmpInst::ICMP_SLE:
case CmpInst::FCMP_ULE:
return AArch64CC::LE;
case CmpInst::FCMP_UNE:
case CmpInst::ICMP_NE:
return AArch64CC::NE;
case CmpInst::ICMP_UGE:
return AArch64CC::HS;
case CmpInst::ICMP_ULT:
return AArch64CC::LO;
}
}
bool AArch64FastISel::SelectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
AArch64CC::CondCode CC = AArch64CC::NE;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
// We may not handle every CC for now.
CC = getCompareCC(CI->getPredicate());
if (CC == AArch64CC::AL)
return false;
// Emit the cmp.
if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
// Emit the branch.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
// Obtain the branch weight and add the TrueBB to the successor list.
uint32_t BranchWeight = 0;
if (FuncInfo.BPI)
BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
TBB->getBasicBlock());
FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
FastEmitBranch(FBB, DbgLoc);
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
MVT SrcVT;
if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
(isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) {
unsigned CondReg = getRegForValue(TI->getOperand(0));
if (CondReg == 0)
return false;
// Issue an extract_subreg to get the lower 32-bits.
if (SrcVT == MVT::i64)
CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
AArch64::sub_32);
MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::ANDWri), ANDReg)
.addReg(CondReg)
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBSWri))
.addReg(ANDReg)
.addReg(ANDReg)
.addImm(0)
.addImm(0);
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
CC = AArch64CC::EQ;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
// Obtain the branch weight and add the TrueBB to the successor list.
uint32_t BranchWeight = 0;
if (FuncInfo.BPI)
BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
TBB->getBasicBlock());
FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
FastEmitBranch(FBB, DbgLoc);
return true;
}
} else if (const ConstantInt *CI =
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
.addMBB(Target);
// Obtain the branch weight and add the target to the successor list.
uint32_t BranchWeight = 0;
if (FuncInfo.BPI)
BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
Target->getBasicBlock());
FuncInfo.MBB->addSuccessor(Target, BranchWeight);
return true;
} else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
// Fake request the condition, otherwise the intrinsic might be completely
// optimized away.
unsigned CondReg = getRegForValue(BI->getCondition());
if (!CondReg)
return false;
// Emit the branch.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
// Obtain the branch weight and add the TrueBB to the successor list.
uint32_t BranchWeight = 0;
if (FuncInfo.BPI)
BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
TBB->getBasicBlock());
FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
FastEmitBranch(FBB, DbgLoc);
return true;
}
unsigned CondReg = getRegForValue(BI->getCondition());
if (CondReg == 0)
return false;
// We've been divorced from our compare! Our block was split, and
// now our compare lives in a predecessor block. We musn't
// re-compare here, as the children of the compare aren't guaranteed
// live across the block boundary (we *could* check for this).
// Regardless, the compare has been done in the predecessor block,
// and it left a value for us in a virtual register. Ergo, we test
// the one-bit value left in the virtual register.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
AArch64::WZR)
.addReg(CondReg)
.addImm(0)
.addImm(0);
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
CC = AArch64CC::EQ;
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
// Obtain the branch weight and add the TrueBB to the successor list.
uint32_t BranchWeight = 0;
if (FuncInfo.BPI)
BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
TBB->getBasicBlock());
FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
FastEmitBranch(FBB, DbgLoc);
return true;
}
bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
const IndirectBrInst *BI = cast<IndirectBrInst>(I);
unsigned AddrReg = getRegForValue(BI->getOperand(0));
if (AddrReg == 0)
return false;
// Emit the indirect branch.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
.addReg(AddrReg);
// Make sure the CFG is up-to-date.
for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
return true;
}
bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
Type *Ty = Src1Value->getType();
EVT SrcEVT = TLI.getValueType(Ty, true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
// Check to see if the 2nd operand is a constant that we can encode directly
// in the compare.
uint64_t Imm;
bool UseImm = false;
bool isNegativeImm = false;
if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
SrcVT == MVT::i8 || SrcVT == MVT::i1) {
const APInt &CIVal = ConstInt->getValue();
Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue();
if (CIVal.isNegative()) {
isNegativeImm = true;
Imm = -Imm;
}
// FIXME: We can handle more immediates using shifts.
UseImm = ((Imm & 0xfff) == Imm);
}
} else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
if (ConstFP->isZero() && !ConstFP->isNegative())
UseImm = true;
}
unsigned ZReg;
unsigned CmpOpc;
bool isICmp = true;
bool needsExt = false;
switch (SrcVT.SimpleTy) {
default:
return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
needsExt = true;
// Intentional fall-through.
case MVT::i32:
ZReg = AArch64::WZR;
if (UseImm)
CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
else
CmpOpc = AArch64::SUBSWrr;
break;
case MVT::i64:
ZReg = AArch64::XZR;
if (UseImm)
CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
else
CmpOpc = AArch64::SUBSXrr;
break;
case MVT::f32:
isICmp = false;
CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
break;
case MVT::f64:
isICmp = false;
CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
break;
}
unsigned SrcReg1 = getRegForValue(Src1Value);
if (SrcReg1 == 0)
return false;
unsigned SrcReg2;
if (!UseImm) {
SrcReg2 = getRegForValue(Src2Value);
if (SrcReg2 == 0)
return false;
}
// We have i1, i8, or i16, we need to either zero extend or sign extend.
if (needsExt) {
SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
if (SrcReg1 == 0)
return false;
if (!UseImm) {
SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
if (SrcReg2 == 0)
return false;
}
}
if (isICmp) {
if (UseImm)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
.addReg(ZReg)
.addReg(SrcReg1)
.addImm(Imm)
.addImm(0);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
.addReg(ZReg)
.addReg(SrcReg1)
.addReg(SrcReg2);
} else {
if (UseImm)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
.addReg(SrcReg1);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
.addReg(SrcReg1)
.addReg(SrcReg2);
}
return true;
}
bool AArch64FastISel::SelectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
// We may not handle every CC for now.
AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
if (CC == AArch64CC::AL)
return false;
// Emit the cmp.
if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
return false;
// Now set a register based on the comparison.
AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
ResultReg)
.addReg(AArch64::WZR)
.addReg(AArch64::WZR)
.addImm(invertedCC);
UpdateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::SelectSelect(const Instruction *I) {
const SelectInst *SI = cast<SelectInst>(I);
EVT DestEVT = TLI.getValueType(SI->getType(), true);
if (!DestEVT.isSimple())
return false;
MVT DestVT = DestEVT.getSimpleVT();
if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
DestVT != MVT::f64)
return false;
unsigned SelectOpc;
switch (DestVT.SimpleTy) {
default: return false;
case MVT::i32: SelectOpc = AArch64::CSELWr; break;
case MVT::i64: SelectOpc = AArch64::CSELXr; break;
case MVT::f32: SelectOpc = AArch64::FCSELSrrr; break;
case MVT::f64: SelectOpc = AArch64::FCSELDrrr; break;
}
const Value *Cond = SI->getCondition();
bool NeedTest = true;
AArch64CC::CondCode CC = AArch64CC::NE;
if (foldXALUIntrinsic(CC, I, Cond))
NeedTest = false;
unsigned CondReg = getRegForValue(Cond);
if (!CondReg)
return false;
bool CondIsKill = hasTrivialKill(Cond);
if (NeedTest) {
MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ANDReg)
.addReg(CondReg, getKillRegState(CondIsKill))
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
.addReg(ANDReg)
.addReg(ANDReg)
.addImm(0)
.addImm(0);
}
unsigned TrueReg = getRegForValue(SI->getTrueValue());
bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
unsigned FalseReg = getRegForValue(SI->getFalseValue());
bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
if (!TrueReg || !FalseReg)
return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc),
ResultReg)
.addReg(TrueReg, getKillRegState(TrueIsKill))
.addReg(FalseReg, getKillRegState(FalseIsKill))
.addImm(CC);
UpdateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::SelectFPExt(const Instruction *I) {
Value *V = I->getOperand(0);
if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
return false;
unsigned Op = getRegForValue(V);
if (Op == 0)
return false;
unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
ResultReg).addReg(Op);
UpdateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
Value *V = I->getOperand(0);
if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
return false;
unsigned Op = getRegForValue(V);
if (Op == 0)
return false;
unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
ResultReg).addReg(Op);
UpdateValueMap(I, ResultReg);
return true;
}
// FPToUI and FPToSI
bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
unsigned SrcReg = getRegForValue(I->getOperand(0));
if (SrcReg == 0)
return false;
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
if (SrcVT == MVT::f128)
return false;
unsigned Opc;
if (SrcVT == MVT::f64) {
if (Signed)
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
else
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
} else {
if (Signed)
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
else
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
}
unsigned ResultReg = createResultReg(
DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg);
UpdateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
"Unexpected value type.");
unsigned SrcReg = getRegForValue(I->getOperand(0));
if (SrcReg == 0)
return false;
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
// Handle sign-extension.
if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
SrcReg =
EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
if (SrcReg == 0)
return false;
}
MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
: &AArch64::GPR32RegClass);
unsigned Opc;
if (SrcVT == MVT::i64) {
if (Signed)
Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
else
Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
} else {
if (Signed)
Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
else
Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
}
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg);
UpdateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::FastLowerArguments() {
if (!FuncInfo.CanLowerReturn)
return false;
const Function *F = FuncInfo.Fn;
if (F->isVarArg())
return false;
CallingConv::ID CC = F->getCallingConv();
if (CC != CallingConv::C)
return false;
// Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
// FPR each.
unsigned GPRCnt = 0;
unsigned FPRCnt = 0;
unsigned Idx = 0;
for (auto const &Arg : F->args()) {
// The first argument is at index 1.
++Idx;
if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
F->getAttributes().hasAttribute(Idx, Attribute::Nest))
return false;
Type *ArgTy = Arg.getType();
if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
return false;
EVT ArgVT = TLI.getValueType(ArgTy);
if (!ArgVT.isSimple()) return false;
switch (ArgVT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
++GPRCnt;
break;
case MVT::f16:
case MVT::f32:
case MVT::f64:
++FPRCnt;
break;
}
if (GPRCnt > 8 || FPRCnt > 8)
return false;
}
static const MCPhysReg Registers[5][8] = {
{ AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
AArch64::W5, AArch64::W6, AArch64::W7 },
{ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
AArch64::X5, AArch64::X6, AArch64::X7 },
{ AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
AArch64::H5, AArch64::H6, AArch64::H7 },
{ AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
AArch64::S5, AArch64::S6, AArch64::S7 },
{ AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
AArch64::D5, AArch64::D6, AArch64::D7 }
};
unsigned GPRIdx = 0;
unsigned FPRIdx = 0;
for (auto const &Arg : F->args()) {
MVT VT = TLI.getSimpleValueType(Arg.getType());
unsigned SrcReg;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type.");
case MVT::i1:
case MVT::i8:
case MVT::i16: VT = MVT::i32; // fall-through
case MVT::i32: SrcReg = Registers[0][GPRIdx++]; break;
case MVT::i64: SrcReg = Registers[1][GPRIdx++]; break;
case MVT::f16: SrcReg = Registers[2][FPRIdx++]; break;
case MVT::f32: SrcReg = Registers[3][FPRIdx++]; break;
case MVT::f64: SrcReg = Registers[4][FPRIdx++]; break;
}
// Skip unused arguments.
if (Arg.use_empty()) {
UpdateValueMap(&Arg, 0);
continue;
}
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
// Without this, EmitLiveInCopies may eliminate the livein if its only
// use is a bitcast (which isn't turned into an instruction).
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(DstReg, getKillRegState(true));
UpdateValueMap(&Arg, ResultReg);
}
return true;
}
bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI,
SmallVectorImpl<MVT> &OutVTs,
unsigned &NumBytes) {
CallingConv::ID CC = CLI.CallConv;
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
// Get a count of how many bytes are to be pushed on the stack.
NumBytes = CCInfo.getNextStackOffset();
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
.addImm(NumBytes);
// Process the args.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
const Value *ArgVal = CLI.OutVals[VA.getValNo()];
MVT ArgVT = OutVTs[VA.getValNo()];
unsigned ArgReg = getRegForValue(ArgVal);
if (!ArgReg)
return false;
// Handle arg promotion: SExt, ZExt, AExt.
switch (VA.getLocInfo()) {
case CCValAssign::Full:
break;
case CCValAssign::SExt: {
MVT DestVT = VA.getLocVT();
MVT SrcVT = ArgVT;
ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
if (!ArgReg)
return false;
break;
}
case CCValAssign::AExt:
// Intentional fall-through.
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
MVT SrcVT = ArgVT;
ArgReg = EmitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
if (!ArgReg)
return false;
break;
}
default:
llvm_unreachable("Unknown arg promotion!");
}
// Now copy/store arg to correct locations.
if (VA.isRegLoc() && !VA.needsCustom()) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
CLI.OutRegs.push_back(VA.getLocReg());
} else if (VA.needsCustom()) {
// FIXME: Handle custom args.
return false;
} else {
assert(VA.isMemLoc() && "Assuming store on stack.");
// Don't emit stores for undef values.
if (isa<UndefValue>(ArgVal))
continue;
// Need to store on the stack.
unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
unsigned BEAlign = 0;
if (ArgSize < 8 && !Subtarget->isLittleEndian())
BEAlign = 8 - ArgSize;
Address Addr;
Addr.setKind(Address::RegBase);
Addr.setReg(AArch64::SP);
Addr.setOffset(VA.getLocMemOffset() + BEAlign);
if (!EmitStore(ArgVT, ArgReg, Addr))
return false;
}
}
return true;
}
bool AArch64FastISel::FinishCall(CallLoweringInfo &CLI, MVT RetVT,
unsigned NumBytes) {
CallingConv::ID CC = CLI.CallConv;
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
.addImm(NumBytes).addImm(0);
// Now the return value.
if (RetVT != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
// Only handle a single return value.
if (RVLocs.size() != 1)
return false;
// Copy all of the result registers out of their specified physreg.
MVT CopyVT = RVLocs[0].getValVT();
unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(RVLocs[0].getLocReg());
CLI.InRegs.push_back(RVLocs[0].getLocReg());
CLI.ResultReg = ResultReg;
CLI.NumResultRegs = 1;
}
return true;
}
bool AArch64FastISel::FastLowerCall(CallLoweringInfo &CLI) {
CallingConv::ID CC = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
const char *SymName = CLI.SymName;
CodeModel::Model CM = TM.getCodeModel();
// Only support the small and large code model.
if (CM != CodeModel::Small && CM != CodeModel::Large)
return false;
// FIXME: Add large code model support for ELF.
if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
return false;
// Let SDISel handle vararg functions.
if (IsVarArg)
return false;
// FIXME: Only handle *simple* calls for now.
MVT RetVT;
if (CLI.RetTy->isVoidTy())
RetVT = MVT::isVoid;
else if (!isTypeLegal(CLI.RetTy, RetVT))
return false;
for (auto Flag : CLI.OutFlags)
if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal())
return false;
// Set up the argument vectors.
SmallVector<MVT, 16> OutVTs;
OutVTs.reserve(CLI.OutVals.size());
for (auto *Val : CLI.OutVals) {
MVT VT;
if (!isTypeLegal(Val->getType(), VT) &&
!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
return false;
// We don't handle vector parameters yet.
if (VT.isVector() || VT.getSizeInBits() > 64)
return false;
OutVTs.push_back(VT);
}
Address Addr;
if (!ComputeCallAddress(Callee, Addr))
return false;
// Handle the arguments now that we've gotten them.
unsigned NumBytes;
if (!ProcessCallArgs(CLI, OutVTs, NumBytes))
return false;
// Issue the call.
MachineInstrBuilder MIB;
if (CM == CodeModel::Small) {
unsigned CallOpc = Addr.getReg() ? AArch64::BLR : AArch64::BL;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
if (SymName)
MIB.addExternalSymbol(SymName, 0);
else if (Addr.getGlobalValue())
MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
else if (Addr.getReg())
MIB.addReg(Addr.getReg());
else
return false;
} else {
unsigned CallReg = 0;
if (SymName) {
unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg)
.addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGE);
CallReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
CallReg)
.addReg(ADRPReg)
.addExternalSymbol(SymName, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
AArch64II::MO_NC);
} else if (Addr.getGlobalValue()) {
CallReg = AArch64MaterializeGV(Addr.getGlobalValue());
} else if (Addr.getReg())
CallReg = Addr.getReg();
if (!CallReg)
return false;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::BLR)).addReg(CallReg);
}
// Add implicit physical register uses to the call.
for (auto Reg : CLI.OutRegs)
MIB.addReg(Reg, RegState::Implicit);
// Add a register mask with the call-preserved registers.
// Proper defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CC));
CLI.Call = MIB;
// Finish off the call including any return values.
return FinishCall(CLI, RetVT, NumBytes);
}
bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
if (Alignment)
return Len / Alignment <= 4;
else
return Len < 32;
}
bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
uint64_t Len, unsigned Alignment) {
// Make sure we don't bloat code by inlining very large memcpy's.
if (!IsMemCpySmall(Len, Alignment))
return false;
int64_t UnscaledOffset = 0;
Address OrigDest = Dest;
Address OrigSrc = Src;
while (Len) {
MVT VT;
if (!Alignment || Alignment >= 8) {
if (Len >= 8)
VT = MVT::i64;
else if (Len >= 4)
VT = MVT::i32;
else if (Len >= 2)
VT = MVT::i16;
else {
VT = MVT::i8;
}
} else {
// Bound based on alignment.
if (Len >= 4 && Alignment == 4)
VT = MVT::i32;
else if (Len >= 2 && Alignment == 2)
VT = MVT::i16;
else {
VT = MVT::i8;
}
}
bool RV;
unsigned ResultReg;
RV = EmitLoad(VT, ResultReg, Src);
if (!RV)
return false;
RV = EmitStore(VT, ResultReg, Dest);
if (!RV)
return false;
int64_t Size = VT.getSizeInBits() / 8;
Len -= Size;
UnscaledOffset += Size;
// We need to recompute the unscaled offset for each iteration.
Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
}
return true;
}
/// \brief Check if it is possible to fold the condition from the XALU intrinsic
/// into the user. The condition code will only be updated on success.
bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
const Instruction *I,
const Value *Cond) {
if (!isa<ExtractValueInst>(Cond))
return false;
const auto *EV = cast<ExtractValueInst>(Cond);
if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
return false;
const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
MVT RetVT;
const Function *Callee = II->getCalledFunction();
Type *RetTy =
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
if (!isTypeLegal(RetTy, RetVT))
return false;
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return false;
AArch64CC::CondCode TmpCC;
switch (II->getIntrinsicID()) {
default: return false;
case Intrinsic::sadd_with_overflow:
case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
}
// Check if both instructions are in the same basic block.
if (II->getParent() != I->getParent())
return false;
// Make sure nothing is in the way
BasicBlock::const_iterator Start = I;
BasicBlock::const_iterator End = II;
for (auto Itr = std::prev(Start); Itr != End; --Itr) {
// We only expect extractvalue instructions between the intrinsic and the
// instruction to be selected.
if (!isa<ExtractValueInst>(Itr))
return false;
// Check that the extractvalue operand comes from the intrinsic.
const auto *EVI = cast<ExtractValueInst>(Itr);
if (EVI->getAggregateOperand() != II)
return false;
}
CC = TmpCC;
return true;
}
bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
// FIXME: Handle more intrinsics.
switch (II->getIntrinsicID()) {
default: return false;
case Intrinsic::frameaddress: {
MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
MFI->setFrameAddressIsTaken(true);
const AArch64RegisterInfo *RegInfo =
static_cast<const AArch64RegisterInfo *>(
TM.getSubtargetImpl()->getRegisterInfo());
unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
unsigned SrcReg = FramePtr;
// Recursively load frame address
// ldr x0, [fp]
// ldr x0, [x0]
// ldr x0, [x0]
// ...
unsigned DestReg;
unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
while (Depth--) {
DestReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::LDRXui), DestReg)
.addReg(SrcReg).addImm(0);
SrcReg = DestReg;
}
UpdateValueMap(II, SrcReg);
return true;
}
case Intrinsic::memcpy:
case Intrinsic::memmove: {
const auto *MTI = cast<MemTransferInst>(II);
// Don't handle volatile.
if (MTI->isVolatile())
return false;
// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
// we would emit dead code because we don't currently handle memmoves.
bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
// Small memcpy's are common enough that we want to do them without a call
// if possible.
uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
unsigned Alignment = MTI->getAlignment();
if (IsMemCpySmall(Len, Alignment)) {
Address Dest, Src;
if (!ComputeAddress(MTI->getRawDest(), Dest) ||
!ComputeAddress(MTI->getRawSource(), Src))
return false;
if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment))
return true;
}
}
if (!MTI->getLength()->getType()->isIntegerTy(64))
return false;
if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
return LowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2);
}
case Intrinsic::memset: {
const MemSetInst *MSI = cast<MemSetInst>(II);
// Don't handle volatile.
if (MSI->isVolatile())
return false;
if (!MSI->getLength()->getType()->isIntegerTy(64))
return false;
if (MSI->getDestAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
return false;
return LowerCallTo(II, "memset", II->getNumArgOperands() - 2);
}
case Intrinsic::trap: {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
.addImm(1);
return true;
}
case Intrinsic::sqrt: {
Type *RetTy = II->getCalledFunction()->getReturnType();
MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
unsigned Op0Reg = getRegForValue(II->getOperand(0));
if (!Op0Reg)
return false;
bool Op0IsKill = hasTrivialKill(II->getOperand(0));
unsigned ResultReg = FastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
if (!ResultReg)
return false;
UpdateValueMap(II, ResultReg);
return true;
}
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::usub_with_overflow:
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow: {
// This implements the basic lowering of the xalu with overflow intrinsics.
const Function *Callee = II->getCalledFunction();
auto *Ty = cast<StructType>(Callee->getReturnType());
Type *RetTy = Ty->getTypeAtIndex(0U);
Type *CondTy = Ty->getTypeAtIndex(1);
MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
if (VT != MVT::i32 && VT != MVT::i64)
return false;
const Value *LHS = II->getArgOperand(0);
const Value *RHS = II->getArgOperand(1);
// Canonicalize immediate to the RHS.
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
isCommutativeIntrinsic(II))
std::swap(LHS, RHS);
unsigned LHSReg = getRegForValue(LHS);
if (!LHSReg)
return false;
bool LHSIsKill = hasTrivialKill(LHS);
// Check if the immediate can be encoded in the instruction and if we should
// invert the instruction (adds -> subs) to handle negative immediates.
bool UseImm = false;
bool UseInverse = false;
uint64_t Imm = 0;
if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
if (C->isNegative()) {
UseInverse = true;
Imm = -(C->getSExtValue());
} else
Imm = C->getZExtValue();
if (isUInt<12>(Imm))
UseImm = true;
UseInverse = UseImm && UseInverse;
}
static const unsigned OpcTable[2][2][2] = {
{ {AArch64::ADDSWrr, AArch64::ADDSXrr},
{AArch64::ADDSWri, AArch64::ADDSXri} },
{ {AArch64::SUBSWrr, AArch64::SUBSXrr},
{AArch64::SUBSWri, AArch64::SUBSXri} }
};
unsigned Opc = 0;
unsigned MulReg = 0;
unsigned RHSReg = 0;
bool RHSIsKill = false;
AArch64CC::CondCode CC = AArch64CC::Invalid;
bool Is64Bit = VT == MVT::i64;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
case Intrinsic::uadd_with_overflow:
Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
case Intrinsic::ssub_with_overflow:
Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
case Intrinsic::usub_with_overflow:
Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
case Intrinsic::smul_with_overflow: {
CC = AArch64CC::NE;
RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
RHSIsKill = hasTrivialKill(RHS);
if (VT == MVT::i32) {
MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
AArch64::sub_32);
ShiftReg = FastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
AArch64::sub_32);
unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBSWrs), CmpReg)
.addReg(ShiftReg, getKillRegState(true))
.addReg(MulReg, getKillRegState(false))
.addImm(159); // 159 <-> asr #31
} else {
assert(VT == MVT::i64 && "Unexpected value type.");
MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
unsigned SMULHReg = FastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
RHSReg, RHSIsKill);
unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBSXrs), CmpReg)
.addReg(SMULHReg, getKillRegState(true))
.addReg(MulReg, getKillRegState(false))
.addImm(191); // 191 <-> asr #63
}
break;
}
case Intrinsic::umul_with_overflow: {
CC = AArch64CC::NE;
RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
RHSIsKill = hasTrivialKill(RHS);
if (VT == MVT::i32) {
MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBSXrs), CmpReg)
.addReg(AArch64::XZR, getKillRegState(true))
.addReg(MulReg, getKillRegState(false))
.addImm(96); // 96 <-> lsr #32
MulReg = FastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
AArch64::sub_32);
} else {
assert(VT == MVT::i64 && "Unexpected value type.");
MulReg = Emit_MUL_rr(VT, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
unsigned UMULHReg = FastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
RHSReg, RHSIsKill);
unsigned CmpReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBSXrr), CmpReg)
.addReg(AArch64::XZR, getKillRegState(true))
.addReg(UMULHReg, getKillRegState(false));
}
break;
}
}
if (!UseImm) {
RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
RHSIsKill = hasTrivialKill(RHS);
}
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
if (Opc) {
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill));
if (UseImm) {
MIB.addImm(Imm);
MIB.addImm(0);
} else
MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
}
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
.addReg(MulReg);
unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
ResultReg2)
.addReg(AArch64::WZR, getKillRegState(true))
.addReg(AArch64::WZR, getKillRegState(true))
.addImm(getInvertedCondCode(CC));
UpdateValueMap(II, ResultReg, 2);
return true;
}
}
return false;
}
bool AArch64FastISel::SelectRet(const Instruction *I) {
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
if (!FuncInfo.CanLowerReturn)
return false;
if (F.isVarArg())
return false;
// Build a list of return value registers.
SmallVector<unsigned, 4> RetRegs;
if (Ret->getNumOperands() > 0) {
CallingConv::ID CC = F.getCallingConv();
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
CCInfo.AnalyzeReturn(Outs, RetCC);
// Only handle a single return value for now.
if (ValLocs.size() != 1)
return false;
CCValAssign &VA = ValLocs[0];
const Value *RV = Ret->getOperand(0);
// Don't bother handling odd stuff for now.
if (VA.getLocInfo() != CCValAssign::Full)
return false;
// Only handle register returns for now.
if (!VA.isRegLoc())
return false;
unsigned Reg = getRegForValue(RV);
if (Reg == 0)
return false;
unsigned SrcReg = Reg + VA.getValNo();
unsigned DestReg = VA.getLocReg();
// Avoid a cross-class copy. This is very unlikely.
if (!MRI.getRegClass(SrcReg)->contains(DestReg))
return false;
EVT RVEVT = TLI.getValueType(RV->getType());
if (!RVEVT.isSimple())
return false;
// Vectors (of > 1 lane) in big endian need tricky handling.
if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
return false;
MVT RVVT = RVEVT.getSimpleVT();
if (RVVT == MVT::f128)
return false;
MVT DestVT = VA.getValVT();
// Special handling for extended integers.
if (RVVT != DestVT) {
if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
return false;
if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
return false;
bool isZExt = Outs[0].Flags.isZExt();
SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt);
if (SrcReg == 0)
return false;
}
// Make the copy.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
// Add register to return instruction.
RetRegs.push_back(VA.getLocReg());
}
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::RET_ReallyLR));
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
}
bool AArch64FastISel::SelectTrunc(const Instruction *I) {
Type *DestTy = I->getType();
Value *Op = I->getOperand(0);
Type *SrcTy = Op->getType();
EVT SrcEVT = TLI.getValueType(SrcTy, true);
EVT DestEVT = TLI.getValueType(DestTy, true);
if (!SrcEVT.isSimple())
return false;
if (!DestEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DestVT = DestEVT.getSimpleVT();
if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
SrcVT != MVT::i8)
return false;
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
DestVT != MVT::i1)
return false;
unsigned SrcReg = getRegForValue(Op);
if (!SrcReg)
return false;
// If we're truncating from i64 to a smaller non-legal type then generate an
// AND. Otherwise, we know the high bits are undefined and a truncate doesn't
// generate any code.
if (SrcVT == MVT::i64) {
uint64_t Mask = 0;
switch (DestVT.SimpleTy) {
default:
// Trunc i64 to i32 is handled by the target-independent fast-isel.
return false;
case MVT::i1:
Mask = 0x1;
break;
case MVT::i8:
Mask = 0xff;
break;
case MVT::i16:
Mask = 0xffff;
break;
}
// Issue an extract_subreg to get the lower 32-bits.
unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
AArch64::sub_32);
MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
// Create the AND instruction which performs the actual truncation.
unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ANDReg)
.addReg(Reg32)
.addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
SrcReg = ANDReg;
}
UpdateValueMap(I, SrcReg);
return true;
}
unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
DestVT == MVT::i64) &&
"Unexpected value type.");
// Handle i8 and i16 as i32.
if (DestVT == MVT::i8 || DestVT == MVT::i16)
DestVT = MVT::i32;
if (isZExt) {
MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ResultReg)
.addReg(SrcReg)
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
if (DestVT == MVT::i64) {
// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), Reg64)
.addImm(0)
.addReg(ResultReg)
.addImm(AArch64::sub_32);
ResultReg = Reg64;
}
return ResultReg;
} else {
if (DestVT == MVT::i64) {
// FIXME: We're SExt i1 to i64.
return 0;
}
unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
ResultReg)
.addReg(SrcReg)
.addImm(0)
.addImm(0);
return ResultReg;
}
}
unsigned AArch64FastISel::Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill) {
unsigned Opc, ZReg;
switch (RetVT.SimpleTy) {
default: return 0;
case MVT::i8:
case MVT::i16:
case MVT::i32:
RetVT = MVT::i32;
Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
case MVT::i64:
Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
}
// Create the base instruction, then add the operands.
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(Op0, getKillRegState(Op0IsKill))
.addReg(Op1, getKillRegState(Op1IsKill))
.addReg(ZReg, getKillRegState(true));
return ResultReg;
}
unsigned AArch64FastISel::Emit_SMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill) {
if (RetVT != MVT::i64)
return 0;
// Create the base instruction, then add the operands.
unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SMADDLrrr),
ResultReg)
.addReg(Op0, getKillRegState(Op0IsKill))
.addReg(Op1, getKillRegState(Op1IsKill))
.addReg(AArch64::XZR, getKillRegState(true));
return ResultReg;
}
unsigned AArch64FastISel::Emit_UMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill) {
if (RetVT != MVT::i64)
return 0;
// Create the base instruction, then add the operands.
unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::UMADDLrrr),
ResultReg)
.addReg(Op0, getKillRegState(Op0IsKill))
.addReg(Op1, getKillRegState(Op1IsKill))
.addReg(AArch64::XZR, getKillRegState(true));
return ResultReg;
}
unsigned AArch64FastISel::Emit_LSL_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
uint64_t Shift) {
unsigned Opc, ImmR, ImmS;
switch (RetVT.SimpleTy) {
default: return 0;
case MVT::i8:
Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 7 - Shift; break;
case MVT::i16:
Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 15 - Shift; break;
case MVT::i32:
Opc = AArch64::UBFMWri; ImmR = -Shift % 32; ImmS = 31 - Shift; break;
case MVT::i64:
Opc = AArch64::UBFMXri; ImmR = -Shift % 64; ImmS = 63 - Shift; break;
}
RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, ImmR,
ImmS);
}
unsigned AArch64FastISel::Emit_LSR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
uint64_t Shift) {
unsigned Opc, ImmS;
switch (RetVT.SimpleTy) {
default: return 0;
case MVT::i8: Opc = AArch64::UBFMWri; ImmS = 7; break;
case MVT::i16: Opc = AArch64::UBFMWri; ImmS = 15; break;
case MVT::i32: Opc = AArch64::UBFMWri; ImmS = 31; break;
case MVT::i64: Opc = AArch64::UBFMXri; ImmS = 63; break;
}
RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
ImmS);
}
unsigned AArch64FastISel::Emit_ASR_ri(MVT RetVT, unsigned Op0, bool Op0IsKill,
uint64_t Shift) {
unsigned Opc, ImmS;
switch (RetVT.SimpleTy) {
default: return 0;
case MVT::i8: Opc = AArch64::SBFMWri; ImmS = 7; break;
case MVT::i16: Opc = AArch64::SBFMWri; ImmS = 15; break;
case MVT::i32: Opc = AArch64::SBFMWri; ImmS = 31; break;
case MVT::i64: Opc = AArch64::SBFMXri; ImmS = 63; break;
}
RetVT.SimpleTy = std::max(MVT::i32, RetVT.SimpleTy);
return FastEmitInst_rii(Opc, TLI.getRegClassFor(RetVT), Op0, Op0IsKill, Shift,
ImmS);
}
unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool isZExt) {
assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
// FastISel does not have plumbing to deal with extensions where the SrcVT or
// DestVT are odd things, so test to make sure that they are both types we can
// handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
// bail out to SelectionDAG.
if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
(DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
(SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
return 0;
unsigned Opc;
unsigned Imm = 0;
switch (SrcVT.SimpleTy) {
default:
return 0;
case MVT::i1:
return Emiti1Ext(SrcReg, DestVT, isZExt);
case MVT::i8:
if (DestVT == MVT::i64)
Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
else
Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
Imm = 7;
break;
case MVT::i16:
if (DestVT == MVT::i64)
Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
else
Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
Imm = 15;
break;
case MVT::i32:
assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
Imm = 31;
break;
}
// Handle i8 and i16 as i32.
if (DestVT == MVT::i8 || DestVT == MVT::i16)
DestVT = MVT::i32;
else if (DestVT == MVT::i64) {
unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), Src64)
.addImm(0)
.addReg(SrcReg)
.addImm(AArch64::sub_32);
SrcReg = Src64;
}
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg)
.addImm(0)
.addImm(Imm);
return ResultReg;
}
bool AArch64FastISel::SelectIntExt(const Instruction *I) {
// On ARM, in general, integer casts don't involve legal types; this code
// handles promotable integers. The high bits for a type smaller than
// the register size are assumed to be undefined.
Type *DestTy = I->getType();
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
bool isZExt = isa<ZExtInst>(I);
unsigned SrcReg = getRegForValue(Src);
if (!SrcReg)
return false;
EVT SrcEVT = TLI.getValueType(SrcTy, true);
EVT DestEVT = TLI.getValueType(DestTy, true);
if (!SrcEVT.isSimple())
return false;
if (!DestEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
MVT DestVT = DestEVT.getSimpleVT();
unsigned ResultReg = 0;
// Check if it is an argument and if it is already zero/sign-extended.
if (const auto *Arg = dyn_cast<Argument>(Src)) {
if ((isZExt && Arg->hasZExtAttr()) || (!isZExt && Arg->hasSExtAttr())) {
if (DestVT == MVT::i64) {
ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
.addImm(0)
.addReg(SrcReg)
.addImm(AArch64::sub_32);
} else
ResultReg = SrcReg;
}
}
if (!ResultReg)
ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
if (!ResultReg)
return false;
UpdateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
EVT DestEVT = TLI.getValueType(I->getType(), true);
if (!DestEVT.isSimple())
return false;
MVT DestVT = DestEVT.getSimpleVT();
if (DestVT != MVT::i64 && DestVT != MVT::i32)
return false;
unsigned DivOpc;
bool is64bit = (DestVT == MVT::i64);
switch (ISDOpcode) {
default:
return false;
case ISD::SREM:
DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
break;
case ISD::UREM:
DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
break;
}
unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
unsigned Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
.addReg(Src0Reg)
.addReg(Src1Reg);
// The remainder is computed as numerator - (quotient * denominator) using the
// MSUB instruction.
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
.addReg(QuotReg)
.addReg(Src1Reg)
.addReg(Src0Reg);
UpdateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::SelectMul(const Instruction *I) {
EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
if (!SrcEVT.isSimple())
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
// Must be simple value type. Don't handle vectors.
if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
SrcVT != MVT::i8)
return false;
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
bool Src0IsKill = hasTrivialKill(I->getOperand(0));
unsigned Src1Reg = getRegForValue(I->getOperand(1));
if (!Src1Reg)
return false;
bool Src1IsKill = hasTrivialKill(I->getOperand(1));
unsigned ResultReg =
Emit_MUL_rr(SrcVT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
if (!ResultReg)
return false;
UpdateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::SelectShift(const Instruction *I, bool IsLeftShift,
bool IsArithmetic) {
EVT RetEVT = TLI.getValueType(I->getType(), true);
if (!RetEVT.isSimple())
return false;
MVT RetVT = RetEVT.getSimpleVT();
if (!isa<ConstantInt>(I->getOperand(1)))
return false;
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (!Op0Reg)
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
uint64_t ShiftVal = cast<ConstantInt>(I->getOperand(1))->getZExtValue();
unsigned ResultReg;
if (IsLeftShift)
ResultReg = Emit_LSL_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
else {
if (IsArithmetic)
ResultReg = Emit_ASR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
else
ResultReg = Emit_LSR_ri(RetVT, Op0Reg, Op0IsKill, ShiftVal);
}
if (!ResultReg)
return false;
UpdateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::SelectBitCast(const Instruction *I) {
MVT RetVT, SrcVT;
if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
return false;
if (!isTypeLegal(I->getType(), RetVT))
return false;
unsigned Opc;
if (RetVT == MVT::f32 && SrcVT == MVT::i32)
Opc = AArch64::FMOVWSr;
else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
Opc = AArch64::FMOVXDr;
else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
Opc = AArch64::FMOVSWr;
else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
Opc = AArch64::FMOVDXr;
else
return false;
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (!Op0Reg)
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
unsigned ResultReg = FastEmitInst_r(Opc, TLI.getRegClassFor(RetVT),
Op0Reg, Op0IsKill);
if (!ResultReg)
return false;
UpdateValueMap(I, ResultReg);
return true;
}
bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
break;
case Instruction::Load:
return SelectLoad(I);
case Instruction::Store:
return SelectStore(I);
case Instruction::Br:
return SelectBranch(I);
case Instruction::IndirectBr:
return SelectIndirectBr(I);
case Instruction::FCmp:
case Instruction::ICmp:
return SelectCmp(I);
case Instruction::Select:
return SelectSelect(I);
case Instruction::FPExt:
return SelectFPExt(I);
case Instruction::FPTrunc:
return SelectFPTrunc(I);
case Instruction::FPToSI:
return SelectFPToInt(I, /*Signed=*/true);
case Instruction::FPToUI:
return SelectFPToInt(I, /*Signed=*/false);
case Instruction::SIToFP:
return SelectIntToFP(I, /*Signed=*/true);
case Instruction::UIToFP:
return SelectIntToFP(I, /*Signed=*/false);
case Instruction::SRem:
return SelectRem(I, ISD::SREM);
case Instruction::URem:
return SelectRem(I, ISD::UREM);
case Instruction::Ret:
return SelectRet(I);
case Instruction::Trunc:
return SelectTrunc(I);
case Instruction::ZExt:
case Instruction::SExt:
return SelectIntExt(I);
// FIXME: All of these should really be handled by the target-independent
// selector -> improve FastISel tblgen.
case Instruction::Mul:
return SelectMul(I);
case Instruction::Shl:
return SelectShift(I, /*IsLeftShift=*/true, /*IsArithmetic=*/false);
case Instruction::LShr:
return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/false);
case Instruction::AShr:
return SelectShift(I, /*IsLeftShift=*/false, /*IsArithmetic=*/true);
case Instruction::BitCast:
return SelectBitCast(I);
}
return false;
// Silence warnings.
(void)&CC_AArch64_DarwinPCS_VarArg;
}
namespace llvm {
llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
return new AArch64FastISel(funcInfo, libInfo);
}
}