Reapply [FastISel][AArch64] Add support for more addressing modes (r215597).

Note: This was originally reverted to track down a buildbot error. Reapply
without any modifications.

Original commit message:
FastISel didn't take much advantage of the different addressing modes available
to it on AArch64. This commit allows the ComputeAddress method to recognize more
addressing modes that allows shifts and sign-/zero-extensions to be folded into
the memory operation itself.

For Example:
  lsl x1, x1, #3     --> ldr x0, [x0, x1, lsl #3]
  ldr x0, [x0, x1]

  sxtw x1, w1
  lsl x1, x1, #3     --> ldr x0, [x0, x1, sxtw #3]
  ldr x0, [x0, x1]

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216013 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Juergen Ributzka 2014-08-19 19:44:17 +00:00
parent 96b1e70c66
commit 06bb1ca1e0
2 changed files with 715 additions and 169 deletions

View File

@ -41,7 +41,6 @@ using namespace llvm;
namespace {
class AArch64FastISel : public FastISel {
class Address {
public:
typedef enum {
@ -51,17 +50,23 @@ class AArch64FastISel : public FastISel {
private:
BaseKind Kind;
AArch64_AM::ShiftExtendType ExtType;
union {
unsigned Reg;
int FI;
} Base;
unsigned OffsetReg;
unsigned Shift;
int64_t Offset;
const GlobalValue *GV;
public:
Address() : Kind(RegBase), Offset(0), GV(nullptr) { Base.Reg = 0; }
Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
void setReg(unsigned Reg) {
@ -72,6 +77,14 @@ class AArch64FastISel : public FastISel {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
void setOffsetReg(unsigned Reg) {
assert(isRegBase() && "Invalid offset register access!");
OffsetReg = Reg;
}
unsigned getOffsetReg() const {
assert(isRegBase() && "Invalid offset register access!");
return OffsetReg;
}
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
@ -82,11 +95,11 @@ class AArch64FastISel : public FastISel {
}
void setOffset(int64_t O) { Offset = O; }
int64_t getOffset() { return Offset; }
void setShift(unsigned S) { Shift = S; }
unsigned getShift() { return Shift; }
void setGlobalValue(const GlobalValue *G) { GV = G; }
const GlobalValue *getGlobalValue() { return GV; }
bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
};
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
@ -121,13 +134,12 @@ private:
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadStoreTypeLegal(Type *Ty, MVT &VT);
bool ComputeAddress(const Value *Obj, Address &Addr);
bool ComputeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
bool ComputeCallAddress(const Value *V, Address &Addr);
bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
bool UseUnscaled);
bool SimplifyAddress(Address &Addr, MVT VT);
void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
unsigned Flags, MachineMemOperand *MMO,
bool UseUnscaled);
unsigned Flags, unsigned ScaleFactor,
MachineMemOperand *MMO);
bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
unsigned Alignment);
@ -137,9 +149,9 @@ private:
// Emit functions.
bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
MachineMemOperand *MMO = nullptr, bool UseUnscaled = false);
MachineMemOperand *MMO = nullptr);
bool EmitStore(MVT VT, unsigned SrcReg, Address Addr,
MachineMemOperand *MMO = nullptr, bool UseUnscaled = false);
MachineMemOperand *MMO = nullptr);
unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned Emit_MUL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
@ -346,7 +358,8 @@ unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
}
// Computes the address to get to an object.
bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr, Type *Ty)
{
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
@ -373,18 +386,18 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
break;
case Instruction::BitCast: {
// Look through bitcasts.
return ComputeAddress(U->getOperand(0), Addr);
return ComputeAddress(U->getOperand(0), Addr, Ty);
}
case Instruction::IntToPtr: {
// Look past no-op inttoptrs.
if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
return ComputeAddress(U->getOperand(0), Addr);
return ComputeAddress(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::PtrToInt: {
// Look past no-op ptrtoints.
if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
return ComputeAddress(U->getOperand(0), Addr);
return ComputeAddress(U->getOperand(0), Addr, Ty);
break;
}
case Instruction::GetElementPtr: {
@ -426,7 +439,7 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
// Try to grab the base operand now.
Addr.setOffset(TmpOffset);
if (ComputeAddress(U->getOperand(0), Addr))
if (ComputeAddress(U->getOperand(0), Addr, Ty))
return true;
// We failed, restore everything and try the other options.
@ -446,19 +459,86 @@ bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
}
break;
}
case Instruction::Add:
case Instruction::Add: {
// Adds of constants are common and easy enough.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
const Value *LHS = U->getOperand(0);
const Value *RHS = U->getOperand(1);
if (isa<ConstantInt>(LHS))
std::swap(LHS, RHS);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
Addr.setOffset(Addr.getOffset() + (uint64_t)CI->getSExtValue());
return ComputeAddress(U->getOperand(0), Addr);
return ComputeAddress(LHS, Addr, Ty);
}
Address Backup = Addr;
if (ComputeAddress(LHS, Addr, Ty) && ComputeAddress(RHS, Addr, Ty))
return true;
Addr = Backup;
break;
}
case Instruction::Shl:
if (Addr.getOffsetReg())
break;
if (const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
unsigned Val = CI->getZExtValue();
if (Val < 1 || Val > 3)
break;
uint64_t NumBytes = 0;
if (Ty && Ty->isSized()) {
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))
NumBytes = 0;
}
if (NumBytes != (1UL << Val))
break;
Addr.setShift(Val);
Addr.setExtendType(AArch64_AM::LSL);
if (const auto *I = dyn_cast<Instruction>(U->getOperand(0)))
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
U = I;
if (const auto *ZE = dyn_cast<ZExtInst>(U))
if (ZE->getOperand(0)->getType()->isIntegerTy(32))
Addr.setExtendType(AArch64_AM::UXTW);
if (const auto *SE = dyn_cast<SExtInst>(U))
if (SE->getOperand(0)->getType()->isIntegerTy(32))
Addr.setExtendType(AArch64_AM::SXTW);
unsigned Reg = getRegForValue(U->getOperand(0));
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
return true;
}
break;
}
// Try to get this in a register if nothing else has worked.
if (!Addr.isValid())
Addr.setReg(getRegForValue(Obj));
return Addr.isValid();
if (Addr.getReg()) {
if (!Addr.getOffsetReg()) {
unsigned Reg = getRegForValue(Obj);
if (!Reg)
return false;
Addr.setOffsetReg(Reg);
return true;
}
return false;
}
unsigned Reg = getRegForValue(Obj);
if (!Reg)
return false;
Addr.setReg(Reg);
return true;
}
bool AArch64FastISel::ComputeCallAddress(const Value *V, Address &Addr) {
@ -540,50 +620,80 @@ bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
return false;
}
bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
int64_t ScaleFactor, bool UseUnscaled) {
bool needsLowering = false;
int64_t Offset = Addr.getOffset();
bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
unsigned ScaleFactor;
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f32:
case MVT::f64:
if (!UseUnscaled)
// Using scaled, 12-bit, unsigned immediate offsets.
needsLowering = ((Offset & 0xfff) != Offset);
else
// Using unscaled, 9-bit, signed immediate offsets.
needsLowering = (Offset > 256 || Offset < -256);
break;
default: return false;
case MVT::i1: // fall-through
case MVT::i8: ScaleFactor = 1; break;
case MVT::i16: ScaleFactor = 2; break;
case MVT::i32: // fall-through
case MVT::f32: ScaleFactor = 4; break;
case MVT::i64: // fall-through
case MVT::f64: ScaleFactor = 8; break;
}
//If this is a stack pointer and the offset needs to be simplified then put
bool ImmediateOffsetNeedsLowering = false;
bool RegisterOffsetNeedsLowering = false;
int64_t Offset = Addr.getOffset();
if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
ImmediateOffsetNeedsLowering = true;
else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
!isUInt<12>(Offset / ScaleFactor))
ImmediateOffsetNeedsLowering = true;
// Cannot encode an offset register and an immediate offset in the same
// instruction. Fold the immediate offset into the load/store instruction and
// emit an additonal add to take care of the offset register.
if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.isRegBase() &&
Addr.getOffsetReg())
RegisterOffsetNeedsLowering = true;
// If this is a stack pointer and the offset needs to be simplified then put
// the alloca address into a register, set the base type back to register and
// continue. This should almost never happen.
if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
if (ImmediateOffsetNeedsLowering && Addr.isFIBase()) {
unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
.addFrameIndex(Addr.getFI())
.addImm(0)
.addImm(0);
.addFrameIndex(Addr.getFI())
.addImm(0)
.addImm(0);
Addr.setKind(Address::RegBase);
Addr.setReg(ResultReg);
}
if (RegisterOffsetNeedsLowering) {
unsigned ResultReg = 0;
if (Addr.getReg()) {
ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::ADDXrs), ResultReg)
.addReg(Addr.getReg())
.addReg(Addr.getOffsetReg())
.addImm(Addr.getShift());
} else
ResultReg = Emit_LSL_ri(MVT::i64, Addr.getOffsetReg(),
/*Op0IsKill=*/false, Addr.getShift());
if (!ResultReg)
return false;
Addr.setReg(ResultReg);
Addr.setOffsetReg(0);
Addr.setShift(0);
}
// Since the offset is too large for the load/store instruction get the
// reg+offset into a register.
if (needsLowering) {
uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor;
unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false,
UnscaledOffset, MVT::i64);
if (ResultReg == 0)
if (ImmediateOffsetNeedsLowering) {
unsigned ResultReg = 0;
if (Addr.getReg())
ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(),
/*IsKill=*/false, Offset, MVT::i64);
else
ResultReg = FastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
if (!ResultReg)
return false;
Addr.setReg(ResultReg);
Addr.setOffset(0);
@ -594,11 +704,11 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
const MachineInstrBuilder &MIB,
unsigned Flags,
MachineMemOperand *MMO,
bool UseUnscaled) {
int64_t Offset = Addr.getOffset();
unsigned ScaleFactor,
MachineMemOperand *MMO) {
int64_t Offset = Addr.getOffset() / ScaleFactor;
// Frame base works a bit differently. Handle it separately.
if (Addr.getKind() == Address::FrameIndexBase) {
if (Addr.isFIBase()) {
int FI = Addr.getFI();
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
// and alignment should be based on the VT.
@ -608,9 +718,19 @@ void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
// Now add the rest of the operands.
MIB.addFrameIndex(FI).addImm(Offset);
} else {
// Now add the rest of the operands.
MIB.addReg(Addr.getReg());
MIB.addImm(Offset);
assert(Addr.isRegBase() && "Unexpected address kind.");
if (Addr.getOffsetReg()) {
assert(Addr.getOffset() == 0 && "Unexpected offset");
bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
Addr.getExtendType() == AArch64_AM::SXTX;
MIB.addReg(Addr.getReg());
MIB.addReg(Addr.getOffsetReg());
MIB.addImm(IsSigned);
MIB.addImm(Addr.getShift() != 0);
} else {
MIB.addReg(Addr.getReg());
MIB.addImm(Offset);
}
}
if (MMO)
@ -618,72 +738,68 @@ void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
}
bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
MachineMemOperand *MMO, bool UseUnscaled) {
MachineMemOperand *MMO) {
// Simplify this down to something we can handle.
if (!SimplifyAddress(Addr, VT))
return false;
unsigned ScaleFactor;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type.");
case MVT::i1: // fall-through
case MVT::i8: ScaleFactor = 1; break;
case MVT::i16: ScaleFactor = 2; break;
case MVT::i32: // fall-through
case MVT::f32: ScaleFactor = 4; break;
case MVT::i64: // fall-through
case MVT::f64: ScaleFactor = 8; break;
}
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
if (!UseUnscaled && Addr.getOffset() < 0)
UseUnscaled = true;
bool UseScaled = true;
if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
UseScaled = false;
ScaleFactor = 1;
}
static const unsigned OpcTable[4][6] = {
{ AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, AArch64::LDURXi,
AArch64::LDURSi, AArch64::LDURDi },
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, AArch64::LDRXui,
AArch64::LDRSui, AArch64::LDRDui },
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX,
AArch64::LDRSroX, AArch64::LDRDroX },
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW,
AArch64::LDRSroW, AArch64::LDRDroW }
};
unsigned Opc;
const TargetRegisterClass *RC;
bool VTIsi1 = false;
int64_t ScaleFactor = 0;
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
Addr.getOffsetReg();
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
if (Addr.getExtendType() == AArch64_AM::UXTW ||
Addr.getExtendType() == AArch64_AM::SXTW)
Idx++;
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
VTIsi1 = true;
// Intentional fall-through.
case MVT::i8:
Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
RC = &AArch64::GPR32RegClass;
ScaleFactor = 1;
break;
case MVT::i16:
Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
RC = &AArch64::GPR32RegClass;
ScaleFactor = 2;
break;
case MVT::i32:
Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
RC = &AArch64::GPR32RegClass;
ScaleFactor = 4;
break;
case MVT::i64:
Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
RC = &AArch64::GPR64RegClass;
ScaleFactor = 8;
break;
case MVT::f32:
Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
RC = TLI.getRegClassFor(VT);
ScaleFactor = 4;
break;
case MVT::f64:
Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
RC = TLI.getRegClassFor(VT);
ScaleFactor = 8;
break;
default: llvm_unreachable("Unexpected value type.");
case MVT::i1: VTIsi1 = true; // Intentional fall-through.
case MVT::i8: Opc = OpcTable[Idx][0]; RC = &AArch64::GPR32RegClass; break;
case MVT::i16: Opc = OpcTable[Idx][1]; RC = &AArch64::GPR32RegClass; break;
case MVT::i32: Opc = OpcTable[Idx][2]; RC = &AArch64::GPR32RegClass; break;
case MVT::i64: Opc = OpcTable[Idx][3]; RC = &AArch64::GPR64RegClass; break;
case MVT::f32: Opc = OpcTable[Idx][4]; RC = &AArch64::FPR32RegClass; break;
case MVT::f64: Opc = OpcTable[Idx][5]; RC = &AArch64::FPR64RegClass; break;
}
// Scale the offset.
if (!UseUnscaled) {
int64_t Offset = Addr.getOffset();
if (Offset & (ScaleFactor - 1))
// Retry using an unscaled, 9-bit, signed immediate offset.
return EmitLoad(VT, ResultReg, Addr, MMO, /*UseUnscaled*/ true);
Addr.setOffset(Offset / ScaleFactor);
}
// Simplify this down to something we can handle.
if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
return false;
// Create the base instruction, then add the operands.
ResultReg = createResultReg(RC);
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, MMO, UseUnscaled);
AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
// Loading an i1 requires special handling.
if (VTIsi1) {
@ -691,8 +807,8 @@ bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ANDReg)
.addReg(ResultReg)
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
.addReg(ResultReg)
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
ResultReg = ANDReg;
}
return true;
@ -708,7 +824,7 @@ bool AArch64FastISel::SelectLoad(const Instruction *I) {
// See if we can handle this address.
Address Addr;
if (!ComputeAddress(I->getOperand(0), Addr))
if (!ComputeAddress(I->getOperand(0), Addr, I->getType()))
return false;
unsigned ResultReg;
@ -720,59 +836,63 @@ bool AArch64FastISel::SelectLoad(const Instruction *I) {
}
bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
MachineMemOperand *MMO, bool UseUnscaled) {
MachineMemOperand *MMO) {
// Simplify this down to something we can handle.
if (!SimplifyAddress(Addr, VT))
return false;
unsigned ScaleFactor;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type.");
case MVT::i1: // fall-through
case MVT::i8: ScaleFactor = 1; break;
case MVT::i16: ScaleFactor = 2; break;
case MVT::i32: // fall-through
case MVT::f32: ScaleFactor = 4; break;
case MVT::i64: // fall-through
case MVT::f64: ScaleFactor = 8; break;
}
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
if (!UseUnscaled && Addr.getOffset() < 0)
UseUnscaled = true;
unsigned StrOpc;
bool VTIsi1 = false;
int64_t ScaleFactor = 0;
// Using scaled, 12-bit, unsigned immediate offsets.
switch (VT.SimpleTy) {
default:
return false;
case MVT::i1:
VTIsi1 = true;
case MVT::i8:
StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
bool UseScaled = true;
if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
UseScaled = false;
ScaleFactor = 1;
break;
case MVT::i16:
StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
ScaleFactor = 2;
break;
case MVT::i32:
StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
ScaleFactor = 4;
break;
case MVT::i64:
StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
ScaleFactor = 8;
break;
case MVT::f32:
StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
ScaleFactor = 4;
break;
case MVT::f64:
StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
ScaleFactor = 8;
break;
}
// Scale the offset.
if (!UseUnscaled) {
int64_t Offset = Addr.getOffset();
if (Offset & (ScaleFactor - 1))
// Retry using an unscaled, 9-bit, signed immediate offset.
return EmitStore(VT, SrcReg, Addr, MMO, /*UseUnscaled*/ true);
Addr.setOffset(Offset / ScaleFactor);
}
// Simplify this down to something we can handle.
if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled))
return false;
static const unsigned OpcTable[4][6] = {
{ AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
AArch64::STURSi, AArch64::STURDi },
{ AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
AArch64::STRSui, AArch64::STRDui },
{ AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
AArch64::STRSroX, AArch64::STRDroX },
{ AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
AArch64::STRSroW, AArch64::STRDroW }
};
unsigned Opc;
bool VTIsi1 = false;
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
Addr.getOffsetReg();
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
if (Addr.getExtendType() == AArch64_AM::UXTW ||
Addr.getExtendType() == AArch64_AM::SXTW)
Idx++;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type.");
case MVT::i1: VTIsi1 = true;
case MVT::i8: Opc = OpcTable[Idx][0]; break;
case MVT::i16: Opc = OpcTable[Idx][1]; break;
case MVT::i32: Opc = OpcTable[Idx][2]; break;
case MVT::i64: Opc = OpcTable[Idx][3]; break;
case MVT::f32: Opc = OpcTable[Idx][4]; break;
case MVT::f64: Opc = OpcTable[Idx][5]; break;
}
// Storing an i1 requires special handling.
if (VTIsi1) {
@ -780,14 +900,15 @@ bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ANDReg)
.addReg(SrcReg)
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
.addReg(SrcReg)
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
SrcReg = ANDReg;
}
// Create the base instruction, then add the operands.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(StrOpc)).addReg(SrcReg);
AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, MMO, UseUnscaled);
TII.get(Opc))
.addReg(SrcReg);
AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
return true;
}
@ -809,7 +930,7 @@ bool AArch64FastISel::SelectStore(const Instruction *I) {
// See if we can handle this address.
Address Addr;
if (!ComputeAddress(I->getOperand(1), Addr))
if (!ComputeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
return false;
if (!EmitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))

View File

@ -0,0 +1,425 @@
; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
; Load / Store Base Register only
define zeroext i1 @load_breg_i1(i1* %a) {
; CHECK-LABEL: load_breg_i1
; CHECK: ldrb {{w[0-9]+}}, [x0]
%1 = load i1* %a
ret i1 %1
}
define zeroext i8 @load_breg_i8(i8* %a) {
; CHECK-LABEL: load_breg_i8
; CHECK: ldrb {{w[0-9]+}}, [x0]
%1 = load i8* %a
ret i8 %1
}
define zeroext i16 @load_breg_i16(i16* %a) {
; CHECK-LABEL: load_breg_i16
; CHECK: ldrh {{w[0-9]+}}, [x0]
%1 = load i16* %a
ret i16 %1
}
define i32 @load_breg_i32(i32* %a) {
; CHECK-LABEL: load_breg_i32
; CHECK: ldr {{w[0-9]+}}, [x0]
%1 = load i32* %a
ret i32 %1
}
define i64 @load_breg_i64(i64* %a) {
; CHECK-LABEL: load_breg_i64
; CHECK: ldr {{x[0-9]+}}, [x0]
%1 = load i64* %a
ret i64 %1
}
define float @load_breg_f32(float* %a) {
; CHECK-LABEL: load_breg_f32
; CHECK: ldr {{s[0-9]+}}, [x0]
%1 = load float* %a
ret float %1
}
define double @load_breg_f64(double* %a) {
; CHECK-LABEL: load_breg_f64
; CHECK: ldr {{d[0-9]+}}, [x0]
%1 = load double* %a
ret double %1
}
define void @store_breg_i1(i1* %a) {
; CHECK-LABEL: store_breg_i1
; CHECK: strb {{wzr|w[0-9]+}}, [x0]
store i1 0, i1* %a
ret void
}
define void @store_breg_i8(i8* %a) {
; CHECK-LABEL: store_breg_i8
; CHECK: strb wzr, [x0]
store i8 0, i8* %a
ret void
}
define void @store_breg_i16(i16* %a) {
; CHECK-LABEL: store_breg_i16
; CHECK: strh wzr, [x0]
store i16 0, i16* %a
ret void
}
define void @store_breg_i32(i32* %a) {
; CHECK-LABEL: store_breg_i32
; CHECK: str wzr, [x0]
store i32 0, i32* %a
ret void
}
define void @store_breg_i64(i64* %a) {
; CHECK-LABEL: store_breg_i64
; CHECK: str xzr, [x0]
store i64 0, i64* %a
ret void
}
define void @store_breg_f32(float* %a) {
; CHECK-LABEL: store_breg_f32
; CHECK: str {{wzr|s[0-9]+}}, [x0]
store float 0.0, float* %a
ret void
}
define void @store_breg_f64(double* %a) {
; CHECK-LABEL: store_breg_f64
; CHECK: str {{xzr|d[0-9]+}}, [x0]
store double 0.0, double* %a
ret void
}
; Load / Store Base Register + Immediate Offset
; Max supported negative offset
define i32 @load_breg_immoff_1(i64 %a) {
; CHECK-LABEL: load_breg_immoff_1
; CHECK: ldur {{w[0-9]+}}, [x0, #-256]
%1 = add i64 %a, -256
%2 = inttoptr i64 %1 to i32*
%3 = load i32* %2
ret i32 %3
}
; Min not-supported negative offset
define i32 @load_breg_immoff_2(i64 %a) {
; SDAG-LABEL: load_breg_immoff_2
; SDAG: sub [[REG:x[0-9]+]], x0, #257
; SDAG-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
; FAST-LABEL: load_breg_immoff_2
; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
%1 = add i64 %a, -257
%2 = inttoptr i64 %1 to i32*
%3 = load i32* %2
ret i32 %3
}
; Max supported unscaled offset
define i32 @load_breg_immoff_3(i64 %a) {
; CHECK-LABEL: load_breg_immoff_3
; CHECK: ldur {{w[0-9]+}}, [x0, #255]
%1 = add i64 %a, 255
%2 = inttoptr i64 %1 to i32*
%3 = load i32* %2
ret i32 %3
}
; Min un-supported unscaled offset
define i32 @load_breg_immoff_4(i64 %a) {
; SDAG-LABEL: load_breg_immoff_4
; SDAG: add [[REG:x[0-9]+]], x0, #257
; SDAG-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
; FAST-LABEL: load_breg_immoff_4
; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
%1 = add i64 %a, 257
%2 = inttoptr i64 %1 to i32*
%3 = load i32* %2
ret i32 %3
}
; Max supported scaled offset
define i32 @load_breg_immoff_5(i64 %a) {
; CHECK-LABEL: load_breg_immoff_5
; CHECK: ldr {{w[0-9]+}}, [x0, #16380]
%1 = add i64 %a, 16380
%2 = inttoptr i64 %1 to i32*
%3 = load i32* %2
ret i32 %3
}
; Min un-supported scaled offset
define i32 @load_breg_immoff_6(i64 %a) {
; SDAG-LABEL: load_breg_immoff_6
; SDAG: add [[REG:x[0-9]+]], x0, #4, lsl #12
; SDAG-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
; FAST-LABEL: load_breg_immoff_6
; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
%1 = add i64 %a, 16384
%2 = inttoptr i64 %1 to i32*
%3 = load i32* %2
ret i32 %3
}
; Max supported negative offset
define void @store_breg_immoff_1(i64 %a) {
; CHECK-LABEL: store_breg_immoff_1
; CHECK: stur wzr, [x0, #-256]
%1 = add i64 %a, -256
%2 = inttoptr i64 %1 to i32*
store i32 0, i32* %2
ret void
}
; Min not-supported negative offset
define void @store_breg_immoff_2(i64 %a) {
; SDAG-LABEL: store_breg_immoff_2
; SDAG: sub [[REG:x[0-9]+]], x0, #257
; SDAG-NEXT: str wzr, {{\[}}[[REG]]{{\]}}
; FAST-LABEL: store_breg_immoff_2
; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
; FAST-NEXT: str wzr, {{\[}}[[REG]]{{\]}}
%1 = add i64 %a, -257
%2 = inttoptr i64 %1 to i32*
store i32 0, i32* %2
ret void
}
; Max supported unscaled offset
define void @store_breg_immoff_3(i64 %a) {
; CHECK-LABEL: store_breg_immoff_3
; CHECK: stur wzr, [x0, #255]
%1 = add i64 %a, 255
%2 = inttoptr i64 %1 to i32*
store i32 0, i32* %2
ret void
}
; Min un-supported unscaled offset
define void @store_breg_immoff_4(i64 %a) {
; SDAG-LABEL: store_breg_immoff_4
; SDAG: add [[REG:x[0-9]+]], x0, #257
; SDAG-NEXT: str wzr, {{\[}}[[REG]]{{\]}}
; FAST-LABEL: store_breg_immoff_4
; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
; FAST-NEXT: str wzr, {{\[}}[[REG]]{{\]}}
%1 = add i64 %a, 257
%2 = inttoptr i64 %1 to i32*
store i32 0, i32* %2
ret void
}
; Max supported scaled offset
define void @store_breg_immoff_5(i64 %a) {
; CHECK-LABEL: store_breg_immoff_5
; CHECK: str wzr, [x0, #16380]
%1 = add i64 %a, 16380
%2 = inttoptr i64 %1 to i32*
store i32 0, i32* %2
ret void
}
; Min un-supported scaled offset
define void @store_breg_immoff_6(i64 %a) {
; SDAG-LABEL: store_breg_immoff_6
; SDAG: add [[REG:x[0-9]+]], x0, #4, lsl #12
; SDAG-NEXT: str wzr, {{\[}}[[REG]]{{\]}}
; FAST-LABEL: store_breg_immoff_6
; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
; FAST-NEXT: str wzr, {{\[}}[[REG]]{{\]}}
%1 = add i64 %a, 16384
%2 = inttoptr i64 %1 to i32*
store i32 0, i32* %2
ret void
}
define i64 @load_breg_immoff_7(i64 %a) {
; CHECK-LABEL: load_breg_immoff_7
; CHECK: ldr {{x[0-9]+}}, [x0, #48]
%1 = add i64 %a, 48
%2 = inttoptr i64 %1 to i64*
%3 = load i64* %2
ret i64 %3
}
; Flip add operands
define i64 @load_breg_immoff_8(i64 %a) {
; CHECK-LABEL: load_breg_immoff_8
; CHECK: ldr {{x[0-9]+}}, [x0, #48]
%1 = add i64 48, %a
%2 = inttoptr i64 %1 to i64*
%3 = load i64* %2
ret i64 %3
}
; Load Base Register + Register Offset
define i64 @load_breg_offreg_1(i64 %a, i64 %b) {
; CHECK-LABEL: load_breg_offreg_1
; CHECK: ldr {{x[0-9]+}}, [x0, x1]
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i64*
%3 = load i64* %2
ret i64 %3
}
; Flip add operands
define i64 @load_breg_offreg_2(i64 %a, i64 %b) {
; CHECK-LABEL: load_breg_offreg_2
; CHECK: ldr {{x[0-9]+}}, [x1, x0]
%1 = add i64 %b, %a
%2 = inttoptr i64 %1 to i64*
%3 = load i64* %2
ret i64 %3
}
; Load Base Register + Register Offset + Immediate Offset
define i64 @load_breg_offreg_immoff_1(i64 %a, i64 %b) {
; CHECK-LABEL: load_breg_offreg_immoff_1
; CHECK: add [[REG:x[0-9]+]], x0, x1
; CHECK-NEXT: ldr x0, {{\[}}[[REG]], #48{{\]}}
%1 = add i64 %a, %b
%2 = add i64 %1, 48
%3 = inttoptr i64 %2 to i64*
%4 = load i64* %3
ret i64 %4
}
define i64 @load_breg_offreg_immoff_2(i64 %a, i64 %b) {
; SDAG-LABEL: load_breg_offreg_immoff_2
; SDAG: add [[REG1:x[0-9]+]], x0, x1
; SDAG-NEXT: add [[REG2:x[0-9]+]], [[REG1]], #15, lsl #12
; SDAG-NEXT: ldr x0, {{\[}}[[REG2]]{{\]}}
; FAST-LABEL: load_breg_offreg_immoff_2
; FAST: add [[REG:x[0-9]+]], x0, {{x[0-9]+}}
; FAST-NEXT: ldr x0, {{\[}}[[REG]], x1{{\]}}
%1 = add i64 %a, %b
%2 = add i64 %1, 61440
%3 = inttoptr i64 %2 to i64*
%4 = load i64* %3
ret i64 %4
}
; Load Base Register + Scaled Register Offset
define i32 @load_breg_shift_offreg_1(i64 %a, i64 %b) {
; CHECK-LABEL: load_breg_shift_offreg_1
; CHECK: ldr {{w[0-9]+}}, [x1, x0, lsl #2]
%1 = shl i64 %a, 2
%2 = add i64 %1, %b
%3 = inttoptr i64 %2 to i32*
%4 = load i32* %3
ret i32 %4
}
define i32 @load_breg_shift_offreg_2(i64 %a, i64 %b) {
; CHECK-LABEL: load_breg_shift_offreg_2
; CHECK: ldr {{w[0-9]+}}, [x1, x0, lsl #2]
%1 = shl i64 %a, 2
%2 = add i64 %b, %1
%3 = inttoptr i64 %2 to i32*
%4 = load i32* %3
ret i32 %4
}
define i32 @load_breg_shift_offreg_3(i64 %a, i64 %b) {
; SDAG-LABEL: load_breg_shift_offreg_3
; SDAG: lsl [[REG:x[0-9]+]], x0, #2
; SDAG-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]], x1, lsl #2{{\]}}
; FAST-LABEL: load_breg_shift_offreg_3
; FAST: lsl [[REG:x[0-9]+]], x1, {{x[0-9]+}}
; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
%1 = shl i64 %a, 2
%2 = shl i64 %b, 2
%3 = add i64 %1, %2
%4 = inttoptr i64 %3 to i32*
%5 = load i32* %4
ret i32 %5
}
define i32 @load_breg_shift_offreg_4(i64 %a, i64 %b) {
; SDAG-LABEL: load_breg_shift_offreg_4
; SDAG: lsl [[REG:x[0-9]+]], x1, #2
; SDAG-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
; FAST-LABEL: load_breg_shift_offreg_4
; FAST: lsl [[REG:x[0-9]+]], x0, {{x[0-9]+}}
; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]], x1, lsl #2{{\]}}
%1 = shl i64 %a, 2
%2 = shl i64 %b, 2
%3 = add i64 %2, %1
%4 = inttoptr i64 %3 to i32*
%5 = load i32* %4
ret i32 %5
}
define i32 @load_breg_shift_offreg_5(i64 %a, i64 %b) {
; SDAG-LABEL: load_breg_shift_offreg_5
; SDAG: lsl [[REG:x[0-9]+]], x1, #3
; SDAG-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
; FAST-LABEL: load_breg_shift_offreg_5
; FAST: lsl [[REG:x[0-9]+]], x1, {{x[0-9]+}}
; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
%1 = shl i64 %a, 2
%2 = shl i64 %b, 3
%3 = add i64 %1, %2
%4 = inttoptr i64 %3 to i32*
%5 = load i32* %4
ret i32 %5
}
; Load Base Register + Scaled Register Offset + Sign/Zero extension
define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) {
; CHECK-LABEL: load_breg_zext_shift_offreg_1
; CHECK: ldr {{w[0-9]+}}, [x1, w0, uxtw #2]
%1 = zext i32 %a to i64
%2 = shl i64 %1, 2
%3 = add i64 %2, %b
%4 = inttoptr i64 %3 to i32*
%5 = load i32* %4
ret i32 %5
}
define i32 @load_breg_zext_shift_offreg_2(i32 %a, i64 %b) {
; CHECK-LABEL: load_breg_zext_shift_offreg_2
; CHECK: ldr {{w[0-9]+}}, [x1, w0, uxtw #2]
%1 = zext i32 %a to i64
%2 = shl i64 %1, 2
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i32*
%5 = load i32* %4
ret i32 %5
}
define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) {
; CHECK-LABEL: load_breg_sext_shift_offreg_1
; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
%1 = sext i32 %a to i64
%2 = shl i64 %1, 2
%3 = add i64 %2, %b
%4 = inttoptr i64 %3 to i32*
%5 = load i32* %4
ret i32 %5
}
define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) {
; CHECK-LABEL: load_breg_sext_shift_offreg_2
; CHECK: ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
%1 = sext i32 %a to i64
%2 = shl i64 %1, 2
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i32*
%5 = load i32* %4
ret i32 %5
}