mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-14 23:48:49 +00:00
[ARM] Re-re-apply VLD1/VST1 base-update combine.
This re-applies r223862, r224198, r224203, and r224754, which were reverted in r228129 because they exposed Clang misalignment problems when self-hosting. The combine caused the crashes because we turned ISD::LOAD/STORE nodes to ARMISD::VLD1/VST1_UPD nodes. When selecting addressing modes, we were very lax for the former, and only emitted the alignment operand (as in "[r1:128]") when it was larger than the standard alignment of the memory type. However, for ARMISD nodes, we just used the MMO alignment, no matter what. In our case, we turned ISD nodes to ARMISD nodes, and this caused the alignment operands to start being emitted. And that's how we exposed alignment problems that were ignored before (but I believe would have been caught with SCTRL.A==1?). To fix this, we can just mirror the hack done for ISD nodes: only take into account the MMO alignment when the access is overaligned. Original commit message: We used to only combine intrinsics, and turn them into VLD1_UPD/VST1_UPD when the base pointer is incremented after the load/store. We can do the same thing for generic load/stores. Note that we can only combine the first load/store+adds pair in a sequence (as might be generated for a v16f32 load for instance), because other combines turn the base pointer addition chain (each computing the address of the next load, from the address of the last load) into independent additions (common base pointer + this load's offset). rdar://19717869, rdar://14062261. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229932 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
19f93ebf18
commit
5898fc70ec
@ -992,18 +992,24 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
|
||||
Addr = N;
|
||||
|
||||
unsigned Alignment = 0;
|
||||
if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
|
||||
|
||||
MemSDNode *MemN = cast<MemSDNode>(Parent);
|
||||
|
||||
if (isa<LSBaseSDNode>(MemN) ||
|
||||
((MemN->getOpcode() == ARMISD::VST1_UPD ||
|
||||
MemN->getOpcode() == ARMISD::VLD1_UPD) &&
|
||||
MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
|
||||
// This case occurs only for VLD1-lane/dup and VST1-lane instructions.
|
||||
// The maximum alignment is equal to the memory size being referenced.
|
||||
unsigned LSNAlign = LSN->getAlignment();
|
||||
unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
|
||||
if (LSNAlign >= MemSize && MemSize > 1)
|
||||
unsigned MMOAlign = MemN->getAlignment();
|
||||
unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
|
||||
if (MMOAlign >= MemSize && MemSize > 1)
|
||||
Alignment = MemSize;
|
||||
} else {
|
||||
// All other uses of addrmode6 are for intrinsics. For now just record
|
||||
// the raw alignment value; it will be refined later based on the legal
|
||||
// alignment operands for the intrinsic.
|
||||
Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
|
||||
Alignment = MemN->getAlignment();
|
||||
}
|
||||
|
||||
Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
|
||||
|
@ -565,6 +565,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
||||
setTargetDAGCombine(ISD::FP_TO_SINT);
|
||||
setTargetDAGCombine(ISD::FP_TO_UINT);
|
||||
setTargetDAGCombine(ISD::FDIV);
|
||||
setTargetDAGCombine(ISD::LOAD);
|
||||
|
||||
// It is legal to extload from v4i8 to v4i16 or v4i32.
|
||||
MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
|
||||
@ -8872,17 +8873,18 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
|
||||
DAG.getUNDEF(VT), NewMask.data());
|
||||
}
|
||||
|
||||
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
|
||||
/// NEON load/store intrinsics to merge base address updates.
|
||||
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
|
||||
/// NEON load/store intrinsics, and generic vector load/stores, to merge
|
||||
/// base address updates.
|
||||
/// For generic load/stores, the memory type is assumed to be a vector.
|
||||
/// The caller is assumed to have checked legality.
|
||||
static SDValue CombineBaseUpdate(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
|
||||
return SDValue();
|
||||
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
|
||||
N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
|
||||
const unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
|
||||
const bool isStore = N->getOpcode() == ISD::STORE;
|
||||
const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
|
||||
SDValue Addr = N->getOperand(AddrOpIdx);
|
||||
MemSDNode *MemN = cast<MemSDNode>(N);
|
||||
|
||||
@ -8944,15 +8946,24 @@ static SDValue CombineBaseUpdate(SDNode *N,
|
||||
case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
|
||||
case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
|
||||
case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
|
||||
case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
|
||||
NumVecs = 1; isLaneOp = false; break;
|
||||
case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
|
||||
NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
|
||||
}
|
||||
}
|
||||
|
||||
// Find the size of memory referenced by the load/store.
|
||||
EVT VecTy;
|
||||
if (isLoadOp)
|
||||
if (isLoadOp) {
|
||||
VecTy = N->getValueType(0);
|
||||
else
|
||||
} else if (isIntrinsic) {
|
||||
VecTy = N->getOperand(AddrOpIdx+1).getValueType();
|
||||
} else {
|
||||
assert(isStore && "Node has to be a load, a store, or an intrinsic!");
|
||||
VecTy = N->getOperand(1).getValueType();
|
||||
}
|
||||
|
||||
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
|
||||
if (isLaneOp)
|
||||
NumBytes /= VecTy.getVectorNumElements();
|
||||
@ -8969,13 +8980,53 @@ static SDValue CombineBaseUpdate(SDNode *N,
|
||||
continue;
|
||||
}
|
||||
|
||||
// OK, we found an ADD we can fold into the base update.
|
||||
// Now, create a _UPD node, taking care of not breaking alignment.
|
||||
|
||||
EVT AlignedVecTy = VecTy;
|
||||
unsigned Alignment = MemN->getAlignment();
|
||||
|
||||
// If this is a less-than-standard-aligned load/store, change the type to
|
||||
// match the standard alignment.
|
||||
// The alignment is overlooked when selecting _UPD variants; and it's
|
||||
// easier to introduce bitcasts here than fix that.
|
||||
// There are 3 ways to get to this base-update combine:
|
||||
// - intrinsics: they are assumed to be properly aligned (to the standard
|
||||
// alignment of the memory type), so we don't need to do anything.
|
||||
// - ARMISD::VLDx nodes: they are only generated from the aforementioned
|
||||
// intrinsics, so, likewise, there's nothing to do.
|
||||
// - generic load/store instructions: the alignment is specified as an
|
||||
// explicit operand, rather than implicitly as the standard alignment
|
||||
// of the memory type (like the intrisics). We need to change the
|
||||
// memory type to match the explicit alignment. That way, we don't
|
||||
// generate non-standard-aligned ARMISD::VLDx nodes.
|
||||
if (isa<LSBaseSDNode>(N)) {
|
||||
if (Alignment == 0)
|
||||
Alignment = 1;
|
||||
if (Alignment < VecTy.getScalarSizeInBits() / 8) {
|
||||
MVT EltTy = MVT::getIntegerVT(Alignment * 8);
|
||||
assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
|
||||
assert(!isLaneOp && "Unexpected generic load/store lane.");
|
||||
unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
|
||||
AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
|
||||
}
|
||||
// Don't set an explicit alignment on regular load/stores that we want
|
||||
// to transform to VLD/VST 1_UPD nodes.
|
||||
// This matches the behavior of regular load/stores, which only get an
|
||||
// explicit alignment if the MMO alignment is larger than the standard
|
||||
// alignment of the memory type.
|
||||
// Intrinsics, however, always get an explicit alignment, set to the
|
||||
// alignment of the MMO.
|
||||
Alignment = 1;
|
||||
}
|
||||
|
||||
// Create the new updating load/store node.
|
||||
// First, create an SDVTList for the new updating node's results.
|
||||
EVT Tys[6];
|
||||
unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
|
||||
unsigned n;
|
||||
for (n = 0; n < NumResultVecs; ++n)
|
||||
Tys[n] = VecTy;
|
||||
Tys[n] = AlignedVecTy;
|
||||
Tys[n++] = MVT::i32;
|
||||
Tys[n] = MVT::Other;
|
||||
SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
|
||||
@ -8985,17 +9036,43 @@ static SDValue CombineBaseUpdate(SDNode *N,
|
||||
Ops.push_back(N->getOperand(0)); // incoming chain
|
||||
Ops.push_back(N->getOperand(AddrOpIdx));
|
||||
Ops.push_back(Inc);
|
||||
for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i)
|
||||
Ops.push_back(N->getOperand(i));
|
||||
|
||||
if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
|
||||
// Try to match the intrinsic's signature
|
||||
Ops.push_back(StN->getValue());
|
||||
} else {
|
||||
// Loads (and of course intrinsics) match the intrinsics' signature,
|
||||
// so just add all but the alignment operand.
|
||||
for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands() - 1; ++i)
|
||||
Ops.push_back(N->getOperand(i));
|
||||
}
|
||||
|
||||
// For all node types, the alignment operand is always the last one.
|
||||
Ops.push_back(DAG.getConstant(Alignment, MVT::i32));
|
||||
|
||||
// If this is a non-standard-aligned STORE, the penultimate operand is the
|
||||
// stored value. Bitcast it to the aligned type.
|
||||
if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
|
||||
SDValue &StVal = Ops[Ops.size()-2];
|
||||
StVal = DAG.getNode(ISD::BITCAST, SDLoc(N), AlignedVecTy, StVal);
|
||||
}
|
||||
|
||||
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
|
||||
Ops, MemN->getMemoryVT(),
|
||||
Ops, AlignedVecTy,
|
||||
MemN->getMemOperand());
|
||||
|
||||
// Update the uses.
|
||||
SmallVector<SDValue, 5> NewResults;
|
||||
for (unsigned i = 0; i < NumResultVecs; ++i)
|
||||
NewResults.push_back(SDValue(UpdN.getNode(), i));
|
||||
|
||||
// If this is an non-standard-aligned LOAD, the first result is the loaded
|
||||
// value. Bitcast it to the expected result type.
|
||||
if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
|
||||
SDValue &LdVal = NewResults[0];
|
||||
LdVal = DAG.getNode(ISD::BITCAST, SDLoc(N), VecTy, LdVal);
|
||||
}
|
||||
|
||||
NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
|
||||
DCI.CombineTo(N, NewResults);
|
||||
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
|
||||
@ -9005,6 +9082,14 @@ static SDValue CombineBaseUpdate(SDNode *N,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue PerformVLDCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
|
||||
return SDValue();
|
||||
|
||||
return CombineBaseUpdate(N, DCI);
|
||||
}
|
||||
|
||||
/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
|
||||
/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
|
||||
/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
|
||||
@ -9118,6 +9203,18 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
|
||||
return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
|
||||
}
|
||||
|
||||
static SDValue PerformLOADCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI) {
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
// If this is a legal vector load, try to combine it into a VLD1_UPD.
|
||||
if (ISD::isNormalLoad(N) && VT.isVector() &&
|
||||
DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
|
||||
return CombineBaseUpdate(N, DCI);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// PerformSTORECombine - Target-specific dag combine xforms for
|
||||
/// ISD::STORE.
|
||||
static SDValue PerformSTORECombine(SDNode *N,
|
||||
@ -9256,6 +9353,11 @@ static SDValue PerformSTORECombine(SDNode *N,
|
||||
St->getAAInfo());
|
||||
}
|
||||
|
||||
// If this is a legal vector store, try to combine it into a VST1_UPD.
|
||||
if (ISD::isNormalStore(N) && VT.isVector() &&
|
||||
DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
|
||||
return CombineBaseUpdate(N, DCI);
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
@ -9849,10 +9951,11 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
|
||||
case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
|
||||
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
|
||||
case ISD::LOAD: return PerformLOADCombine(N, DCI);
|
||||
case ARMISD::VLD2DUP:
|
||||
case ARMISD::VLD3DUP:
|
||||
case ARMISD::VLD4DUP:
|
||||
return CombineBaseUpdate(N, DCI);
|
||||
return PerformVLDCombine(N, DCI);
|
||||
case ARMISD::BUILD_VECTOR:
|
||||
return PerformARMBUILD_VECTORCombine(N, DCI);
|
||||
case ISD::INTRINSIC_VOID:
|
||||
@ -9872,7 +9975,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case Intrinsic::arm_neon_vst2lane:
|
||||
case Intrinsic::arm_neon_vst3lane:
|
||||
case Intrinsic::arm_neon_vst4lane:
|
||||
return CombineBaseUpdate(N, DCI);
|
||||
return PerformVLDCombine(N, DCI);
|
||||
default: break;
|
||||
}
|
||||
break;
|
||||
|
@ -9,8 +9,8 @@
|
||||
define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
|
||||
entry:
|
||||
; NO-REALIGN-LABEL: test1
|
||||
; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1:[0-9]+]]:128]
|
||||
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
|
||||
; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
|
||||
; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
|
||||
; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
|
||||
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
|
||||
; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
|
||||
@ -21,16 +21,14 @@ entry:
|
||||
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
|
||||
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
|
||||
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
|
||||
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
|
||||
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
|
||||
; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
|
||||
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
|
||||
|
||||
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48
|
||||
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
|
||||
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32
|
||||
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
|
||||
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
|
||||
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
|
||||
; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
|
||||
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
|
||||
%retval = alloca <16 x float>, align 16
|
||||
%0 = load <16 x float>* @T3_retval, align 16
|
||||
@ -44,8 +42,8 @@ define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
|
||||
entry:
|
||||
; REALIGN-LABEL: test2
|
||||
; REALIGN: bfc sp, #0, #6
|
||||
; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1:[0-9]+]]:128]
|
||||
; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
|
||||
; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
|
||||
; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
|
||||
; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
|
||||
; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
|
||||
; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
|
||||
@ -65,8 +63,7 @@ entry:
|
||||
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
|
||||
; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32
|
||||
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
|
||||
; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #16
|
||||
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
|
||||
; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
|
||||
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
|
||||
%retval = alloca <16 x float>, align 16
|
||||
%0 = load <16 x float>* @T3_retval, align 16
|
||||
|
@ -46,10 +46,8 @@ entry:
|
||||
; CHECK: movw [[REG2:r[0-9]+]], #16716
|
||||
; CHECK: movt [[REG2:r[0-9]+]], #72
|
||||
; CHECK: str [[REG2]], [r0, #32]
|
||||
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
|
||||
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
; CHECK: adds r0, #16
|
||||
; CHECK: adds r1, #16
|
||||
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
|
||||
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
|
||||
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
|
||||
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
|
||||
@ -59,10 +57,8 @@ entry:
|
||||
define void @t3(i8* nocapture %C) nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: t3:
|
||||
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
|
||||
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||
; CHECK: adds r0, #16
|
||||
; CHECK: adds r1, #16
|
||||
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
|
||||
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
|
||||
; CHECK: vld1.8 {d{{[0-9]+}}}, [r1]
|
||||
; CHECK: vst1.8 {d{{[0-9]+}}}, [r0]
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
|
||||
@ -73,7 +69,8 @@ define void @t4(i8* nocapture %C) nounwind {
|
||||
entry:
|
||||
; CHECK-LABEL: t4:
|
||||
; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
|
||||
; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]
|
||||
; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]!
|
||||
; CHECK: strh [[REG5:r[0-9]+]], [r0]
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
253
test/CodeGen/ARM/vector-load.ll
Normal file
253
test/CodeGen/ARM/vector-load.ll
Normal file
@ -0,0 +1,253 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
|
||||
target triple = "thumbv7s-apple-ios8.0.0"
|
||||
|
||||
define <8 x i8> @load_v8i8(<8 x i8>** %ptr) {
|
||||
;CHECK-LABEL: load_v8i8:
|
||||
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <8 x i8>** %ptr
|
||||
%lA = load <8 x i8>* %A, align 1
|
||||
ret <8 x i8> %lA
|
||||
}
|
||||
|
||||
define <8 x i8> @load_v8i8_update(<8 x i8>** %ptr) {
|
||||
;CHECK-LABEL: load_v8i8_update:
|
||||
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <8 x i8>** %ptr
|
||||
%lA = load <8 x i8>* %A, align 1
|
||||
%inc = getelementptr <8 x i8>* %A, i38 1
|
||||
store <8 x i8>* %inc, <8 x i8>** %ptr
|
||||
ret <8 x i8> %lA
|
||||
}
|
||||
|
||||
define <4 x i16> @load_v4i16(<4 x i16>** %ptr) {
|
||||
;CHECK-LABEL: load_v4i16:
|
||||
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <4 x i16>** %ptr
|
||||
%lA = load <4 x i16>* %A, align 1
|
||||
ret <4 x i16> %lA
|
||||
}
|
||||
|
||||
define <4 x i16> @load_v4i16_update(<4 x i16>** %ptr) {
|
||||
;CHECK-LABEL: load_v4i16_update:
|
||||
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <4 x i16>** %ptr
|
||||
%lA = load <4 x i16>* %A, align 1
|
||||
%inc = getelementptr <4 x i16>* %A, i34 1
|
||||
store <4 x i16>* %inc, <4 x i16>** %ptr
|
||||
ret <4 x i16> %lA
|
||||
}
|
||||
|
||||
define <2 x i32> @load_v2i32(<2 x i32>** %ptr) {
|
||||
;CHECK-LABEL: load_v2i32:
|
||||
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <2 x i32>** %ptr
|
||||
%lA = load <2 x i32>* %A, align 1
|
||||
ret <2 x i32> %lA
|
||||
}
|
||||
|
||||
define <2 x i32> @load_v2i32_update(<2 x i32>** %ptr) {
|
||||
;CHECK-LABEL: load_v2i32_update:
|
||||
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <2 x i32>** %ptr
|
||||
%lA = load <2 x i32>* %A, align 1
|
||||
%inc = getelementptr <2 x i32>* %A, i32 1
|
||||
store <2 x i32>* %inc, <2 x i32>** %ptr
|
||||
ret <2 x i32> %lA
|
||||
}
|
||||
|
||||
define <2 x float> @load_v2f32(<2 x float>** %ptr) {
|
||||
;CHECK-LABEL: load_v2f32:
|
||||
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <2 x float>** %ptr
|
||||
%lA = load <2 x float>* %A, align 1
|
||||
ret <2 x float> %lA
|
||||
}
|
||||
|
||||
define <2 x float> @load_v2f32_update(<2 x float>** %ptr) {
|
||||
;CHECK-LABEL: load_v2f32_update:
|
||||
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <2 x float>** %ptr
|
||||
%lA = load <2 x float>* %A, align 1
|
||||
%inc = getelementptr <2 x float>* %A, i32 1
|
||||
store <2 x float>* %inc, <2 x float>** %ptr
|
||||
ret <2 x float> %lA
|
||||
}
|
||||
|
||||
define <1 x i64> @load_v1i64(<1 x i64>** %ptr) {
|
||||
;CHECK-LABEL: load_v1i64:
|
||||
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <1 x i64>** %ptr
|
||||
%lA = load <1 x i64>* %A, align 1
|
||||
ret <1 x i64> %lA
|
||||
}
|
||||
|
||||
define <1 x i64> @load_v1i64_update(<1 x i64>** %ptr) {
|
||||
;CHECK-LABEL: load_v1i64_update:
|
||||
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <1 x i64>** %ptr
|
||||
%lA = load <1 x i64>* %A, align 1
|
||||
%inc = getelementptr <1 x i64>* %A, i31 1
|
||||
store <1 x i64>* %inc, <1 x i64>** %ptr
|
||||
ret <1 x i64> %lA
|
||||
}
|
||||
|
||||
define <16 x i8> @load_v16i8(<16 x i8>** %ptr) {
|
||||
;CHECK-LABEL: load_v16i8:
|
||||
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <16 x i8>** %ptr
|
||||
%lA = load <16 x i8>* %A, align 1
|
||||
ret <16 x i8> %lA
|
||||
}
|
||||
|
||||
define <16 x i8> @load_v16i8_update(<16 x i8>** %ptr) {
|
||||
;CHECK-LABEL: load_v16i8_update:
|
||||
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <16 x i8>** %ptr
|
||||
%lA = load <16 x i8>* %A, align 1
|
||||
%inc = getelementptr <16 x i8>* %A, i316 1
|
||||
store <16 x i8>* %inc, <16 x i8>** %ptr
|
||||
ret <16 x i8> %lA
|
||||
}
|
||||
|
||||
define <8 x i16> @load_v8i16(<8 x i16>** %ptr) {
|
||||
;CHECK-LABEL: load_v8i16:
|
||||
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <8 x i16>** %ptr
|
||||
%lA = load <8 x i16>* %A, align 1
|
||||
ret <8 x i16> %lA
|
||||
}
|
||||
|
||||
define <8 x i16> @load_v8i16_update(<8 x i16>** %ptr) {
|
||||
;CHECK-LABEL: load_v8i16_update:
|
||||
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <8 x i16>** %ptr
|
||||
%lA = load <8 x i16>* %A, align 1
|
||||
%inc = getelementptr <8 x i16>* %A, i38 1
|
||||
store <8 x i16>* %inc, <8 x i16>** %ptr
|
||||
ret <8 x i16> %lA
|
||||
}
|
||||
|
||||
define <4 x i32> @load_v4i32(<4 x i32>** %ptr) {
|
||||
;CHECK-LABEL: load_v4i32:
|
||||
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <4 x i32>** %ptr
|
||||
%lA = load <4 x i32>* %A, align 1
|
||||
ret <4 x i32> %lA
|
||||
}
|
||||
|
||||
define <4 x i32> @load_v4i32_update(<4 x i32>** %ptr) {
|
||||
;CHECK-LABEL: load_v4i32_update:
|
||||
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <4 x i32>** %ptr
|
||||
%lA = load <4 x i32>* %A, align 1
|
||||
%inc = getelementptr <4 x i32>* %A, i34 1
|
||||
store <4 x i32>* %inc, <4 x i32>** %ptr
|
||||
ret <4 x i32> %lA
|
||||
}
|
||||
|
||||
define <4 x float> @load_v4f32(<4 x float>** %ptr) {
|
||||
;CHECK-LABEL: load_v4f32:
|
||||
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <4 x float>** %ptr
|
||||
%lA = load <4 x float>* %A, align 1
|
||||
ret <4 x float> %lA
|
||||
}
|
||||
|
||||
define <4 x float> @load_v4f32_update(<4 x float>** %ptr) {
|
||||
;CHECK-LABEL: load_v4f32_update:
|
||||
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <4 x float>** %ptr
|
||||
%lA = load <4 x float>* %A, align 1
|
||||
%inc = getelementptr <4 x float>* %A, i34 1
|
||||
store <4 x float>* %inc, <4 x float>** %ptr
|
||||
ret <4 x float> %lA
|
||||
}
|
||||
|
||||
define <2 x i64> @load_v2i64(<2 x i64>** %ptr) {
|
||||
;CHECK-LABEL: load_v2i64:
|
||||
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <2 x i64>** %ptr
|
||||
%lA = load <2 x i64>* %A, align 1
|
||||
ret <2 x i64> %lA
|
||||
}
|
||||
|
||||
define <2 x i64> @load_v2i64_update(<2 x i64>** %ptr) {
|
||||
;CHECK-LABEL: load_v2i64_update:
|
||||
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <2 x i64>** %ptr
|
||||
%lA = load <2 x i64>* %A, align 1
|
||||
%inc = getelementptr <2 x i64>* %A, i32 1
|
||||
store <2 x i64>* %inc, <2 x i64>** %ptr
|
||||
ret <2 x i64> %lA
|
||||
}
|
||||
|
||||
; Make sure we change the type to match alignment if necessary.
|
||||
define <2 x i64> @load_v2i64_update_aligned2(<2 x i64>** %ptr) {
|
||||
;CHECK-LABEL: load_v2i64_update_aligned2:
|
||||
;CHECK: vld1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <2 x i64>** %ptr
|
||||
%lA = load <2 x i64>* %A, align 2
|
||||
%inc = getelementptr <2 x i64>* %A, i32 1
|
||||
store <2 x i64>* %inc, <2 x i64>** %ptr
|
||||
ret <2 x i64> %lA
|
||||
}
|
||||
|
||||
define <2 x i64> @load_v2i64_update_aligned4(<2 x i64>** %ptr) {
|
||||
;CHECK-LABEL: load_v2i64_update_aligned4:
|
||||
;CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <2 x i64>** %ptr
|
||||
%lA = load <2 x i64>* %A, align 4
|
||||
%inc = getelementptr <2 x i64>* %A, i32 1
|
||||
store <2 x i64>* %inc, <2 x i64>** %ptr
|
||||
ret <2 x i64> %lA
|
||||
}
|
||||
|
||||
define <2 x i64> @load_v2i64_update_aligned8(<2 x i64>** %ptr) {
|
||||
;CHECK-LABEL: load_v2i64_update_aligned8:
|
||||
;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <2 x i64>** %ptr
|
||||
%lA = load <2 x i64>* %A, align 8
|
||||
%inc = getelementptr <2 x i64>* %A, i32 1
|
||||
store <2 x i64>* %inc, <2 x i64>** %ptr
|
||||
ret <2 x i64> %lA
|
||||
}
|
||||
|
||||
define <2 x i64> @load_v2i64_update_aligned16(<2 x i64>** %ptr) {
|
||||
;CHECK-LABEL: load_v2i64_update_aligned16:
|
||||
;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
|
||||
%A = load <2 x i64>** %ptr
|
||||
%lA = load <2 x i64>* %A, align 16
|
||||
%inc = getelementptr <2 x i64>* %A, i32 1
|
||||
store <2 x i64>* %inc, <2 x i64>** %ptr
|
||||
ret <2 x i64> %lA
|
||||
}
|
||||
|
||||
; Make sure we don't break smaller-than-dreg extloads.
|
||||
define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) {
|
||||
;CHECK-LABEL: zextload_v8i8tov8i32:
|
||||
;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [{{r[0-9]+}}:32]
|
||||
;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
|
||||
;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
|
||||
%A = load <4 x i8>** %ptr
|
||||
%lA = load <4 x i8>* %A, align 4
|
||||
%zlA = zext <4 x i8> %lA to <4 x i32>
|
||||
ret <4 x i32> %zlA
|
||||
}
|
||||
|
||||
define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
|
||||
;CHECK-LABEL: zextload_v8i8tov8i32_fake_update:
|
||||
;CHECK: ldr.w r[[PTRREG:[0-9]+]], [r0]
|
||||
;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32]
|
||||
;CHECK: add.w r[[INCREG:[0-9]+]], r[[PTRREG]], #16
|
||||
;CHECK: str.w r[[INCREG]], [r0]
|
||||
;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
|
||||
;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
|
||||
%A = load <4 x i8>** %ptr
|
||||
%lA = load <4 x i8>* %A, align 4
|
||||
%inc = getelementptr <4 x i8>* %A, i38 4
|
||||
store <4 x i8>* %inc, <4 x i8>** %ptr
|
||||
%zlA = zext <4 x i8> %lA to <4 x i32>
|
||||
ret <4 x i32> %zlA
|
||||
}
|
258
test/CodeGen/ARM/vector-store.ll
Normal file
258
test/CodeGen/ARM/vector-store.ll
Normal file
@ -0,0 +1,258 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
|
||||
target triple = "thumbv7s-apple-ios8.0.0"
|
||||
|
||||
define void @store_v8i8(<8 x i8>** %ptr, <8 x i8> %val) {
|
||||
;CHECK-LABEL: store_v8i8:
|
||||
;CHECK: str r1, [r0]
|
||||
%A = load <8 x i8>** %ptr
|
||||
store <8 x i8> %val, <8 x i8>* %A, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v8i8_update(<8 x i8>** %ptr, <8 x i8> %val) {
|
||||
;CHECK-LABEL: store_v8i8_update:
|
||||
;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <8 x i8>** %ptr
|
||||
store <8 x i8> %val, <8 x i8>* %A, align 1
|
||||
%inc = getelementptr <8 x i8>* %A, i38 1
|
||||
store <8 x i8>* %inc, <8 x i8>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v4i16(<4 x i16>** %ptr, <4 x i16> %val) {
|
||||
;CHECK-LABEL: store_v4i16:
|
||||
;CHECK: str r1, [r0]
|
||||
%A = load <4 x i16>** %ptr
|
||||
store <4 x i16> %val, <4 x i16>* %A, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v4i16_update(<4 x i16>** %ptr, <4 x i16> %val) {
|
||||
;CHECK-LABEL: store_v4i16_update:
|
||||
;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <4 x i16>** %ptr
|
||||
store <4 x i16> %val, <4 x i16>* %A, align 1
|
||||
%inc = getelementptr <4 x i16>* %A, i34 1
|
||||
store <4 x i16>* %inc, <4 x i16>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v2i32(<2 x i32>** %ptr, <2 x i32> %val) {
|
||||
;CHECK-LABEL: store_v2i32:
|
||||
;CHECK: str r1, [r0]
|
||||
%A = load <2 x i32>** %ptr
|
||||
store <2 x i32> %val, <2 x i32>* %A, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v2i32_update(<2 x i32>** %ptr, <2 x i32> %val) {
|
||||
;CHECK-LABEL: store_v2i32_update:
|
||||
;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <2 x i32>** %ptr
|
||||
store <2 x i32> %val, <2 x i32>* %A, align 1
|
||||
%inc = getelementptr <2 x i32>* %A, i32 1
|
||||
store <2 x i32>* %inc, <2 x i32>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v2f32(<2 x float>** %ptr, <2 x float> %val) {
|
||||
;CHECK-LABEL: store_v2f32:
|
||||
;CHECK: str r1, [r0]
|
||||
%A = load <2 x float>** %ptr
|
||||
store <2 x float> %val, <2 x float>* %A, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v2f32_update(<2 x float>** %ptr, <2 x float> %val) {
|
||||
;CHECK-LABEL: store_v2f32_update:
|
||||
;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <2 x float>** %ptr
|
||||
store <2 x float> %val, <2 x float>* %A, align 1
|
||||
%inc = getelementptr <2 x float>* %A, i32 1
|
||||
store <2 x float>* %inc, <2 x float>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v1i64(<1 x i64>** %ptr, <1 x i64> %val) {
|
||||
;CHECK-LABEL: store_v1i64:
|
||||
;CHECK: str r1, [r0]
|
||||
%A = load <1 x i64>** %ptr
|
||||
store <1 x i64> %val, <1 x i64>* %A, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v1i64_update(<1 x i64>** %ptr, <1 x i64> %val) {
|
||||
;CHECK-LABEL: store_v1i64_update:
|
||||
;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <1 x i64>** %ptr
|
||||
store <1 x i64> %val, <1 x i64>* %A, align 1
|
||||
%inc = getelementptr <1 x i64>* %A, i31 1
|
||||
store <1 x i64>* %inc, <1 x i64>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v16i8(<16 x i8>** %ptr, <16 x i8> %val) {
|
||||
;CHECK-LABEL: store_v16i8:
|
||||
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <16 x i8>** %ptr
|
||||
store <16 x i8> %val, <16 x i8>* %A, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v16i8_update(<16 x i8>** %ptr, <16 x i8> %val) {
|
||||
;CHECK-LABEL: store_v16i8_update:
|
||||
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <16 x i8>** %ptr
|
||||
store <16 x i8> %val, <16 x i8>* %A, align 1
|
||||
%inc = getelementptr <16 x i8>* %A, i316 1
|
||||
store <16 x i8>* %inc, <16 x i8>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v8i16(<8 x i16>** %ptr, <8 x i16> %val) {
|
||||
;CHECK-LABEL: store_v8i16:
|
||||
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <8 x i16>** %ptr
|
||||
store <8 x i16> %val, <8 x i16>* %A, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v8i16_update(<8 x i16>** %ptr, <8 x i16> %val) {
|
||||
;CHECK-LABEL: store_v8i16_update:
|
||||
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <8 x i16>** %ptr
|
||||
store <8 x i16> %val, <8 x i16>* %A, align 1
|
||||
%inc = getelementptr <8 x i16>* %A, i38 1
|
||||
store <8 x i16>* %inc, <8 x i16>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v4i32(<4 x i32>** %ptr, <4 x i32> %val) {
|
||||
;CHECK-LABEL: store_v4i32:
|
||||
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <4 x i32>** %ptr
|
||||
store <4 x i32> %val, <4 x i32>* %A, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v4i32_update(<4 x i32>** %ptr, <4 x i32> %val) {
|
||||
;CHECK-LABEL: store_v4i32_update:
|
||||
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <4 x i32>** %ptr
|
||||
store <4 x i32> %val, <4 x i32>* %A, align 1
|
||||
%inc = getelementptr <4 x i32>* %A, i34 1
|
||||
store <4 x i32>* %inc, <4 x i32>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v4f32(<4 x float>** %ptr, <4 x float> %val) {
|
||||
;CHECK-LABEL: store_v4f32:
|
||||
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <4 x float>** %ptr
|
||||
store <4 x float> %val, <4 x float>* %A, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v4f32_update(<4 x float>** %ptr, <4 x float> %val) {
|
||||
;CHECK-LABEL: store_v4f32_update:
|
||||
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <4 x float>** %ptr
|
||||
store <4 x float> %val, <4 x float>* %A, align 1
|
||||
%inc = getelementptr <4 x float>* %A, i34 1
|
||||
store <4 x float>* %inc, <4 x float>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v2i64(<2 x i64>** %ptr, <2 x i64> %val) {
|
||||
;CHECK-LABEL: store_v2i64:
|
||||
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
||||
%A = load <2 x i64>** %ptr
|
||||
store <2 x i64> %val, <2 x i64>* %A, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v2i64_update(<2 x i64>** %ptr, <2 x i64> %val) {
|
||||
;CHECK-LABEL: store_v2i64_update:
|
||||
;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <2 x i64>** %ptr
|
||||
store <2 x i64> %val, <2 x i64>* %A, align 1
|
||||
%inc = getelementptr <2 x i64>* %A, i32 1
|
||||
store <2 x i64>* %inc, <2 x i64>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v2i64_update_aligned2(<2 x i64>** %ptr, <2 x i64> %val) {
|
||||
;CHECK-LABEL: store_v2i64_update_aligned2:
|
||||
;CHECK: vst1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <2 x i64>** %ptr
|
||||
store <2 x i64> %val, <2 x i64>* %A, align 2
|
||||
%inc = getelementptr <2 x i64>* %A, i32 1
|
||||
store <2 x i64>* %inc, <2 x i64>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v2i64_update_aligned4(<2 x i64>** %ptr, <2 x i64> %val) {
|
||||
;CHECK-LABEL: store_v2i64_update_aligned4:
|
||||
;CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <2 x i64>** %ptr
|
||||
store <2 x i64> %val, <2 x i64>* %A, align 4
|
||||
%inc = getelementptr <2 x i64>* %A, i32 1
|
||||
store <2 x i64>* %inc, <2 x i64>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v2i64_update_aligned8(<2 x i64>** %ptr, <2 x i64> %val) {
|
||||
;CHECK-LABEL: store_v2i64_update_aligned8:
|
||||
;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
||||
%A = load <2 x i64>** %ptr
|
||||
store <2 x i64> %val, <2 x i64>* %A, align 8
|
||||
%inc = getelementptr <2 x i64>* %A, i32 1
|
||||
store <2 x i64>* %inc, <2 x i64>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @store_v2i64_update_aligned16(<2 x i64>** %ptr, <2 x i64> %val) {
|
||||
;CHECK-LABEL: store_v2i64_update_aligned16:
|
||||
;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
|
||||
%A = load <2 x i64>** %ptr
|
||||
store <2 x i64> %val, <2 x i64>* %A, align 16
|
||||
%inc = getelementptr <2 x i64>* %A, i32 1
|
||||
store <2 x i64>* %inc, <2 x i64>** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @truncstore_v4i32tov4i8(<4 x i8>** %ptr, <4 x i32> %val) {
|
||||
;CHECK-LABEL: truncstore_v4i32tov4i8:
|
||||
;CHECK: ldr.w r9, [sp]
|
||||
;CHECK: vmov {{d[0-9]+}}, r3, r9
|
||||
;CHECK: vmov {{d[0-9]+}}, r1, r2
|
||||
;CHECK: vmovn.i32 [[VECLO:d[0-9]+]], {{q[0-9]+}}
|
||||
;CHECK: vuzp.8 [[VECLO]], {{d[0-9]+}}
|
||||
;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
|
||||
;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32]
|
||||
%A = load <4 x i8>** %ptr
|
||||
%trunc = trunc <4 x i32> %val to <4 x i8>
|
||||
store <4 x i8> %trunc, <4 x i8>* %A, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val) {
|
||||
;CHECK-LABEL: truncstore_v4i32tov4i8_fake_update:
|
||||
;CHECK: ldr.w r9, [sp]
|
||||
;CHECK: vmov {{d[0-9]+}}, r3, r9
|
||||
;CHECK: vmov {{d[0-9]+}}, r1, r2
|
||||
;CHECK: movs [[IMM16:r[0-9]+]], #16
|
||||
;CHECK: vmovn.i32 [[VECLO:d[0-9]+]], {{q[0-9]+}}
|
||||
;CHECK: vuzp.8 [[VECLO]], {{d[0-9]+}}
|
||||
;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
|
||||
;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32], [[IMM16]]
|
||||
;CHECK: str r[[PTRREG]], [r0]
|
||||
%A = load <4 x i8>** %ptr
|
||||
%trunc = trunc <4 x i32> %val to <4 x i8>
|
||||
store <4 x i8> %trunc, <4 x i8>* %A, align 4
|
||||
%inc = getelementptr <4 x i8>* %A, i38 4
|
||||
store <4 x i8>* %inc, <4 x i8>** %ptr
|
||||
ret void
|
||||
}
|
@ -201,7 +201,7 @@ for.end: ; preds = %for.body
|
||||
;
|
||||
; Currently we have three extra add.w's that keep the store address
|
||||
; live past the next increment because ISEL is unfortunately undoing
|
||||
; the store chain. ISEL also fails to convert the stores to
|
||||
; the store chain. ISEL also fails to convert all but one of the stores to
|
||||
; post-increment addressing. However, the loads should use
|
||||
; post-increment addressing, no add's or add.w's beyond the three
|
||||
; mentioned. Most importantly, there should be no spills or reloads!
|
||||
@ -210,7 +210,7 @@ for.end: ; preds = %for.body
|
||||
; A9: %.lr.ph
|
||||
; A9-NOT: lsl.w
|
||||
; A9-NOT: {{ldr|str|adds|add r}}
|
||||
; A9: add.w r
|
||||
; A9: vst1.8 {{.*}} [r{{[0-9]+}}]!
|
||||
; A9-NOT: {{ldr|str|adds|add r}}
|
||||
; A9: add.w r
|
||||
; A9-NOT: {{ldr|str|adds|add r}}
|
||||
|
Loading…
Reference in New Issue
Block a user