mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-10 05:41:40 +00:00
[NEON] Support VLD1xN intrinsics in AArch32 mode (LLVM part)
We currently support them only in AArch64. The NEON Reference, however, says they are 'ARMv7, ARMv8' intrinsics. Differential Revision: https://reviews.llvm.org/D47120 llvm-svn: 333825
This commit is contained in:
parent
f49f73f5a8
commit
3a4bdaf295
@ -620,6 +620,18 @@ def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
|
|||||||
[llvm_anyptr_ty, llvm_i32_ty],
|
[llvm_anyptr_ty, llvm_i32_ty],
|
||||||
[IntrReadMem, IntrArgMemOnly]>;
|
[IntrReadMem, IntrArgMemOnly]>;
|
||||||
|
|
||||||
|
def int_arm_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
|
||||||
|
[LLVMAnyPointerType<LLVMMatchType<0>>],
|
||||||
|
[IntrReadMem, IntrArgMemOnly]>;
|
||||||
|
def int_arm_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
|
||||||
|
LLVMMatchType<0>],
|
||||||
|
[LLVMAnyPointerType<LLVMMatchType<0>>],
|
||||||
|
[IntrReadMem, IntrArgMemOnly]>;
|
||||||
|
def int_arm_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
|
||||||
|
LLVMMatchType<0>, LLVMMatchType<0>],
|
||||||
|
[LLVMAnyPointerType<LLVMMatchType<0>>],
|
||||||
|
[IntrReadMem, IntrArgMemOnly]>;
|
||||||
|
|
||||||
// Vector load N-element structure to one lane.
|
// Vector load N-element structure to one lane.
|
||||||
// Source operands are: the address, the N input vectors (since only one
|
// Source operands are: the address, the N input vectors (since only one
|
||||||
// lane is assigned), the lane number, and the alignment.
|
// lane is assigned), the lane number, and the alignment.
|
||||||
|
@ -1359,7 +1359,13 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case ARM::VLD1q64:
|
case ARM::VLD1q64:
|
||||||
|
case ARM::VLD1d8TPseudo:
|
||||||
|
case ARM::VLD1d16TPseudo:
|
||||||
|
case ARM::VLD1d32TPseudo:
|
||||||
case ARM::VLD1d64TPseudo:
|
case ARM::VLD1d64TPseudo:
|
||||||
|
case ARM::VLD1d8QPseudo:
|
||||||
|
case ARM::VLD1d16QPseudo:
|
||||||
|
case ARM::VLD1d32QPseudo:
|
||||||
case ARM::VLD1d64QPseudo:
|
case ARM::VLD1d64QPseudo:
|
||||||
if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
|
if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {
|
||||||
FrameIndex = MI.getOperand(1).getIndex();
|
FrameIndex = MI.getOperand(1).getIndex();
|
||||||
@ -4230,6 +4236,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
|||||||
case ARM::VLD3d8Pseudo:
|
case ARM::VLD3d8Pseudo:
|
||||||
case ARM::VLD3d16Pseudo:
|
case ARM::VLD3d16Pseudo:
|
||||||
case ARM::VLD3d32Pseudo:
|
case ARM::VLD3d32Pseudo:
|
||||||
|
case ARM::VLD1d8TPseudo:
|
||||||
|
case ARM::VLD1d16TPseudo:
|
||||||
|
case ARM::VLD1d32TPseudo:
|
||||||
case ARM::VLD1d64TPseudo:
|
case ARM::VLD1d64TPseudo:
|
||||||
case ARM::VLD1d64TPseudoWB_fixed:
|
case ARM::VLD1d64TPseudoWB_fixed:
|
||||||
case ARM::VLD1d64TPseudoWB_register:
|
case ARM::VLD1d64TPseudoWB_register:
|
||||||
@ -4248,9 +4257,28 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
|||||||
case ARM::VLD4d8Pseudo:
|
case ARM::VLD4d8Pseudo:
|
||||||
case ARM::VLD4d16Pseudo:
|
case ARM::VLD4d16Pseudo:
|
||||||
case ARM::VLD4d32Pseudo:
|
case ARM::VLD4d32Pseudo:
|
||||||
|
case ARM::VLD1d8QPseudo:
|
||||||
|
case ARM::VLD1d16QPseudo:
|
||||||
|
case ARM::VLD1d32QPseudo:
|
||||||
case ARM::VLD1d64QPseudo:
|
case ARM::VLD1d64QPseudo:
|
||||||
case ARM::VLD1d64QPseudoWB_fixed:
|
case ARM::VLD1d64QPseudoWB_fixed:
|
||||||
case ARM::VLD1d64QPseudoWB_register:
|
case ARM::VLD1d64QPseudoWB_register:
|
||||||
|
case ARM::VLD1q8HighQPseudo:
|
||||||
|
case ARM::VLD1q8LowQPseudo_UPD:
|
||||||
|
case ARM::VLD1q8HighTPseudo:
|
||||||
|
case ARM::VLD1q8LowTPseudo_UPD:
|
||||||
|
case ARM::VLD1q16HighQPseudo:
|
||||||
|
case ARM::VLD1q16LowQPseudo_UPD:
|
||||||
|
case ARM::VLD1q16HighTPseudo:
|
||||||
|
case ARM::VLD1q16LowTPseudo_UPD:
|
||||||
|
case ARM::VLD1q32HighQPseudo:
|
||||||
|
case ARM::VLD1q32LowQPseudo_UPD:
|
||||||
|
case ARM::VLD1q32HighTPseudo:
|
||||||
|
case ARM::VLD1q32LowTPseudo_UPD:
|
||||||
|
case ARM::VLD1q64HighQPseudo:
|
||||||
|
case ARM::VLD1q64LowQPseudo_UPD:
|
||||||
|
case ARM::VLD1q64HighTPseudo:
|
||||||
|
case ARM::VLD1q64LowTPseudo_UPD:
|
||||||
case ARM::VLD4d8Pseudo_UPD:
|
case ARM::VLD4d8Pseudo_UPD:
|
||||||
case ARM::VLD4d16Pseudo_UPD:
|
case ARM::VLD4d16Pseudo_UPD:
|
||||||
case ARM::VLD4d32Pseudo_UPD:
|
case ARM::VLD4d32Pseudo_UPD:
|
||||||
|
@ -110,6 +110,9 @@ namespace {
|
|||||||
// OddDblSpc depending on the lane number operand.
|
// OddDblSpc depending on the lane number operand.
|
||||||
enum NEONRegSpacing {
|
enum NEONRegSpacing {
|
||||||
SingleSpc,
|
SingleSpc,
|
||||||
|
SingleLowSpc , // Single spacing, low registers, three and four vectors.
|
||||||
|
SingleHighQSpc, // Single spacing, high registers, four vectors.
|
||||||
|
SingleHighTSpc, // Single spacing, high registers, three vectors.
|
||||||
EvenDblSpc,
|
EvenDblSpc,
|
||||||
OddDblSpc
|
OddDblSpc
|
||||||
};
|
};
|
||||||
@ -154,12 +157,34 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
|
|||||||
{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true},
|
{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true},
|
||||||
{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
|
{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
|
||||||
|
|
||||||
|
{ ARM::VLD1d16QPseudo, ARM::VLD1d16Q, true, false, false, SingleSpc, 4, 4 ,false},
|
||||||
|
{ ARM::VLD1d16TPseudo, ARM::VLD1d16T, true, false, false, SingleSpc, 3, 4 ,false},
|
||||||
|
{ ARM::VLD1d32QPseudo, ARM::VLD1d32Q, true, false, false, SingleSpc, 4, 2 ,false},
|
||||||
|
{ ARM::VLD1d32TPseudo, ARM::VLD1d32T, true, false, false, SingleSpc, 3, 2 ,false},
|
||||||
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
|
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
|
||||||
{ ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false},
|
{ ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false},
|
||||||
{ ARM::VLD1d64QPseudoWB_register, ARM::VLD1d64Qwb_register, true, true, true, SingleSpc, 4, 1 ,false},
|
{ ARM::VLD1d64QPseudoWB_register, ARM::VLD1d64Qwb_register, true, true, true, SingleSpc, 4, 1 ,false},
|
||||||
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
|
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
|
||||||
{ ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false},
|
{ ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false},
|
||||||
{ ARM::VLD1d64TPseudoWB_register, ARM::VLD1d64Twb_register, true, true, true, SingleSpc, 3, 1 ,false},
|
{ ARM::VLD1d64TPseudoWB_register, ARM::VLD1d64Twb_register, true, true, true, SingleSpc, 3, 1 ,false},
|
||||||
|
{ ARM::VLD1d8QPseudo, ARM::VLD1d8Q, true, false, false, SingleSpc, 4, 8 ,false},
|
||||||
|
{ ARM::VLD1d8TPseudo, ARM::VLD1d8T, true, false, false, SingleSpc, 3, 8 ,false},
|
||||||
|
{ ARM::VLD1q16HighQPseudo, ARM::VLD1d16Q, true, false, false, SingleHighQSpc, 4, 4 ,false},
|
||||||
|
{ ARM::VLD1q16HighTPseudo, ARM::VLD1d16T, true, false, false, SingleHighTSpc, 3, 4 ,false},
|
||||||
|
{ ARM::VLD1q16LowQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleLowSpc, 4, 4 ,false},
|
||||||
|
{ ARM::VLD1q16LowTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleLowSpc, 3, 4 ,false},
|
||||||
|
{ ARM::VLD1q32HighQPseudo, ARM::VLD1d32Q, true, false, false, SingleHighQSpc, 4, 2 ,false},
|
||||||
|
{ ARM::VLD1q32HighTPseudo, ARM::VLD1d32T, true, false, false, SingleHighTSpc, 3, 2 ,false},
|
||||||
|
{ ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleLowSpc, 4, 2 ,false},
|
||||||
|
{ ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleLowSpc, 3, 2 ,false},
|
||||||
|
{ ARM::VLD1q64HighQPseudo, ARM::VLD1d64Q, true, false, false, SingleHighQSpc, 4, 1 ,false},
|
||||||
|
{ ARM::VLD1q64HighTPseudo, ARM::VLD1d64T, true, false, false, SingleHighTSpc, 3, 1 ,false},
|
||||||
|
{ ARM::VLD1q64LowQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleLowSpc, 4, 1 ,false},
|
||||||
|
{ ARM::VLD1q64LowTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleLowSpc, 3, 1 ,false},
|
||||||
|
{ ARM::VLD1q8HighQPseudo, ARM::VLD1d8Q, true, false, false, SingleHighQSpc, 4, 8 ,false},
|
||||||
|
{ ARM::VLD1q8HighTPseudo, ARM::VLD1d8T, true, false, false, SingleHighTSpc, 3, 8 ,false},
|
||||||
|
{ ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleLowSpc, 4, 8 ,false},
|
||||||
|
{ ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleLowSpc, 3, 8 ,false},
|
||||||
|
|
||||||
{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
|
{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
|
||||||
{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
|
{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
|
||||||
@ -370,11 +395,21 @@ static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
|
|||||||
static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
|
static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
|
||||||
const TargetRegisterInfo *TRI, unsigned &D0,
|
const TargetRegisterInfo *TRI, unsigned &D0,
|
||||||
unsigned &D1, unsigned &D2, unsigned &D3) {
|
unsigned &D1, unsigned &D2, unsigned &D3) {
|
||||||
if (RegSpc == SingleSpc) {
|
if (RegSpc == SingleSpc || RegSpc == SingleLowSpc) {
|
||||||
D0 = TRI->getSubReg(Reg, ARM::dsub_0);
|
D0 = TRI->getSubReg(Reg, ARM::dsub_0);
|
||||||
D1 = TRI->getSubReg(Reg, ARM::dsub_1);
|
D1 = TRI->getSubReg(Reg, ARM::dsub_1);
|
||||||
D2 = TRI->getSubReg(Reg, ARM::dsub_2);
|
D2 = TRI->getSubReg(Reg, ARM::dsub_2);
|
||||||
D3 = TRI->getSubReg(Reg, ARM::dsub_3);
|
D3 = TRI->getSubReg(Reg, ARM::dsub_3);
|
||||||
|
} else if (RegSpc == SingleHighQSpc) {
|
||||||
|
D0 = TRI->getSubReg(Reg, ARM::dsub_4);
|
||||||
|
D1 = TRI->getSubReg(Reg, ARM::dsub_5);
|
||||||
|
D2 = TRI->getSubReg(Reg, ARM::dsub_6);
|
||||||
|
D3 = TRI->getSubReg(Reg, ARM::dsub_7);
|
||||||
|
} else if (RegSpc == SingleHighTSpc) {
|
||||||
|
D0 = TRI->getSubReg(Reg, ARM::dsub_3);
|
||||||
|
D1 = TRI->getSubReg(Reg, ARM::dsub_4);
|
||||||
|
D2 = TRI->getSubReg(Reg, ARM::dsub_5);
|
||||||
|
D3 = TRI->getSubReg(Reg, ARM::dsub_6);
|
||||||
} else if (RegSpc == EvenDblSpc) {
|
} else if (RegSpc == EvenDblSpc) {
|
||||||
D0 = TRI->getSubReg(Reg, ARM::dsub_0);
|
D0 = TRI->getSubReg(Reg, ARM::dsub_0);
|
||||||
D1 = TRI->getSubReg(Reg, ARM::dsub_2);
|
D1 = TRI->getSubReg(Reg, ARM::dsub_2);
|
||||||
@ -422,15 +457,40 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
|
|||||||
// Copy the addrmode6 operands.
|
// Copy the addrmode6 operands.
|
||||||
MIB.add(MI.getOperand(OpIdx++));
|
MIB.add(MI.getOperand(OpIdx++));
|
||||||
MIB.add(MI.getOperand(OpIdx++));
|
MIB.add(MI.getOperand(OpIdx++));
|
||||||
|
|
||||||
// Copy the am6offset operand.
|
// Copy the am6offset operand.
|
||||||
if (TableEntry->hasWritebackOperand)
|
if (TableEntry->hasWritebackOperand) {
|
||||||
MIB.add(MI.getOperand(OpIdx++));
|
// TODO: The writing-back pseudo instructions we translate here are all
|
||||||
|
// defined to take am6offset nodes that are capable to represent both fixed
|
||||||
|
// and register forms. Some real instructions, however, do not rely on
|
||||||
|
// am6offset and have separate definitions for such forms. When this is the
|
||||||
|
// case, fixed forms do not take any offset nodes, so here we skip them for
|
||||||
|
// such intructions. Once all real and pseudo writing-back instructions are
|
||||||
|
// rewritten without use of am6offset nodes, this code will go away.
|
||||||
|
const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
|
||||||
|
if (TableEntry->RealOpc == ARM::VLD1d8Qwb_fixed ||
|
||||||
|
TableEntry->RealOpc == ARM::VLD1d16Qwb_fixed ||
|
||||||
|
TableEntry->RealOpc == ARM::VLD1d32Qwb_fixed ||
|
||||||
|
TableEntry->RealOpc == ARM::VLD1d64Qwb_fixed ||
|
||||||
|
TableEntry->RealOpc == ARM::VLD1d8Twb_fixed ||
|
||||||
|
TableEntry->RealOpc == ARM::VLD1d16Twb_fixed ||
|
||||||
|
TableEntry->RealOpc == ARM::VLD1d32Twb_fixed ||
|
||||||
|
TableEntry->RealOpc == ARM::VLD1d64Twb_fixed) {
|
||||||
|
assert(AM6Offset.getReg() == 0 &&
|
||||||
|
"A fixed writing-back pseudo intruction provides an offset "
|
||||||
|
"register!");
|
||||||
|
} else {
|
||||||
|
MIB.add(AM6Offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// For an instruction writing double-spaced subregs, the pseudo instruction
|
// For an instruction writing double-spaced subregs, the pseudo instruction
|
||||||
// has an extra operand that is a use of the super-register. Record the
|
// has an extra operand that is a use of the super-register. Record the
|
||||||
// operand index and skip over it.
|
// operand index and skip over it.
|
||||||
unsigned SrcOpIdx = 0;
|
unsigned SrcOpIdx = 0;
|
||||||
if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc)
|
if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc ||
|
||||||
|
RegSpc == SingleLowSpc || RegSpc == SingleHighQSpc ||
|
||||||
|
RegSpc == SingleHighTSpc)
|
||||||
SrcOpIdx = OpIdx++;
|
SrcOpIdx = OpIdx++;
|
||||||
|
|
||||||
// Copy the predicate operands.
|
// Copy the predicate operands.
|
||||||
@ -1503,6 +1563,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
|||||||
case ARM::VLD3d8Pseudo:
|
case ARM::VLD3d8Pseudo:
|
||||||
case ARM::VLD3d16Pseudo:
|
case ARM::VLD3d16Pseudo:
|
||||||
case ARM::VLD3d32Pseudo:
|
case ARM::VLD3d32Pseudo:
|
||||||
|
case ARM::VLD1d8TPseudo:
|
||||||
|
case ARM::VLD1d16TPseudo:
|
||||||
|
case ARM::VLD1d32TPseudo:
|
||||||
case ARM::VLD1d64TPseudo:
|
case ARM::VLD1d64TPseudo:
|
||||||
case ARM::VLD1d64TPseudoWB_fixed:
|
case ARM::VLD1d64TPseudoWB_fixed:
|
||||||
case ARM::VLD1d64TPseudoWB_register:
|
case ARM::VLD1d64TPseudoWB_register:
|
||||||
@ -1521,9 +1584,28 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
|||||||
case ARM::VLD4d8Pseudo:
|
case ARM::VLD4d8Pseudo:
|
||||||
case ARM::VLD4d16Pseudo:
|
case ARM::VLD4d16Pseudo:
|
||||||
case ARM::VLD4d32Pseudo:
|
case ARM::VLD4d32Pseudo:
|
||||||
|
case ARM::VLD1d8QPseudo:
|
||||||
|
case ARM::VLD1d16QPseudo:
|
||||||
|
case ARM::VLD1d32QPseudo:
|
||||||
case ARM::VLD1d64QPseudo:
|
case ARM::VLD1d64QPseudo:
|
||||||
case ARM::VLD1d64QPseudoWB_fixed:
|
case ARM::VLD1d64QPseudoWB_fixed:
|
||||||
case ARM::VLD1d64QPseudoWB_register:
|
case ARM::VLD1d64QPseudoWB_register:
|
||||||
|
case ARM::VLD1q8HighQPseudo:
|
||||||
|
case ARM::VLD1q8LowQPseudo_UPD:
|
||||||
|
case ARM::VLD1q8HighTPseudo:
|
||||||
|
case ARM::VLD1q8LowTPseudo_UPD:
|
||||||
|
case ARM::VLD1q16HighQPseudo:
|
||||||
|
case ARM::VLD1q16LowQPseudo_UPD:
|
||||||
|
case ARM::VLD1q16HighTPseudo:
|
||||||
|
case ARM::VLD1q16LowTPseudo_UPD:
|
||||||
|
case ARM::VLD1q32HighQPseudo:
|
||||||
|
case ARM::VLD1q32LowQPseudo_UPD:
|
||||||
|
case ARM::VLD1q32HighTPseudo:
|
||||||
|
case ARM::VLD1q32LowTPseudo_UPD:
|
||||||
|
case ARM::VLD1q64HighQPseudo:
|
||||||
|
case ARM::VLD1q64LowQPseudo_UPD:
|
||||||
|
case ARM::VLD1q64HighTPseudo:
|
||||||
|
case ARM::VLD1q64LowTPseudo_UPD:
|
||||||
case ARM::VLD4d8Pseudo_UPD:
|
case ARM::VLD4d8Pseudo_UPD:
|
||||||
case ARM::VLD4d16Pseudo_UPD:
|
case ARM::VLD4d16Pseudo_UPD:
|
||||||
case ARM::VLD4d32Pseudo_UPD:
|
case ARM::VLD4d32Pseudo_UPD:
|
||||||
|
@ -1761,9 +1761,7 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
|
|||||||
case MVT::v4f32:
|
case MVT::v4f32:
|
||||||
case MVT::v4i32: OpcodeIndex = 2; break;
|
case MVT::v4i32: OpcodeIndex = 2; break;
|
||||||
case MVT::v2f64:
|
case MVT::v2f64:
|
||||||
case MVT::v2i64: OpcodeIndex = 3;
|
case MVT::v2i64: OpcodeIndex = 3; break;
|
||||||
assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EVT ResTy;
|
EVT ResTy;
|
||||||
@ -3441,6 +3439,51 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case Intrinsic::arm_neon_vld1x2: {
|
||||||
|
static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
|
||||||
|
ARM::VLD1q32, ARM::VLD1q64 };
|
||||||
|
static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
|
||||||
|
ARM::VLD1d16QPseudo,
|
||||||
|
ARM::VLD1d32QPseudo,
|
||||||
|
ARM::VLD1d64QPseudo };
|
||||||
|
SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
case Intrinsic::arm_neon_vld1x3: {
|
||||||
|
static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
|
||||||
|
ARM::VLD1d16TPseudo,
|
||||||
|
ARM::VLD1d32TPseudo,
|
||||||
|
ARM::VLD1d64TPseudo };
|
||||||
|
static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
|
||||||
|
ARM::VLD1q16LowTPseudo_UPD,
|
||||||
|
ARM::VLD1q32LowTPseudo_UPD,
|
||||||
|
ARM::VLD1q64LowTPseudo_UPD };
|
||||||
|
static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
|
||||||
|
ARM::VLD1q16HighTPseudo,
|
||||||
|
ARM::VLD1q32HighTPseudo,
|
||||||
|
ARM::VLD1q64HighTPseudo };
|
||||||
|
SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
case Intrinsic::arm_neon_vld1x4: {
|
||||||
|
static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
|
||||||
|
ARM::VLD1d16QPseudo,
|
||||||
|
ARM::VLD1d32QPseudo,
|
||||||
|
ARM::VLD1d64QPseudo };
|
||||||
|
static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
|
||||||
|
ARM::VLD1q16LowQPseudo_UPD,
|
||||||
|
ARM::VLD1q32LowQPseudo_UPD,
|
||||||
|
ARM::VLD1q64LowQPseudo_UPD };
|
||||||
|
static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
|
||||||
|
ARM::VLD1q16HighQPseudo,
|
||||||
|
ARM::VLD1q32HighQPseudo,
|
||||||
|
ARM::VLD1q64HighQPseudo };
|
||||||
|
SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
case Intrinsic::arm_neon_vld2: {
|
case Intrinsic::arm_neon_vld2: {
|
||||||
static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
|
static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
|
||||||
ARM::VLD2d32, ARM::VLD1q64 };
|
ARM::VLD2d32, ARM::VLD1q64 };
|
||||||
|
@ -12763,6 +12763,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
|||||||
case ISD::INTRINSIC_W_CHAIN:
|
case ISD::INTRINSIC_W_CHAIN:
|
||||||
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
|
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
|
||||||
case Intrinsic::arm_neon_vld1:
|
case Intrinsic::arm_neon_vld1:
|
||||||
|
case Intrinsic::arm_neon_vld1x2:
|
||||||
|
case Intrinsic::arm_neon_vld1x3:
|
||||||
|
case Intrinsic::arm_neon_vld1x4:
|
||||||
case Intrinsic::arm_neon_vld2:
|
case Intrinsic::arm_neon_vld2:
|
||||||
case Intrinsic::arm_neon_vld3:
|
case Intrinsic::arm_neon_vld3:
|
||||||
case Intrinsic::arm_neon_vld4:
|
case Intrinsic::arm_neon_vld4:
|
||||||
@ -14074,6 +14077,21 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|||||||
Info.flags = MachineMemOperand::MOLoad;
|
Info.flags = MachineMemOperand::MOLoad;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
case Intrinsic::arm_neon_vld1x2:
|
||||||
|
case Intrinsic::arm_neon_vld1x3:
|
||||||
|
case Intrinsic::arm_neon_vld1x4: {
|
||||||
|
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||||
|
// Conservatively set memVT to the entire set of vectors loaded.
|
||||||
|
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
|
||||||
|
uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
|
||||||
|
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
|
||||||
|
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
|
||||||
|
Info.offset = 0;
|
||||||
|
Info.align = 0;
|
||||||
|
// volatile loads with NEON intrinsics not supported
|
||||||
|
Info.flags = MachineMemOperand::MOLoad;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
case Intrinsic::arm_neon_vst1:
|
case Intrinsic::arm_neon_vst1:
|
||||||
case Intrinsic::arm_neon_vst2:
|
case Intrinsic::arm_neon_vst2:
|
||||||
case Intrinsic::arm_neon_vst3:
|
case Intrinsic::arm_neon_vst3:
|
||||||
|
@ -770,10 +770,22 @@ defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
|
|||||||
defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
|
defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
|
||||||
defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
|
defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
|
||||||
|
|
||||||
|
def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
|
def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
|
def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
|
|
||||||
|
def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
|
def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
|
def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
|
def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
|
def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
|
def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
|
def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
|
def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
|
||||||
|
|
||||||
// ...with 4 registers
|
// ...with 4 registers
|
||||||
class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
|
class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
|
||||||
: NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
|
: NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
|
||||||
@ -811,10 +823,22 @@ defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
|
|||||||
defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
|
defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
|
||||||
defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
|
defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
|
||||||
|
|
||||||
|
def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
|
def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
|
def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
|
|
||||||
|
def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
|
def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
|
def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
|
def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
|
def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
|
def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
|
def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
|
def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
|
||||||
|
|
||||||
// VLD2 : Vector Load (multiple 2-element structures)
|
// VLD2 : Vector Load (multiple 2-element structures)
|
||||||
class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
|
class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
|
||||||
InstrItinClass itin, Operand AddrMode>
|
InstrItinClass itin, Operand AddrMode>
|
||||||
|
242
test/CodeGen/ARM/arm-vld1.ll
Normal file
242
test/CodeGen/ARM/arm-vld1.ll
Normal file
@ -0,0 +1,242 @@
|
|||||||
|
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -verify-machineinstrs \
|
||||||
|
; RUN: -asm-verbose=false | FileCheck %s
|
||||||
|
|
||||||
|
%struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> }
|
||||||
|
%struct.uint16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
|
||||||
|
%struct.uint16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
|
||||||
|
|
||||||
|
%struct.uint32x2x2_t = type { <2 x i32>, <2 x i32> }
|
||||||
|
%struct.uint32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
|
||||||
|
%struct.uint32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
|
||||||
|
|
||||||
|
%struct.uint64x1x2_t = type { <1 x i64>, <1 x i64> }
|
||||||
|
%struct.uint64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
|
||||||
|
%struct.uint64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
|
||||||
|
|
||||||
|
%struct.uint8x8x2_t = type { <8 x i8>, <8 x i8> }
|
||||||
|
%struct.uint8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
|
||||||
|
%struct.uint8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
|
||||||
|
|
||||||
|
%struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> }
|
||||||
|
%struct.uint16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
|
||||||
|
%struct.uint16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
|
||||||
|
|
||||||
|
%struct.uint32x4x2_t = type { <4 x i32>, <4 x i32> }
|
||||||
|
%struct.uint32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
|
||||||
|
%struct.uint32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
|
||||||
|
|
||||||
|
%struct.uint64x2x2_t = type { <2 x i64>, <2 x i64> }
|
||||||
|
%struct.uint64x2x3_t = type { <2 x i64>, <2 x i64>, <2 x i64> }
|
||||||
|
%struct.uint64x2x4_t = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }
|
||||||
|
|
||||||
|
%struct.uint8x16x2_t = type { <16 x i8>, <16 x i8> }
|
||||||
|
%struct.uint8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> }
|
||||||
|
%struct.uint8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
|
||||||
|
|
||||||
|
declare %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0i16(i16*) nounwind readonly
|
||||||
|
declare %struct.uint16x4x3_t @llvm.arm.neon.vld1x3.v4i16.p0i16(i16*) nounwind readonly
|
||||||
|
declare %struct.uint16x4x4_t @llvm.arm.neon.vld1x4.v4i16.p0i16(i16*) nounwind readonly
|
||||||
|
|
||||||
|
declare %struct.uint32x2x2_t @llvm.arm.neon.vld1x2.v2i32.p0i32(i32*) nounwind readonly
|
||||||
|
declare %struct.uint32x2x3_t @llvm.arm.neon.vld1x3.v2i32.p0i32(i32*) nounwind readonly
|
||||||
|
declare %struct.uint32x2x4_t @llvm.arm.neon.vld1x4.v2i32.p0i32(i32*) nounwind readonly
|
||||||
|
|
||||||
|
declare %struct.uint64x1x2_t @llvm.arm.neon.vld1x2.v1i64.p0i64(i64*) nounwind readonly
|
||||||
|
declare %struct.uint64x1x3_t @llvm.arm.neon.vld1x3.v1i64.p0i64(i64*) nounwind readonly
|
||||||
|
declare %struct.uint64x1x4_t @llvm.arm.neon.vld1x4.v1i64.p0i64(i64*) nounwind readonly
|
||||||
|
|
||||||
|
declare %struct.uint8x8x2_t @llvm.arm.neon.vld1x2.v8i8.p0i8(i8*) nounwind readonly
|
||||||
|
declare %struct.uint8x8x3_t @llvm.arm.neon.vld1x3.v8i8.p0i8(i8*) nounwind readonly
|
||||||
|
declare %struct.uint8x8x4_t @llvm.arm.neon.vld1x4.v8i8.p0i8(i8*) nounwind readonly
|
||||||
|
|
||||||
|
declare %struct.uint16x8x2_t @llvm.arm.neon.vld1x2.v8i16.p0i16(i16*) nounwind readonly
|
||||||
|
declare %struct.uint16x8x3_t @llvm.arm.neon.vld1x3.v8i16.p0i16(i16*) nounwind readonly
|
||||||
|
declare %struct.uint16x8x4_t @llvm.arm.neon.vld1x4.v8i16.p0i16(i16*) nounwind readonly
|
||||||
|
|
||||||
|
declare %struct.uint32x4x2_t @llvm.arm.neon.vld1x2.v4i32.p0i32(i32*) nounwind readonly
|
||||||
|
declare %struct.uint32x4x3_t @llvm.arm.neon.vld1x3.v4i32.p0i32(i32*) nounwind readonly
|
||||||
|
declare %struct.uint32x4x4_t @llvm.arm.neon.vld1x4.v4i32.p0i32(i32*) nounwind readonly
|
||||||
|
|
||||||
|
declare %struct.uint64x2x2_t @llvm.arm.neon.vld1x2.v2i64.p0i64(i64*) nounwind readonly
|
||||||
|
declare %struct.uint64x2x3_t @llvm.arm.neon.vld1x3.v2i64.p0i64(i64*) nounwind readonly
|
||||||
|
declare %struct.uint64x2x4_t @llvm.arm.neon.vld1x4.v2i64.p0i64(i64*) nounwind readonly
|
||||||
|
|
||||||
|
declare %struct.uint8x16x2_t @llvm.arm.neon.vld1x2.v16i8.p0i8(i8*) nounwind readonly
|
||||||
|
declare %struct.uint8x16x3_t @llvm.arm.neon.vld1x3.v16i8.p0i8(i8*) nounwind readonly
|
||||||
|
declare %struct.uint8x16x4_t @llvm.arm.neon.vld1x4.v16i8.p0i8(i8*) nounwind readonly
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1_u16_x2
|
||||||
|
; CHECK: vld1.16 {d16, d17}, [r0:64]
|
||||||
|
define %struct.uint16x4x2_t @test_vld1_u16_x2(i16* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0i16(i16* %a)
|
||||||
|
ret %struct.uint16x4x2_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1_u16_x3
|
||||||
|
; CHECK: vld1.16 {d16, d17, d18}, [r1:64]
|
||||||
|
define %struct.uint16x4x3_t @test_vld1_u16_x3(i16* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld1x3.v4i16.p0i16(i16* %a)
|
||||||
|
ret %struct.uint16x4x3_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1_u16_x4
|
||||||
|
; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]
|
||||||
|
define %struct.uint16x4x4_t @test_vld1_u16_x4(i16* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld1x4.v4i16.p0i16(i16* %a)
|
||||||
|
ret %struct.uint16x4x4_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1_u32_x2
|
||||||
|
; CHECK: vld1.32 {d16, d17}, [r0:64]
|
||||||
|
define %struct.uint32x2x2_t @test_vld1_u32_x2(i32* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld1x2.v2i32.p0i32(i32* %a)
|
||||||
|
ret %struct.uint32x2x2_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1_u32_x3
|
||||||
|
; CHECK: vld1.32 {d16, d17, d18}, [r1:64]
|
||||||
|
define %struct.uint32x2x3_t @test_vld1_u32_x3(i32* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld1x3.v2i32.p0i32(i32* %a)
|
||||||
|
ret %struct.uint32x2x3_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1_u32_x4
|
||||||
|
; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]
|
||||||
|
define %struct.uint32x2x4_t @test_vld1_u32_x4(i32* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld1x4.v2i32.p0i32(i32* %a)
|
||||||
|
ret %struct.uint32x2x4_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1_u64_x2
|
||||||
|
; CHECK: vld1.64 {d16, d17}, [r0:64]
|
||||||
|
define %struct.uint64x1x2_t @test_vld1_u64_x2(i64* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld1x2.v1i64.p0i64(i64* %a)
|
||||||
|
ret %struct.uint64x1x2_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1_u64_x3
|
||||||
|
; CHECK: vld1.64 {d16, d17, d18}, [r1:64]
|
||||||
|
define %struct.uint64x1x3_t @test_vld1_u64_x3(i64* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld1x3.v1i64.p0i64(i64* %a)
|
||||||
|
ret %struct.uint64x1x3_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1_u64_x4
|
||||||
|
; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]
|
||||||
|
define %struct.uint64x1x4_t @test_vld1_u64_x4(i64* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld1x4.v1i64.p0i64(i64* %a)
|
||||||
|
ret %struct.uint64x1x4_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1_u8_x2
|
||||||
|
; CHECK: vld1.8 {d16, d17}, [r0:64]
|
||||||
|
define %struct.uint8x8x2_t @test_vld1_u8_x2(i8* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %a)
|
||||||
|
ret %struct.uint8x8x2_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1_u8_x3
|
||||||
|
; CHECK: vld1.8 {d16, d17, d18}, [r1:64]
|
||||||
|
define %struct.uint8x8x3_t @test_vld1_u8_x3(i8* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %a)
|
||||||
|
ret %struct.uint8x8x3_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1_u8_x4
|
||||||
|
; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]
|
||||||
|
define %struct.uint8x8x4_t @test_vld1_u8_x4(i8* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %a)
|
||||||
|
ret %struct.uint8x8x4_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1q_u16_x2
|
||||||
|
; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]
|
||||||
|
define %struct.uint16x8x2_t @test_vld1q_u16_x2(i16* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld1x2.v8i16.p0i16(i16* %a)
|
||||||
|
ret %struct.uint16x8x2_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1q_u16_x3
|
||||||
|
; CHECK: vld1.16 {d16, d17, d18}, [r1:64]!
|
||||||
|
; CHECK: vld1.16 {d19, d20, d21}, [r1:64]
|
||||||
|
define %struct.uint16x8x3_t @test_vld1q_u16_x3(i16* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld1x3.v8i16.p0i16(i16* %a)
|
||||||
|
ret %struct.uint16x8x3_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1q_u16_x4
|
||||||
|
; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]!
|
||||||
|
; CHECK: vld1.16 {d20, d21, d22, d23}, [r1:256]
|
||||||
|
define %struct.uint16x8x4_t @test_vld1q_u16_x4(i16* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld1x4.v8i16.p0i16(i16* %a)
|
||||||
|
ret %struct.uint16x8x4_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1q_u32_x2
|
||||||
|
; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]
|
||||||
|
define %struct.uint32x4x2_t @test_vld1q_u32_x2(i32* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld1x2.v4i32.p0i32(i32* %a)
|
||||||
|
ret %struct.uint32x4x2_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1q_u32_x3
|
||||||
|
; CHECK: vld1.32 {d16, d17, d18}, [r1:64]!
|
||||||
|
; CHECK: vld1.32 {d19, d20, d21}, [r1:64]
|
||||||
|
define %struct.uint32x4x3_t @test_vld1q_u32_x3(i32* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld1x3.v4i32.p0i32(i32* %a)
|
||||||
|
ret %struct.uint32x4x3_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1q_u32_x4
|
||||||
|
; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]!
|
||||||
|
; CHECK: vld1.32 {d20, d21, d22, d23}, [r1:256]
|
||||||
|
define %struct.uint32x4x4_t @test_vld1q_u32_x4(i32* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld1x4.v4i32.p0i32(i32* %a)
|
||||||
|
ret %struct.uint32x4x4_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1q_u64_x2
|
||||||
|
; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]
|
||||||
|
define %struct.uint64x2x2_t @test_vld1q_u64_x2(i64* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint64x2x2_t @llvm.arm.neon.vld1x2.v2i64.p0i64(i64* %a)
|
||||||
|
ret %struct.uint64x2x2_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1q_u64_x3
|
||||||
|
; CHECK: vld1.64 {d16, d17, d18}, [r1:64]!
|
||||||
|
; CHECK: vld1.64 {d19, d20, d21}, [r1:64]
|
||||||
|
define %struct.uint64x2x3_t @test_vld1q_u64_x3(i64* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint64x2x3_t @llvm.arm.neon.vld1x3.v2i64.p0i64(i64* %a)
|
||||||
|
ret %struct.uint64x2x3_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1q_u64_x4
|
||||||
|
; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
|
||||||
|
; CHECK: vld1.64 {d20, d21, d22, d23}, [r1:256]
|
||||||
|
define %struct.uint64x2x4_t @test_vld1q_u64_x4(i64* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint64x2x4_t @llvm.arm.neon.vld1x4.v2i64.p0i64(i64* %a)
|
||||||
|
ret %struct.uint64x2x4_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1q_u8_x2
|
||||||
|
; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]
|
||||||
|
define %struct.uint8x16x2_t @test_vld1q_u8_x2(i8* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld1x2.v16i8.p0i8(i8* %a)
|
||||||
|
ret %struct.uint8x16x2_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1q_u8_x3
|
||||||
|
; CHECK: vld1.8 {d16, d17, d18}, [r1:64]!
|
||||||
|
; CHECK: vld1.8 {d19, d20, d21}, [r1:64]
|
||||||
|
define %struct.uint8x16x3_t @test_vld1q_u8_x3(i8* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld1x3.v16i8.p0i8(i8* %a)
|
||||||
|
ret %struct.uint8x16x3_t %tmp
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: test_vld1q_u8_x4
|
||||||
|
; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]!
|
||||||
|
; CHECK: vld1.8 {d20, d21, d22, d23}, [r1:256]
|
||||||
|
define %struct.uint8x16x4_t @test_vld1q_u8_x4(i8* %a) nounwind {
|
||||||
|
%tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld1x4.v16i8.p0i8(i8* %a)
|
||||||
|
ret %struct.uint8x16x4_t %tmp
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user