mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-13 23:18:58 +00:00
Add support for NEON VLD3-dup instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120312 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2fcda63763
commit
86c6d80a7a
@ -172,6 +172,13 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
|
||||
{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 },
|
||||
{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 },
|
||||
|
||||
{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4},
|
||||
{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4},
|
||||
{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2},
|
||||
{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2},
|
||||
{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8},
|
||||
{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8},
|
||||
|
||||
{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 },
|
||||
{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 },
|
||||
{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 },
|
||||
@ -946,6 +953,12 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||
case ARM::VLD2DUPd8Pseudo_UPD:
|
||||
case ARM::VLD2DUPd16Pseudo_UPD:
|
||||
case ARM::VLD2DUPd32Pseudo_UPD:
|
||||
case ARM::VLD3DUPd8Pseudo:
|
||||
case ARM::VLD3DUPd16Pseudo:
|
||||
case ARM::VLD3DUPd32Pseudo:
|
||||
case ARM::VLD3DUPd8Pseudo_UPD:
|
||||
case ARM::VLD3DUPd16Pseudo_UPD:
|
||||
case ARM::VLD3DUPd32Pseudo_UPD:
|
||||
ExpandVLD(MBBI);
|
||||
break;
|
||||
|
||||
|
@ -2361,6 +2361,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
return SelectVLDDup(N, 2, Opcodes);
|
||||
}
|
||||
|
||||
case ARMISD::VLD3DUP: {
|
||||
unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
|
||||
ARM::VLD3DUPd32Pseudo };
|
||||
return SelectVLDDup(N, 3, Opcodes);
|
||||
}
|
||||
|
||||
case ISD::INTRINSIC_VOID:
|
||||
case ISD::INTRINSIC_W_CHAIN: {
|
||||
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
|
||||
|
@ -896,6 +896,48 @@ def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
|
||||
def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
|
||||
|
||||
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
|
||||
class VLD3DUP<bits<4> op7_4, string Dt>
|
||||
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
|
||||
(ins addrmode6:$Rn), IIC_VLD3dup,
|
||||
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
|
||||
let Rm = 0b1111;
|
||||
let Inst{4} = Rn{4};
|
||||
}
|
||||
|
||||
def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">;
|
||||
def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
|
||||
def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
|
||||
|
||||
def VLD3DUPd8Pseudo : VLDQQPseudo<IIC_VLD3dup>;
|
||||
def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
|
||||
def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
|
||||
|
||||
// ...with double-spaced registers (not used for codegen):
|
||||
def VLD3DUPd8T : VLD3DUP<{0,0,1,?}, "8">;
|
||||
def VLD3DUPd16T : VLD3DUP<{0,1,1,?}, "16">;
|
||||
def VLD3DUPd32T : VLD3DUP<{1,0,1,?}, "32">;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VLD3DUPWB<bits<4> op7_4, string Dt>
|
||||
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
|
||||
(ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3dupu,
|
||||
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
|
||||
"$Rn.addr = $wb", []> {
|
||||
let Inst{4} = Rn{4};
|
||||
}
|
||||
|
||||
def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
|
||||
def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
|
||||
def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
|
||||
|
||||
def VLD3DUPd8T_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
|
||||
def VLD3DUPd16T_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
|
||||
def VLD3DUPd32T_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
|
||||
|
||||
def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
|
||||
def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
|
||||
def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
|
||||
|
||||
// VLD4DUP : Vector Load (single 4-element structure to all lanes)
|
||||
// FIXME: Not yet implemented.
|
||||
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
|
||||
|
@ -152,6 +152,8 @@ def IIC_VLD3 : InstrItinClass;
|
||||
def IIC_VLD3ln : InstrItinClass;
|
||||
def IIC_VLD3u : InstrItinClass;
|
||||
def IIC_VLD3lnu : InstrItinClass;
|
||||
def IIC_VLD3dup : InstrItinClass;
|
||||
def IIC_VLD3dupu : InstrItinClass;
|
||||
def IIC_VLD4 : InstrItinClass;
|
||||
def IIC_VLD4ln : InstrItinClass;
|
||||
def IIC_VLD4u : InstrItinClass;
|
||||
|
@ -559,6 +559,18 @@ def CortexA8Itineraries : ProcessorItineraries<
|
||||
InstrStage<5, [A8_LSPipe]>],
|
||||
[4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
|
||||
//
|
||||
// VLD3dup
|
||||
InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
||||
InstrStage<3, [A8_NLSPipe], 0>,
|
||||
InstrStage<3, [A8_LSPipe]>],
|
||||
[2, 2, 3, 1]>,
|
||||
//
|
||||
// VLD3dupu
|
||||
InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
|
||||
InstrStage<3, [A8_NLSPipe], 0>,
|
||||
InstrStage<3, [A8_LSPipe]>],
|
||||
[2, 2, 3, 2, 1, 1]>,
|
||||
//
|
||||
// VLD4
|
||||
InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
|
||||
InstrStage<4, [A8_NLSPipe], 0>,
|
||||
|
@ -941,6 +941,24 @@ def CortexA9Itineraries : ProcessorItineraries<
|
||||
InstrStage<5, [A9_LSUnit]>],
|
||||
[5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
|
||||
//
|
||||
// VLD3dup
|
||||
InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
||||
InstrStage<1, [A9_MUX0], 0>,
|
||||
InstrStage<1, [A9_DRegsN], 0, Required>,
|
||||
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
|
||||
InstrStage<3, [A9_NPipe], 0>,
|
||||
InstrStage<3, [A9_LSUnit]>],
|
||||
[3, 3, 4, 1]>,
|
||||
//
|
||||
// VLD3dupu
|
||||
InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
||||
InstrStage<1, [A9_MUX0], 0>,
|
||||
InstrStage<1, [A9_DRegsN], 0, Required>,
|
||||
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
|
||||
InstrStage<3, [A9_NPipe], 0>,
|
||||
InstrStage<3, [A9_LSUnit]>],
|
||||
[3, 3, 4, 2, 1, 1]>,
|
||||
//
|
||||
// VLD4
|
||||
InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
|
||||
InstrStage<1, [A9_MUX0], 0>,
|
||||
|
@ -71,3 +71,23 @@ define <2 x i32> @vld2dupi32(i32* %A) nounwind {
|
||||
|
||||
declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
|
||||
declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
|
||||
|
||||
%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
|
||||
|
||||
define <4 x i16> @vld3dupi16(i16* %A) nounwind {
|
||||
;CHECK: vld3dupi16:
|
||||
;Check the (default) alignment value. VLD3 does not support alignment.
|
||||
;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
|
||||
%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
|
||||
%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
|
||||
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
|
||||
%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1
|
||||
%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
|
||||
%tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 2
|
||||
%tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
|
||||
%tmp7 = add <4 x i16> %tmp2, %tmp4
|
||||
%tmp8 = add <4 x i16> %tmp7, %tmp6
|
||||
ret <4 x i16> %tmp8
|
||||
}
|
||||
|
||||
declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
|
||||
|
Loading…
Reference in New Issue
Block a user