mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-01 00:25:01 +00:00
Add patterns to use post-increment addressing for Neon VST1-lane instructions.
llvm-svn: 126477
This commit is contained in:
parent
61c746f927
commit
6bbffe19e9
@ -126,6 +126,7 @@ public:
|
||||
bool SelectAddrMode5(SDValue N, SDValue &Base,
|
||||
SDValue &Offset);
|
||||
bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
|
||||
bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
|
||||
|
||||
bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
|
||||
|
||||
@ -886,6 +887,20 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
|
||||
SDValue &Offset) {
|
||||
LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
|
||||
ISD::MemIndexedMode AM = LdSt->getAddressingMode();
|
||||
if (AM != ISD::POST_INC)
|
||||
return false;
|
||||
Offset = N;
|
||||
if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
|
||||
if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
|
||||
Offset = CurDAG->getRegister(0, MVT::i32);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
|
||||
SDValue &Offset, SDValue &Label) {
|
||||
if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
|
||||
|
@ -561,7 +561,9 @@ def addrmode6 : Operand<i32>,
|
||||
let EncoderMethod = "getAddrMode6AddressOpValue";
|
||||
}
|
||||
|
||||
def am6offset : Operand<i32> {
|
||||
def am6offset : Operand<i32>,
|
||||
ComplexPattern<i32, 1, "SelectAddrMode6Offset",
|
||||
[], [SDNPWantRoot]> {
|
||||
let PrintMethod = "printAddrMode6OffsetOperand";
|
||||
let MIOperandInfo = (ops GPR);
|
||||
let EncoderMethod = "getAddrMode6OffsetOpValue";
|
||||
|
@ -1402,31 +1402,42 @@ def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
|
||||
def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
|
||||
(VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
|
||||
|
||||
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
|
||||
|
||||
// ...with address register writeback:
|
||||
class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
|
||||
PatFrag StoreOp, SDNode ExtractOp>
|
||||
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
|
||||
(ins addrmode6:$Rn, am6offset:$Rm,
|
||||
DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
|
||||
"\\{$Vd[$lane]\\}, $Rn$Rm",
|
||||
"$Rn.addr = $wb", []>;
|
||||
"$Rn.addr = $wb",
|
||||
[(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
|
||||
addrmode6:$Rn, am6offset:$Rm))]>;
|
||||
class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
|
||||
: VSTQLNWBPseudo<IIC_VST1lnu> {
|
||||
let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
|
||||
addrmode6:$addr, am6offset:$offset))];
|
||||
}
|
||||
|
||||
def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> {
|
||||
def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
|
||||
NEONvgetlaneu> {
|
||||
let Inst{7-5} = lane{2-0};
|
||||
}
|
||||
def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> {
|
||||
def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
|
||||
NEONvgetlaneu> {
|
||||
let Inst{7-6} = lane{1-0};
|
||||
let Inst{4} = Rn{5};
|
||||
}
|
||||
def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> {
|
||||
def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
|
||||
extractelt> {
|
||||
let Inst{7} = lane{0};
|
||||
let Inst{5-4} = Rn{5-4};
|
||||
}
|
||||
|
||||
def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
|
||||
def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
|
||||
def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
|
||||
def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
|
||||
def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
|
||||
def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
|
||||
|
||||
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
|
||||
|
||||
// VST2LN : Vector Store (single 2-element structure from one lane)
|
||||
class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
|
@ -10,6 +10,19 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
|
||||
ret void
|
||||
}
|
||||
|
||||
;Check for a post-increment updating store.
|
||||
define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
|
||||
;CHECK: vst1lanei8_update:
|
||||
;CHECK: vst1.8 {d16[3]}, [r2]!
|
||||
%A = load i8** %ptr
|
||||
%tmp1 = load <8 x i8>* %B
|
||||
%tmp2 = extractelement <8 x i8> %tmp1, i32 3
|
||||
store i8 %tmp2, i8* %A, align 8
|
||||
%tmp3 = getelementptr i8* %A, i32 1
|
||||
store i8* %tmp3, i8** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
|
||||
;CHECK: vst1lanei16:
|
||||
;Check the alignment value. Max for this instruction is 16 bits:
|
||||
@ -66,6 +79,19 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
|
||||
ret void
|
||||
}
|
||||
|
||||
;Check for a post-increment updating store.
|
||||
define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
|
||||
;CHECK: vst1laneQi32_update:
|
||||
;CHECK: vst1.32 {d17[1]}, [r1, :32]!
|
||||
%A = load i32** %ptr
|
||||
%tmp1 = load <4 x i32>* %B
|
||||
%tmp2 = extractelement <4 x i32> %tmp1, i32 3
|
||||
store i32 %tmp2, i32* %A, align 8
|
||||
%tmp3 = getelementptr i32* %A, i32 1
|
||||
store i32* %tmp3, i32** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
|
||||
;CHECK: vst1laneQf:
|
||||
;CHECK: vst1.32 {d17[1]}, [r0]
|
||||
|
Loading…
Reference in New Issue
Block a user