From 3a6756cb1c87908f5d04660b6ed7d464b56f78f6 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Mon, 13 Dec 2010 21:05:52 +0000 Subject: [PATCH] Use pseudo instructions for 2-register Neon instructions for scalar FP. Partial fix for Radar 8711675. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@121716 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 41 +++++++++++++++++-------- lib/Target/ARM/ARMInstrNEON.td | 33 ++++++-------------- 2 files changed, 39 insertions(+), 35 deletions(-) diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 99ced50a193..79ca3fc50e3 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -54,6 +54,7 @@ namespace { void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); void ExpandVTBL(MachineBasicBlock::iterator &MBBI, unsigned Opc, bool IsExt, unsigned NumRegs); + void ExpandNeonSFP2(MachineBasicBlock::iterator &MBBI, unsigned Opc); }; char ARMExpandPseudo::ID = 0; } @@ -612,6 +613,21 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MI.eraseFromParent(); } +/// ExpandNeonSFP2 - Translate a 2-register Neon pseudo instruction used for +/// scalar floating-point to a real instruction. +void ARMExpandPseudo::ExpandNeonSFP2(MachineBasicBlock::iterator &MBBI, + unsigned Opc) { + MachineInstr &MI = *MBBI; + MachineBasicBlock &MBB = *MI.getParent(); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); + MIB.addOperand(MI.getOperand(0)) // destination register + .addOperand(MI.getOperand(1)) // source register + .addOperand(MI.getOperand(2)) // predicate + .addOperand(MI.getOperand(3)); // predicate register + TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); +} + bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; @@ -1145,18 +1161,19 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { ExpandLaneOp(MBBI); break; - case ARM::VTBL2Pseudo: - ExpandVTBL(MBBI, ARM::VTBL2, false, 2); break; - case ARM::VTBL3Pseudo: - ExpandVTBL(MBBI, ARM::VTBL3, false, 3); break; - case ARM::VTBL4Pseudo: - ExpandVTBL(MBBI, ARM::VTBL4, false, 4); break; - case ARM::VTBX2Pseudo: - ExpandVTBL(MBBI, ARM::VTBX2, true, 2); break; - case ARM::VTBX3Pseudo: - ExpandVTBL(MBBI, ARM::VTBX3, true, 3); break; - case ARM::VTBX4Pseudo: - ExpandVTBL(MBBI, ARM::VTBX4, true, 4); break; + case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); break; + case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); break; + case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); break; + case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); break; + case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); break; + case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); break; + + case ARM::VABSfd_sfp: ExpandNeonSFP2(MBBI, ARM::VABSfd); break; + case ARM::VNEGfd_sfp: ExpandNeonSFP2(MBBI, ARM::VNEGfd); break; + case ARM::VCVTf2sd_sfp: ExpandNeonSFP2(MBBI, ARM::VCVTf2sd); break; + case ARM::VCVTf2ud_sfp: ExpandNeonSFP2(MBBI, ARM::VCVTf2ud); break; + case ARM::VCVTs2fd_sfp: ExpandNeonSFP2(MBBI, ARM::VCVTs2fd); break; + case ARM::VCVTu2fd_sfp: ExpandNeonSFP2(MBBI, ARM::VCVTu2fd); break; } if (ModifiedOp) diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 81659f75b96..196e3f5f2b4 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1668,12 +1668,9 @@ def SubReg_i32_lane : SDNodeXForm op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, - string Dt> - : N2V; +let neverHasSideEffects = 1 in +class N2VS + : PseudoNeonI<(outs DPR_VFP2:$Vd), (ins DPR_VFP2:$Vm), IIC_VUNAD, "", []>; class N2VD op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> @@ -4681,7 +4678,7 @@ def VTBX4Pseudo // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// -class N2VSPat +class N2VSPat : NEONFPPat<(ResTy (OpNode SPR:$a)), (EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), SPR:$a, ssub_0))), @@ -4739,17 +4736,11 @@ def : N3VSMulOpPat, Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; // Vector Absolute used for single-precision FP -let neverHasSideEffects = 1 in -def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0, - (outs DPR_VFP2:$Vd), (ins DPR_VFP2:$Vm), IIC_VUNAD, - "vabs", "f32", "$Vd, $Vm", "", []>; +def VABSfd_sfp : N2VS; def : N2VSPat; // Vector Negate used for single-precision FP -let neverHasSideEffects = 1 in -def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, - (outs DPR_VFP2:$Vd), (ins DPR_VFP2:$Vm), IIC_VUNAD, - "vneg", "f32", "$Vd, $Vm", "", []>; +def VNEGfd_sfp : N2VS; def : N2VSPat; // Vector Maximum used for single-precision FP @@ -4767,20 +4758,16 @@ def VMINfd_sfp : N3V<0, 0, 0b10, 0b1111, 0, 0, (outs DPR_VFP2:$Vd), def : N3VSPat; // Vector Convert between single-precision FP and integer -let neverHasSideEffects = 1 in -def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32">; +def VCVTf2sd_sfp : N2VS; def : N2VSPat; -let neverHasSideEffects = 1 in -def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32">; +def VCVTf2ud_sfp : N2VS; def : N2VSPat; -let neverHasSideEffects = 1 in -def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32">; +def VCVTs2fd_sfp : N2VS; def : N2VSPat; -let neverHasSideEffects = 1 in -def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32">; +def VCVTu2fd_sfp : N2VS; def : N2VSPat; //===----------------------------------------------------------------------===//