From 4cb228fdc820991383563e3eec5510e65e86507c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 3 Mar 2016 15:56:08 +0000 Subject: [PATCH] [AArch64] fold 'isPositive' vector integer operations (PR26819) This is one of the cases shown in: https://llvm.org/bugs/show_bug.cgi?id=26819 Shift and negate is what InstCombine prefers to produce (and I tried to make it do more of that in http://reviews.llvm.org/rL262424 ), so we should recognize that pattern as something that might come from autovectorization even if it's unlikely to be produced from C NEON intrinsics. The patch is based on the x86 equivalent: http://reviews.llvm.org/rL262036 Differential Revision: http://reviews.llvm.org/D17834 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@262623 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 31 ++++++++++++++++++- .../AArch64/neon-compare-instructions.ll | 22 +++++-------- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index faff086cc05..a7adb12093e 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7423,6 +7423,33 @@ bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return Shift < 3; } +/// Turn vector tests of the signbit in the form of: +/// xor (sra X, elt_size(X)-1), -1 +/// into: +/// cmge X, X, #0 +static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (!Subtarget->hasNEON() || !VT.isVector()) + return SDValue(); + + // There must be a shift right algebraic before the xor, and the xor must be a + // 'not' operation. + SDValue Shift = N->getOperand(0); + SDValue Ones = N->getOperand(1); + if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() || + !ISD::isBuildVectorAllOnes(Ones.getNode())) + return SDValue(); + + // The shift should be smearing the sign bit across each vector element. + auto *ShiftAmt = dyn_cast(Shift.getOperand(1)); + EVT ShiftEltTy = Shift.getValueType().getVectorElementType(); + if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1) + return SDValue(); + + return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0)); +} + // Generate SUBS and CSEL for integer abs. static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); @@ -7451,13 +7478,15 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// performXorCombine - Attempts to handle integer ABS. static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); + if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget)) + return Cmp; + return performIntegerAbsCombine(N, DAG); } diff --git a/test/CodeGen/AArch64/neon-compare-instructions.ll b/test/CodeGen/AArch64/neon-compare-instructions.ll index 127a9fa93f6..887cb5dd698 100644 --- a/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -801,12 +801,10 @@ define <2 x i64> @cmgez2xi64(<2 x i64> %A) { ret <2 x i64> %tmp4 } -; FIXME: The following 7 tests could be optimized to cmgez to save an instruction. define <8 x i8> @cmgez8xi8_alt(<8 x i8> %A) { ; CHECK-LABEL: cmgez8xi8_alt: -; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #7 -; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x0|0}} %sign = ashr <8 x i8> %A, %not = xor <8 x i8> %sign, ret <8 x i8> %not @@ -814,8 +812,7 @@ define <8 x i8> @cmgez8xi8_alt(<8 x i8> %A) { define <16 x i8> @cmgez16xi8_alt(<16 x i8> %A) { ; CHECK-LABEL: cmgez16xi8_alt: -; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #7 -; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x0|0}} %sign = ashr <16 x i8> %A, %not = xor <16 x i8> %sign, ret <16 x i8> %not @@ -823,8 +820,7 @@ define <16 x i8> @cmgez16xi8_alt(<16 x i8> %A) { define <4 x i16> @cmgez4xi16_alt(<4 x i16> %A) { ; CHECK-LABEL: cmgez4xi16_alt: -; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15 -; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #{{0x0|0}} %sign = ashr <4 x i16> %A, %not = xor <4 x i16> %sign, ret <4 x i16> %not @@ -832,8 +828,7 @@ define <4 x i16> @cmgez4xi16_alt(<4 x i16> %A) { define <8 x i16> @cmgez8xi16_alt(<8 x i16> %A) { ; CHECK-LABEL: cmgez8xi16_alt: -; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15 -; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #{{0x0|0}} %sign = ashr <8 x i16> %A, %not = xor <8 x i16> %sign, ret <8 x i16> %not @@ -841,8 +836,7 @@ define <8 x i16> @cmgez8xi16_alt(<8 x i16> %A) { define <2 x i32> @cmgez2xi32_alt(<2 x i32> %A) { ; CHECK-LABEL: cmgez2xi32_alt: -; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31 -; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0x0|0}} %sign = ashr <2 x i32> %A, %not = xor <2 x i32> %sign, ret <2 x i32> %not @@ -850,8 +844,7 @@ define <2 x i32> @cmgez2xi32_alt(<2 x i32> %A) { define <4 x i32> @cmgez4xi32_alt(<4 x i32> %A) { ; CHECK-LABEL: cmgez4xi32_alt: -; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31 -; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0x0|0}} %sign = ashr <4 x i32> %A, %not = xor <4 x i32> %sign, ret <4 x i32> %not @@ -859,8 +852,7 @@ define <4 x i32> @cmgez4xi32_alt(<4 x i32> %A) { define <2 x i64> @cmgez2xi64_alt(<2 x i64> %A) { ; CHECK-LABEL: cmgez2xi64_alt: -; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #63 -; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +; CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0x0|0}} %sign = ashr <2 x i64> %A, %not = xor <2 x i64> %sign, ret <2 x i64> %not