mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-27 13:40:30 +00:00
[AArch64] fold 'isPositive' vector integer operations (PR26819)
This is one of the cases shown in: https://llvm.org/bugs/show_bug.cgi?id=26819 Shift and negate is what InstCombine prefers to produce (and I tried to make it do more of that in http://reviews.llvm.org/rL262424 ), so we should recognize that pattern as something that might come from autovectorization even if it's unlikely to be produced from C NEON intrinsics. The patch is based on the x86 equivalent: http://reviews.llvm.org/rL262036 Differential Revision: http://reviews.llvm.org/D17834 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@262623 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b77a6ae3ef
commit
4cb228fdc8
@ -7423,6 +7423,33 @@ bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
|
||||
return Shift < 3;
|
||||
}
|
||||
|
||||
/// Turn vector tests of the signbit in the form of:
|
||||
/// xor (sra X, elt_size(X)-1), -1
|
||||
/// into:
|
||||
/// cmge X, X, #0
|
||||
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
|
||||
const AArch64Subtarget *Subtarget) {
|
||||
EVT VT = N->getValueType(0);
|
||||
if (!Subtarget->hasNEON() || !VT.isVector())
|
||||
return SDValue();
|
||||
|
||||
// There must be a shift right algebraic before the xor, and the xor must be a
|
||||
// 'not' operation.
|
||||
SDValue Shift = N->getOperand(0);
|
||||
SDValue Ones = N->getOperand(1);
|
||||
if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() ||
|
||||
!ISD::isBuildVectorAllOnes(Ones.getNode()))
|
||||
return SDValue();
|
||||
|
||||
// The shift should be smearing the sign bit across each vector element.
|
||||
auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
|
||||
EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
|
||||
if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
|
||||
return SDValue();
|
||||
|
||||
return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
|
||||
}
|
||||
|
||||
// Generate SUBS and CSEL for integer abs.
|
||||
static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
EVT VT = N->getValueType(0);
|
||||
@ -7451,13 +7478,15 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// performXorCombine - Attempts to handle integer ABS.
|
||||
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const AArch64Subtarget *Subtarget) {
|
||||
if (DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
|
||||
return Cmp;
|
||||
|
||||
return performIntegerAbsCombine(N, DAG);
|
||||
}
|
||||
|
||||
|
@ -801,12 +801,10 @@ define <2 x i64> @cmgez2xi64(<2 x i64> %A) {
|
||||
ret <2 x i64> %tmp4
|
||||
}
|
||||
|
||||
; FIXME: The following 7 tests could be optimized to cmgez to save an instruction.
|
||||
|
||||
define <8 x i8> @cmgez8xi8_alt(<8 x i8> %A) {
|
||||
; CHECK-LABEL: cmgez8xi8_alt:
|
||||
; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #7
|
||||
; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
; CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x0|0}}
|
||||
%sign = ashr <8 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
|
||||
%not = xor <8 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
||||
ret <8 x i8> %not
|
||||
@ -814,8 +812,7 @@ define <8 x i8> @cmgez8xi8_alt(<8 x i8> %A) {
|
||||
|
||||
define <16 x i8> @cmgez16xi8_alt(<16 x i8> %A) {
|
||||
; CHECK-LABEL: cmgez16xi8_alt:
|
||||
; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #7
|
||||
; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
; CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x0|0}}
|
||||
%sign = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
|
||||
%not = xor <16 x i8> %sign, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
||||
ret <16 x i8> %not
|
||||
@ -823,8 +820,7 @@ define <16 x i8> @cmgez16xi8_alt(<16 x i8> %A) {
|
||||
|
||||
define <4 x i16> @cmgez4xi16_alt(<4 x i16> %A) {
|
||||
; CHECK-LABEL: cmgez4xi16_alt:
|
||||
; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
|
||||
; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
; CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #{{0x0|0}}
|
||||
%sign = ashr <4 x i16> %A, <i16 15, i16 15, i16 15, i16 15>
|
||||
%not = xor <4 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
ret <4 x i16> %not
|
||||
@ -832,8 +828,7 @@ define <4 x i16> @cmgez4xi16_alt(<4 x i16> %A) {
|
||||
|
||||
define <8 x i16> @cmgez8xi16_alt(<8 x i16> %A) {
|
||||
; CHECK-LABEL: cmgez8xi16_alt:
|
||||
; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
|
||||
; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
; CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #{{0x0|0}}
|
||||
%sign = ashr <8 x i16> %A, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
%not = xor <8 x i16> %sign, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
ret <8 x i16> %not
|
||||
@ -841,8 +836,7 @@ define <8 x i16> @cmgez8xi16_alt(<8 x i16> %A) {
|
||||
|
||||
define <2 x i32> @cmgez2xi32_alt(<2 x i32> %A) {
|
||||
; CHECK-LABEL: cmgez2xi32_alt:
|
||||
; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
|
||||
; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
; CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #{{0x0|0}}
|
||||
%sign = ashr <2 x i32> %A, <i32 31, i32 31>
|
||||
%not = xor <2 x i32> %sign, <i32 -1, i32 -1>
|
||||
ret <2 x i32> %not
|
||||
@ -850,8 +844,7 @@ define <2 x i32> @cmgez2xi32_alt(<2 x i32> %A) {
|
||||
|
||||
define <4 x i32> @cmgez4xi32_alt(<4 x i32> %A) {
|
||||
; CHECK-LABEL: cmgez4xi32_alt:
|
||||
; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
|
||||
; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
; CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #{{0x0|0}}
|
||||
%sign = ashr <4 x i32> %A, <i32 31, i32 31, i32 31, i32 31>
|
||||
%not = xor <4 x i32> %sign, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
ret <4 x i32> %not
|
||||
@ -859,8 +852,7 @@ define <4 x i32> @cmgez4xi32_alt(<4 x i32> %A) {
|
||||
|
||||
define <2 x i64> @cmgez2xi64_alt(<2 x i64> %A) {
|
||||
; CHECK-LABEL: cmgez2xi64_alt:
|
||||
; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #63
|
||||
; CHECK-NEXT: {{mvn|not}} {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
|
||||
; CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #{{0x0|0}}
|
||||
%sign = ashr <2 x i64> %A, <i64 63, i64 63>
|
||||
%not = xor <2 x i64> %sign, <i64 -1, i64 -1>
|
||||
ret <2 x i64> %not
|
||||
|
Loading…
Reference in New Issue
Block a user