X86: add pattern for X86ISD::VSRAV

Detect clamping ashr shift amount to max legal value
2024-11-23 11:49:46 +00:00 · 2018-07-04 02:35:29 +03:00 · 2018-07-04 02:35:29 +03:00 · fd0cff5e13
commit fd0cff5e13
parent 32c90c1321
1 changed files with 14 additions and 3 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -4860,7 +4860,7 @@ bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
                                              ISD::LoadExtType ExtTy,
                                              EVT NewVT) const {
  assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow");
-  
+
  // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
  // relocation target a movq or addq instruction: don't let the load shrink.
  SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
@ -38431,11 +38431,22 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) {
  return SDValue();
 }

-static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG,
+                                           const X86Subtarget &Subtarget) {
  SDValue N0 = N->getOperand(0);
  SDValue N1 = N->getOperand(1);
  EVT VT = N0.getValueType();
  unsigned Size = VT.getSizeInBits();
+  APInt MinAmnt;
+
+  // Detect pattern (ashr (a, umin(b, MaxAllowedShiftAmount)))
+  if (VT.isVector() && N1.getOpcode() == ISD::UMIN &&
+      SupportedVectorVarShift(VT.getSimpleVT(), Subtarget, ISD::SRA) &&
+      ISD::isConstantSplatVector(N1.getOperand(1).getNode(), MinAmnt) &&
+      MinAmnt == VT.getScalarSizeInBits() - 1) {
+    // Use infinite-precision vector variable shift if supported
+    return DAG.getNode(X86ISD::VSRAV, SDLoc(N), VT, N0, N1.getOperand(0));
+  }

  // fold (ashr (shl, a, [56,48,32,24,16]), SarConst)
  // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or
@ -45019,7 +45030,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
  case X86ISD::ADC:         return combineADC(N, DAG, DCI);
  case ISD::MUL:            return combineMul(N, DAG, DCI, Subtarget);
  case ISD::SHL:            return combineShiftLeft(N, DAG);
-  case ISD::SRA:            return combineShiftRightArithmetic(N, DAG);
+  case ISD::SRA:            return combineShiftRightArithmetic(N, DAG, Subtarget);
  case ISD::SRL:            return combineShiftRightLogical(N, DAG, DCI);
  case ISD::AND:            return combineAnd(N, DAG, DCI, Subtarget);
  case ISD::OR:             return combineOr(N, DAG, DCI, Subtarget);