[AArch64][SVE][InstCombine] Unpack of a splat vector -> Scalar extend

Replace vector unpack operation with a scalar extend operation.
  unpack(splat(X)) --> splat(extend(X))

If we have both, unpkhi and unpklo, for the same vector then we may
save a register in some cases, e.g:
  Hi = unpkhi (splat(X))
  Lo = unpklo(splat(X))
   --> Hi = Lo = splat(extend(X))

Differential Revision: https://reviews.llvm.org/D106929

Change-Id: I77c5c201131e3a50de1cdccbdcf84420f5b2244b
This commit is contained in:
Usman Nadeem 2021-08-05 17:23:01 -07:00
parent 85bbc05154
commit 5420fc4a27
2 changed files with 90 additions and 0 deletions

View File

@ -783,6 +783,28 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
return None;
}
static Optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
IntrinsicInst &II) {
IRBuilder<> Builder(II.getContext());
Builder.SetInsertPoint(&II);
Value *UnpackArg = II.getArgOperand(0);
auto *RetTy = cast<ScalableVectorType>(II.getType());
bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
// Hi = uunpkhi(splat(X)) --> Hi = splat(extend(X))
// Lo = uunpklo(splat(X)) --> Lo = splat(extend(X))
if (auto *ScalarArg = getSplatValue(UnpackArg)) {
ScalarArg =
Builder.CreateIntCast(ScalarArg, RetTy->getScalarType(), IsSigned);
Value *NewVal =
Builder.CreateVectorSplat(RetTy->getElementCount(), ScalarArg);
NewVal->takeName(&II);
return IC.replaceInstUsesWith(II, NewVal);
}
return None;
}
static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
IntrinsicInst &II) {
auto *OpVal = II.getOperand(0);
@ -848,6 +870,11 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineSVEVectorMul(IC, II);
case Intrinsic::aarch64_sve_tbl:
return instCombineSVETBL(IC, II);
case Intrinsic::aarch64_sve_uunpkhi:
case Intrinsic::aarch64_sve_uunpklo:
case Intrinsic::aarch64_sve_sunpkhi:
case Intrinsic::aarch64_sve_sunpklo:
return instCombineSVEUnpack(IC, II);
}
return None;

View File

@ -0,0 +1,63 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine < %s | FileCheck %s
target triple = "aarch64"
define <vscale x 4 x i32> @uunpkhi_splat(i16 %a) #0 {
; CHECK-LABEL: @uunpkhi_splat(
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: ret <vscale x 4 x i32> [[UNPACK]]
;
%splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
%splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
%unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16> %splat)
ret <vscale x 4 x i32> %unpack
}
define <vscale x 4 x i32> @uunpklo_splat(i16 %a) #0 {
; CHECK-LABEL: @uunpklo_splat(
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: ret <vscale x 4 x i32> [[UNPACK]]
;
%splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
%splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
%unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> %splat)
ret <vscale x 4 x i32> %unpack
}
define <vscale x 4 x i32> @sunpkhi_splat(i16 %a) #0 {
; CHECK-LABEL: @sunpkhi_splat(
; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: ret <vscale x 4 x i32> [[UNPACK]]
;
%splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
%splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
%unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16> %splat)
ret <vscale x 4 x i32> %unpack
}
define <vscale x 4 x i32> @sunpklo_splat(i16 %a) #0 {
; CHECK-LABEL: @sunpklo_splat(
; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: ret <vscale x 4 x i32> [[UNPACK]]
;
%splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
%splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
%unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> %splat)
ret <vscale x 4 x i32> %unpack
}
declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16>)
attributes #0 = { "target-features"="+sve" }