mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-10 03:13:34 +00:00
[AArch64][SVE][InstCombine] Unpack of a splat vector -> Scalar extend
Replace vector unpack operation with a scalar extend operation. unpack(splat(X)) --> splat(extend(X)) If we have both, unpkhi and unpklo, for the same vector then we may save a register in some cases, e.g: Hi = unpkhi (splat(X)) Lo = unpklo(splat(X)) --> Hi = Lo = splat(extend(X)) Differential Revision: https://reviews.llvm.org/D106929 Change-Id: I77c5c201131e3a50de1cdccbdcf84420f5b2244b
This commit is contained in:
parent
85bbc05154
commit
5420fc4a27
@ -783,6 +783,28 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
|
||||
return None;
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
Value *UnpackArg = II.getArgOperand(0);
|
||||
auto *RetTy = cast<ScalableVectorType>(II.getType());
|
||||
bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
|
||||
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
|
||||
|
||||
// Hi = uunpkhi(splat(X)) --> Hi = splat(extend(X))
|
||||
// Lo = uunpklo(splat(X)) --> Lo = splat(extend(X))
|
||||
if (auto *ScalarArg = getSplatValue(UnpackArg)) {
|
||||
ScalarArg =
|
||||
Builder.CreateIntCast(ScalarArg, RetTy->getScalarType(), IsSigned);
|
||||
Value *NewVal =
|
||||
Builder.CreateVectorSplat(RetTy->getElementCount(), ScalarArg);
|
||||
NewVal->takeName(&II);
|
||||
return IC.replaceInstUsesWith(II, NewVal);
|
||||
}
|
||||
|
||||
return None;
|
||||
}
|
||||
static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
auto *OpVal = II.getOperand(0);
|
||||
@ -848,6 +870,11 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
|
||||
return instCombineSVEVectorMul(IC, II);
|
||||
case Intrinsic::aarch64_sve_tbl:
|
||||
return instCombineSVETBL(IC, II);
|
||||
case Intrinsic::aarch64_sve_uunpkhi:
|
||||
case Intrinsic::aarch64_sve_uunpklo:
|
||||
case Intrinsic::aarch64_sve_sunpkhi:
|
||||
case Intrinsic::aarch64_sve_sunpklo:
|
||||
return instCombineSVEUnpack(IC, II);
|
||||
}
|
||||
|
||||
return None;
|
||||
|
@ -0,0 +1,63 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -instcombine < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64"
|
||||
|
||||
define <vscale x 4 x i32> @uunpkhi_splat(i16 %a) #0 {
|
||||
; CHECK-LABEL: @uunpkhi_splat(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[UNPACK]]
|
||||
;
|
||||
%splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
|
||||
%splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
|
||||
%unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16> %splat)
|
||||
ret <vscale x 4 x i32> %unpack
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @uunpklo_splat(i16 %a) #0 {
|
||||
; CHECK-LABEL: @uunpklo_splat(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[UNPACK]]
|
||||
;
|
||||
%splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
|
||||
%splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
|
||||
%unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> %splat)
|
||||
ret <vscale x 4 x i32> %unpack
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @sunpkhi_splat(i16 %a) #0 {
|
||||
; CHECK-LABEL: @sunpkhi_splat(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[UNPACK]]
|
||||
;
|
||||
%splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
|
||||
%splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
|
||||
%unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16> %splat)
|
||||
ret <vscale x 4 x i32> %unpack
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @sunpklo_splat(i16 %a) #0 {
|
||||
; CHECK-LABEL: @sunpklo_splat(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[A:%.*]] to i32
|
||||
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP1]], i32 0
|
||||
; CHECK-NEXT: [[UNPACK:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[UNPACK]]
|
||||
;
|
||||
%splat_insert = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
|
||||
%splat = shufflevector <vscale x 8 x i16> %splat_insert, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
|
||||
%unpack = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> %splat)
|
||||
ret <vscale x 4 x i32> %unpack
|
||||
}
|
||||
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16>)
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
Loading…
x
Reference in New Issue
Block a user