mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-24 06:10:12 +00:00
[InstCombine][X86] Tweak generic expansion of PACKSS/PACKUS to shuffle then truncate. NFCI.
This has no effect on constant folding but will be useful when we expand non-saturating PACKSS/PACKUS intrinsics. llvm-svn: 359191
This commit is contained in:
parent
0fc09d0d25
commit
48a3b54572
@ -593,12 +593,7 @@ static Value *simplifyX86pack(IntrinsicInst &II,
|
|||||||
Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
|
Arg0 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
|
||||||
Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
|
Arg1 = Builder.CreateSelect(Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
|
||||||
|
|
||||||
// Truncate clamped args to dst size.
|
// Shuffle clamped args together at the lane level.
|
||||||
auto *TruncTy = VectorType::get(ResTy->getScalarType(), NumSrcElts);
|
|
||||||
Arg0 = Builder.CreateTrunc(Arg0, TruncTy);
|
|
||||||
Arg1 = Builder.CreateTrunc(Arg1, TruncTy);
|
|
||||||
|
|
||||||
// Shuffle args together at the lane level.
|
|
||||||
SmallVector<unsigned, 32> PackMask;
|
SmallVector<unsigned, 32> PackMask;
|
||||||
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
|
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
|
||||||
for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
|
for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
|
||||||
@ -606,8 +601,10 @@ static Value *simplifyX86pack(IntrinsicInst &II,
|
|||||||
for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
|
for (unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
|
||||||
PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
|
PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
|
||||||
}
|
}
|
||||||
|
auto *Shuffle = Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
|
||||||
|
|
||||||
return Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
|
// Truncate to dst size.
|
||||||
|
return Builder.CreateTrunc(Shuffle, ResTy);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replace X86-specific intrinsics with generic floor-ceil where applicable.
|
// Replace X86-specific intrinsics with generic floor-ceil where applicable.
|
||||||
|
Loading…
Reference in New Issue
Block a user