mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-24 04:09:45 +00:00
Scalarizer: Support scalarizing intrinsics
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@276681 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
21e0aa8d55
commit
dc848c22cc
@ -16,6 +16,7 @@
|
||||
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Analysis/VectorUtils.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstVisitor.h"
|
||||
#include "llvm/Pass.h"
|
||||
@ -148,6 +149,7 @@ public:
|
||||
bool visitPHINode(PHINode &);
|
||||
bool visitLoadInst(LoadInst &);
|
||||
bool visitStoreInst(StoreInst &);
|
||||
bool visitCallInst(CallInst &I);
|
||||
|
||||
static void registerOptions() {
|
||||
// This is disabled by default because having separate loads and stores
|
||||
@ -169,6 +171,8 @@ private:
|
||||
|
||||
template<typename T> bool splitBinary(Instruction &, const T &);
|
||||
|
||||
bool splitCall(CallInst &CI);
|
||||
|
||||
ScatterMap Scattered;
|
||||
GatherList Gathered;
|
||||
unsigned ParallelLoopAccessMDKind;
|
||||
@ -394,6 +398,77 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool isTriviallyScalariable(Intrinsic::ID ID) {
|
||||
return isTriviallyVectorizable(ID);
|
||||
}
|
||||
|
||||
// All of the current scalarizable intrinsics only have one mangled type.
|
||||
static Function *getScalarIntrinsicDeclaration(Module *M,
|
||||
Intrinsic::ID ID,
|
||||
VectorType *Ty) {
|
||||
return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() });
|
||||
}
|
||||
|
||||
/// If a call to a vector typed intrinsic function, split into a scalar call per
|
||||
/// element if possible for the intrinsic.
|
||||
bool Scalarizer::splitCall(CallInst &CI) {
|
||||
VectorType *VT = dyn_cast<VectorType>(CI.getType());
|
||||
if (!VT)
|
||||
return false;
|
||||
|
||||
Function *F = CI.getCalledFunction();
|
||||
if (!F)
|
||||
return false;
|
||||
|
||||
Intrinsic::ID ID = F->getIntrinsicID();
|
||||
if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID))
|
||||
return false;
|
||||
|
||||
unsigned NumElems = VT->getNumElements();
|
||||
unsigned NumArgs = CI.getNumArgOperands();
|
||||
|
||||
ValueVector ScalarOperands(NumArgs);
|
||||
SmallVector<Scatterer, 8> Scattered(NumArgs);
|
||||
|
||||
Scattered.resize(NumArgs);
|
||||
|
||||
// Assumes that any vector type has the same number of elements as the return
|
||||
// vector type, which is true for all current intrinsics.
|
||||
for (unsigned I = 0; I != NumArgs; ++I) {
|
||||
Value *OpI = CI.getOperand(I);
|
||||
if (OpI->getType()->isVectorTy()) {
|
||||
Scattered[I] = scatter(&CI, OpI);
|
||||
assert(Scattered[I].size() == NumElems && "mismatched call operands");
|
||||
} else {
|
||||
ScalarOperands[I] = OpI;
|
||||
}
|
||||
}
|
||||
|
||||
ValueVector Res(NumElems);
|
||||
ValueVector ScalarCallOps(NumArgs);
|
||||
|
||||
Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT);
|
||||
IRBuilder<> Builder(&CI);
|
||||
|
||||
// Perform actual scalarization, taking care to preserve any scalar operands.
|
||||
for (unsigned Elem = 0; Elem < NumElems; ++Elem) {
|
||||
ScalarCallOps.clear();
|
||||
|
||||
for (unsigned J = 0; J != NumArgs; ++J) {
|
||||
if (hasVectorInstrinsicScalarOpd(ID, J))
|
||||
ScalarCallOps.push_back(ScalarOperands[J]);
|
||||
else
|
||||
ScalarCallOps.push_back(Scattered[J][Elem]);
|
||||
}
|
||||
|
||||
Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
|
||||
CI.getName() + ".i" + Twine(Elem));
|
||||
}
|
||||
|
||||
gather(&CI, Res);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scalarizer::visitSelectInst(SelectInst &SI) {
|
||||
VectorType *VT = dyn_cast<VectorType>(SI.getType());
|
||||
if (!VT)
|
||||
@ -642,6 +717,10 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scalarizer::visitCallInst(CallInst &CI) {
|
||||
return splitCall(CI);
|
||||
}
|
||||
|
||||
// Delete the instructions that we scalarized. If a full vector result
|
||||
// is still needed, recreate it using InsertElements.
|
||||
bool Scalarizer::finish() {
|
||||
|
85
test/Transforms/Scalarizer/intrinsics.ll
Normal file
85
test/Transforms/Scalarizer/intrinsics.ll
Normal file
@ -0,0 +1,85 @@
|
||||
; RUN: opt -S -scalarizer %s | FileCheck %s
|
||||
|
||||
; Unary fp
|
||||
declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
|
||||
|
||||
; Binary fp
|
||||
declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
|
||||
|
||||
; Ternary fp
|
||||
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
|
||||
|
||||
; Binary int
|
||||
declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
|
||||
|
||||
; Unary int plus constant scalar operand
|
||||
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
|
||||
|
||||
; Unary fp plus any scalar operand
|
||||
declare <2 x float> @llvm.powi.v2f32(<2 x float>, i32)
|
||||
|
||||
; CHECK-LABEL: @scalarize_sqrt_v2f32(
|
||||
; CHECK: %sqrt.i0 = call float @llvm.sqrt.f32(float %x.i0)
|
||||
; CHECK: %sqrt.i1 = call float @llvm.sqrt.f32(float %x.i1)
|
||||
; CHECK: %sqrt.upto0 = insertelement <2 x float> undef, float %sqrt.i0, i32 0
|
||||
; CHECK: %sqrt = insertelement <2 x float> %sqrt.upto0, float %sqrt.i1, i32 1
|
||||
; CHECK: ret <2 x float> %sqrt
|
||||
define <2 x float> @scalarize_sqrt_v2f32(<2 x float> %x) #0 {
|
||||
%sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
|
||||
ret <2 x float> %sqrt
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @scalarize_minnum_v2f32(
|
||||
; CHECK: %minnum.i0 = call float @llvm.minnum.f32(float %x.i0, float %y.i0)
|
||||
; CHECK: %minnum.i1 = call float @llvm.minnum.f32(float %x.i1, float %y.i1)
|
||||
; CHECK: %minnum.upto0 = insertelement <2 x float> undef, float %minnum.i0, i32 0
|
||||
; CHECK: %minnum = insertelement <2 x float> %minnum.upto0, float %minnum.i1, i32 1
|
||||
; CHECK: ret <2 x float> %minnum
|
||||
define <2 x float> @scalarize_minnum_v2f32(<2 x float> %x, <2 x float> %y) #0 {
|
||||
%minnum = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y)
|
||||
ret <2 x float> %minnum
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @scalarize_fma_v2f32(
|
||||
; CHECK: %fma.i0 = call float @llvm.fma.f32(float %x.i0, float %y.i0, float %z.i0)
|
||||
; CHECK: %fma.i1 = call float @llvm.fma.f32(float %x.i1, float %y.i1, float %z.i1)
|
||||
; CHECK: %fma.upto0 = insertelement <2 x float> undef, float %fma.i0, i32 0
|
||||
; CHECK: %fma = insertelement <2 x float> %fma.upto0, float %fma.i1, i32 1
|
||||
; CHECK: ret <2 x float> %fma
|
||||
define <2 x float> @scalarize_fma_v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
|
||||
%fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
|
||||
ret <2 x float> %fma
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @scalarize_bswap_v2i32(
|
||||
; CHECK: %bswap.i0 = call i32 @llvm.bswap.i32(i32 %x.i0)
|
||||
; CHECK: %bswap.i1 = call i32 @llvm.bswap.i32(i32 %x.i1)
|
||||
; CHECK: %bswap.upto0 = insertelement <2 x i32> undef, i32 %bswap.i0, i32 0
|
||||
; CHECK: %bswap = insertelement <2 x i32> %bswap.upto0, i32 %bswap.i1, i32 1
|
||||
; CHECK: ret <2 x i32> %bswap
|
||||
define <2 x i32> @scalarize_bswap_v2i32(<2 x i32> %x) #0 {
|
||||
%bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %x)
|
||||
ret <2 x i32> %bswap
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @scalarize_ctlz_v2i32(
|
||||
; CHECK: %ctlz.i0 = call i32 @llvm.ctlz.i32(i32 %x.i0, i1 true)
|
||||
; CHECK: %ctlz.i1 = call i32 @llvm.ctlz.i32(i32 %x.i1, i1 true)
|
||||
; CHECK: %ctlz.upto0 = insertelement <2 x i32> undef, i32 %ctlz.i0, i32 0
|
||||
; CHECK: %ctlz = insertelement <2 x i32> %ctlz.upto0, i32 %ctlz.i1, i32 1
|
||||
; CHECK: ret <2 x i32> %ctlz
|
||||
define <2 x i32> @scalarize_ctlz_v2i32(<2 x i32> %x) #0 {
|
||||
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true)
|
||||
ret <2 x i32> %ctlz
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @scalarize_powi_v2f32(
|
||||
; CHECK: %powi.i0 = call float @llvm.powi.f32(float %x.i0, i32 %y)
|
||||
; CHECK: %powi.i1 = call float @llvm.powi.f32(float %x.i1, i32 %y)
|
||||
; CHECK: %powi.upto0 = insertelement <2 x float> undef, float %powi.i0, i32 0
|
||||
; CHECK: %powi = insertelement <2 x float> %powi.upto0, float %powi.i1, i32 1
|
||||
; CHECK: ret <2 x float> %powi
|
||||
define <2 x float> @scalarize_powi_v2f32(<2 x float> %x, i32 %y) #0 {
|
||||
%powi = call <2 x float> @llvm.powi.v2f32(<2 x float> %x, i32 %y)
|
||||
ret <2 x float> %powi
|
||||
}
|
Loading…
Reference in New Issue
Block a user