mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-12 22:26:14 +00:00
SLPVectorizer: Only vectorize intrinsics whose operands are widened equally
The vectorizer only knows how to vectorize intrinics by widening all operands by the same factor. Patch by Tyler Nowicki! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205855 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0d5d656524
commit
b0ee2374ce
54
include/llvm/Transforms/Utils/VectorUtils.h
Normal file
54
include/llvm/Transforms/Utils/VectorUtils.h
Normal file
@ -0,0 +1,54 @@
|
||||
//===- llvm/Transforms/Utils/VectorUtils.h - Vector utilities -*- C++ -*-=====//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines some vectorizer utilities.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
|
||||
#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// \brief Identify if the intrinsic is trivially vectorizable.
|
||||
///
|
||||
/// This method returns true if the intrinsic's argument types are all
|
||||
/// scalars for the scalar form of the intrinsic and all vectors for
|
||||
/// the vector form of the intrinsic.
|
||||
static inline bool isTriviallyVectorizable(Intrinsic::ID ID) {
|
||||
switch (ID) {
|
||||
case Intrinsic::sqrt:
|
||||
case Intrinsic::sin:
|
||||
case Intrinsic::cos:
|
||||
case Intrinsic::exp:
|
||||
case Intrinsic::exp2:
|
||||
case Intrinsic::log:
|
||||
case Intrinsic::log10:
|
||||
case Intrinsic::log2:
|
||||
case Intrinsic::fabs:
|
||||
case Intrinsic::copysign:
|
||||
case Intrinsic::floor:
|
||||
case Intrinsic::ceil:
|
||||
case Intrinsic::trunc:
|
||||
case Intrinsic::rint:
|
||||
case Intrinsic::nearbyint:
|
||||
case Intrinsic::round:
|
||||
case Intrinsic::ctpop:
|
||||
case Intrinsic::pow:
|
||||
case Intrinsic::fma:
|
||||
case Intrinsic::fmuladd:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // llvm namespace
|
||||
|
||||
#endif
|
@ -91,6 +91,7 @@
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/Transforms/Utils/VectorUtils.h"
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
|
||||
@ -2266,32 +2267,12 @@ static Intrinsic::ID
|
||||
getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
|
||||
// If we have an intrinsic call, check if it is trivially vectorizable.
|
||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
|
||||
switch (II->getIntrinsicID()) {
|
||||
case Intrinsic::sqrt:
|
||||
case Intrinsic::sin:
|
||||
case Intrinsic::cos:
|
||||
case Intrinsic::exp:
|
||||
case Intrinsic::exp2:
|
||||
case Intrinsic::log:
|
||||
case Intrinsic::log10:
|
||||
case Intrinsic::log2:
|
||||
case Intrinsic::fabs:
|
||||
case Intrinsic::copysign:
|
||||
case Intrinsic::floor:
|
||||
case Intrinsic::ceil:
|
||||
case Intrinsic::trunc:
|
||||
case Intrinsic::rint:
|
||||
case Intrinsic::nearbyint:
|
||||
case Intrinsic::round:
|
||||
case Intrinsic::pow:
|
||||
case Intrinsic::fma:
|
||||
case Intrinsic::fmuladd:
|
||||
case Intrinsic::lifetime_start:
|
||||
case Intrinsic::lifetime_end:
|
||||
return II->getIntrinsicID();
|
||||
default:
|
||||
Intrinsic::ID ID = II->getIntrinsicID();
|
||||
if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
|
||||
ID == Intrinsic::lifetime_end)
|
||||
return ID;
|
||||
else
|
||||
return Intrinsic::not_intrinsic;
|
||||
}
|
||||
}
|
||||
|
||||
if (!TLI)
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Transforms/Utils/VectorUtils.h"
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
|
||||
@ -949,7 +950,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
||||
case Instruction::Call: {
|
||||
// Check if the calls are all to the same vectorizable intrinsic.
|
||||
IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]);
|
||||
if (II==NULL) {
|
||||
Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
|
||||
|
||||
if (!isTriviallyVectorizable(ID)) {
|
||||
newTreeEntry(VL, false);
|
||||
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
|
||||
return;
|
||||
|
@ -0,0 +1,36 @@
|
||||
; RUN: opt < %s -slp-vectorizer -o - -S -slp-threshold=-1000
|
||||
|
||||
target datalayout = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64"
|
||||
target triple = "nvptx--nvidiacl"
|
||||
|
||||
; CTLZ cannot be vectorized currently because the second argument is a scalar
|
||||
; for both the scalar and vector forms of the intrinsic. In the future it
|
||||
; should be possible to vectorize such functions.
|
||||
; Test causes an assert if LLVM tries to vectorize CTLZ.
|
||||
|
||||
define <2 x i8> @cltz_test(<2 x i8> %x) #0 {
|
||||
entry:
|
||||
%0 = extractelement <2 x i8> %x, i32 0
|
||||
%call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
|
||||
%vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0
|
||||
%1 = extractelement <2 x i8> %x, i32 1
|
||||
%call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
|
||||
%vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
|
||||
ret <2 x i8> %vecinit2
|
||||
}
|
||||
|
||||
define <2 x i8> @cltz_test2(<2 x i8> %x) #1 {
|
||||
entry:
|
||||
%0 = extractelement <2 x i8> %x, i32 0
|
||||
%1 = extractelement <2 x i8> %x, i32 1
|
||||
%call.i = call i8 @llvm.ctlz.i8(i8 %0, i1 false)
|
||||
%call.i4 = call i8 @llvm.ctlz.i8(i8 %1, i1 false)
|
||||
%vecinit = insertelement <2 x i8> undef, i8 %call.i, i32 0
|
||||
%vecinit2 = insertelement <2 x i8> %vecinit, i8 %call.i4, i32 1
|
||||
ret <2 x i8> %vecinit2
|
||||
}
|
||||
|
||||
declare i8 @llvm.ctlz.i8(i8, i1) #3
|
||||
|
||||
attributes #0 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
Loading…
Reference in New Issue
Block a user