mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-13 14:47:00 +00:00
e3e43d9d57
I did this a long time ago with a janky python script, but now clang-format has built-in support for this. I fed clang-format every line with a #include and let it re-sort things according to the precise LLVM rules for include ordering baked into clang-format these days. I've reverted a number of files where the results of sorting includes isn't healthy. Either places where we have legacy code relying on particular include ordering (where possible, I'll fix these separately) or where we have particular formatting around #include lines that I didn't want to disturb in this patch. This patch is *entirely* mechanical. If you get merge conflicts or anything, just ignore the changes in this patch and run clang-format over your #include lines in the files. Sorry for any noise here, but it is important to keep these things stable. I was seeing an increasing number of patches with irrelevant re-ordering of #include lines because clang-format was used. This patch at least isolates that churn, makes it easy to skip when resolving conflicts, and gets us to a clean baseline (again). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@304787 91177308-0d34-0410-b5e6-96231b3b80d8
168 lines
5.8 KiB
C++
168 lines
5.8 KiB
C++
//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass implements IR expansion for reduction intrinsics, allowing targets
|
|
// to enable the experimental intrinsics until just before codegen.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/CodeGen/ExpandReductions.h"
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
#include "llvm/CodeGen/Passes.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/IRBuilder.h"
|
|
#include "llvm/IR/InstIterator.h"
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
#include "llvm/IR/Intrinsics.h"
|
|
#include "llvm/IR/Module.h"
|
|
#include "llvm/Pass.h"
|
|
#include "llvm/Transforms/Utils/LoopUtils.h"
|
|
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
|
|
unsigned getOpcode(Intrinsic::ID ID) {
|
|
switch (ID) {
|
|
case Intrinsic::experimental_vector_reduce_fadd:
|
|
return Instruction::FAdd;
|
|
case Intrinsic::experimental_vector_reduce_fmul:
|
|
return Instruction::FMul;
|
|
case Intrinsic::experimental_vector_reduce_add:
|
|
return Instruction::Add;
|
|
case Intrinsic::experimental_vector_reduce_mul:
|
|
return Instruction::Mul;
|
|
case Intrinsic::experimental_vector_reduce_and:
|
|
return Instruction::And;
|
|
case Intrinsic::experimental_vector_reduce_or:
|
|
return Instruction::Or;
|
|
case Intrinsic::experimental_vector_reduce_xor:
|
|
return Instruction::Xor;
|
|
case Intrinsic::experimental_vector_reduce_smax:
|
|
case Intrinsic::experimental_vector_reduce_smin:
|
|
case Intrinsic::experimental_vector_reduce_umax:
|
|
case Intrinsic::experimental_vector_reduce_umin:
|
|
return Instruction::ICmp;
|
|
case Intrinsic::experimental_vector_reduce_fmax:
|
|
case Intrinsic::experimental_vector_reduce_fmin:
|
|
return Instruction::FCmp;
|
|
default:
|
|
llvm_unreachable("Unexpected ID");
|
|
}
|
|
}
|
|
|
|
RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
|
|
switch (ID) {
|
|
case Intrinsic::experimental_vector_reduce_smax:
|
|
return RecurrenceDescriptor::MRK_SIntMax;
|
|
case Intrinsic::experimental_vector_reduce_smin:
|
|
return RecurrenceDescriptor::MRK_SIntMin;
|
|
case Intrinsic::experimental_vector_reduce_umax:
|
|
return RecurrenceDescriptor::MRK_UIntMax;
|
|
case Intrinsic::experimental_vector_reduce_umin:
|
|
return RecurrenceDescriptor::MRK_UIntMin;
|
|
case Intrinsic::experimental_vector_reduce_fmax:
|
|
return RecurrenceDescriptor::MRK_FloatMax;
|
|
case Intrinsic::experimental_vector_reduce_fmin:
|
|
return RecurrenceDescriptor::MRK_FloatMin;
|
|
default:
|
|
return RecurrenceDescriptor::MRK_Invalid;
|
|
}
|
|
}
|
|
|
|
bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
|
|
bool Changed = false;
|
|
SmallVector<IntrinsicInst*, 4> Worklist;
|
|
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
|
|
if (auto II = dyn_cast<IntrinsicInst>(&*I))
|
|
Worklist.push_back(II);
|
|
|
|
for (auto *II : Worklist) {
|
|
IRBuilder<> Builder(II);
|
|
Value *Vec = nullptr;
|
|
auto ID = II->getIntrinsicID();
|
|
auto MRK = RecurrenceDescriptor::MRK_Invalid;
|
|
switch (ID) {
|
|
case Intrinsic::experimental_vector_reduce_fadd:
|
|
case Intrinsic::experimental_vector_reduce_fmul:
|
|
// FMFs must be attached to the call, otherwise it's an ordered reduction
|
|
// and it can't be handled by generating this shuffle sequence.
|
|
// TODO: Implement scalarization of ordered reductions here for targets
|
|
// without native support.
|
|
if (!II->getFastMathFlags().unsafeAlgebra())
|
|
continue;
|
|
Vec = II->getArgOperand(1);
|
|
break;
|
|
case Intrinsic::experimental_vector_reduce_add:
|
|
case Intrinsic::experimental_vector_reduce_mul:
|
|
case Intrinsic::experimental_vector_reduce_and:
|
|
case Intrinsic::experimental_vector_reduce_or:
|
|
case Intrinsic::experimental_vector_reduce_xor:
|
|
case Intrinsic::experimental_vector_reduce_smax:
|
|
case Intrinsic::experimental_vector_reduce_smin:
|
|
case Intrinsic::experimental_vector_reduce_umax:
|
|
case Intrinsic::experimental_vector_reduce_umin:
|
|
case Intrinsic::experimental_vector_reduce_fmax:
|
|
case Intrinsic::experimental_vector_reduce_fmin:
|
|
Vec = II->getArgOperand(0);
|
|
MRK = getMRK(ID);
|
|
break;
|
|
default:
|
|
continue;
|
|
}
|
|
if (!TTI->shouldExpandReduction(II))
|
|
continue;
|
|
auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
|
|
II->replaceAllUsesWith(Rdx);
|
|
II->eraseFromParent();
|
|
Changed = true;
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
class ExpandReductions : public FunctionPass {
|
|
public:
|
|
static char ID;
|
|
ExpandReductions() : FunctionPass(ID) {
|
|
initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
bool runOnFunction(Function &F) override {
|
|
const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
|
|
return expandReductions(F, TTI);
|
|
}
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.addRequired<TargetTransformInfoWrapperPass>();
|
|
AU.setPreservesCFG();
|
|
}
|
|
};
|
|
}
|
|
|
|
char ExpandReductions::ID;
|
|
INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
|
|
"Expand reduction intrinsics", false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
|
|
INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
|
|
"Expand reduction intrinsics", false, false)
|
|
|
|
FunctionPass *llvm::createExpandReductionsPass() {
|
|
return new ExpandReductions();
|
|
}
|
|
|
|
PreservedAnalyses ExpandReductionsPass::run(Function &F,
|
|
FunctionAnalysisManager &AM) {
|
|
const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
|
|
if (!expandReductions(F, &TTI))
|
|
return PreservedAnalyses::all();
|
|
PreservedAnalyses PA;
|
|
PA.preserveSet<CFGAnalyses>();
|
|
return PA;
|
|
}
|