mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-14 12:12:07 +00:00
b3224adfb6
This is the fourth patch to apply the BLIS matmul optimization pattern on matmul kernels (http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf). BLIS implements gemm as three nested loops around a macro-kernel, plus two packing routines. The macro-kernel is implemented in terms of two additional loops around a micro-kernel. The micro-kernel is a loop around a rank-1 (i.e., outer product) update. In this change we perform copying to created arrays, which is the last step to implement the packing transformation. Reviewed-by: Tobias Grosser <tobias@grosser.es> Differential Revision: https://reviews.llvm.org/D23260 llvm-svn: 281441
174 lines
4.9 KiB
C++
174 lines
4.9 KiB
C++
//===------ PollyIRBuilder.cpp --------------------------------------------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// The Polly IRBuilder file contains Polly specific extensions for the IRBuilder
|
|
// that are used e.g. to emit the llvm.loop.parallel metadata.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "polly/CodeGen/IRBuilder.h"
|
|
#include "polly/ScopInfo.h"
|
|
#include "polly/Support/ScopHelper.h"
|
|
#include "llvm/IR/Metadata.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
using namespace llvm;
|
|
using namespace polly;
|
|
|
|
/// Get a self referencing id metadata node.
|
|
///
|
|
/// The MDNode looks like this (if arg0/arg1 are not null):
|
|
///
|
|
/// '!n = metadata !{metadata !n, arg0, arg1}'
|
|
///
|
|
/// @return The self referencing id metadata node.
|
|
static MDNode *getID(LLVMContext &Ctx, Metadata *arg0 = nullptr,
|
|
Metadata *arg1 = nullptr) {
|
|
MDNode *ID;
|
|
SmallVector<Metadata *, 3> Args;
|
|
// Use a temporary node to safely create a unique pointer for the first arg.
|
|
auto TempNode = MDNode::getTemporary(Ctx, None);
|
|
// Reserve operand 0 for loop id self reference.
|
|
Args.push_back(TempNode.get());
|
|
|
|
if (arg0)
|
|
Args.push_back(arg0);
|
|
if (arg1)
|
|
Args.push_back(arg1);
|
|
|
|
ID = MDNode::get(Ctx, Args);
|
|
ID->replaceOperandWith(0, ID);
|
|
return ID;
|
|
}
|
|
|
|
ScopAnnotator::ScopAnnotator() : SE(nullptr), AliasScopeDomain(nullptr) {}
|
|
|
|
void ScopAnnotator::buildAliasScopes(Scop &S) {
|
|
SE = S.getSE();
|
|
|
|
LLVMContext &Ctx = SE->getContext();
|
|
AliasScopeDomain = getID(Ctx, MDString::get(Ctx, "polly.alias.scope.domain"));
|
|
|
|
AliasScopeMap.clear();
|
|
OtherAliasScopeListMap.clear();
|
|
|
|
SetVector<Value *> BasePtrs;
|
|
for (ScopStmt &Stmt : S)
|
|
for (MemoryAccess *MA : Stmt)
|
|
if (!Stmt.isCopyStmt())
|
|
BasePtrs.insert(MA->getBaseAddr());
|
|
|
|
std::string AliasScopeStr = "polly.alias.scope.";
|
|
for (Value *BasePtr : BasePtrs)
|
|
AliasScopeMap[BasePtr] = getID(
|
|
Ctx, AliasScopeDomain,
|
|
MDString::get(Ctx, (AliasScopeStr + BasePtr->getName()).str().c_str()));
|
|
|
|
for (Value *BasePtr : BasePtrs) {
|
|
MDNode *AliasScopeList = MDNode::get(Ctx, {});
|
|
for (const auto &AliasScopePair : AliasScopeMap) {
|
|
if (BasePtr == AliasScopePair.first)
|
|
continue;
|
|
|
|
Metadata *Args = {AliasScopePair.second};
|
|
AliasScopeList =
|
|
MDNode::concatenate(AliasScopeList, MDNode::get(Ctx, Args));
|
|
}
|
|
|
|
OtherAliasScopeListMap[BasePtr] = AliasScopeList;
|
|
}
|
|
}
|
|
|
|
void ScopAnnotator::pushLoop(Loop *L, bool IsParallel) {
|
|
|
|
ActiveLoops.push_back(L);
|
|
if (!IsParallel)
|
|
return;
|
|
|
|
BasicBlock *Header = L->getHeader();
|
|
MDNode *Id = getID(Header->getContext());
|
|
assert(Id->getOperand(0) == Id && "Expected Id to be a self-reference");
|
|
assert(Id->getNumOperands() == 1 && "Unexpected extra operands in Id");
|
|
MDNode *Ids = ParallelLoops.empty()
|
|
? Id
|
|
: MDNode::concatenate(ParallelLoops.back(), Id);
|
|
ParallelLoops.push_back(Ids);
|
|
}
|
|
|
|
void ScopAnnotator::popLoop(bool IsParallel) {
|
|
ActiveLoops.pop_back();
|
|
if (!IsParallel)
|
|
return;
|
|
|
|
assert(!ParallelLoops.empty() && "Expected a parallel loop to pop");
|
|
ParallelLoops.pop_back();
|
|
}
|
|
|
|
void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L,
|
|
bool IsParallel) const {
|
|
if (!IsParallel)
|
|
return;
|
|
|
|
assert(!ParallelLoops.empty() && "Expected a parallel loop to annotate");
|
|
MDNode *Ids = ParallelLoops.back();
|
|
MDNode *Id = cast<MDNode>(Ids->getOperand(Ids->getNumOperands() - 1));
|
|
B->setMetadata("llvm.loop", Id);
|
|
}
|
|
|
|
void ScopAnnotator::annotate(Instruction *Inst) {
|
|
if (!Inst->mayReadOrWriteMemory())
|
|
return;
|
|
|
|
if (!ParallelLoops.empty())
|
|
Inst->setMetadata("llvm.mem.parallel_loop_access", ParallelLoops.back());
|
|
|
|
// TODO: Use the ScopArrayInfo once available here.
|
|
if (!AliasScopeDomain)
|
|
return;
|
|
|
|
auto MemInst = MemAccInst::dyn_cast(Inst);
|
|
if (!MemInst)
|
|
return;
|
|
|
|
auto *Ptr = MemInst.getPointerOperand();
|
|
if (!Ptr)
|
|
return;
|
|
|
|
auto *PtrSCEV = SE->getSCEV(Ptr);
|
|
auto *BaseSCEV = SE->getPointerBase(PtrSCEV);
|
|
auto *SU = dyn_cast<SCEVUnknown>(BaseSCEV);
|
|
|
|
if (!SU)
|
|
return;
|
|
|
|
auto *BasePtr = SU->getValue();
|
|
|
|
if (!BasePtr)
|
|
return;
|
|
|
|
auto AliasScope = AliasScopeMap.lookup(BasePtr);
|
|
|
|
if (!AliasScope) {
|
|
BasePtr = AlternativeAliasBases.lookup(BasePtr);
|
|
if (!BasePtr)
|
|
return;
|
|
|
|
AliasScope = AliasScopeMap.lookup(BasePtr);
|
|
if (!AliasScope)
|
|
return;
|
|
}
|
|
|
|
assert(OtherAliasScopeListMap.count(BasePtr) &&
|
|
"BasePtr either expected in AliasScopeMap and OtherAlias...Map");
|
|
auto *OtherAliasScopeList = OtherAliasScopeListMap[BasePtr];
|
|
|
|
Inst->setMetadata("alias.scope", AliasScope);
|
|
Inst->setMetadata("noalias", OtherAliasScopeList);
|
|
}
|