mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-15 20:51:35 +00:00
[MLIR][NFC] loop transforms/analyis utils cleanup / modernize
Modernize/cleanup code in loop transforms utils - a lot of this code was written prior to the currently available IR support / code style. This patch also does some variable renames including inst -> op, comment updates, turns getCleanupLoopLowerBound into a local function. Differential Revision: https://reviews.llvm.org/D77175
This commit is contained in:
parent
3a7865df62
commit
68316afb29
@ -82,7 +82,7 @@ bool isVectorizableLoopBody(AffineForOp loop, int *memRefDim,
|
||||
/// 'def' and all its uses have the same shift factor.
|
||||
// TODO(mlir-team): extend this to check for memory-based dependence
|
||||
// violation when we have the support.
|
||||
bool isInstwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts);
|
||||
bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts);
|
||||
} // end namespace mlir
|
||||
|
||||
#endif // MLIR_ANALYSIS_LOOP_ANALYSIS_H
|
||||
|
@ -72,20 +72,11 @@ LogicalResult promoteIfSingleIteration(AffineForOp forOp);
|
||||
/// their body into the containing Block.
|
||||
void promoteSingleIterationLoops(FuncOp f);
|
||||
|
||||
/// Computes the cleanup loop lower bound of the loop being unrolled with
|
||||
/// the specified unroll factor; this bound will also be upper bound of the main
|
||||
/// part of the unrolled loop. Computes the bound as an AffineMap with its
|
||||
/// operands or a null map when the trip count can't be expressed as an affine
|
||||
/// expression.
|
||||
void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
|
||||
AffineMap *map, SmallVectorImpl<Value> *operands,
|
||||
OpBuilder &builder);
|
||||
|
||||
/// Skew the operations in the body of an affine.for operation with the
|
||||
/// specified operation-wise shifts. The shifts are with respect to the
|
||||
/// original execution order, and are multiplied by the loop 'step' before being
|
||||
/// applied. If `unrollPrologueEpilogue` is set, fully unroll the prologue and
|
||||
/// epilogue loops when possible.
|
||||
/// Skew the operations in an affine.for's body with the specified
|
||||
/// operation-wise shifts. The shifts are with respect to the original execution
|
||||
/// order, and are multiplied by the loop 'step' before being applied. If
|
||||
/// `unrollPrologueEpilogue` is set, fully unroll the prologue and epilogue
|
||||
/// loops when possible.
|
||||
LLVM_NODISCARD
|
||||
LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
|
||||
bool unrollPrologueEpilogue = false);
|
||||
|
@ -30,9 +30,6 @@ using namespace mlir;
|
||||
/// expression is simplified before returning. This method only utilizes map
|
||||
/// composition to construct lower and upper bounds before computing the trip
|
||||
/// count expressions.
|
||||
// TODO(mlir-team): this should be moved into 'Transforms/' and be replaced by a
|
||||
// pure analysis method relying on FlatAffineConstraints; the latter will also
|
||||
// be more powerful (since both inequalities and equalities will be considered).
|
||||
void mlir::buildTripCountMapAndOperands(
|
||||
AffineForOp forOp, AffineMap *tripCountMap,
|
||||
SmallVectorImpl<Value> *tripCountOperands) {
|
||||
@ -270,8 +267,8 @@ static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename LoadOrStoreOpPointer>
|
||||
static bool isVectorElement(LoadOrStoreOpPointer memoryOp) {
|
||||
template <typename LoadOrStoreOp>
|
||||
static bool isVectorElement(LoadOrStoreOp memoryOp) {
|
||||
auto memRefType = memoryOp.getMemRefType();
|
||||
return memRefType.getElementType().template isa<VectorType>();
|
||||
}
|
||||
@ -351,7 +348,7 @@ bool mlir::isVectorizableLoopBody(AffineForOp loop,
|
||||
/// 'def' and all its uses have the same shift factor.
|
||||
// TODO(mlir-team): extend this to check for memory-based dependence violation
|
||||
// when we have the support.
|
||||
bool mlir::isInstwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
|
||||
bool mlir::isOpwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
|
||||
auto *forBody = forOp.getBody();
|
||||
assert(shifts.size() == forBody->getOperations().size());
|
||||
|
||||
|
@ -345,7 +345,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
|
||||
instShiftMap[&op] = 1;
|
||||
|
||||
// Get shifts stored in map.
|
||||
std::vector<uint64_t> shifts(forOp.getBody()->getOperations().size());
|
||||
SmallVector<uint64_t, 8> shifts(forOp.getBody()->getOperations().size());
|
||||
unsigned s = 0;
|
||||
for (auto &op : forOp.getBody()->without_terminator()) {
|
||||
assert(instShiftMap.find(&op) != instShiftMap.end());
|
||||
@ -358,7 +358,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
|
||||
});
|
||||
}
|
||||
|
||||
if (!isInstwiseShiftValid(forOp, shifts)) {
|
||||
if (!isOpwiseShiftValid(forOp, shifts)) {
|
||||
// Violates dependences.
|
||||
LLVM_DEBUG(llvm::dbgs() << "Shifts invalid - unexpected\n";);
|
||||
return;
|
||||
|
@ -53,15 +53,14 @@ struct LoopParams {
|
||||
/// part of the unrolled loop. Computes the bound as an AffineMap with its
|
||||
/// operands or a null map when the trip count can't be expressed as an affine
|
||||
/// expression.
|
||||
void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
|
||||
AffineMap *map,
|
||||
SmallVectorImpl<Value> *operands,
|
||||
OpBuilder &b) {
|
||||
static void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
|
||||
AffineMap &map,
|
||||
SmallVectorImpl<Value> &operands) {
|
||||
auto lbMap = forOp.getLowerBoundMap();
|
||||
|
||||
// Single result lower bound map only.
|
||||
if (lbMap.getNumResults() != 1) {
|
||||
*map = AffineMap();
|
||||
map = AffineMap();
|
||||
return;
|
||||
}
|
||||
|
||||
@ -71,11 +70,11 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
|
||||
|
||||
// Sometimes the trip count cannot be expressed as an affine expression.
|
||||
if (!tripCountMap) {
|
||||
*map = AffineMap();
|
||||
map = AffineMap();
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned step = forOp.getStep();
|
||||
OpBuilder b(forOp);
|
||||
auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap,
|
||||
forOp.getLowerBoundOperands());
|
||||
|
||||
@ -86,6 +85,7 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
|
||||
// these affine.apply's make up the cleanup loop lower bound.
|
||||
SmallVector<AffineExpr, 4> bumpExprs(tripCountMap.getNumResults());
|
||||
SmallVector<Value, 4> bumpValues(tripCountMap.getNumResults());
|
||||
int64_t step = forOp.getStep();
|
||||
for (unsigned i = 0, e = tripCountMap.getNumResults(); i < e; i++) {
|
||||
auto tripCountExpr = tripCountMap.getResult(i);
|
||||
bumpExprs[i] = (tripCountExpr - tripCountExpr % unrollFactor) * step;
|
||||
@ -99,19 +99,19 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
|
||||
for (unsigned i = 0, e = bumpExprs.size(); i < e; i++)
|
||||
newUbExprs[i] = b.getAffineDimExpr(0) + b.getAffineDimExpr(i + 1);
|
||||
|
||||
operands->clear();
|
||||
operands->push_back(lb);
|
||||
operands->append(bumpValues.begin(), bumpValues.end());
|
||||
*map = AffineMap::get(1 + tripCountMap.getNumResults(), 0, newUbExprs);
|
||||
operands.clear();
|
||||
operands.push_back(lb);
|
||||
operands.append(bumpValues.begin(), bumpValues.end());
|
||||
map = AffineMap::get(1 + tripCountMap.getNumResults(), 0, newUbExprs);
|
||||
// Simplify the map + operands.
|
||||
fullyComposeAffineMapAndOperands(map, operands);
|
||||
*map = simplifyAffineMap(*map);
|
||||
canonicalizeMapAndOperands(map, operands);
|
||||
fullyComposeAffineMapAndOperands(&map, &operands);
|
||||
map = simplifyAffineMap(map);
|
||||
canonicalizeMapAndOperands(&map, &operands);
|
||||
// Remove any affine.apply's that became dead from the simplification above.
|
||||
for (auto v : bumpValues) {
|
||||
for (auto v : bumpValues)
|
||||
if (v.use_empty())
|
||||
v.getDefiningOp()->erase();
|
||||
}
|
||||
|
||||
if (lb.use_empty())
|
||||
lb.erase();
|
||||
}
|
||||
@ -121,16 +121,15 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
|
||||
// TODO(bondhugula): extend this for arbitrary affine bounds.
|
||||
LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
|
||||
Optional<uint64_t> tripCount = getConstantTripCount(forOp);
|
||||
if (!tripCount.hasValue() || tripCount.getValue() != 1)
|
||||
if (!tripCount || tripCount.getValue() != 1)
|
||||
return failure();
|
||||
|
||||
// TODO(mlir-team): there is no builder for a max.
|
||||
if (forOp.getLowerBoundMap().getNumResults() != 1)
|
||||
return failure();
|
||||
|
||||
// Replaces all IV uses to its single iteration value.
|
||||
auto iv = forOp.getInductionVar();
|
||||
Operation *op = forOp.getOperation();
|
||||
auto *parentBlock = forOp.getOperation()->getBlock();
|
||||
if (!iv.use_empty()) {
|
||||
if (forOp.hasConstantLowerBound()) {
|
||||
OpBuilder topBuilder(forOp.getParentOfType<FuncOp>().getBody());
|
||||
@ -140,7 +139,7 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
|
||||
} else {
|
||||
auto lbOperands = forOp.getLowerBoundOperands();
|
||||
auto lbMap = forOp.getLowerBoundMap();
|
||||
OpBuilder builder(op->getBlock(), Block::iterator(op));
|
||||
OpBuilder builder(parentBlock, Block::iterator(forOp));
|
||||
if (lbMap == builder.getDimIdentityMap()) {
|
||||
// No need of generating an affine.apply.
|
||||
iv.replaceAllUsesWith(lbOperands[0]);
|
||||
@ -151,17 +150,16 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
|
||||
}
|
||||
}
|
||||
}
|
||||
// Move the loop body operations, except for terminator, to the loop's
|
||||
// Move the loop body operations, except for its terminator, to the loop's
|
||||
// containing block.
|
||||
auto *block = op->getBlock();
|
||||
forOp.getBody()->getOperations().back().erase();
|
||||
block->getOperations().splice(Block::iterator(op),
|
||||
forOp.getBody()->getOperations());
|
||||
forOp.getBody()->back().erase();
|
||||
parentBlock->getOperations().splice(Block::iterator(forOp),
|
||||
forOp.getBody()->getOperations());
|
||||
forOp.erase();
|
||||
return success();
|
||||
}
|
||||
|
||||
/// Promotes all single iteration for op's in the FuncOp, i.e., moves
|
||||
/// Promotes all single iteration 'for' ops in `f`, i.e., moves
|
||||
/// their body into the containing Block.
|
||||
void mlir::promoteSingleIterationLoops(FuncOp f) {
|
||||
// Gathers all innermost loops through a post order pruned walk.
|
||||
@ -233,6 +231,8 @@ static AffineForOp generateShiftedLoop(
|
||||
LogicalResult mlir::affineForOpBodySkew(AffineForOp forOp,
|
||||
ArrayRef<uint64_t> shifts,
|
||||
bool unrollPrologueEpilogue) {
|
||||
assert(forOp.getBody()->getOperations().size() == shifts.size() &&
|
||||
"too few/many shifts");
|
||||
if (forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
|
||||
return success();
|
||||
|
||||
@ -247,20 +247,17 @@ LogicalResult mlir::affineForOpBodySkew(AffineForOp forOp,
|
||||
}
|
||||
uint64_t tripCount = mayBeConstTripCount.getValue();
|
||||
|
||||
assert(isInstwiseShiftValid(forOp, shifts) &&
|
||||
assert(isOpwiseShiftValid(forOp, shifts) &&
|
||||
"shifts will lead to an invalid transformation\n");
|
||||
|
||||
int64_t step = forOp.getStep();
|
||||
|
||||
unsigned numChildInsts = forOp.getBody()->getOperations().size();
|
||||
unsigned numChildOps = shifts.size();
|
||||
|
||||
// Do a linear time (counting) sort for the shifts.
|
||||
uint64_t maxShift = 0;
|
||||
for (unsigned i = 0; i < numChildInsts; i++) {
|
||||
maxShift = std::max(maxShift, shifts[i]);
|
||||
}
|
||||
// Such large shifts are not the typical use case.
|
||||
if (maxShift >= numChildInsts) {
|
||||
uint64_t maxShift = *std::max_element(shifts.begin(), shifts.end());
|
||||
if (maxShift >= numChildOps) {
|
||||
// Large shifts are not the typical use case.
|
||||
forOp.emitWarning("not shifting because shifts are unrealistically large");
|
||||
return success();
|
||||
}
|
||||
@ -289,13 +286,13 @@ LogicalResult mlir::affineForOpBodySkew(AffineForOp forOp,
|
||||
|
||||
auto origLbMap = forOp.getLowerBoundMap();
|
||||
uint64_t lbShift = 0;
|
||||
OpBuilder b(forOp.getOperation());
|
||||
OpBuilder b(forOp);
|
||||
for (uint64_t d = 0, e = sortedOpGroups.size(); d < e; ++d) {
|
||||
// If nothing is shifted by d, continue.
|
||||
if (sortedOpGroups[d].empty())
|
||||
continue;
|
||||
if (!opGroupQueue.empty()) {
|
||||
assert(d >= 1 &&
|
||||
assert(d > 0 &&
|
||||
"Queue expected to be empty when the first block is found");
|
||||
// The interval for which the loop needs to be generated here is:
|
||||
// [lbShift, min(lbShift + tripCount, d)) and the body of the
|
||||
@ -343,8 +340,7 @@ LogicalResult mlir::affineForOpBodySkew(AffineForOp forOp,
|
||||
|
||||
if (unrollPrologueEpilogue && prologue)
|
||||
loopUnrollFull(prologue);
|
||||
if (unrollPrologueEpilogue && !epilogue &&
|
||||
epilogue.getOperation() != prologue.getOperation())
|
||||
if (unrollPrologueEpilogue && !epilogue && epilogue != prologue)
|
||||
loopUnrollFull(epilogue);
|
||||
|
||||
return success();
|
||||
@ -389,9 +385,8 @@ LogicalResult mlir::loopUnrollFull(AffineForOp forOp) {
|
||||
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
|
||||
if (mayBeConstantTripCount.hasValue()) {
|
||||
uint64_t tripCount = mayBeConstantTripCount.getValue();
|
||||
if (tripCount == 1) {
|
||||
if (tripCount == 1)
|
||||
return promoteIfSingleIteration(forOp);
|
||||
}
|
||||
return loopUnrollByFactor(forOp, tripCount);
|
||||
}
|
||||
return failure();
|
||||
@ -413,14 +408,14 @@ LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp,
|
||||
/// is successfully unrolled.
|
||||
LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
|
||||
uint64_t unrollFactor) {
|
||||
assert(unrollFactor >= 1 && "unroll factor should be >= 1");
|
||||
assert(unrollFactor > 0 && "unroll factor should be positive");
|
||||
|
||||
if (unrollFactor == 1)
|
||||
return promoteIfSingleIteration(forOp);
|
||||
|
||||
if (forOp.getBody()->empty() ||
|
||||
forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
|
||||
return failure();
|
||||
// Nothing in the loop body other than the terminator.
|
||||
if (has_single_element(forOp.getBody()->getOperations()))
|
||||
return success();
|
||||
|
||||
// Loops where the lower bound is a max expression isn't supported for
|
||||
// unrolling since the trip count can be expressed as an affine function when
|
||||
@ -438,20 +433,19 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
|
||||
return failure();
|
||||
|
||||
// Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
|
||||
Operation *op = forOp.getOperation();
|
||||
if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
|
||||
OpBuilder builder(op->getBlock(), ++Block::iterator(op));
|
||||
auto cleanupForInst = cast<AffineForOp>(builder.clone(*op));
|
||||
OpBuilder builder(forOp.getOperation()->getBlock(),
|
||||
std::next(Block::iterator(forOp)));
|
||||
auto cleanupForOp = cast<AffineForOp>(builder.clone(*forOp));
|
||||
AffineMap cleanupMap;
|
||||
SmallVector<Value, 4> cleanupOperands;
|
||||
getCleanupLoopLowerBound(forOp, unrollFactor, &cleanupMap, &cleanupOperands,
|
||||
builder);
|
||||
getCleanupLoopLowerBound(forOp, unrollFactor, cleanupMap, cleanupOperands);
|
||||
assert(cleanupMap &&
|
||||
"cleanup loop lower bound map for single result lower bound maps "
|
||||
"can always be determined");
|
||||
cleanupForInst.setLowerBound(cleanupOperands, cleanupMap);
|
||||
cleanupForOp.setLowerBound(cleanupOperands, cleanupMap);
|
||||
// Promote the loop body up if this has turned into a single iteration loop.
|
||||
promoteIfSingleIteration(cleanupForInst);
|
||||
promoteIfSingleIteration(cleanupForOp);
|
||||
|
||||
// Adjust upper bound of the original loop; this is the same as the lower
|
||||
// bound of the cleanup loop.
|
||||
@ -470,7 +464,7 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
|
||||
// so that we know what to clone (since we are doing this in-place).
|
||||
Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end(), 2);
|
||||
|
||||
// Unroll the contents of 'forOp' (append unrollFactor-1 additional copies).
|
||||
// Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
|
||||
auto forOpIV = forOp.getInductionVar();
|
||||
for (unsigned i = 1; i < unrollFactor; i++) {
|
||||
BlockAndValueMapping operandMap;
|
||||
@ -501,7 +495,6 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
|
||||
LogicalResult mlir::loopUnrollJamUpToFactor(AffineForOp forOp,
|
||||
uint64_t unrollJamFactor) {
|
||||
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
|
||||
|
||||
if (mayBeConstantTripCount.hasValue() &&
|
||||
mayBeConstantTripCount.getValue() < unrollJamFactor)
|
||||
return loopUnrollJamByFactor(forOp, mayBeConstantTripCount.getValue());
|
||||
@ -524,6 +517,7 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
|
||||
for (auto &block : region)
|
||||
walk(block);
|
||||
}
|
||||
|
||||
void walk(Block &block) {
|
||||
for (auto it = block.begin(), e = std::prev(block.end()); it != e;) {
|
||||
auto subBlockStart = it;
|
||||
@ -531,21 +525,21 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
|
||||
++it;
|
||||
if (it != subBlockStart)
|
||||
subBlocks.push_back({subBlockStart, std::prev(it)});
|
||||
// Process all for insts that appear next.
|
||||
// Process all for ops that appear next.
|
||||
while (it != e && isa<AffineForOp>(&*it))
|
||||
walk(&*it++);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
assert(unrollJamFactor >= 1 && "unroll jam factor should be >= 1");
|
||||
assert(unrollJamFactor > 0 && "unroll jam factor should be positive");
|
||||
|
||||
if (unrollJamFactor == 1)
|
||||
return promoteIfSingleIteration(forOp);
|
||||
|
||||
if (forOp.getBody()->empty() ||
|
||||
forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
|
||||
return failure();
|
||||
// Nothing in the loop body other than the terminator.
|
||||
if (has_single_element(forOp.getBody()->getOperations()))
|
||||
return success();
|
||||
|
||||
// Loops where both lower and upper bounds are multi-result maps won't be
|
||||
// unrolled (since the trip can't be expressed as an affine function in
|
||||
@ -559,28 +553,29 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
|
||||
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
|
||||
// If the trip count is lower than the unroll jam factor, no unroll jam.
|
||||
if (mayBeConstantTripCount.hasValue() &&
|
||||
mayBeConstantTripCount.getValue() < unrollJamFactor)
|
||||
mayBeConstantTripCount.getValue() < unrollJamFactor) {
|
||||
LLVM_DEBUG(llvm::dbgs() << "[failed] trip count < unroll-jam factor\n");
|
||||
return failure();
|
||||
|
||||
auto *forInst = forOp.getOperation();
|
||||
}
|
||||
|
||||
// Gather all sub-blocks to jam upon the loop being unrolled.
|
||||
JamBlockGatherer jbg;
|
||||
jbg.walk(forInst);
|
||||
jbg.walk(forOp);
|
||||
auto &subBlocks = jbg.subBlocks;
|
||||
|
||||
// Generate the cleanup loop if trip count isn't a multiple of
|
||||
// unrollJamFactor.
|
||||
if (getLargestDivisorOfTripCount(forOp) % unrollJamFactor != 0) {
|
||||
// Insert the cleanup loop right after 'forOp'.
|
||||
OpBuilder builder(forInst->getBlock(), std::next(Block::iterator(forInst)));
|
||||
auto cleanupAffineForOp = cast<AffineForOp>(builder.clone(*forInst));
|
||||
OpBuilder builder(forOp.getOperation()->getBlock(),
|
||||
std::next(Block::iterator(forOp)));
|
||||
auto cleanupAffineForOp = cast<AffineForOp>(builder.clone(*forOp));
|
||||
// Adjust the lower bound of the cleanup loop; its upper bound is the same
|
||||
// as the original loop's upper bound.
|
||||
AffineMap cleanupMap;
|
||||
SmallVector<Value, 4> cleanupOperands;
|
||||
getCleanupLoopLowerBound(forOp, unrollJamFactor, &cleanupMap,
|
||||
&cleanupOperands, builder);
|
||||
getCleanupLoopLowerBound(forOp, unrollJamFactor, cleanupMap,
|
||||
cleanupOperands);
|
||||
cleanupAffineForOp.setLowerBound(cleanupOperands, cleanupMap);
|
||||
|
||||
// Promote the cleanup loop if it has turned into a single iteration loop.
|
||||
@ -599,7 +594,7 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
|
||||
// Unroll and jam (appends unrollJamFactor - 1 additional copies).
|
||||
for (unsigned i = unrollJamFactor - 1; i >= 1; --i) {
|
||||
// Operand map persists across all sub-blocks.
|
||||
BlockAndValueMapping operandMapping;
|
||||
BlockAndValueMapping operandMap;
|
||||
for (auto &subBlock : subBlocks) {
|
||||
// Builder to insert unroll-jammed bodies. Insert right at the end of
|
||||
// sub-block.
|
||||
@ -612,13 +607,12 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
|
||||
auto d0 = builder.getAffineDimExpr(0);
|
||||
auto bumpMap = AffineMap::get(1, 0, {d0 + i * step});
|
||||
auto ivUnroll =
|
||||
builder.create<AffineApplyOp>(forInst->getLoc(), bumpMap, forOpIV);
|
||||
operandMapping.map(forOpIV, ivUnroll);
|
||||
builder.create<AffineApplyOp>(forOp.getLoc(), bumpMap, forOpIV);
|
||||
operandMap.map(forOpIV, ivUnroll);
|
||||
}
|
||||
// Clone the sub-block being unroll-jammed.
|
||||
for (auto it = subBlock.first; it != std::next(subBlock.second); ++it) {
|
||||
builder.clone(*it, operandMapping);
|
||||
}
|
||||
for (auto it = subBlock.first; it != std::next(subBlock.second); ++it)
|
||||
builder.clone(*it, operandMap);
|
||||
}
|
||||
}
|
||||
|
||||
@ -630,8 +624,6 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
|
||||
/// Performs loop interchange on 'forOpA' and 'forOpB', where 'forOpB' is
|
||||
/// nested within 'forOpA' as the only non-terminator operation in its block.
|
||||
void mlir::interchangeLoops(AffineForOp forOpA, AffineForOp forOpB) {
|
||||
auto *forOpAInst = forOpA.getOperation();
|
||||
|
||||
assert(&*forOpA.getBody()->begin() == forOpB.getOperation());
|
||||
auto &forOpABody = forOpA.getBody()->getOperations();
|
||||
auto &forOpBBody = forOpB.getBody()->getOperations();
|
||||
@ -639,16 +631,17 @@ void mlir::interchangeLoops(AffineForOp forOpA, AffineForOp forOpB) {
|
||||
// 1) Splice forOpA's non-terminator operations (which is just forOpB) just
|
||||
// before forOpA (in ForOpA's parent's block) this should leave 'forOpA's
|
||||
// body containing only the terminator.
|
||||
forOpAInst->getBlock()->getOperations().splice(Block::iterator(forOpAInst),
|
||||
forOpABody, forOpABody.begin(),
|
||||
std::prev(forOpABody.end()));
|
||||
forOpA.getOperation()->getBlock()->getOperations().splice(
|
||||
Block::iterator(forOpA), forOpABody, forOpABody.begin(),
|
||||
std::prev(forOpABody.end()));
|
||||
// 2) Splice forOpB's non-terminator operations into the beginning of forOpA's
|
||||
// body (this leaves forOpB's body containing only the terminator).
|
||||
forOpABody.splice(forOpABody.begin(), forOpBBody, forOpBBody.begin(),
|
||||
std::prev(forOpBBody.end()));
|
||||
// 3) Splice forOpA into the beginning of forOpB's body.
|
||||
forOpBBody.splice(forOpBBody.begin(), forOpAInst->getBlock()->getOperations(),
|
||||
Block::iterator(forOpAInst));
|
||||
forOpBBody.splice(forOpBBody.begin(),
|
||||
forOpA.getOperation()->getBlock()->getOperations(),
|
||||
Block::iterator(forOpA));
|
||||
}
|
||||
|
||||
// Checks each dependence component against the permutation to see if the
|
||||
@ -873,8 +866,8 @@ stripmineSink(AffineForOp forOp, uint64_t factor,
|
||||
auto scaledStep = originalStep * factor;
|
||||
forOp.setStep(scaledStep);
|
||||
|
||||
auto *op = forOp.getOperation();
|
||||
OpBuilder b(op->getBlock(), ++Block::iterator(op));
|
||||
OpBuilder b(forOp.getOperation()->getBlock(),
|
||||
std::next(Block::iterator(forOp)));
|
||||
|
||||
// Lower-bound map creation.
|
||||
auto lbMap = forOp.getLowerBoundMap();
|
||||
@ -1180,7 +1173,6 @@ static LoopParams normalizeLoop(OpBuilder &boundsBuilder,
|
||||
if (auto stepCst = dyn_cast_or_null<ConstantIndexOp>(step.getDefiningOp()))
|
||||
isStepOne = stepCst.getValue() == 1;
|
||||
|
||||
|
||||
// Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
|
||||
// assuming the step is strictly positive. Update the bounds and the step
|
||||
// of the loop to go from 0 to the number of iterations, if necessary.
|
||||
@ -1823,16 +1815,16 @@ static LogicalResult generateCopy(
|
||||
|
||||
/// Construct the memref region to just include the entire memref. Returns false
|
||||
/// dynamic shaped memref's for now. `numParamLoopIVs` is the number of
|
||||
/// enclosing loop IVs of opInst (starting from the outermost) that the region
|
||||
/// enclosing loop IVs of `op` (starting from the outermost) that the region
|
||||
/// is parametric on.
|
||||
static bool getFullMemRefAsRegion(Operation *opInst, unsigned numParamLoopIVs,
|
||||
static bool getFullMemRefAsRegion(Operation *op, unsigned numParamLoopIVs,
|
||||
MemRefRegion *region) {
|
||||
unsigned rank;
|
||||
if (auto loadOp = dyn_cast<AffineLoadOp>(opInst)) {
|
||||
if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
|
||||
rank = loadOp.getMemRefType().getRank();
|
||||
region->memref = loadOp.getMemRef();
|
||||
region->setWrite(false);
|
||||
} else if (auto storeOp = dyn_cast<AffineStoreOp>(opInst)) {
|
||||
} else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
|
||||
rank = storeOp.getMemRefType().getRank();
|
||||
region->memref = storeOp.getMemRef();
|
||||
region->setWrite(true);
|
||||
@ -1849,7 +1841,7 @@ static bool getFullMemRefAsRegion(Operation *opInst, unsigned numParamLoopIVs,
|
||||
// Just get the first numSymbols IVs, which the memref region is parametric
|
||||
// on.
|
||||
SmallVector<AffineForOp, 4> ivs;
|
||||
getLoopIVs(*opInst, &ivs);
|
||||
getLoopIVs(*op, &ivs);
|
||||
ivs.resize(numParamLoopIVs);
|
||||
SmallVector<Value, 4> symbols;
|
||||
extractForInductionVars(ivs, &symbols);
|
||||
|
Loading…
x
Reference in New Issue
Block a user