Add a new memory allocation rewrite pass.

This pass can reclassify memory allocations (fir.alloca, fir.allocmem)
based on heuristics and settings. The intention is to allow better
performance and workarounds for conditions such as environments with
limited stack space.

Currently, implements two conversions from stack to heap allocation.
  1. If a stack allocation is an array larger than some threshold value
     make it a heap allocation.
  2. If a stack allocation is an array with a runtime evaluated size make
     it a heap allocation.

Add a lit test for both suboptions.

Reviewed By: PeteSteinfeld, vdonaldson

Differential Revision: https://reviews.llvm.org/D115763
This commit is contained in:
Eric Schweitz 2021-12-14 09:28:53 -08:00
parent cf9e61a9bb
commit 3d092e31c1
5 changed files with 240 additions and 0 deletions

View File

@ -34,6 +34,7 @@ std::unique_ptr<mlir::Pass> createCharacterConversionPass();
std::unique_ptr<mlir::Pass> createExternalNameConversionPass();
std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
std::unique_ptr<mlir::Pass> createMemoryAllocationPass();
/// Support for inlining on FIR.
bool canLegallyInline(mlir::Operation *op, mlir::Region *reg,

View File

@ -152,4 +152,22 @@ def MemRefDataFlowOpt : FunctionPass<"fir-memref-dataflow-opt"> {
];
}
def MemoryAllocationOpt : Pass<"memory-allocation-opt", "mlir::FuncOp"> {
let summary = "Convert stack to heap allocations and vice versa.";
let description = [{
Convert stack allocations to heap allocations and vice versa based on
estimated size, lifetime, usage patterns, the call tree, etc.
}];
let dependentDialects = [ "fir::FIROpsDialect" ];
let options = [
Option<"dynamicArrayOnHeap", "dynamic-array-on-heap",
"bool", /*default=*/"false",
"Allocate all arrays with runtime determined size on heap.">,
Option<"maxStackArraySize", "maximum-array-alloc-size",
"std::size_t", /*default=*/"~static_cast<std::size_t>(0)",
"Set maximum number of elements of an array allocated on the stack.">
];
let constructor = "::fir::createMemoryAllocationPass()";
}
#endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES

View File

@ -6,6 +6,7 @@ add_flang_library(FIRTransforms
ArrayValueCopy.cpp
Inliner.cpp
ExternalNameConversion.cpp
MemoryAllocation.cpp
MemRefDataFlowOpt.cpp
RewriteLoop.cpp

View File

@ -0,0 +1,186 @@
//===- MemoryAllocation.cpp -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "PassDetail.h"
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Transforms/Passes.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/Diagnostics.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
#include "mlir/Transforms/Passes.h"
#include "llvm/ADT/TypeSwitch.h"
#define DEBUG_TYPE "flang-memory-allocation-opt"
// Number of elements in an array does not determine where it is allocated.
static constexpr std::size_t UnlimitedArraySize = ~static_cast<std::size_t>(0);
namespace {
struct MemoryAllocationOptions {
// Always move dynamic array allocations to the heap. This may result in more
// heap fragmentation, so may impact performance negatively.
bool dynamicArrayOnHeap = false;
// Number of elements in array threshold for moving to heap. In environments
// with limited stack size, moving large arrays to the heap can avoid running
// out of stack space.
std::size_t maxStackArraySize = UnlimitedArraySize;
};
class ReturnAnalysis {
public:
ReturnAnalysis(mlir::Operation *op) {
if (auto func = mlir::dyn_cast<mlir::FuncOp>(op))
for (mlir::Block &block : func)
for (mlir::Operation &i : block)
if (mlir::isa<mlir::ReturnOp>(i)) {
returnMap[op].push_back(&i);
break;
}
}
llvm::SmallVector<mlir::Operation *> getReturns(mlir::Operation *func) const {
auto iter = returnMap.find(func);
if (iter != returnMap.end())
return iter->second;
return {};
}
private:
llvm::DenseMap<mlir::Operation *, llvm::SmallVector<mlir::Operation *>>
returnMap;
};
} // namespace
/// Return `true` if this allocation is to remain on the stack (`fir.alloca`).
/// Otherwise the allocation should be moved to the heap (`fir.allocmem`).
static inline bool keepStackAllocation(fir::AllocaOp alloca, mlir::Block *entry,
const MemoryAllocationOptions &options) {
// Limitation: only arrays allocated on the stack in the entry block are
// considered for now.
// TODO: Generalize the algorithm and placement of the freemem nodes.
if (alloca->getBlock() != entry)
return true;
if (auto seqTy = alloca.getInType().dyn_cast<fir::SequenceType>()) {
if (fir::hasDynamicSize(seqTy)) {
// Move all arrays with runtime determined size to the heap.
if (options.dynamicArrayOnHeap)
return false;
} else {
std::int64_t numberOfElements = 1;
for (std::int64_t i : seqTy.getShape()) {
numberOfElements *= i;
// If the count is suspicious, then don't change anything here.
if (numberOfElements <= 0)
return true;
}
// If the number of elements exceeds the threshold, move the allocation to
// the heap.
if (static_cast<std::size_t>(numberOfElements) >
options.maxStackArraySize) {
LLVM_DEBUG(llvm::dbgs()
<< "memory allocation opt: found " << alloca << '\n');
return false;
}
}
}
return true;
}
namespace {
class AllocaOpConversion : public mlir::OpRewritePattern<fir::AllocaOp> {
public:
using OpRewritePattern::OpRewritePattern;
AllocaOpConversion(mlir::MLIRContext *ctx,
llvm::ArrayRef<mlir::Operation *> rets)
: OpRewritePattern(ctx), returnOps(rets) {}
mlir::LogicalResult
matchAndRewrite(fir::AllocaOp alloca,
mlir::PatternRewriter &rewriter) const override {
auto loc = alloca.getLoc();
mlir::Type varTy = alloca.getInType();
auto unpackName =
[](llvm::Optional<llvm::StringRef> opt) -> llvm::StringRef {
if (opt)
return *opt;
return {};
};
auto uniqName = unpackName(alloca.uniq_name());
auto bindcName = unpackName(alloca.bindc_name());
auto heap = rewriter.create<fir::AllocMemOp>(
loc, varTy, uniqName, bindcName, alloca.typeparams(), alloca.shape());
auto insPt = rewriter.saveInsertionPoint();
for (mlir::Operation *retOp : returnOps) {
rewriter.setInsertionPoint(retOp);
[[maybe_unused]] auto free = rewriter.create<fir::FreeMemOp>(loc, heap);
LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: add free " << free
<< " for " << heap << '\n');
}
rewriter.restoreInsertionPoint(insPt);
rewriter.replaceOpWithNewOp<fir::ConvertOp>(
alloca, fir::ReferenceType::get(varTy), heap);
LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: replaced " << alloca
<< " with " << heap << '\n');
return mlir::success();
}
private:
llvm::ArrayRef<mlir::Operation *> returnOps;
};
/// This pass can reclassify memory allocations (fir.alloca, fir.allocmem) based
/// on heuristics and settings. The intention is to allow better performance and
/// workarounds for conditions such as environments with limited stack space.
///
/// Currently, implements two conversions from stack to heap allocation.
/// 1. If a stack allocation is an array larger than some threshold value
/// make it a heap allocation.
/// 2. If a stack allocation is an array with a runtime evaluated size make
/// it a heap allocation.
class MemoryAllocationOpt
: public fir::MemoryAllocationOptBase<MemoryAllocationOpt> {
public:
void runOnOperation() override {
auto *context = &getContext();
auto func = getOperation();
mlir::OwningRewritePatternList patterns(context);
mlir::ConversionTarget target(*context);
MemoryAllocationOptions options = {dynamicArrayOnHeap.getValue(),
maxStackArraySize.getValue()};
// If func is a declaration, skip it.
if (func.empty())
return;
const auto &analysis = getAnalysis<ReturnAnalysis>();
target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithmeticDialect,
mlir::StandardOpsDialect>();
target.addDynamicallyLegalOp<fir::AllocaOp>([&](fir::AllocaOp alloca) {
return keepStackAllocation(alloca, &func.front(), options);
});
patterns.insert<AllocaOpConversion>(context, analysis.getReturns(func));
if (mlir::failed(
mlir::applyPartialConversion(func, target, std::move(patterns)))) {
mlir::emitError(func.getLoc(),
"error in memory allocation optimization\n");
signalPassFailure();
}
}
};
} // namespace
std::unique_ptr<mlir::Pass> fir::createMemoryAllocationPass() {
return std::make_unique<MemoryAllocationOpt>();
}

View File

@ -0,0 +1,34 @@
// RUN: fir-opt --memory-allocation-opt="dynamic-array-on-heap=true maximum-array-alloc-size=1024" %s | FileCheck %s
// Test for size of array being too big.
// CHECK-LABEL: func @_QPs1(
// CHECK: %[[mem:.*]] = fir.allocmem !fir.array<1000123xi32> {bindc_name = "array", uniq_name = "_QFs1Earray"}
// CHECK: fir.call @_QPs3(
// CHECK: fir.freemem %[[mem]]
// CHECK-NEXT: return
func @_QPs1() {
%0 = fir.alloca !fir.array<1000123xi32> {bindc_name = "array", uniq_name = "_QFs1Earray"}
fir.call @_QPs3(%0) : (!fir.ref<!fir.array<1000123xi32>>) -> ()
return
}
// Test for dynamic array.
// CHECK-LABEL: func @_QPs2(
// CHECK: %[[mem:.*]] = fir.allocmem !fir.array<?xi32>, %{{[0-9]+}} {bindc_name = "array", uniq_name = "_QFs2Earray"}
// CHECK: fir.call @_QPs3(
// CHECK: fir.freemem %[[mem]]
// CHECK-NEXT: return
func @_QPs2(%arg0: !fir.ref<i32>) {
%0 = fir.load %arg0 : !fir.ref<i32>
%1 = fir.convert %0 : (i32) -> index
%2 = fir.alloca !fir.array<?xi32>, %1 {bindc_name = "array", uniq_name = "_QFs2Earray"}
%3 = fir.convert %2 : (!fir.ref<!fir.array<?xi32>>) -> !fir.ref<!fir.array<1000123xi32>>
fir.call @_QPs3(%3) : (!fir.ref<!fir.array<1000123xi32>>) -> ()
return
}
func private @_QPs3(!fir.ref<!fir.array<1000123xi32>>)