Add a new memory allocation rewrite pass.

This pass can reclassify memory allocations (fir.alloca, fir.allocmem) based on heuristics and settings. The intention is to allow better performance and workarounds for conditions such as environments with limited stack space. Currently, implements two conversions from stack to heap allocation. 1. If a stack allocation is an array larger than some threshold value make it a heap allocation. 2. If a stack allocation is an array with a runtime evaluated size make it a heap allocation. Add a lit test for both suboptions. Reviewed By: PeteSteinfeld, vdonaldson Differential Revision: https://reviews.llvm.org/D115763
2025-02-03 07:38:57 +00:00 · 2021-12-14 09:28:53 -08:00 · 2021-12-14 09:28:53 -08:00 · 3d092e31c1
commit 3d092e31c1
parent cf9e61a9bb
5 changed files with 240 additions and 0 deletions
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@ -34,6 +34,7 @@ std::unique_ptr<mlir::Pass> createCharacterConversionPass();
 std::unique_ptr<mlir::Pass> createExternalNameConversionPass();
 std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
 std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
+std::unique_ptr<mlir::Pass> createMemoryAllocationPass();

 /// Support for inlining on FIR.
 bool canLegallyInline(mlir::Operation *op, mlir::Region *reg,
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@ -152,4 +152,22 @@ def MemRefDataFlowOpt : FunctionPass<"fir-memref-dataflow-opt"> {
  ];
 }

+def MemoryAllocationOpt : Pass<"memory-allocation-opt", "mlir::FuncOp"> {
+  let summary = "Convert stack to heap allocations and vice versa.";
+  let description = [{
+    Convert stack allocations to heap allocations and vice versa based on
+    estimated size, lifetime, usage patterns, the call tree, etc.
+  }];
+  let dependentDialects = [ "fir::FIROpsDialect" ];
+  let options = [
+    Option<"dynamicArrayOnHeap", "dynamic-array-on-heap",
+           "bool", /*default=*/"false",
+           "Allocate all arrays with runtime determined size on heap.">,
+    Option<"maxStackArraySize", "maximum-array-alloc-size",
+           "std::size_t", /*default=*/"~static_cast<std::size_t>(0)",
+           "Set maximum number of elements of an array allocated on the stack.">
+  ];
+  let constructor = "::fir::createMemoryAllocationPass()";
+}
+
 #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@ -6,6 +6,7 @@ add_flang_library(FIRTransforms
  ArrayValueCopy.cpp
  Inliner.cpp
  ExternalNameConversion.cpp
+  MemoryAllocation.cpp
  MemRefDataFlowOpt.cpp
  RewriteLoop.cpp

--- a/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
+++ b/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
@ -0,0 +1,186 @@
+//===- MemoryAllocation.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/IR/Diagnostics.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/Transforms/Passes.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+#define DEBUG_TYPE "flang-memory-allocation-opt"
+
+// Number of elements in an array does not determine where it is allocated.
+static constexpr std::size_t UnlimitedArraySize = ~static_cast<std::size_t>(0);
+
+namespace {
+struct MemoryAllocationOptions {
+  // Always move dynamic array allocations to the heap. This may result in more
+  // heap fragmentation, so may impact performance negatively.
+  bool dynamicArrayOnHeap = false;
+
+  // Number of elements in array threshold for moving to heap. In environments
+  // with limited stack size, moving large arrays to the heap can avoid running
+  // out of stack space.
+  std::size_t maxStackArraySize = UnlimitedArraySize;
+};
+
+class ReturnAnalysis {
+public:
+  ReturnAnalysis(mlir::Operation *op) {
+    if (auto func = mlir::dyn_cast<mlir::FuncOp>(op))
+      for (mlir::Block &block : func)
+        for (mlir::Operation &i : block)
+          if (mlir::isa<mlir::ReturnOp>(i)) {
+            returnMap[op].push_back(&i);
+            break;
+          }
+  }
+
+  llvm::SmallVector<mlir::Operation *> getReturns(mlir::Operation *func) const {
+    auto iter = returnMap.find(func);
+    if (iter != returnMap.end())
+      return iter->second;
+    return {};
+  }
+
+private:
+  llvm::DenseMap<mlir::Operation *, llvm::SmallVector<mlir::Operation *>>
+      returnMap;
+};
+} // namespace
+
+/// Return `true` if this allocation is to remain on the stack (`fir.alloca`).
+/// Otherwise the allocation should be moved to the heap (`fir.allocmem`).
+static inline bool keepStackAllocation(fir::AllocaOp alloca, mlir::Block *entry,
+                                       const MemoryAllocationOptions &options) {
+  // Limitation: only arrays allocated on the stack in the entry block are
+  // considered for now.
+  // TODO: Generalize the algorithm and placement of the freemem nodes.
+  if (alloca->getBlock() != entry)
+    return true;
+  if (auto seqTy = alloca.getInType().dyn_cast<fir::SequenceType>()) {
+    if (fir::hasDynamicSize(seqTy)) {
+      // Move all arrays with runtime determined size to the heap.
+      if (options.dynamicArrayOnHeap)
+        return false;
+    } else {
+      std::int64_t numberOfElements = 1;
+      for (std::int64_t i : seqTy.getShape()) {
+        numberOfElements *= i;
+        // If the count is suspicious, then don't change anything here.
+        if (numberOfElements <= 0)
+          return true;
+      }
+      // If the number of elements exceeds the threshold, move the allocation to
+      // the heap.
+      if (static_cast<std::size_t>(numberOfElements) >
+          options.maxStackArraySize) {
+        LLVM_DEBUG(llvm::dbgs()
+                   << "memory allocation opt: found " << alloca << '\n');
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+namespace {
+class AllocaOpConversion : public mlir::OpRewritePattern<fir::AllocaOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  AllocaOpConversion(mlir::MLIRContext *ctx,
+                     llvm::ArrayRef<mlir::Operation *> rets)
+      : OpRewritePattern(ctx), returnOps(rets) {}
+
+  mlir::LogicalResult
+  matchAndRewrite(fir::AllocaOp alloca,
+                  mlir::PatternRewriter &rewriter) const override {
+    auto loc = alloca.getLoc();
+    mlir::Type varTy = alloca.getInType();
+    auto unpackName =
+        [](llvm::Optional<llvm::StringRef> opt) -> llvm::StringRef {
+      if (opt)
+        return *opt;
+      return {};
+    };
+    auto uniqName = unpackName(alloca.uniq_name());
+    auto bindcName = unpackName(alloca.bindc_name());
+    auto heap = rewriter.create<fir::AllocMemOp>(
+        loc, varTy, uniqName, bindcName, alloca.typeparams(), alloca.shape());
+    auto insPt = rewriter.saveInsertionPoint();
+    for (mlir::Operation *retOp : returnOps) {
+      rewriter.setInsertionPoint(retOp);
+      [[maybe_unused]] auto free = rewriter.create<fir::FreeMemOp>(loc, heap);
+      LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: add free " << free
+                              << " for " << heap << '\n');
+    }
+    rewriter.restoreInsertionPoint(insPt);
+    rewriter.replaceOpWithNewOp<fir::ConvertOp>(
+        alloca, fir::ReferenceType::get(varTy), heap);
+    LLVM_DEBUG(llvm::dbgs() << "memory allocation opt: replaced " << alloca
+                            << " with " << heap << '\n');
+    return mlir::success();
+  }
+
+private:
+  llvm::ArrayRef<mlir::Operation *> returnOps;
+};
+
+/// This pass can reclassify memory allocations (fir.alloca, fir.allocmem) based
+/// on heuristics and settings. The intention is to allow better performance and
+/// workarounds for conditions such as environments with limited stack space.
+///
+/// Currently, implements two conversions from stack to heap allocation.
+///   1. If a stack allocation is an array larger than some threshold value
+///      make it a heap allocation.
+///   2. If a stack allocation is an array with a runtime evaluated size make
+///      it a heap allocation.
+class MemoryAllocationOpt
+    : public fir::MemoryAllocationOptBase<MemoryAllocationOpt> {
+public:
+  void runOnOperation() override {
+    auto *context = &getContext();
+    auto func = getOperation();
+    mlir::OwningRewritePatternList patterns(context);
+    mlir::ConversionTarget target(*context);
+    MemoryAllocationOptions options = {dynamicArrayOnHeap.getValue(),
+                                       maxStackArraySize.getValue()};
+
+    // If func is a declaration, skip it.
+    if (func.empty())
+      return;
+
+    const auto &analysis = getAnalysis<ReturnAnalysis>();
+
+    target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithmeticDialect,
+                           mlir::StandardOpsDialect>();
+    target.addDynamicallyLegalOp<fir::AllocaOp>([&](fir::AllocaOp alloca) {
+      return keepStackAllocation(alloca, &func.front(), options);
+    });
+
+    patterns.insert<AllocaOpConversion>(context, analysis.getReturns(func));
+    if (mlir::failed(
+            mlir::applyPartialConversion(func, target, std::move(patterns)))) {
+      mlir::emitError(func.getLoc(),
+                      "error in memory allocation optimization\n");
+      signalPassFailure();
+    }
+  }
+};
+} // namespace
+
+std::unique_ptr<mlir::Pass> fir::createMemoryAllocationPass() {
+  return std::make_unique<MemoryAllocationOpt>();
+}
--- a/flang/test/Fir/memory-allocation-opt.fir
+++ b/flang/test/Fir/memory-allocation-opt.fir
@ -0,0 +1,34 @@
+// RUN: fir-opt --memory-allocation-opt="dynamic-array-on-heap=true maximum-array-alloc-size=1024" %s | FileCheck %s
+
+// Test for size of array being too big.
+
+// CHECK-LABEL: func @_QPs1(
+// CHECK: %[[mem:.*]] = fir.allocmem !fir.array<1000123xi32> {bindc_name = "array", uniq_name = "_QFs1Earray"}
+// CHECK: fir.call @_QPs3(
+// CHECK: fir.freemem %[[mem]]
+// CHECK-NEXT: return
+
+func @_QPs1() {
+  %0 = fir.alloca !fir.array<1000123xi32> {bindc_name = "array", uniq_name = "_QFs1Earray"}
+  fir.call @_QPs3(%0) : (!fir.ref<!fir.array<1000123xi32>>) -> ()
+  return
+}
+
+// Test for dynamic array.
+
+// CHECK-LABEL: func @_QPs2(
+// CHECK: %[[mem:.*]] = fir.allocmem !fir.array<?xi32>, %{{[0-9]+}} {bindc_name = "array", uniq_name = "_QFs2Earray"}
+// CHECK: fir.call @_QPs3(
+// CHECK: fir.freemem %[[mem]]
+// CHECK-NEXT: return
+
+func @_QPs2(%arg0: !fir.ref<i32>) {
+  %0 = fir.load %arg0 : !fir.ref<i32>
+  %1 = fir.convert %0 : (i32) -> index
+  %2 = fir.alloca !fir.array<?xi32>, %1 {bindc_name = "array", uniq_name = "_QFs2Earray"}
+  %3 = fir.convert %2 : (!fir.ref<!fir.array<?xi32>>) -> !fir.ref<!fir.array<1000123xi32>>
+  fir.call @_QPs3(%3) : (!fir.ref<!fir.array<1000123xi32>>) -> ()
+  return
+}
+func private @_QPs3(!fir.ref<!fir.array<1000123xi32>>)
+