[mlir][tensor] Add transform to make tensor.pad loop-independent

Add a transform to make `tensor.pad` and `tensor.empty` ops independent of SCF loop IVs. Such ops can then be hoisted.

E.g.:
```
scf.for %iv = %lb to %ub step %step {
  %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
  %p = tensor.pad %t low[5] high[%high] ...
  ...
}
```
Is transformed to:
```
%high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub]
%p_hoistable = tensor.pad %t low[5] high[%high_new]
%dim = tensor.dim %t, %c0
%size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>(%iv)[%ub, %dim]
%slice = tensor.extract_slice %p_hoistable [0] [%size] [1]
```

Differential Revision: https://reviews.llvm.org/D143910
This commit is contained in:
Matthias Springer 2023-04-28 10:34:03 +09:00
parent fbf42f1fe2
commit 77124386fe
16 changed files with 611 additions and 8 deletions

View File

@ -15,9 +15,11 @@
#define MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H
#include "mlir/Interfaces/ValueBoundsOpInterface.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Support/LogicalResult.h"
namespace mlir {
class AffineMap;
class Location;
class OpBuilder;
class OpFoldResult;
@ -85,6 +87,18 @@ FailureOr<OpFoldResult> reifyShapedValueDimBound(
ValueBoundsConstraintSet::StopConditionFn stopCondition = nullptr,
bool closedUB = false);
/// Materialize an already computed bound with Affine dialect ops.
///
/// * `ValueBoundsOpInterface::computeBound` computes bounds but does not
/// create IR. It is dialect independent.
/// * `materializeComputedBound` materializes computed bounds with Affine
/// dialect ops.
/// * `reifyIndexValueBound`/`reifyShapedValueDimBound` are a combination of
/// the two functions mentioned above.
OpFoldResult materializeComputedBound(
OpBuilder &b, Location loc, AffineMap boundMap,
ArrayRef<std::pair<Value, std::optional<int64_t>>> mapOperands);
} // namespace affine
} // namespace mlir

View File

@ -1,2 +1,3 @@
add_subdirectory(IR)
add_subdirectory(Transforms)
add_subdirectory(TransformOps)

View File

@ -0,0 +1,6 @@
set(LLVM_TARGET_DEFINITIONS TensorTransformOps.td)
mlir_tablegen(TensorTransformOps.h.inc -gen-op-decls)
mlir_tablegen(TensorTransformOps.cpp.inc -gen-op-defs)
add_public_tablegen_target(MLIRTensorTransformOpsIncGen)
add_mlir_doc(TensorTransformOps TensorTransformOps Dialects/ -gen-op-doc)

View File

@ -11,10 +11,13 @@
#include "mlir/Dialect/PDL/IR/PDLTypes.h"
#include "mlir/Dialect/Transform/IR/TransformOps.h"
#include "mlir/Dialect/Transform/IR/TransformTypes.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/PatternMatch.h"
namespace mlir {
class DialectRegistry;
namespace tensor {
/// A specialized TrackingListener for transform ops that operate on tensor IR.
@ -29,7 +32,12 @@ protected:
ValueRange newValues) const override;
};
void registerTransformDialectExtension(DialectRegistry &registry);
} // namespace tensor
} // namespace mlir
#define GET_OP_CLASSES
#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc"
#endif // MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H

View File

@ -0,0 +1,64 @@
//===- TensorTransformOps.td - Tensor transformation ops ---*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef TENSOR_TRANSFORM_OPS
#define TENSOR_TRANSFORM_OPS
include "mlir/Dialect/PDL/IR/PDLTypes.td"
include "mlir/Dialect/Transform/IR/TransformDialect.td"
include "mlir/Dialect/Transform/IR/TransformInterfaces.td"
include "mlir/Dialect/Transform/IR/TransformTypes.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/IR/OpBase.td"
def Transform_TensorPadOp : Transform_ConcreteOpType<"tensor.pad">;
def MakeLoopIndependentOp
: Op<Transform_Dialect, "tensor.make_loop_independent",
[FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
TransformOpInterface, TransformEachOpTrait]> {
let description = [{
Rewrite the targeted ops such that their index-typed operands no longer
depend on any loop induction variable of the `num_loop` enclosing `scf.for`
loops. I.e., compute an upper bound that is independent of any such loop IV
for every tensor dimension. The transformed op could then be hoisted from
the `num_loop` enclosing loops. To preserve the original semantics, place a
`tensor.extract_slice` inside the loop.
Currently supported operations are:
- tensor.empty: Replaced with a new tensor.empty with upper bound sizes,
followed by a tensor.extract_slice.
- tensor.pad: Replaced by an upper bound padding, followed by a
tensor.extract_slice.
#### Return modes
This operation fails if at least one induction variable could not be
eliminated. In case the targeted op is already independent of induction
variables, this transform succeeds and returns the unmodified target op.
Otherwise, the returned handle points to a subset of the produced ops:
- tensor.empty: The returned handle points to the tensor.extract_slice op.
- tensor.pad: The returned handle points to the tensor.extract_slice op.
This transform op consumes the target handle and produces a result handle.
}];
let arguments = (ins PDL_Operation:$target, I64Attr:$num_loops);
let results = (outs PDL_Operation:$transformed);
let assemblyFormat = "$target attr-dict";
let extraClassDeclaration = [{
::mlir::DiagnosedSilenceableFailure applyToOne(
::mlir::Operation *target,
::mlir::transform::ApplyToEachResultList &results,
::mlir::transform::TransformState &state);
}];
}
#endif // TENSOR_TRANSFORM_OPS

View File

@ -61,6 +61,45 @@ void populateFoldTensorEmptyPatterns(RewritePatternSet &patterns);
/// respectively.
void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns);
//===----------------------------------------------------------------------===//
// Transform helpers
//===----------------------------------------------------------------------===//
/// Build a new tensor::PadOp with low/high padding that is independent of all
/// given independencies. If the op is already independent of all
/// independencies, the same PadOp result is returned.
///
/// Failure indicates the no suitable upper bound for low/high padding could be
/// found.
///
/// Example:
/// scf.for %iv = %lb to %ub step %step {
/// %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
/// %p = tensor.pad %t low[5] high[%high] ...
/// ...
/// }
///
/// The function builds IR such as:
/// %high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub]
/// %p_hoistable = tensor.pad %t low[5] high[%high_new]
/// %dim = tensor.dim %t, %c0
/// %size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
/// (%iv)[%ub, %dim]
/// %slice = tensor.extract_slice %p_hoistable [0] [%size] [1]
///
/// The slice is returned.
FailureOr<Value> buildIndependentOp(OpBuilder &b, tensor::PadOp padOp,
ValueRange independencies);
/// Build a new tensor::EmptyOp who's dynamic sizes are independent of all
/// given independencies. If the op is already independent of all
/// independencies, the same EmptyOp result is returned.
///
/// Failure indicates the no suitable upper bound for the dynamic sizes could be
/// found.
FailureOr<Value> buildIndependentOp(OpBuilder &b, tensor::EmptyOp emptyOp,
ValueRange independencies);
} // namespace tensor
} // namespace mlir

View File

@ -71,6 +71,7 @@
#include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h"
#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
#include "mlir/Dialect/Tensor/IR/ValueBoundsOpInterfaceImpl.h"
#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h"
#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Dialect/Transform/IR/TransformDialect.h"
@ -132,6 +133,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
linalg::registerTransformDialectExtension(registry);
memref::registerTransformDialectExtension(registry);
scf::registerTransformDialectExtension(registry);
tensor::registerTransformDialectExtension(registry);
vector::registerTransformDialectExtension(registry);
// Register all external models.

View File

@ -114,12 +114,25 @@ public:
/// Compute a bound in terms of the values/dimensions in `dependencies`. The
/// computed bound consists of only constant terms and dependent values (or
/// dimension sizes thereof).
static LogicalResult computeBound(AffineMap &resultMap,
ValueDimList &mapOperands,
presburger::BoundType type, Value value,
std::optional<int64_t> dim,
ValueDimList dependencies,
bool closedUB = false);
static LogicalResult
computeDependentBound(AffineMap &resultMap, ValueDimList &mapOperands,
presburger::BoundType type, Value value,
std::optional<int64_t> dim, ValueDimList dependencies,
bool closedUB = false);
/// Compute a bound in that is independent of all values in `independencies`.
///
/// Independencies are the opposite of dependencies. The computed bound does
/// not contain any SSA values that are part of `independencies`. E.g., this
/// function can be used to make ops hoistable from loops. To that end, ops
/// must be made independent of loop induction variables (in the case of "for"
/// loops). Loop induction variables are the independencies; they may not
/// appear in the computed bound.
static LogicalResult
computeIndependentBound(AffineMap &resultMap, ValueDimList &mapOperands,
presburger::BoundType type, Value value,
std::optional<int64_t> dim, ValueRange independencies,
bool closedUB = false);
/// Compute a constant bound for the given index-typed value or shape
/// dimension size.

View File

@ -19,7 +19,7 @@ using namespace mlir::affine;
static FailureOr<OpFoldResult>
reifyValueBound(OpBuilder &b, Location loc, presburger::BoundType type,
Value value, std::optional<int64_t> dim,
function_ref<bool(Value, std::optional<int64_t>)> stopCondition,
ValueBoundsConstraintSet::StopConditionFn stopCondition,
bool closedUB) {
// Compute bound.
AffineMap boundMap;
@ -28,6 +28,13 @@ reifyValueBound(OpBuilder &b, Location loc, presburger::BoundType type,
boundMap, mapOperands, type, value, dim, stopCondition, closedUB)))
return failure();
// Reify bound.
return affine::materializeComputedBound(b, loc, boundMap, mapOperands);
}
OpFoldResult affine::materializeComputedBound(
OpBuilder &b, Location loc, AffineMap boundMap,
ArrayRef<std::pair<Value, std::optional<int64_t>>> mapOperands) {
// Materialize tensor.dim/memref.dim ops.
SmallVector<Value> operands;
for (auto valueDim : mapOperands) {

View File

@ -4,9 +4,15 @@ add_mlir_dialect_library(MLIRTensorTransformOps
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor/TransformOps
DEPENDS
MLIRTensorTransformOpsIncGen
LINK_LIBS PUBLIC
MLIRAffineDialect
MLIRIR
MLIRPDLDialect
MLIRSCFDialect
MLIRTensorDialect
MLIRTensorTransforms
MLIRTransformDialect
)

View File

@ -8,8 +8,12 @@
#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
#include "mlir/Dialect/Transform/IR/TransformDialect.h"
#include "mlir/Dialect/Transform/IR/TransformInterfaces.h"
#include "llvm/ADT/TypeSwitch.h"
using namespace mlir;
@ -49,3 +53,80 @@ tensor::TrackingListener::findReplacementOp(Operation *op,
return nullptr;
}
//===----------------------------------------------------------------------===//
// MakeLoopIndependentOp
//===----------------------------------------------------------------------===//
DiagnosedSilenceableFailure transform::MakeLoopIndependentOp::applyToOne(
Operation *target, transform::ApplyToEachResultList &results,
transform::TransformState &state) {
// Gather IVs.
SmallVector<Value> ivs;
Operation *nextOp = target;
for (uint64_t i = 0, e = getNumLoops(); i < e; ++i) {
nextOp = nextOp->getParentOfType<scf::ForOp>();
if (!nextOp) {
DiagnosedSilenceableFailure diag = emitSilenceableError()
<< "could not find " << i
<< "-th enclosing loop";
diag.attachNote(target->getLoc()) << "target op";
return diag;
}
ivs.push_back(cast<scf::ForOp>(nextOp).getInductionVar());
}
// Rewrite IR.
IRRewriter rewriter(target->getContext());
FailureOr<Value> replacement = failure();
if (auto padOp = dyn_cast<tensor::PadOp>(target)) {
replacement = tensor::buildIndependentOp(rewriter, padOp, ivs);
} else if (auto emptyOp = dyn_cast<tensor::EmptyOp>(target)) {
replacement = tensor::buildIndependentOp(rewriter, emptyOp, ivs);
} else {
DiagnosedSilenceableFailure diag = emitSilenceableError()
<< "unsupported target op";
diag.attachNote(target->getLoc()) << "target op";
return diag;
}
if (failed(replacement)) {
DiagnosedSilenceableFailure diag =
emitSilenceableError() << "could not make target op loop-independent";
diag.attachNote(target->getLoc()) << "target op";
return diag;
}
rewriter.replaceOp(target, *replacement);
results.push_back(replacement->getDefiningOp());
return DiagnosedSilenceableFailure::success();
}
//===----------------------------------------------------------------------===//
// Transform op registration
//===----------------------------------------------------------------------===//
namespace {
class TensorTransformDialectExtension
: public transform::TransformDialectExtension<
TensorTransformDialectExtension> {
public:
using Base::Base;
void init() {
declareGeneratedDialect<affine::AffineDialect>();
declareGeneratedDialect<tensor::TensorDialect>();
registerTransformOps<
#define GET_OP_LIST
#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc"
>();
}
};
} // namespace
#define GET_OP_CLASSES
#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc"
void mlir::tensor::registerTransformDialectExtension(
DialectRegistry &registry) {
registry.addExtensions<TensorTransformDialectExtension>();
}

View File

@ -5,6 +5,7 @@ add_mlir_dialect_library(MLIRTensorTransforms
ExtractSliceFromReshapeUtils.cpp
FoldIntoPackAndUnpackPatterns.cpp
FoldTensorSubsetOps.cpp
IndependenceTransforms.cpp
MergeConsecutiveInsertExtractSlicePatterns.cpp
ReshapePatterns.cpp
SwapExtractSliceWithProducerPatterns.cpp
@ -17,6 +18,7 @@ add_mlir_dialect_library(MLIRTensorTransforms
LINK_LIBS PUBLIC
MLIRAffineDialect
MLIRAffineTransforms
MLIRAffineUtils
MLIRArithDialect
MLIRBufferizationDialect
@ -30,4 +32,5 @@ add_mlir_dialect_library(MLIRTensorTransforms
MLIRTilingInterface
MLIRTransforms
MLIRVectorDialect
MLIRValueBoundsOpInterface
)

View File

@ -0,0 +1,136 @@
//===- IndependenceTransforms.cpp - Make ops independent of values --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Affine/Transforms/Transforms.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/Interfaces/ValueBoundsOpInterface.h"
using namespace mlir;
using namespace mlir::tensor;
/// Make the given OpFoldResult independent of all independencies.
static FailureOr<OpFoldResult> makeIndependent(OpBuilder &b, Location loc,
OpFoldResult ofr,
ValueRange independencies) {
if (ofr.is<Attribute>())
return ofr;
Value value = ofr.get<Value>();
AffineMap boundMap;
ValueDimList mapOperands;
if (failed(ValueBoundsConstraintSet::computeIndependentBound(
boundMap, mapOperands, presburger::BoundType::UB, value,
/*dim=*/std::nullopt, independencies, /*closedUB=*/true)))
return failure();
return mlir::affine::materializeComputedBound(b, loc, boundMap, mapOperands);
}
FailureOr<Value> tensor::buildIndependentOp(OpBuilder &b, tensor::PadOp padOp,
ValueRange independencies) {
OpBuilder::InsertionGuard g(b);
b.setInsertionPoint(padOp);
Location loc = padOp.getLoc();
// Non-constant padding not supported.
Value constantPadding = padOp.getConstantPaddingValue();
if (!constantPadding)
return failure();
SmallVector<OpFoldResult> newMixedLow, newMixedHigh;
for (OpFoldResult ofr : padOp.getMixedLowPad()) {
auto ub = makeIndependent(b, loc, ofr, independencies);
if (failed(ub))
return failure();
newMixedLow.push_back(*ub);
}
for (OpFoldResult ofr : padOp.getMixedHighPad()) {
auto ub = makeIndependent(b, loc, ofr, independencies);
if (failed(ub))
return failure();
newMixedHigh.push_back(*ub);
}
// Return existing tensor::PadOp if nothing has changed.
if (llvm::equal(padOp.getMixedLowPad(), newMixedLow) &&
llvm::equal(padOp.getMixedHighPad(), newMixedHigh))
return padOp.getResult();
// Create a new tensor::PadOp.
auto newPadOp = b.create<PadOp>(
loc, padOp.getResultType(), padOp.getSource(), newMixedLow, newMixedHigh,
constantPadding, padOp.getNofold(), /*attrs=*/ArrayRef<NamedAttribute>{});
// Create a tensor::ExtractSliceOp.
// Reify the result sizes of the old tensor::PadOp.
ReifiedRankedShapedTypeDims reifiedSizes;
ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
dyn_cast<ReifyRankedShapedTypeOpInterface>(padOp.getOperation());
if (failed(reifyShapedTypeInterface.reifyResultShapes(b, reifiedSizes)))
return failure();
SmallVector<OpFoldResult> offsets, sizes, strides;
for (int64_t i = 0, e = padOp.getResultType().getRank(); i < e; ++i) {
// offset = ub(low_padding) - low_padding
OpFoldResult prevLow = padOp.getMixedLowPad()[i];
if (prevLow.is<Attribute>()) {
offsets.push_back(b.getIndexAttr(0));
} else {
offsets.push_back(
b.create<affine::AffineApplyOp>(
loc, b.getAffineDimExpr(0) - b.getAffineDimExpr(1),
std::initializer_list<Value>{newMixedLow[i].get<Value>(),
prevLow.get<Value>()})
.getResult());
}
// size = reified result size
if (!padOp.getResultType().isDynamicDim(i)) {
sizes.push_back(b.getIndexAttr(padOp.getResultType().getDimSize(i)));
} else {
sizes.push_back(reifiedSizes[0][i]);
}
// stride = 1
strides.push_back(b.getIndexAttr(1));
}
return b.create<ExtractSliceOp>(loc, newPadOp, offsets, sizes, strides)
.getResult();
}
FailureOr<Value> tensor::buildIndependentOp(OpBuilder &b,
tensor::EmptyOp emptyOp,
ValueRange independencies) {
OpBuilder::InsertionGuard g(b);
b.setInsertionPoint(emptyOp);
Location loc = emptyOp.getLoc();
SmallVector<OpFoldResult> newSizes;
for (OpFoldResult ofr : emptyOp.getMixedSizes()) {
auto ub = makeIndependent(b, loc, ofr, independencies);
if (failed(ub))
return failure();
newSizes.push_back(*ub);
}
// Return existing tensor::EmptyOp if nothing has changed.
if (llvm::equal(emptyOp.getMixedSizes(), newSizes))
return emptyOp.getResult();
// Create a new tensor::EmptyOp.
Value newEmptyOp =
b.create<EmptyOp>(loc, newSizes, emptyOp.getType().getElementType());
// Create a tensor::ExtractSliceOp.
SmallVector<OpFoldResult> offsets(newSizes.size(), b.getIndexAttr(0));
SmallVector<OpFoldResult> strides(newSizes.size(), b.getIndexAttr(1));
return b
.create<ExtractSliceOp>(loc, newEmptyOp, offsets, emptyOp.getMixedSizes(),
strides)
.getResult();
}

View File

@ -356,7 +356,7 @@ LogicalResult ValueBoundsConstraintSet::computeBound(
return success();
}
LogicalResult ValueBoundsConstraintSet::computeBound(
LogicalResult ValueBoundsConstraintSet::computeDependentBound(
AffineMap &resultMap, ValueDimList &mapOperands, presburger::BoundType type,
Value value, std::optional<int64_t> dim, ValueDimList dependencies,
bool closedUB) {
@ -368,6 +368,40 @@ LogicalResult ValueBoundsConstraintSet::computeBound(
closedUB);
}
LogicalResult ValueBoundsConstraintSet::computeIndependentBound(
AffineMap &resultMap, ValueDimList &mapOperands, presburger::BoundType type,
Value value, std::optional<int64_t> dim, ValueRange independencies,
bool closedUB) {
// Return "true" if the given value is independent of all values in
// `independencies`. I.e., neither the value itself nor any value in the
// backward slice (reverse use-def chain) is contained in `independencies`.
auto isIndependent = [&](Value v) {
SmallVector<Value> worklist;
DenseSet<Value> visited;
worklist.push_back(v);
while (!worklist.empty()) {
Value next = worklist.pop_back_val();
if (visited.contains(next))
continue;
visited.insert(next);
if (llvm::is_contained(independencies, next))
return false;
// TODO: DominanceInfo could be used to stop the traversal early.
Operation *op = next.getDefiningOp();
if (!op)
continue;
worklist.append(op->getOperands().begin(), op->getOperands().end());
}
return true;
};
// Reify bounds in terms of any independent values.
return computeBound(
resultMap, mapOperands, type, value, dim,
[&](Value v, std::optional<int64_t> d) { return isIndependent(v); },
closedUB);
}
FailureOr<int64_t> ValueBoundsConstraintSet::computeConstantBound(
presburger::BoundType type, Value value, std::optional<int64_t> dim,
StopConditionFn stopCondition, bool closedUB) {

View File

@ -0,0 +1,151 @@
// RUN: mlir-opt %s -allow-unregistered-dialect \
// RUN: -test-transform-dialect-interpreter -canonicalize \
// RUN: -split-input-file -verify-diagnostics | FileCheck %s
// This is a test case where "high" padding depends on the IV.
// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 - s1)>
// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
// CHECK-LABEL: func @make_pad_loop_independent_1(
// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index,
// CHECK-SAME: %[[t:.*]]: tensor<?xf32>
func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index,
%t: tensor<?xf32>, %f: f32) {
// CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
scf.for %i = %lb to %ub step %step {
// CHECK: %[[high:.*]] = affine.apply #[[$map]]()[%[[ub]], %[[lb]]]
// CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[5] high[%[[high]]]
// CHECK: %[[dim:.*]] = tensor.dim %[[t]]
// CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]]
// CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][0] [%[[size]]] [1]
%high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
%p = tensor.pad %t low[5] high[%high] {
^bb0(%arg1: index):
tensor.yield %f : f32
} : tensor<?xf32> to tensor<?xf32>
// CHECK: "dummy.some_use"(%[[replacement]])
"dummy.some_use"(%p) : (tensor<?xf32>) -> ()
}
return
}
transform.sequence failures(propagate) {
^bb1(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
}
// -----
// This is a test case where "low" padding depends on the IV.
// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 - s1)>
// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
// CHECK: #[[$map2:.*]] = affine_map<(d0)[s0] -> (d0 - s0)>
// CHECK-LABEL: func @make_pad_loop_independent_1(
// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index,
// CHECK-SAME: %[[t:.*]]: tensor<?xf32>
func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index,
%t: tensor<?xf32>, %f: f32) {
// CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
scf.for %i = %lb to %ub step %step {
// CHECK: %[[low:.*]] = affine.apply #[[$map]]()[%[[ub]], %[[lb]]]
// CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[%[[low]]] high[5]
// CHECK: %[[dim:.*]] = tensor.dim %[[t]]
// CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]]
// CHECK: %[[offset:.*]] = affine.apply #[[$map2]](%[[iv]])[%[[lb]]]
// CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][%[[offset]]] [%[[size]]] [1]
%low = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
%p = tensor.pad %t low[%low] high[5] {
^bb0(%arg1: index):
tensor.yield %f : f32
} : tensor<?xf32> to tensor<?xf32>
// CHECK: "dummy.some_use"(%[[replacement]])
"dummy.some_use"(%p) : (tensor<?xf32>) -> ()
}
return
}
transform.sequence failures(propagate) {
^bb1(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
}
// -----
// CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 * 2 - 2)>
// CHECK-LABEL: func @two_loops(
func.func @two_loops(%lb: index, %ub: index, %step: index,
%t: tensor<?xf32>, %f: f32) {
scf.for %i = %lb to %ub step %step {
scf.for %j = %lb to %ub step %step {
// CHECK: affine.apply #map()[%{{.*}}]
%low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[]
%p = tensor.pad %t low[%low] high[5] {
^bb0(%arg1: index):
tensor.yield %f : f32
} : tensor<?xf32> to tensor<?xf32>
"dummy.some_use"(%p) : (tensor<?xf32>) -> ()
}
}
return
}
transform.sequence failures(propagate) {
^bb1(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%1 = transform.tensor.make_loop_independent %0 {num_loops = 2}
}
// -----
func.func @not_enough_loops(%lb: index, %ub: index, %step: index,
%t: tensor<?xf32>, %f: f32) {
scf.for %i = %lb to %ub step %step {
scf.for %j = %lb to %ub step %step {
%low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[]
// expected-note@below {{target op}}
%p = tensor.pad %t low[%low] high[5] {
^bb0(%arg1: index):
tensor.yield %f : f32
} : tensor<?xf32> to tensor<?xf32>
"dummy.some_use"(%p) : (tensor<?xf32>) -> ()
}
}
return
}
transform.sequence failures(propagate) {
^bb1(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
// expected-error@below {{could not find 2-th enclosing loop}}
%1 = transform.tensor.make_loop_independent %0 {num_loops = 3}
}
// -----
// CHECK: #[[$map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
// CHECK: #[[$map1:.*]] = affine_map<()[s0, s1] -> (s0 - s1)>
// CHECK-LABEL: func @make_empty_loop_independent(
// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index)
func.func @make_empty_loop_independent(%lb: index, %ub: index, %step: index) {
// CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
scf.for %i = %lb to %ub step %step {
// CHECK: %[[slice_sz:.*]] = affine.apply #[[$map]](%[[iv]])[%[[ub]]]
// CHECK: %[[empty_sz:.*]] = affine.apply #[[$map1]]()[%[[ub]], %[[lb]]]
// CHECK: %[[empty:.*]] = tensor.empty(%[[empty_sz]]) : tensor<?xf32>
// CHECK: %[[replacement:.*]] = tensor.extract_slice %[[empty]][0] [%[[slice_sz]]] [1]
%sz = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
%empty = tensor.empty(%sz) : tensor<?xf32>
// CHECK: "dummy.some_use"(%[[replacement]])
"dummy.some_use"(%empty) : (tensor<?xf32>) -> ()
}
return
}
transform.sequence failures(propagate) {
^bb1(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["tensor.empty"]} in %arg1 : (!pdl.operation) -> !pdl.operation
%1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
}

View File

@ -5808,6 +5808,7 @@ cc_library(
includes = ["include"],
deps = [
":AffineDialect",
":AffineTransforms",
":AffineUtils",
":ArithDialect",
":ArithUtils",
@ -5824,20 +5825,57 @@ cc_library(
":TensorPassIncGen",
":TilingInterface",
":Transforms",
":ValueBoundsOpInterface",
":VectorDialect",
"//llvm:Support",
],
)
td_library(
name = "TensorTransformOpsTdFiles",
srcs = [
"include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td",
],
includes = ["include"],
deps = [
":PDLDialect",
":TransformDialectTdFiles",
],
)
gentbl_cc_library(
name = "TensorTransformOpsIncGen",
strip_include_prefix = "include",
tbl_outs = [
(
["-gen-op-decls"],
"include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc",
),
(
["-gen-op-defs"],
"include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc",
),
],
tblgen = ":mlir-tblgen",
td_file = "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td",
deps = [
":TensorTransformOpsTdFiles",
],
)
cc_library(
name = "TensorTransformOps",
srcs = glob(["lib/Dialect/Tensor/TransformOps/*.cpp"]),
hdrs = glob(["include/mlir/Dialect/Tensor/TransformOps/*.h"]),
includes = ["include"],
deps = [
":AffineDialect",
":IR",
":PDLDialect",
":SCFDialect",
":TensorDialect",
":TensorTransformOpsIncGen",
":TensorTransforms",
":TransformDialect",
"//llvm:Support",
],