mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-24 14:20:17 +00:00
[mlir][tensor] Add transform to make tensor.pad loop-independent
Add a transform to make `tensor.pad` and `tensor.empty` ops independent of SCF loop IVs. Such ops can then be hoisted. E.g.: ``` scf.for %iv = %lb to %ub step %step { %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub] %p = tensor.pad %t low[5] high[%high] ... ... } ``` Is transformed to: ``` %high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub] %p_hoistable = tensor.pad %t low[5] high[%high_new] %dim = tensor.dim %t, %c0 %size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>(%iv)[%ub, %dim] %slice = tensor.extract_slice %p_hoistable [0] [%size] [1] ``` Differential Revision: https://reviews.llvm.org/D143910
This commit is contained in:
parent
fbf42f1fe2
commit
77124386fe
@ -15,9 +15,11 @@
|
||||
#define MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H
|
||||
|
||||
#include "mlir/Interfaces/ValueBoundsOpInterface.h"
|
||||
#include "mlir/Support/LLVM.h"
|
||||
#include "mlir/Support/LogicalResult.h"
|
||||
|
||||
namespace mlir {
|
||||
class AffineMap;
|
||||
class Location;
|
||||
class OpBuilder;
|
||||
class OpFoldResult;
|
||||
@ -85,6 +87,18 @@ FailureOr<OpFoldResult> reifyShapedValueDimBound(
|
||||
ValueBoundsConstraintSet::StopConditionFn stopCondition = nullptr,
|
||||
bool closedUB = false);
|
||||
|
||||
/// Materialize an already computed bound with Affine dialect ops.
|
||||
///
|
||||
/// * `ValueBoundsOpInterface::computeBound` computes bounds but does not
|
||||
/// create IR. It is dialect independent.
|
||||
/// * `materializeComputedBound` materializes computed bounds with Affine
|
||||
/// dialect ops.
|
||||
/// * `reifyIndexValueBound`/`reifyShapedValueDimBound` are a combination of
|
||||
/// the two functions mentioned above.
|
||||
OpFoldResult materializeComputedBound(
|
||||
OpBuilder &b, Location loc, AffineMap boundMap,
|
||||
ArrayRef<std::pair<Value, std::optional<int64_t>>> mapOperands);
|
||||
|
||||
} // namespace affine
|
||||
} // namespace mlir
|
||||
|
||||
|
@ -1,2 +1,3 @@
|
||||
add_subdirectory(IR)
|
||||
add_subdirectory(Transforms)
|
||||
add_subdirectory(TransformOps)
|
||||
|
@ -0,0 +1,6 @@
|
||||
set(LLVM_TARGET_DEFINITIONS TensorTransformOps.td)
|
||||
mlir_tablegen(TensorTransformOps.h.inc -gen-op-decls)
|
||||
mlir_tablegen(TensorTransformOps.cpp.inc -gen-op-defs)
|
||||
add_public_tablegen_target(MLIRTensorTransformOpsIncGen)
|
||||
|
||||
add_mlir_doc(TensorTransformOps TensorTransformOps Dialects/ -gen-op-doc)
|
@ -11,10 +11,13 @@
|
||||
|
||||
#include "mlir/Dialect/PDL/IR/PDLTypes.h"
|
||||
#include "mlir/Dialect/Transform/IR/TransformOps.h"
|
||||
#include "mlir/Dialect/Transform/IR/TransformTypes.h"
|
||||
#include "mlir/IR/OpImplementation.h"
|
||||
#include "mlir/IR/PatternMatch.h"
|
||||
|
||||
namespace mlir {
|
||||
class DialectRegistry;
|
||||
|
||||
namespace tensor {
|
||||
|
||||
/// A specialized TrackingListener for transform ops that operate on tensor IR.
|
||||
@ -29,7 +32,12 @@ protected:
|
||||
ValueRange newValues) const override;
|
||||
};
|
||||
|
||||
void registerTransformDialectExtension(DialectRegistry ®istry);
|
||||
|
||||
} // namespace tensor
|
||||
} // namespace mlir
|
||||
|
||||
#define GET_OP_CLASSES
|
||||
#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc"
|
||||
|
||||
#endif // MLIR_DIALECT_TENSOR_TRANSFORMOPS_TENSORTRANSFORMOPS_H
|
||||
|
@ -0,0 +1,64 @@
|
||||
//===- TensorTransformOps.td - Tensor transformation ops ---*- tablegen -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef TENSOR_TRANSFORM_OPS
|
||||
#define TENSOR_TRANSFORM_OPS
|
||||
|
||||
include "mlir/Dialect/PDL/IR/PDLTypes.td"
|
||||
include "mlir/Dialect/Transform/IR/TransformDialect.td"
|
||||
include "mlir/Dialect/Transform/IR/TransformInterfaces.td"
|
||||
include "mlir/Dialect/Transform/IR/TransformTypes.td"
|
||||
include "mlir/Interfaces/SideEffectInterfaces.td"
|
||||
include "mlir/IR/OpBase.td"
|
||||
|
||||
def Transform_TensorPadOp : Transform_ConcreteOpType<"tensor.pad">;
|
||||
|
||||
def MakeLoopIndependentOp
|
||||
: Op<Transform_Dialect, "tensor.make_loop_independent",
|
||||
[FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface,
|
||||
TransformOpInterface, TransformEachOpTrait]> {
|
||||
let description = [{
|
||||
Rewrite the targeted ops such that their index-typed operands no longer
|
||||
depend on any loop induction variable of the `num_loop` enclosing `scf.for`
|
||||
loops. I.e., compute an upper bound that is independent of any such loop IV
|
||||
for every tensor dimension. The transformed op could then be hoisted from
|
||||
the `num_loop` enclosing loops. To preserve the original semantics, place a
|
||||
`tensor.extract_slice` inside the loop.
|
||||
|
||||
Currently supported operations are:
|
||||
- tensor.empty: Replaced with a new tensor.empty with upper bound sizes,
|
||||
followed by a tensor.extract_slice.
|
||||
- tensor.pad: Replaced by an upper bound padding, followed by a
|
||||
tensor.extract_slice.
|
||||
|
||||
#### Return modes
|
||||
|
||||
This operation fails if at least one induction variable could not be
|
||||
eliminated. In case the targeted op is already independent of induction
|
||||
variables, this transform succeeds and returns the unmodified target op.
|
||||
|
||||
Otherwise, the returned handle points to a subset of the produced ops:
|
||||
- tensor.empty: The returned handle points to the tensor.extract_slice op.
|
||||
- tensor.pad: The returned handle points to the tensor.extract_slice op.
|
||||
|
||||
This transform op consumes the target handle and produces a result handle.
|
||||
}];
|
||||
|
||||
let arguments = (ins PDL_Operation:$target, I64Attr:$num_loops);
|
||||
let results = (outs PDL_Operation:$transformed);
|
||||
let assemblyFormat = "$target attr-dict";
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
::mlir::DiagnosedSilenceableFailure applyToOne(
|
||||
::mlir::Operation *target,
|
||||
::mlir::transform::ApplyToEachResultList &results,
|
||||
::mlir::transform::TransformState &state);
|
||||
}];
|
||||
}
|
||||
|
||||
#endif // TENSOR_TRANSFORM_OPS
|
@ -61,6 +61,45 @@ void populateFoldTensorEmptyPatterns(RewritePatternSet &patterns);
|
||||
/// respectively.
|
||||
void populateFoldIntoPackAndUnpackPatterns(RewritePatternSet &patterns);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Transform helpers
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Build a new tensor::PadOp with low/high padding that is independent of all
|
||||
/// given independencies. If the op is already independent of all
|
||||
/// independencies, the same PadOp result is returned.
|
||||
///
|
||||
/// Failure indicates the no suitable upper bound for low/high padding could be
|
||||
/// found.
|
||||
///
|
||||
/// Example:
|
||||
/// scf.for %iv = %lb to %ub step %step {
|
||||
/// %high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
|
||||
/// %p = tensor.pad %t low[5] high[%high] ...
|
||||
/// ...
|
||||
/// }
|
||||
///
|
||||
/// The function builds IR such as:
|
||||
/// %high_new = affine.apply affine_map<()[s0, s1] -> (-s0 + s1)> ()[%lb, %ub]
|
||||
/// %p_hoistable = tensor.pad %t low[5] high[%high_new]
|
||||
/// %dim = tensor.dim %t, %c0
|
||||
/// %size = affine.apply affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
|
||||
/// (%iv)[%ub, %dim]
|
||||
/// %slice = tensor.extract_slice %p_hoistable [0] [%size] [1]
|
||||
///
|
||||
/// The slice is returned.
|
||||
FailureOr<Value> buildIndependentOp(OpBuilder &b, tensor::PadOp padOp,
|
||||
ValueRange independencies);
|
||||
|
||||
/// Build a new tensor::EmptyOp who's dynamic sizes are independent of all
|
||||
/// given independencies. If the op is already independent of all
|
||||
/// independencies, the same EmptyOp result is returned.
|
||||
///
|
||||
/// Failure indicates the no suitable upper bound for the dynamic sizes could be
|
||||
/// found.
|
||||
FailureOr<Value> buildIndependentOp(OpBuilder &b, tensor::EmptyOp emptyOp,
|
||||
ValueRange independencies);
|
||||
|
||||
} // namespace tensor
|
||||
} // namespace mlir
|
||||
|
||||
|
@ -71,6 +71,7 @@
|
||||
#include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h"
|
||||
#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
|
||||
#include "mlir/Dialect/Tensor/IR/ValueBoundsOpInterfaceImpl.h"
|
||||
#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h"
|
||||
#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
|
||||
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
|
||||
#include "mlir/Dialect/Transform/IR/TransformDialect.h"
|
||||
@ -132,6 +133,7 @@ inline void registerAllDialects(DialectRegistry ®istry) {
|
||||
linalg::registerTransformDialectExtension(registry);
|
||||
memref::registerTransformDialectExtension(registry);
|
||||
scf::registerTransformDialectExtension(registry);
|
||||
tensor::registerTransformDialectExtension(registry);
|
||||
vector::registerTransformDialectExtension(registry);
|
||||
|
||||
// Register all external models.
|
||||
|
@ -114,12 +114,25 @@ public:
|
||||
/// Compute a bound in terms of the values/dimensions in `dependencies`. The
|
||||
/// computed bound consists of only constant terms and dependent values (or
|
||||
/// dimension sizes thereof).
|
||||
static LogicalResult computeBound(AffineMap &resultMap,
|
||||
ValueDimList &mapOperands,
|
||||
presburger::BoundType type, Value value,
|
||||
std::optional<int64_t> dim,
|
||||
ValueDimList dependencies,
|
||||
bool closedUB = false);
|
||||
static LogicalResult
|
||||
computeDependentBound(AffineMap &resultMap, ValueDimList &mapOperands,
|
||||
presburger::BoundType type, Value value,
|
||||
std::optional<int64_t> dim, ValueDimList dependencies,
|
||||
bool closedUB = false);
|
||||
|
||||
/// Compute a bound in that is independent of all values in `independencies`.
|
||||
///
|
||||
/// Independencies are the opposite of dependencies. The computed bound does
|
||||
/// not contain any SSA values that are part of `independencies`. E.g., this
|
||||
/// function can be used to make ops hoistable from loops. To that end, ops
|
||||
/// must be made independent of loop induction variables (in the case of "for"
|
||||
/// loops). Loop induction variables are the independencies; they may not
|
||||
/// appear in the computed bound.
|
||||
static LogicalResult
|
||||
computeIndependentBound(AffineMap &resultMap, ValueDimList &mapOperands,
|
||||
presburger::BoundType type, Value value,
|
||||
std::optional<int64_t> dim, ValueRange independencies,
|
||||
bool closedUB = false);
|
||||
|
||||
/// Compute a constant bound for the given index-typed value or shape
|
||||
/// dimension size.
|
||||
|
@ -19,7 +19,7 @@ using namespace mlir::affine;
|
||||
static FailureOr<OpFoldResult>
|
||||
reifyValueBound(OpBuilder &b, Location loc, presburger::BoundType type,
|
||||
Value value, std::optional<int64_t> dim,
|
||||
function_ref<bool(Value, std::optional<int64_t>)> stopCondition,
|
||||
ValueBoundsConstraintSet::StopConditionFn stopCondition,
|
||||
bool closedUB) {
|
||||
// Compute bound.
|
||||
AffineMap boundMap;
|
||||
@ -28,6 +28,13 @@ reifyValueBound(OpBuilder &b, Location loc, presburger::BoundType type,
|
||||
boundMap, mapOperands, type, value, dim, stopCondition, closedUB)))
|
||||
return failure();
|
||||
|
||||
// Reify bound.
|
||||
return affine::materializeComputedBound(b, loc, boundMap, mapOperands);
|
||||
}
|
||||
|
||||
OpFoldResult affine::materializeComputedBound(
|
||||
OpBuilder &b, Location loc, AffineMap boundMap,
|
||||
ArrayRef<std::pair<Value, std::optional<int64_t>>> mapOperands) {
|
||||
// Materialize tensor.dim/memref.dim ops.
|
||||
SmallVector<Value> operands;
|
||||
for (auto valueDim : mapOperands) {
|
||||
|
@ -4,9 +4,15 @@ add_mlir_dialect_library(MLIRTensorTransformOps
|
||||
ADDITIONAL_HEADER_DIRS
|
||||
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor/TransformOps
|
||||
|
||||
DEPENDS
|
||||
MLIRTensorTransformOpsIncGen
|
||||
|
||||
LINK_LIBS PUBLIC
|
||||
MLIRAffineDialect
|
||||
MLIRIR
|
||||
MLIRPDLDialect
|
||||
MLIRSCFDialect
|
||||
MLIRTensorDialect
|
||||
MLIRTensorTransforms
|
||||
MLIRTransformDialect
|
||||
)
|
||||
|
@ -8,8 +8,12 @@
|
||||
|
||||
#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h"
|
||||
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/SCF/IR/SCF.h"
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
|
||||
#include "mlir/Dialect/Transform/IR/TransformDialect.h"
|
||||
#include "mlir/Dialect/Transform/IR/TransformInterfaces.h"
|
||||
#include "llvm/ADT/TypeSwitch.h"
|
||||
|
||||
using namespace mlir;
|
||||
@ -49,3 +53,80 @@ tensor::TrackingListener::findReplacementOp(Operation *op,
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MakeLoopIndependentOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
DiagnosedSilenceableFailure transform::MakeLoopIndependentOp::applyToOne(
|
||||
Operation *target, transform::ApplyToEachResultList &results,
|
||||
transform::TransformState &state) {
|
||||
// Gather IVs.
|
||||
SmallVector<Value> ivs;
|
||||
Operation *nextOp = target;
|
||||
for (uint64_t i = 0, e = getNumLoops(); i < e; ++i) {
|
||||
nextOp = nextOp->getParentOfType<scf::ForOp>();
|
||||
if (!nextOp) {
|
||||
DiagnosedSilenceableFailure diag = emitSilenceableError()
|
||||
<< "could not find " << i
|
||||
<< "-th enclosing loop";
|
||||
diag.attachNote(target->getLoc()) << "target op";
|
||||
return diag;
|
||||
}
|
||||
ivs.push_back(cast<scf::ForOp>(nextOp).getInductionVar());
|
||||
}
|
||||
|
||||
// Rewrite IR.
|
||||
IRRewriter rewriter(target->getContext());
|
||||
FailureOr<Value> replacement = failure();
|
||||
if (auto padOp = dyn_cast<tensor::PadOp>(target)) {
|
||||
replacement = tensor::buildIndependentOp(rewriter, padOp, ivs);
|
||||
} else if (auto emptyOp = dyn_cast<tensor::EmptyOp>(target)) {
|
||||
replacement = tensor::buildIndependentOp(rewriter, emptyOp, ivs);
|
||||
} else {
|
||||
DiagnosedSilenceableFailure diag = emitSilenceableError()
|
||||
<< "unsupported target op";
|
||||
diag.attachNote(target->getLoc()) << "target op";
|
||||
return diag;
|
||||
}
|
||||
if (failed(replacement)) {
|
||||
DiagnosedSilenceableFailure diag =
|
||||
emitSilenceableError() << "could not make target op loop-independent";
|
||||
diag.attachNote(target->getLoc()) << "target op";
|
||||
return diag;
|
||||
}
|
||||
rewriter.replaceOp(target, *replacement);
|
||||
results.push_back(replacement->getDefiningOp());
|
||||
return DiagnosedSilenceableFailure::success();
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Transform op registration
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace {
|
||||
class TensorTransformDialectExtension
|
||||
: public transform::TransformDialectExtension<
|
||||
TensorTransformDialectExtension> {
|
||||
public:
|
||||
using Base::Base;
|
||||
|
||||
void init() {
|
||||
declareGeneratedDialect<affine::AffineDialect>();
|
||||
declareGeneratedDialect<tensor::TensorDialect>();
|
||||
|
||||
registerTransformOps<
|
||||
#define GET_OP_LIST
|
||||
#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc"
|
||||
>();
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
#define GET_OP_CLASSES
|
||||
#include "mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc"
|
||||
|
||||
void mlir::tensor::registerTransformDialectExtension(
|
||||
DialectRegistry ®istry) {
|
||||
registry.addExtensions<TensorTransformDialectExtension>();
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ add_mlir_dialect_library(MLIRTensorTransforms
|
||||
ExtractSliceFromReshapeUtils.cpp
|
||||
FoldIntoPackAndUnpackPatterns.cpp
|
||||
FoldTensorSubsetOps.cpp
|
||||
IndependenceTransforms.cpp
|
||||
MergeConsecutiveInsertExtractSlicePatterns.cpp
|
||||
ReshapePatterns.cpp
|
||||
SwapExtractSliceWithProducerPatterns.cpp
|
||||
@ -17,6 +18,7 @@ add_mlir_dialect_library(MLIRTensorTransforms
|
||||
|
||||
LINK_LIBS PUBLIC
|
||||
MLIRAffineDialect
|
||||
MLIRAffineTransforms
|
||||
MLIRAffineUtils
|
||||
MLIRArithDialect
|
||||
MLIRBufferizationDialect
|
||||
@ -30,4 +32,5 @@ add_mlir_dialect_library(MLIRTensorTransforms
|
||||
MLIRTilingInterface
|
||||
MLIRTransforms
|
||||
MLIRVectorDialect
|
||||
MLIRValueBoundsOpInterface
|
||||
)
|
||||
|
136
mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp
Normal file
136
mlir/lib/Dialect/Tensor/Transforms/IndependenceTransforms.cpp
Normal file
@ -0,0 +1,136 @@
|
||||
//===- IndependenceTransforms.cpp - Make ops independent of values --------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "mlir/Dialect/Tensor/Transforms/Transforms.h"
|
||||
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/Affine/Transforms/Transforms.h"
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
#include "mlir/Dialect/Utils/StaticValueUtils.h"
|
||||
#include "mlir/Interfaces/ValueBoundsOpInterface.h"
|
||||
|
||||
using namespace mlir;
|
||||
using namespace mlir::tensor;
|
||||
|
||||
/// Make the given OpFoldResult independent of all independencies.
|
||||
static FailureOr<OpFoldResult> makeIndependent(OpBuilder &b, Location loc,
|
||||
OpFoldResult ofr,
|
||||
ValueRange independencies) {
|
||||
if (ofr.is<Attribute>())
|
||||
return ofr;
|
||||
Value value = ofr.get<Value>();
|
||||
AffineMap boundMap;
|
||||
ValueDimList mapOperands;
|
||||
if (failed(ValueBoundsConstraintSet::computeIndependentBound(
|
||||
boundMap, mapOperands, presburger::BoundType::UB, value,
|
||||
/*dim=*/std::nullopt, independencies, /*closedUB=*/true)))
|
||||
return failure();
|
||||
return mlir::affine::materializeComputedBound(b, loc, boundMap, mapOperands);
|
||||
}
|
||||
|
||||
FailureOr<Value> tensor::buildIndependentOp(OpBuilder &b, tensor::PadOp padOp,
|
||||
ValueRange independencies) {
|
||||
OpBuilder::InsertionGuard g(b);
|
||||
b.setInsertionPoint(padOp);
|
||||
Location loc = padOp.getLoc();
|
||||
|
||||
// Non-constant padding not supported.
|
||||
Value constantPadding = padOp.getConstantPaddingValue();
|
||||
if (!constantPadding)
|
||||
return failure();
|
||||
|
||||
SmallVector<OpFoldResult> newMixedLow, newMixedHigh;
|
||||
for (OpFoldResult ofr : padOp.getMixedLowPad()) {
|
||||
auto ub = makeIndependent(b, loc, ofr, independencies);
|
||||
if (failed(ub))
|
||||
return failure();
|
||||
newMixedLow.push_back(*ub);
|
||||
}
|
||||
for (OpFoldResult ofr : padOp.getMixedHighPad()) {
|
||||
auto ub = makeIndependent(b, loc, ofr, independencies);
|
||||
if (failed(ub))
|
||||
return failure();
|
||||
newMixedHigh.push_back(*ub);
|
||||
}
|
||||
|
||||
// Return existing tensor::PadOp if nothing has changed.
|
||||
if (llvm::equal(padOp.getMixedLowPad(), newMixedLow) &&
|
||||
llvm::equal(padOp.getMixedHighPad(), newMixedHigh))
|
||||
return padOp.getResult();
|
||||
|
||||
// Create a new tensor::PadOp.
|
||||
auto newPadOp = b.create<PadOp>(
|
||||
loc, padOp.getResultType(), padOp.getSource(), newMixedLow, newMixedHigh,
|
||||
constantPadding, padOp.getNofold(), /*attrs=*/ArrayRef<NamedAttribute>{});
|
||||
|
||||
// Create a tensor::ExtractSliceOp.
|
||||
// Reify the result sizes of the old tensor::PadOp.
|
||||
ReifiedRankedShapedTypeDims reifiedSizes;
|
||||
ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
|
||||
dyn_cast<ReifyRankedShapedTypeOpInterface>(padOp.getOperation());
|
||||
if (failed(reifyShapedTypeInterface.reifyResultShapes(b, reifiedSizes)))
|
||||
return failure();
|
||||
SmallVector<OpFoldResult> offsets, sizes, strides;
|
||||
for (int64_t i = 0, e = padOp.getResultType().getRank(); i < e; ++i) {
|
||||
// offset = ub(low_padding) - low_padding
|
||||
OpFoldResult prevLow = padOp.getMixedLowPad()[i];
|
||||
if (prevLow.is<Attribute>()) {
|
||||
offsets.push_back(b.getIndexAttr(0));
|
||||
} else {
|
||||
offsets.push_back(
|
||||
b.create<affine::AffineApplyOp>(
|
||||
loc, b.getAffineDimExpr(0) - b.getAffineDimExpr(1),
|
||||
std::initializer_list<Value>{newMixedLow[i].get<Value>(),
|
||||
prevLow.get<Value>()})
|
||||
.getResult());
|
||||
}
|
||||
// size = reified result size
|
||||
if (!padOp.getResultType().isDynamicDim(i)) {
|
||||
sizes.push_back(b.getIndexAttr(padOp.getResultType().getDimSize(i)));
|
||||
} else {
|
||||
sizes.push_back(reifiedSizes[0][i]);
|
||||
}
|
||||
// stride = 1
|
||||
strides.push_back(b.getIndexAttr(1));
|
||||
}
|
||||
|
||||
return b.create<ExtractSliceOp>(loc, newPadOp, offsets, sizes, strides)
|
||||
.getResult();
|
||||
}
|
||||
|
||||
FailureOr<Value> tensor::buildIndependentOp(OpBuilder &b,
|
||||
tensor::EmptyOp emptyOp,
|
||||
ValueRange independencies) {
|
||||
OpBuilder::InsertionGuard g(b);
|
||||
b.setInsertionPoint(emptyOp);
|
||||
Location loc = emptyOp.getLoc();
|
||||
|
||||
SmallVector<OpFoldResult> newSizes;
|
||||
for (OpFoldResult ofr : emptyOp.getMixedSizes()) {
|
||||
auto ub = makeIndependent(b, loc, ofr, independencies);
|
||||
if (failed(ub))
|
||||
return failure();
|
||||
newSizes.push_back(*ub);
|
||||
}
|
||||
|
||||
// Return existing tensor::EmptyOp if nothing has changed.
|
||||
if (llvm::equal(emptyOp.getMixedSizes(), newSizes))
|
||||
return emptyOp.getResult();
|
||||
|
||||
// Create a new tensor::EmptyOp.
|
||||
Value newEmptyOp =
|
||||
b.create<EmptyOp>(loc, newSizes, emptyOp.getType().getElementType());
|
||||
|
||||
// Create a tensor::ExtractSliceOp.
|
||||
SmallVector<OpFoldResult> offsets(newSizes.size(), b.getIndexAttr(0));
|
||||
SmallVector<OpFoldResult> strides(newSizes.size(), b.getIndexAttr(1));
|
||||
return b
|
||||
.create<ExtractSliceOp>(loc, newEmptyOp, offsets, emptyOp.getMixedSizes(),
|
||||
strides)
|
||||
.getResult();
|
||||
}
|
@ -356,7 +356,7 @@ LogicalResult ValueBoundsConstraintSet::computeBound(
|
||||
return success();
|
||||
}
|
||||
|
||||
LogicalResult ValueBoundsConstraintSet::computeBound(
|
||||
LogicalResult ValueBoundsConstraintSet::computeDependentBound(
|
||||
AffineMap &resultMap, ValueDimList &mapOperands, presburger::BoundType type,
|
||||
Value value, std::optional<int64_t> dim, ValueDimList dependencies,
|
||||
bool closedUB) {
|
||||
@ -368,6 +368,40 @@ LogicalResult ValueBoundsConstraintSet::computeBound(
|
||||
closedUB);
|
||||
}
|
||||
|
||||
LogicalResult ValueBoundsConstraintSet::computeIndependentBound(
|
||||
AffineMap &resultMap, ValueDimList &mapOperands, presburger::BoundType type,
|
||||
Value value, std::optional<int64_t> dim, ValueRange independencies,
|
||||
bool closedUB) {
|
||||
// Return "true" if the given value is independent of all values in
|
||||
// `independencies`. I.e., neither the value itself nor any value in the
|
||||
// backward slice (reverse use-def chain) is contained in `independencies`.
|
||||
auto isIndependent = [&](Value v) {
|
||||
SmallVector<Value> worklist;
|
||||
DenseSet<Value> visited;
|
||||
worklist.push_back(v);
|
||||
while (!worklist.empty()) {
|
||||
Value next = worklist.pop_back_val();
|
||||
if (visited.contains(next))
|
||||
continue;
|
||||
visited.insert(next);
|
||||
if (llvm::is_contained(independencies, next))
|
||||
return false;
|
||||
// TODO: DominanceInfo could be used to stop the traversal early.
|
||||
Operation *op = next.getDefiningOp();
|
||||
if (!op)
|
||||
continue;
|
||||
worklist.append(op->getOperands().begin(), op->getOperands().end());
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
// Reify bounds in terms of any independent values.
|
||||
return computeBound(
|
||||
resultMap, mapOperands, type, value, dim,
|
||||
[&](Value v, std::optional<int64_t> d) { return isIndependent(v); },
|
||||
closedUB);
|
||||
}
|
||||
|
||||
FailureOr<int64_t> ValueBoundsConstraintSet::computeConstantBound(
|
||||
presburger::BoundType type, Value value, std::optional<int64_t> dim,
|
||||
StopConditionFn stopCondition, bool closedUB) {
|
||||
|
151
mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir
Normal file
151
mlir/test/Dialect/Tensor/transform-op-make-loop-independent.mlir
Normal file
@ -0,0 +1,151 @@
|
||||
// RUN: mlir-opt %s -allow-unregistered-dialect \
|
||||
// RUN: -test-transform-dialect-interpreter -canonicalize \
|
||||
// RUN: -split-input-file -verify-diagnostics | FileCheck %s
|
||||
|
||||
// This is a test case where "high" padding depends on the IV.
|
||||
|
||||
// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 - s1)>
|
||||
// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
|
||||
// CHECK-LABEL: func @make_pad_loop_independent_1(
|
||||
// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index,
|
||||
// CHECK-SAME: %[[t:.*]]: tensor<?xf32>
|
||||
func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index,
|
||||
%t: tensor<?xf32>, %f: f32) {
|
||||
// CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
|
||||
scf.for %i = %lb to %ub step %step {
|
||||
// CHECK: %[[high:.*]] = affine.apply #[[$map]]()[%[[ub]], %[[lb]]]
|
||||
// CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[5] high[%[[high]]]
|
||||
// CHECK: %[[dim:.*]] = tensor.dim %[[t]]
|
||||
// CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]]
|
||||
// CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][0] [%[[size]]] [1]
|
||||
%high = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
|
||||
%p = tensor.pad %t low[5] high[%high] {
|
||||
^bb0(%arg1: index):
|
||||
tensor.yield %f : f32
|
||||
} : tensor<?xf32> to tensor<?xf32>
|
||||
// CHECK: "dummy.some_use"(%[[replacement]])
|
||||
"dummy.some_use"(%p) : (tensor<?xf32>) -> ()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !pdl.operation):
|
||||
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
|
||||
%1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// This is a test case where "low" padding depends on the IV.
|
||||
|
||||
// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 - s1)>
|
||||
// CHECK: #[[$map1:.*]] = affine_map<(d0)[s0, s1] -> (-d0 + s0 + s1 + 5)>
|
||||
// CHECK: #[[$map2:.*]] = affine_map<(d0)[s0] -> (d0 - s0)>
|
||||
// CHECK-LABEL: func @make_pad_loop_independent_1(
|
||||
// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index,
|
||||
// CHECK-SAME: %[[t:.*]]: tensor<?xf32>
|
||||
func.func @make_pad_loop_independent_1(%lb: index, %ub: index, %step: index,
|
||||
%t: tensor<?xf32>, %f: f32) {
|
||||
// CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
|
||||
scf.for %i = %lb to %ub step %step {
|
||||
// CHECK: %[[low:.*]] = affine.apply #[[$map]]()[%[[ub]], %[[lb]]]
|
||||
// CHECK: %[[padded:.*]] = tensor.pad %[[t]] low[%[[low]]] high[5]
|
||||
// CHECK: %[[dim:.*]] = tensor.dim %[[t]]
|
||||
// CHECK: %[[size:.*]] = affine.apply #[[$map1]](%[[iv]])[%[[ub]], %[[dim]]]
|
||||
// CHECK: %[[offset:.*]] = affine.apply #[[$map2]](%[[iv]])[%[[lb]]]
|
||||
// CHECK: %[[replacement:.*]] = tensor.extract_slice %[[padded]][%[[offset]]] [%[[size]]] [1]
|
||||
%low = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
|
||||
%p = tensor.pad %t low[%low] high[5] {
|
||||
^bb0(%arg1: index):
|
||||
tensor.yield %f : f32
|
||||
} : tensor<?xf32> to tensor<?xf32>
|
||||
// CHECK: "dummy.some_use"(%[[replacement]])
|
||||
"dummy.some_use"(%p) : (tensor<?xf32>) -> ()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !pdl.operation):
|
||||
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
|
||||
%1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 * 2 - 2)>
|
||||
// CHECK-LABEL: func @two_loops(
|
||||
func.func @two_loops(%lb: index, %ub: index, %step: index,
|
||||
%t: tensor<?xf32>, %f: f32) {
|
||||
scf.for %i = %lb to %ub step %step {
|
||||
scf.for %j = %lb to %ub step %step {
|
||||
// CHECK: affine.apply #map()[%{{.*}}]
|
||||
%low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[]
|
||||
%p = tensor.pad %t low[%low] high[5] {
|
||||
^bb0(%arg1: index):
|
||||
tensor.yield %f : f32
|
||||
} : tensor<?xf32> to tensor<?xf32>
|
||||
"dummy.some_use"(%p) : (tensor<?xf32>) -> ()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !pdl.operation):
|
||||
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
|
||||
%1 = transform.tensor.make_loop_independent %0 {num_loops = 2}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
func.func @not_enough_loops(%lb: index, %ub: index, %step: index,
|
||||
%t: tensor<?xf32>, %f: f32) {
|
||||
scf.for %i = %lb to %ub step %step {
|
||||
scf.for %j = %lb to %ub step %step {
|
||||
%low = affine.apply affine_map<(d0, d1)[] -> (d0 + d1)> (%i, %j)[]
|
||||
// expected-note@below {{target op}}
|
||||
%p = tensor.pad %t low[%low] high[5] {
|
||||
^bb0(%arg1: index):
|
||||
tensor.yield %f : f32
|
||||
} : tensor<?xf32> to tensor<?xf32>
|
||||
"dummy.some_use"(%p) : (tensor<?xf32>) -> ()
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !pdl.operation):
|
||||
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation
|
||||
// expected-error@below {{could not find 2-th enclosing loop}}
|
||||
%1 = transform.tensor.make_loop_independent %0 {num_loops = 3}
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK: #[[$map:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
|
||||
// CHECK: #[[$map1:.*]] = affine_map<()[s0, s1] -> (s0 - s1)>
|
||||
// CHECK-LABEL: func @make_empty_loop_independent(
|
||||
// CHECK-SAME: %[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index)
|
||||
func.func @make_empty_loop_independent(%lb: index, %ub: index, %step: index) {
|
||||
// CHECK: scf.for %[[iv:.*]] = %[[lb]] to %[[ub]]
|
||||
scf.for %i = %lb to %ub step %step {
|
||||
// CHECK: %[[slice_sz:.*]] = affine.apply #[[$map]](%[[iv]])[%[[ub]]]
|
||||
// CHECK: %[[empty_sz:.*]] = affine.apply #[[$map1]]()[%[[ub]], %[[lb]]]
|
||||
// CHECK: %[[empty:.*]] = tensor.empty(%[[empty_sz]]) : tensor<?xf32>
|
||||
// CHECK: %[[replacement:.*]] = tensor.extract_slice %[[empty]][0] [%[[slice_sz]]] [1]
|
||||
%sz = affine.apply affine_map<(d0)[s0] -> (s0 - d0)> (%i)[%ub]
|
||||
%empty = tensor.empty(%sz) : tensor<?xf32>
|
||||
// CHECK: "dummy.some_use"(%[[replacement]])
|
||||
"dummy.some_use"(%empty) : (tensor<?xf32>) -> ()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
transform.sequence failures(propagate) {
|
||||
^bb1(%arg1: !pdl.operation):
|
||||
%0 = transform.structured.match ops{["tensor.empty"]} in %arg1 : (!pdl.operation) -> !pdl.operation
|
||||
%1 = transform.tensor.make_loop_independent %0 {num_loops = 1}
|
||||
}
|
@ -5808,6 +5808,7 @@ cc_library(
|
||||
includes = ["include"],
|
||||
deps = [
|
||||
":AffineDialect",
|
||||
":AffineTransforms",
|
||||
":AffineUtils",
|
||||
":ArithDialect",
|
||||
":ArithUtils",
|
||||
@ -5824,20 +5825,57 @@ cc_library(
|
||||
":TensorPassIncGen",
|
||||
":TilingInterface",
|
||||
":Transforms",
|
||||
":ValueBoundsOpInterface",
|
||||
":VectorDialect",
|
||||
"//llvm:Support",
|
||||
],
|
||||
)
|
||||
|
||||
td_library(
|
||||
name = "TensorTransformOpsTdFiles",
|
||||
srcs = [
|
||||
"include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td",
|
||||
],
|
||||
includes = ["include"],
|
||||
deps = [
|
||||
":PDLDialect",
|
||||
":TransformDialectTdFiles",
|
||||
],
|
||||
)
|
||||
|
||||
gentbl_cc_library(
|
||||
name = "TensorTransformOpsIncGen",
|
||||
strip_include_prefix = "include",
|
||||
tbl_outs = [
|
||||
(
|
||||
["-gen-op-decls"],
|
||||
"include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.h.inc",
|
||||
),
|
||||
(
|
||||
["-gen-op-defs"],
|
||||
"include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.cpp.inc",
|
||||
),
|
||||
],
|
||||
tblgen = ":mlir-tblgen",
|
||||
td_file = "include/mlir/Dialect/Tensor/TransformOps/TensorTransformOps.td",
|
||||
deps = [
|
||||
":TensorTransformOpsTdFiles",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "TensorTransformOps",
|
||||
srcs = glob(["lib/Dialect/Tensor/TransformOps/*.cpp"]),
|
||||
hdrs = glob(["include/mlir/Dialect/Tensor/TransformOps/*.h"]),
|
||||
includes = ["include"],
|
||||
deps = [
|
||||
":AffineDialect",
|
||||
":IR",
|
||||
":PDLDialect",
|
||||
":SCFDialect",
|
||||
":TensorDialect",
|
||||
":TensorTransformOpsIncGen",
|
||||
":TensorTransforms",
|
||||
":TransformDialect",
|
||||
"//llvm:Support",
|
||||
],
|
||||
|
Loading…
Reference in New Issue
Block a user