mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-10-07 10:54:01 +00:00
[mlir][arith] Rename operations: maxf
→ maximumf
, minf
→ minimumf
(#65800)
This patch is part of a larger initiative aimed at fixing floating-point `max` and `min` operations in MLIR: https://discourse.llvm.org/t/rfc-fix-floating-point-max-and-min-operations-in-mlir/72671. This commit addresses Task 1.2 of the mentioned RFC. By renaming these operations, we align their names with LLVM intrinsics that have corresponding semantics.
This commit is contained in:
parent
c5ccae4f18
commit
8a6e54c9b3
@ -803,11 +803,11 @@ createReductionDecl(fir::FirOpBuilder &builder, llvm::StringRef reductionOpName,
|
||||
Fortran::parser::Unwrap<Fortran::parser::Name>(procDesignator)}) {
|
||||
if (name->source == "max") {
|
||||
reductionOp =
|
||||
getReductionOperation<mlir::arith::MaxFOp, mlir::arith::MaxSIOp>(
|
||||
getReductionOperation<mlir::arith::MaximumFOp, mlir::arith::MaxSIOp>(
|
||||
builder, type, loc, op1, op2);
|
||||
} else if (name->source == "min") {
|
||||
reductionOp =
|
||||
getReductionOperation<mlir::arith::MinFOp, mlir::arith::MinSIOp>(
|
||||
getReductionOperation<mlir::arith::MinimumFOp, mlir::arith::MinSIOp>(
|
||||
builder, type, loc, op1, op2);
|
||||
} else if (name->source == "ior") {
|
||||
assert((type.isIntOrIndex()) && "only integer is expected");
|
||||
|
@ -6,7 +6,7 @@
|
||||
!CHECK: omp.yield(%[[MINIMUM_VAL_F]] : f32)
|
||||
!CHECK: combiner
|
||||
!CHECK: ^bb0(%[[ARG0_F:.*]]: f32, %[[ARG1_F:.*]]: f32):
|
||||
!CHECK: %[[COMB_VAL_F:.*]] = arith.maxf %[[ARG0_F]], %[[ARG1_F]] {{.*}}: f32
|
||||
!CHECK: %[[COMB_VAL_F:.*]] = arith.maximumf %[[ARG0_F]], %[[ARG1_F]] {{.*}}: f32
|
||||
!CHECK: omp.yield(%[[COMB_VAL_F]] : f32)
|
||||
|
||||
!CHECK: omp.reduction.declare @[[MAX_DECLARE_I:.*]] : i32 init {
|
||||
|
@ -6,7 +6,7 @@
|
||||
!CHECK: omp.yield(%[[MAXIMUM_VAL_F]] : f32)
|
||||
!CHECK: combiner
|
||||
!CHECK: ^bb0(%[[ARG0_F:.*]]: f32, %[[ARG1_F:.*]]: f32):
|
||||
!CHECK: %[[COMB_VAL_F:.*]] = arith.minf %[[ARG0_F]], %[[ARG1_F]] {{.*}}: f32
|
||||
!CHECK: %[[COMB_VAL_F:.*]] = arith.minimumf %[[ARG0_F]], %[[ARG1_F]] {{.*}}: f32
|
||||
!CHECK: omp.yield(%[[COMB_VAL_F]] : f32)
|
||||
|
||||
!CHECK: omp.reduction.declare @[[MIN_DECLARE_I:.*]] : i32 init {
|
||||
|
@ -832,16 +832,16 @@ def Arith_SubFOp : Arith_FloatBinaryOp<"subf"> {
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MaxFOp
|
||||
// MaximumFOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def Arith_MaxFOp : Arith_FloatBinaryOp<"maxf", [Commutative]> {
|
||||
def Arith_MaximumFOp : Arith_FloatBinaryOp<"maximumf", [Commutative]> {
|
||||
let summary = "floating-point maximum operation";
|
||||
let description = [{
|
||||
Syntax:
|
||||
|
||||
```
|
||||
operation ::= ssa-id `=` `arith.maxf` ssa-use `,` ssa-use `:` type
|
||||
operation ::= ssa-id `=` `arith.maximumf` ssa-use `,` ssa-use `:` type
|
||||
```
|
||||
|
||||
Returns the maximum of the two arguments, treating -0.0 as less than +0.0.
|
||||
@ -851,7 +851,7 @@ def Arith_MaxFOp : Arith_FloatBinaryOp<"maxf", [Commutative]> {
|
||||
|
||||
```mlir
|
||||
// Scalar floating-point maximum.
|
||||
%a = arith.maxf %b, %c : f64
|
||||
%a = arith.maximumf %b, %c : f64
|
||||
```
|
||||
}];
|
||||
let hasFolder = 1;
|
||||
@ -876,16 +876,16 @@ def Arith_MaxUIOp : Arith_TotalIntBinaryOp<"maxui", [Commutative]> {
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MinFOp
|
||||
// MinimumFOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def Arith_MinFOp : Arith_FloatBinaryOp<"minf", [Commutative]> {
|
||||
def Arith_MinimumFOp : Arith_FloatBinaryOp<"minimumf", [Commutative]> {
|
||||
let summary = "floating-point minimum operation";
|
||||
let description = [{
|
||||
Syntax:
|
||||
|
||||
```
|
||||
operation ::= ssa-id `=` `arith.minf` ssa-use `,` ssa-use `:` type
|
||||
operation ::= ssa-id `=` `arith.minimumf` ssa-use `,` ssa-use `:` type
|
||||
```
|
||||
|
||||
Returns the minimum of the two arguments, treating -0.0 as less than +0.0.
|
||||
@ -895,7 +895,7 @@ def Arith_MinFOp : Arith_FloatBinaryOp<"minf", [Commutative]> {
|
||||
|
||||
```mlir
|
||||
// Scalar floating-point minimum.
|
||||
%a = arith.minf %b, %c : f64
|
||||
%a = arith.minimumf %b, %c : f64
|
||||
```
|
||||
}];
|
||||
let hasFolder = 1;
|
||||
|
@ -54,15 +54,15 @@ using FPToSIOpLowering =
|
||||
VectorConvertToLLVMPattern<arith::FPToSIOp, LLVM::FPToSIOp>;
|
||||
using FPToUIOpLowering =
|
||||
VectorConvertToLLVMPattern<arith::FPToUIOp, LLVM::FPToUIOp>;
|
||||
using MaxFOpLowering =
|
||||
VectorConvertToLLVMPattern<arith::MaxFOp, LLVM::MaximumOp,
|
||||
using MaximumFOpLowering =
|
||||
VectorConvertToLLVMPattern<arith::MaximumFOp, LLVM::MaximumOp,
|
||||
arith::AttrConvertFastMathToLLVM>;
|
||||
using MaxSIOpLowering =
|
||||
VectorConvertToLLVMPattern<arith::MaxSIOp, LLVM::SMaxOp>;
|
||||
using MaxUIOpLowering =
|
||||
VectorConvertToLLVMPattern<arith::MaxUIOp, LLVM::UMaxOp>;
|
||||
using MinFOpLowering =
|
||||
VectorConvertToLLVMPattern<arith::MinFOp, LLVM::MinimumOp,
|
||||
using MinimumFOpLowering =
|
||||
VectorConvertToLLVMPattern<arith::MinimumFOp, LLVM::MinimumOp,
|
||||
arith::AttrConvertFastMathToLLVM>;
|
||||
using MinSIOpLowering =
|
||||
VectorConvertToLLVMPattern<arith::MinSIOp, LLVM::SMinOp>;
|
||||
@ -495,10 +495,10 @@ void mlir::arith::populateArithToLLVMConversionPatterns(
|
||||
FPToUIOpLowering,
|
||||
IndexCastOpSILowering,
|
||||
IndexCastOpUILowering,
|
||||
MaxFOpLowering,
|
||||
MaximumFOpLowering,
|
||||
MaxSIOpLowering,
|
||||
MaxUIOpLowering,
|
||||
MinFOpLowering,
|
||||
MinimumFOpLowering,
|
||||
MinSIOpLowering,
|
||||
MinUIOpLowering,
|
||||
MulFOpLowering,
|
||||
|
@ -1039,12 +1039,13 @@ public:
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MaxFOp
|
||||
// MinimumFOp, MaximumFOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// Converts arith.maxf to spirv.GL.FMax or spirv.CL.fmax.
|
||||
/// Converts arith.maximumf/minimumf to spirv.GL.FMax/FMin or
|
||||
/// spirv.CL.fmax/fmin.
|
||||
template <typename Op, typename SPIRVOp>
|
||||
class MinMaxFOpPattern final : public OpConversionPattern<Op> {
|
||||
class MinimumMaximumFOpPattern final : public OpConversionPattern<Op> {
|
||||
public:
|
||||
using OpConversionPattern<Op>::OpConversionPattern;
|
||||
LogicalResult
|
||||
@ -1055,7 +1056,7 @@ public:
|
||||
if (!dstType)
|
||||
return getTypeConversionFailure(rewriter, op);
|
||||
|
||||
// arith.maxf/minf:
|
||||
// arith.maximumf/minimumf:
|
||||
// "if one of the arguments is NaN, then the result is also NaN."
|
||||
// spirv.GL.FMax/FMin
|
||||
// "which operand is the result is undefined if one of the operands
|
||||
@ -1135,15 +1136,15 @@ void mlir::arith::populateArithToSPIRVPatterns(
|
||||
MulIExtendedOpPattern<arith::MulUIExtendedOp, spirv::UMulExtendedOp>,
|
||||
SelectOpPattern,
|
||||
|
||||
MinMaxFOpPattern<arith::MaxFOp, spirv::GLFMaxOp>,
|
||||
MinMaxFOpPattern<arith::MinFOp, spirv::GLFMinOp>,
|
||||
MinimumMaximumFOpPattern<arith::MaximumFOp, spirv::GLFMaxOp>,
|
||||
MinimumMaximumFOpPattern<arith::MinimumFOp, spirv::GLFMinOp>,
|
||||
spirv::ElementwiseOpPattern<arith::MaxSIOp, spirv::GLSMaxOp>,
|
||||
spirv::ElementwiseOpPattern<arith::MaxUIOp, spirv::GLUMaxOp>,
|
||||
spirv::ElementwiseOpPattern<arith::MinSIOp, spirv::GLSMinOp>,
|
||||
spirv::ElementwiseOpPattern<arith::MinUIOp, spirv::GLUMinOp>,
|
||||
|
||||
MinMaxFOpPattern<arith::MaxFOp, spirv::CLFMaxOp>,
|
||||
MinMaxFOpPattern<arith::MinFOp, spirv::CLFMinOp>,
|
||||
MinimumMaximumFOpPattern<arith::MaximumFOp, spirv::CLFMaxOp>,
|
||||
MinimumMaximumFOpPattern<arith::MinimumFOp, spirv::CLFMinOp>,
|
||||
spirv::ElementwiseOpPattern<arith::MaxSIOp, spirv::CLSMaxOp>,
|
||||
spirv::ElementwiseOpPattern<arith::MaxUIOp, spirv::CLUMaxOp>,
|
||||
spirv::ElementwiseOpPattern<arith::MinSIOp, spirv::CLSMinOp>,
|
||||
|
@ -344,7 +344,7 @@ createLinalgBodyCalculationForElementwiseOp(Operation *op, ValueRange args,
|
||||
|
||||
// tosa::MaximumOp
|
||||
if (isa<tosa::MaximumOp>(op) && isa<FloatType>(elementTy)) {
|
||||
return rewriter.create<arith::MaxFOp>(loc, args[0], args[1]);
|
||||
return rewriter.create<arith::MaximumFOp>(loc, args[0], args[1]);
|
||||
}
|
||||
|
||||
if (isa<tosa::MaximumOp>(op) && elementTy.isSignlessInteger()) {
|
||||
@ -355,7 +355,7 @@ createLinalgBodyCalculationForElementwiseOp(Operation *op, ValueRange args,
|
||||
|
||||
// tosa::MinimumOp
|
||||
if (isa<tosa::MinimumOp>(op) && isa<FloatType>(elementTy)) {
|
||||
return rewriter.create<arith::MinFOp>(loc, args[0], args[1]);
|
||||
return rewriter.create<arith::MinimumFOp>(loc, args[0], args[1]);
|
||||
}
|
||||
|
||||
if (isa<tosa::MinimumOp>(op) && elementTy.isSignlessInteger()) {
|
||||
@ -931,7 +931,7 @@ static Value createLinalgBodyCalculationForReduceOp(Operation *op,
|
||||
}
|
||||
|
||||
if (isa<tosa::ReduceMinOp>(op) && isa<FloatType>(elementTy)) {
|
||||
return rewriter.create<arith::MinFOp>(loc, args[0], args[1]);
|
||||
return rewriter.create<arith::MinimumFOp>(loc, args[0], args[1]);
|
||||
}
|
||||
|
||||
if (isa<tosa::ReduceMinOp>(op) && isa<IntegerType>(elementTy)) {
|
||||
@ -941,7 +941,7 @@ static Value createLinalgBodyCalculationForReduceOp(Operation *op,
|
||||
}
|
||||
|
||||
if (isa<tosa::ReduceMaxOp>(op) && isa<FloatType>(elementTy)) {
|
||||
return rewriter.create<arith::MaxFOp>(loc, args[0], args[1]);
|
||||
return rewriter.create<arith::MaximumFOp>(loc, args[0], args[1]);
|
||||
}
|
||||
|
||||
if (isa<tosa::ReduceMaxOp>(op) && isa<IntegerType>(elementTy)) {
|
||||
|
@ -226,9 +226,9 @@ convertElementwiseOpToMMA(Operation *op) {
|
||||
return gpu::MMAElementwiseOp::MULF;
|
||||
if (isa<arith::SubFOp>(op))
|
||||
return gpu::MMAElementwiseOp::SUBF;
|
||||
if (isa<arith::MaxFOp>(op))
|
||||
if (isa<arith::MaximumFOp>(op))
|
||||
return gpu::MMAElementwiseOp::MAXF;
|
||||
if (isa<arith::MinFOp>(op))
|
||||
if (isa<arith::MinimumFOp>(op))
|
||||
return gpu::MMAElementwiseOp::MINF;
|
||||
if (isa<arith::DivFOp>(op))
|
||||
return gpu::MMAElementwiseOp::DIVF;
|
||||
|
@ -163,10 +163,10 @@ void mlir::amdgpu::populateAmdgpuEmulateAtomicsPatterns(
|
||||
target.addIllegalOp<RawBufferAtomicFmaxOp>();
|
||||
}
|
||||
}
|
||||
patterns
|
||||
.add<RawBufferAtomicByCasPattern<RawBufferAtomicFaddOp, arith::AddFOp>,
|
||||
RawBufferAtomicByCasPattern<RawBufferAtomicFmaxOp, arith::MaxFOp>>(
|
||||
patterns.getContext());
|
||||
patterns.add<
|
||||
RawBufferAtomicByCasPattern<RawBufferAtomicFaddOp, arith::AddFOp>,
|
||||
RawBufferAtomicByCasPattern<RawBufferAtomicFmaxOp, arith::MaximumFOp>>(
|
||||
patterns.getContext());
|
||||
}
|
||||
|
||||
void AmdgpuEmulateAtomicsPass::runOnOperation() {
|
||||
|
@ -60,8 +60,8 @@ static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
|
||||
.Case([](arith::AndIOp) { return arith::AtomicRMWKind::andi; })
|
||||
.Case([](arith::OrIOp) { return arith::AtomicRMWKind::ori; })
|
||||
.Case([](arith::MulIOp) { return arith::AtomicRMWKind::muli; })
|
||||
.Case([](arith::MinFOp) { return arith::AtomicRMWKind::minf; })
|
||||
.Case([](arith::MaxFOp) { return arith::AtomicRMWKind::maxf; })
|
||||
.Case([](arith::MinimumFOp) { return arith::AtomicRMWKind::minf; })
|
||||
.Case([](arith::MaximumFOp) { return arith::AtomicRMWKind::maxf; })
|
||||
.Case([](arith::MinSIOp) { return arith::AtomicRMWKind::mins; })
|
||||
.Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
|
||||
.Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
|
||||
|
@ -923,10 +923,10 @@ OpFoldResult arith::SubFOp::fold(FoldAdaptor adaptor) {
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MaxFOp
|
||||
// MaximumFOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
OpFoldResult arith::MaxFOp::fold(FoldAdaptor adaptor) {
|
||||
OpFoldResult arith::MaximumFOp::fold(FoldAdaptor adaptor) {
|
||||
// maxf(x,x) -> x
|
||||
if (getLhs() == getRhs())
|
||||
return getRhs();
|
||||
@ -991,10 +991,10 @@ OpFoldResult MaxUIOp::fold(FoldAdaptor adaptor) {
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MinFOp
|
||||
// MinimumFOp
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
OpFoldResult arith::MinFOp::fold(FoldAdaptor adaptor) {
|
||||
OpFoldResult arith::MinimumFOp::fold(FoldAdaptor adaptor) {
|
||||
// minf(x,x) -> x
|
||||
if (getLhs() == getRhs())
|
||||
return getRhs();
|
||||
@ -2426,8 +2426,8 @@ std::optional<TypedAttr> mlir::arith::getNeutralElement(Operation *op) {
|
||||
// Floating-point operations.
|
||||
.Case([](arith::AddFOp op) { return AtomicRMWKind::addf; })
|
||||
.Case([](arith::MulFOp op) { return AtomicRMWKind::mulf; })
|
||||
.Case([](arith::MaxFOp op) { return AtomicRMWKind::maxf; })
|
||||
.Case([](arith::MinFOp op) { return AtomicRMWKind::minf; })
|
||||
.Case([](arith::MaximumFOp op) { return AtomicRMWKind::maxf; })
|
||||
.Case([](arith::MinimumFOp op) { return AtomicRMWKind::minf; })
|
||||
// Integer operations.
|
||||
.Case([](arith::AddIOp op) { return AtomicRMWKind::addi; })
|
||||
.Case([](arith::OrIOp op) { return AtomicRMWKind::ori; })
|
||||
@ -2483,9 +2483,9 @@ Value mlir::arith::getReductionOp(AtomicRMWKind op, OpBuilder &builder,
|
||||
case AtomicRMWKind::muli:
|
||||
return builder.create<arith::MulIOp>(loc, lhs, rhs);
|
||||
case AtomicRMWKind::maxf:
|
||||
return builder.create<arith::MaxFOp>(loc, lhs, rhs);
|
||||
return builder.create<arith::MaximumFOp>(loc, lhs, rhs);
|
||||
case AtomicRMWKind::minf:
|
||||
return builder.create<arith::MinFOp>(loc, lhs, rhs);
|
||||
return builder.create<arith::MinimumFOp>(loc, lhs, rhs);
|
||||
case AtomicRMWKind::maxs:
|
||||
return builder.create<arith::MaxSIOp>(loc, lhs, rhs);
|
||||
case AtomicRMWKind::mins:
|
||||
|
@ -161,7 +161,7 @@ struct FloorDivSIOpConverter : public OpRewritePattern<arith::FloorDivSIOp> {
|
||||
};
|
||||
|
||||
template <typename OpTy, arith::CmpFPredicate pred>
|
||||
struct MaxMinFOpConverter : public OpRewritePattern<OpTy> {
|
||||
struct MaximumMinimumFOpConverter : public OpRewritePattern<OpTy> {
|
||||
public:
|
||||
using OpRewritePattern<OpTy>::OpRewritePattern;
|
||||
|
||||
@ -321,8 +321,8 @@ struct ArithExpandOpsPass
|
||||
arith::CeilDivSIOp,
|
||||
arith::CeilDivUIOp,
|
||||
arith::FloorDivSIOp,
|
||||
arith::MaxFOp,
|
||||
arith::MinFOp
|
||||
arith::MaximumFOp,
|
||||
arith::MinimumFOp
|
||||
>();
|
||||
|
||||
if (includeBf16) {
|
||||
@ -367,8 +367,8 @@ void mlir::arith::populateArithExpandOpsPatterns(RewritePatternSet &patterns) {
|
||||
populateCeilFloorDivExpandOpsPatterns(patterns);
|
||||
// clang-format off
|
||||
patterns.add<
|
||||
MaxMinFOpConverter<MaxFOp, arith::CmpFPredicate::UGT>,
|
||||
MaxMinFOpConverter<MinFOp, arith::CmpFPredicate::ULT>
|
||||
MaximumMinimumFOpConverter<MaximumFOp, arith::CmpFPredicate::UGT>,
|
||||
MaximumMinimumFOpConverter<MinimumFOp, arith::CmpFPredicate::ULT>
|
||||
>(patterns.getContext());
|
||||
// clang-format on
|
||||
}
|
||||
|
@ -449,22 +449,22 @@ public:
|
||||
case BinaryFn::max_signed:
|
||||
assert(!allComplex);
|
||||
if (allFloatingPoint)
|
||||
return builder.create<arith::MaxFOp>(arg0.getLoc(), arg0, arg1);
|
||||
return builder.create<arith::MaximumFOp>(arg0.getLoc(), arg0, arg1);
|
||||
return builder.create<arith::MaxSIOp>(arg0.getLoc(), arg0, arg1);
|
||||
case BinaryFn::min_signed:
|
||||
assert(!allComplex);
|
||||
if (allFloatingPoint)
|
||||
return builder.create<arith::MinFOp>(arg0.getLoc(), arg0, arg1);
|
||||
return builder.create<arith::MinimumFOp>(arg0.getLoc(), arg0, arg1);
|
||||
return builder.create<arith::MinSIOp>(arg0.getLoc(), arg0, arg1);
|
||||
case BinaryFn::max_unsigned:
|
||||
assert(!allComplex);
|
||||
if (allFloatingPoint)
|
||||
return builder.create<arith::MaxFOp>(arg0.getLoc(), arg0, arg1);
|
||||
return builder.create<arith::MaximumFOp>(arg0.getLoc(), arg0, arg1);
|
||||
return builder.create<arith::MaxUIOp>(arg0.getLoc(), arg0, arg1);
|
||||
case BinaryFn::min_unsigned:
|
||||
assert(!allComplex);
|
||||
if (allFloatingPoint)
|
||||
return builder.create<arith::MinFOp>(arg0.getLoc(), arg0, arg1);
|
||||
return builder.create<arith::MinimumFOp>(arg0.getLoc(), arg0, arg1);
|
||||
return builder.create<arith::MinUIOp>(arg0.getLoc(), arg0, arg1);
|
||||
}
|
||||
llvm_unreachable("unsupported binary function");
|
||||
@ -2555,8 +2555,8 @@ FailureOr<SmallVector<Value>> SoftmaxOp::decomposeOperation(OpBuilder &b) {
|
||||
Value neutralForMaxFInit =
|
||||
b.create<linalg::FillOp>(loc, Value{neutralForMaxF}, outputReduce)
|
||||
.result();
|
||||
Value max =
|
||||
reduce<arith::MaxFOp>(b, loc, input, neutralForMaxFInit, reductionDim);
|
||||
Value max = reduce<arith::MaximumFOp>(b, loc, input, neutralForMaxFInit,
|
||||
reductionDim);
|
||||
|
||||
// Step 2: Subtract max from input and exponentiate.
|
||||
Value numerator = buildSubAndExpOp(b, loc, input, max, output, reductionDim);
|
||||
|
@ -505,10 +505,10 @@ mlir::linalg::getCombinerOpKind(Operation *combinerOp) {
|
||||
.Case<arith::AndIOp>([&](auto op) { return CombiningKind::AND; })
|
||||
.Case<arith::MaxSIOp>([&](auto op) { return CombiningKind::MAXSI; })
|
||||
.Case<arith::MaxUIOp>([&](auto op) { return CombiningKind::MAXUI; })
|
||||
.Case<arith::MaxFOp>([&](auto op) { return CombiningKind::MAXF; })
|
||||
.Case<arith::MaximumFOp>([&](auto op) { return CombiningKind::MAXF; })
|
||||
.Case<arith::MinSIOp>([&](auto op) { return CombiningKind::MINSI; })
|
||||
.Case<arith::MinUIOp>([&](auto op) { return CombiningKind::MINUI; })
|
||||
.Case<arith::MinFOp>([&](auto op) { return CombiningKind::MINF; })
|
||||
.Case<arith::MinimumFOp>([&](auto op) { return CombiningKind::MINF; })
|
||||
.Case<arith::MulIOp, arith::MulFOp>(
|
||||
[&](auto op) { return CombiningKind::MUL; })
|
||||
.Case<arith::OrIOp>([&](auto op) { return CombiningKind::OR; })
|
||||
|
@ -556,8 +556,8 @@ public:
|
||||
auto red = cast<linalg::YieldOp>(op.getRegion().front().getTerminator())
|
||||
.getOperand(0)
|
||||
.getDefiningOp();
|
||||
if (!isa<arith::AndIOp, arith::MulIOp, arith::MulFOp, arith::MinFOp,
|
||||
arith::MinSIOp, arith::MinUIOp, arith::MaxFOp, arith::MaxSIOp,
|
||||
if (!isa<arith::AndIOp, arith::MulIOp, arith::MulFOp, arith::MinimumFOp,
|
||||
arith::MinSIOp, arith::MinUIOp, arith::MaximumFOp, arith::MaxSIOp,
|
||||
arith::MaxUIOp>(red))
|
||||
return failure();
|
||||
Value s0 = op.getBlock()->getArgument(0);
|
||||
|
@ -33,8 +33,8 @@ mlir::tosa::condenseValues(const SmallVector<Value> &values) {
|
||||
|
||||
Value mlir::tosa::clampFloatHelper(Location loc, Value arg, Value min,
|
||||
Value max, OpBuilder &rewriter) {
|
||||
Value minValue = rewriter.create<arith::MinFOp>(loc, arg, max);
|
||||
return rewriter.create<arith::MaxFOp>(loc, minValue, min);
|
||||
Value minValue = rewriter.create<arith::MinimumFOp>(loc, arg, max);
|
||||
return rewriter.create<arith::MaximumFOp>(loc, minValue, min);
|
||||
}
|
||||
|
||||
Value mlir::tosa::clampIntHelper(Location loc, Value arg, Value min, Value max,
|
||||
|
@ -5949,12 +5949,12 @@ Value mlir::vector::makeArithReduction(OpBuilder &b, Location loc,
|
||||
case CombiningKind::MAXF:
|
||||
assert(llvm::isa<FloatType>(t1) && llvm::isa<FloatType>(tAcc) &&
|
||||
"expected float values");
|
||||
result = b.createOrFold<arith::MaxFOp>(loc, v1, acc);
|
||||
result = b.createOrFold<arith::MaximumFOp>(loc, v1, acc);
|
||||
break;
|
||||
case CombiningKind::MINF:
|
||||
assert(llvm::isa<FloatType>(t1) && llvm::isa<FloatType>(tAcc) &&
|
||||
"expected float values");
|
||||
result = b.createOrFold<arith::MinFOp>(loc, v1, acc);
|
||||
result = b.createOrFold<arith::MinimumFOp>(loc, v1, acc);
|
||||
break;
|
||||
case CombiningKind::MAXSI:
|
||||
assert(t1.isIntOrIndex() && tAcc.isIntOrIndex() && "expected int values");
|
||||
|
@ -87,10 +87,10 @@ static Value genOperator(Location loc, Value x, Value y,
|
||||
combinedResult = rewriter.create<arith::XOrIOp>(loc, x, y);
|
||||
break;
|
||||
case CombiningKind::MINF:
|
||||
combinedResult = rewriter.create<arith::MinFOp>(loc, x, y);
|
||||
combinedResult = rewriter.create<arith::MinimumFOp>(loc, x, y);
|
||||
break;
|
||||
case CombiningKind::MAXF:
|
||||
combinedResult = rewriter.create<arith::MaxFOp>(loc, x, y);
|
||||
combinedResult = rewriter.create<arith::MaximumFOp>(loc, x, y);
|
||||
break;
|
||||
}
|
||||
return combinedResult;
|
||||
|
@ -527,28 +527,28 @@ class _BodyBuilder:
|
||||
|
||||
def _binary_max_signed(self, lhs: Value, rhs: Value) -> Value:
|
||||
if _is_floating_point_type(lhs.type):
|
||||
return arith.MaxFOp(lhs, rhs).result
|
||||
return arith.MaximumFOp(lhs, rhs).result
|
||||
if _is_integer_type(lhs.type) or _is_index_type(lhs.type):
|
||||
return arith.MaxSIOp(lhs, rhs).result
|
||||
raise NotImplementedError("Unsupported 'max' operands: {lhs}, {rhs}")
|
||||
|
||||
def _binary_max_unsigned(self, lhs: Value, rhs: Value) -> Value:
|
||||
if _is_floating_point_type(lhs.type):
|
||||
return arith.MaxFOp(lhs, rhs).result
|
||||
return arith.MaximumFOp(lhs, rhs).result
|
||||
if _is_integer_type(lhs.type) or _is_index_type(lhs.type):
|
||||
return arith.MaxUIOp(lhs, rhs).result
|
||||
raise NotImplementedError("Unsupported 'max_unsigned' operands: {lhs}, {rhs}")
|
||||
|
||||
def _binary_min_signed(self, lhs: Value, rhs: Value) -> Value:
|
||||
if _is_floating_point_type(lhs.type):
|
||||
return arith.MinFOp(lhs, rhs).result
|
||||
return arith.MinimumFOp(lhs, rhs).result
|
||||
if _is_integer_type(lhs.type) or _is_index_type(lhs.type):
|
||||
return arith.MinSIOp(lhs, rhs).result
|
||||
raise NotImplementedError("Unsupported 'min' operands: {lhs}, {rhs}")
|
||||
|
||||
def _binary_min_unsigned(self, lhs: Value, rhs: Value) -> Value:
|
||||
if _is_floating_point_type(lhs.type):
|
||||
return arith.MinFOp(lhs, rhs).result
|
||||
return arith.MinimumFOp(lhs, rhs).result
|
||||
if _is_integer_type(lhs.type) or _is_index_type(lhs.type):
|
||||
return arith.MinUIOp(lhs, rhs).result
|
||||
raise NotImplementedError("Unsupported 'min_unsigned' operands: {lhs}, {rhs}")
|
||||
|
@ -523,9 +523,9 @@ func.func @minmaxi(%arg0 : i32, %arg1 : i32) -> i32 {
|
||||
// CHECK-LABEL: @minmaxf
|
||||
func.func @minmaxf(%arg0 : f32, %arg1 : f32) -> f32 {
|
||||
// CHECK: = llvm.intr.minimum(%arg0, %arg1) : (f32, f32) -> f32
|
||||
%0 = arith.minf %arg0, %arg1 : f32
|
||||
%0 = arith.minimumf %arg0, %arg1 : f32
|
||||
// CHECK: = llvm.intr.maximum(%arg0, %arg1) : (f32, f32) -> f32
|
||||
%1 = arith.maxf %arg0, %arg1 : f32
|
||||
%1 = arith.maximumf %arg0, %arg1 : f32
|
||||
return %0 : f32
|
||||
}
|
||||
|
||||
@ -555,9 +555,9 @@ func.func @ops_supporting_fastmath(%arg0: f32, %arg1: f32, %arg2: i32) {
|
||||
// CHECK: llvm.fdiv %arg0, %arg1 {fastmathFlags = #llvm.fastmath<fast>} : f32
|
||||
%1 = arith.divf %arg0, %arg1 fastmath<fast> : f32
|
||||
// CHECK: llvm.intr.maximum(%arg0, %arg1) {fastmathFlags = #llvm.fastmath<fast>} : (f32, f32) -> f32
|
||||
%2 = arith.maxf %arg0, %arg1 fastmath<fast> : f32
|
||||
%2 = arith.maximumf %arg0, %arg1 fastmath<fast> : f32
|
||||
// CHECK: llvm.intr.minimum(%arg0, %arg1) {fastmathFlags = #llvm.fastmath<fast>} : (f32, f32) -> f32
|
||||
%3 = arith.minf %arg0, %arg1 fastmath<fast> : f32
|
||||
%3 = arith.minimumf %arg0, %arg1 fastmath<fast> : f32
|
||||
// CHECK: llvm.fmul %arg0, %arg1 {fastmathFlags = #llvm.fastmath<fast>} : f32
|
||||
%4 = arith.mulf %arg0, %arg1 fastmath<fast> : f32
|
||||
// CHECK: llvm.fneg %arg0 {fastmathFlags = #llvm.fastmath<fast>} : f32
|
||||
|
@ -1132,7 +1132,7 @@ func.func @float32_minf_scalar(%arg0 : f32, %arg1 : f32) -> f32 {
|
||||
// CHECK: %[[RHS_NAN:.+]] = spirv.IsNan %[[RHS]] : f32
|
||||
// CHECK: %[[SELECT1:.+]] = spirv.Select %[[LHS_NAN]], %[[LHS]], %[[MIN]]
|
||||
// CHECK: %[[SELECT2:.+]] = spirv.Select %[[RHS_NAN]], %[[RHS]], %[[SELECT1]]
|
||||
%0 = arith.minf %arg0, %arg1 : f32
|
||||
%0 = arith.minimumf %arg0, %arg1 : f32
|
||||
// CHECK: return %[[SELECT2]]
|
||||
return %0: f32
|
||||
}
|
||||
@ -1145,7 +1145,7 @@ func.func @float32_maxf_scalar(%arg0 : vector<2xf32>, %arg1 : vector<2xf32>) ->
|
||||
// CHECK: %[[RHS_NAN:.+]] = spirv.IsNan %[[RHS]] : vector<2xf32>
|
||||
// CHECK: %[[SELECT1:.+]] = spirv.Select %[[LHS_NAN]], %[[LHS]], %[[MAX]]
|
||||
// CHECK: %[[SELECT2:.+]] = spirv.Select %[[RHS_NAN]], %[[RHS]], %[[SELECT1]]
|
||||
%0 = arith.maxf %arg0, %arg1 : vector<2xf32>
|
||||
%0 = arith.maximumf %arg0, %arg1 : vector<2xf32>
|
||||
// CHECK: return %[[SELECT2]]
|
||||
return %0: vector<2xf32>
|
||||
}
|
||||
@ -1278,7 +1278,7 @@ func.func @float32_minf_scalar(%arg0 : f32, %arg1 : f32) -> f32 {
|
||||
// CHECK: %[[RHS_NAN:.+]] = spirv.IsNan %[[RHS]] : f32
|
||||
// CHECK: %[[SELECT1:.+]] = spirv.Select %[[LHS_NAN]], %[[LHS]], %[[MIN]]
|
||||
// CHECK: %[[SELECT2:.+]] = spirv.Select %[[RHS_NAN]], %[[RHS]], %[[SELECT1]]
|
||||
%0 = arith.minf %arg0, %arg1 : f32
|
||||
%0 = arith.minimumf %arg0, %arg1 : f32
|
||||
// CHECK: return %[[SELECT2]]
|
||||
return %0: f32
|
||||
}
|
||||
@ -1291,7 +1291,7 @@ func.func @float32_maxf_scalar(%arg0 : vector<2xf32>, %arg1 : vector<2xf32>) ->
|
||||
// CHECK: %[[RHS_NAN:.+]] = spirv.IsNan %[[RHS]] : vector<2xf32>
|
||||
// CHECK: %[[SELECT1:.+]] = spirv.Select %[[LHS_NAN]], %[[LHS]], %[[MAX]]
|
||||
// CHECK: %[[SELECT2:.+]] = spirv.Select %[[RHS_NAN]], %[[RHS]], %[[SELECT1]]
|
||||
%0 = arith.maxf %arg0, %arg1 : vector<2xf32>
|
||||
%0 = arith.maximumf %arg0, %arg1 : vector<2xf32>
|
||||
// CHECK: return %[[SELECT2]]
|
||||
return %0: vector<2xf32>
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ module attributes {
|
||||
// CHECK-SAME: %[[LHS:.+]]: f32, %[[RHS:.+]]: f32
|
||||
func.func @minf(%arg0 : f32, %arg1 : f32) -> f32 {
|
||||
// CHECK: %[[F:.+]] = spirv.GL.FMin %[[LHS]], %[[RHS]]
|
||||
%0 = arith.minf %arg0, %arg1 : f32
|
||||
%0 = arith.minimumf %arg0, %arg1 : f32
|
||||
// CHECK: return %[[F]]
|
||||
return %0: f32
|
||||
}
|
||||
@ -43,7 +43,7 @@ func.func @minf(%arg0 : f32, %arg1 : f32) -> f32 {
|
||||
// CHECK-SAME: %[[LHS:.+]]: vector<4xf32>, %[[RHS:.+]]: vector<4xf32>
|
||||
func.func @maxf(%arg0 : vector<4xf32>, %arg1 : vector<4xf32>) -> vector<4xf32> {
|
||||
// CHECK: %[[F:.+]] = spirv.GL.FMax %[[LHS]], %[[RHS]]
|
||||
%0 = arith.maxf %arg0, %arg1 : vector<4xf32>
|
||||
%0 = arith.maximumf %arg0, %arg1 : vector<4xf32>
|
||||
// CHECK: return %[[F]]
|
||||
return %0: vector<4xf32>
|
||||
}
|
||||
|
@ -486,11 +486,11 @@ func.func @test_simple_f32(%arg0: tensor<1xf32>) -> () {
|
||||
%13 = tosa.select %10, %0, %1 : (tensor<1xi1>, tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
|
||||
|
||||
// CHECK: linalg.generic
|
||||
// CHECK: arith.maxf
|
||||
// CHECK: arith.maximumf
|
||||
%14 = tosa.maximum %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
|
||||
|
||||
// CHECK: linalg.generic
|
||||
// CHECK: arith.minf
|
||||
// CHECK: arith.minimumf
|
||||
%15 = tosa.minimum %0, %1 : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
|
||||
|
||||
// CHECK: linalg.generic
|
||||
@ -502,8 +502,8 @@ func.func @test_simple_f32(%arg0: tensor<1xf32>) -> () {
|
||||
%17 = tosa.floor %0 : (tensor<1xf32>) -> tensor<1xf32>
|
||||
|
||||
// CHECK: linalg.generic
|
||||
// CHECK: arith.minf
|
||||
// CHECK: arith.maxf
|
||||
// CHECK: arith.minimumf
|
||||
// CHECK: arith.maximumf
|
||||
%18 = tosa.clamp %0 {min_int = 1 : i64, max_int = 5 : i64, min_fp = 1.0 : f32, max_fp = 5.0 : f32} : (tensor<1xf32>) -> tensor<1xf32>
|
||||
|
||||
// CHECK: linalg.generic
|
||||
@ -517,8 +517,8 @@ func.func @test_simple_f32(%arg0: tensor<1xf32>) -> () {
|
||||
// CHECK: arith.constant -2.14748365E+9
|
||||
// CHECK: arith.constant 2.14748365E+9
|
||||
// CHECK: math.roundeven
|
||||
// CHECK: arith.minf
|
||||
// CHECK: arith.maxf
|
||||
// CHECK: arith.minimumf
|
||||
// CHECK: arith.maximumf
|
||||
// CHECK: arith.fptosi
|
||||
%20 = tosa.cast %0 : (tensor<1xf32>) -> tensor<1xi32>
|
||||
|
||||
@ -555,8 +555,8 @@ func.func @test_simple_f16(%arg0: tensor<1xf16>) -> () {
|
||||
// CHECK: arith.constant -1.280000e+02
|
||||
// CHECK: arith.constant 1.270000e+02
|
||||
// CHECK: math.roundeven
|
||||
// CHECK: arith.minf
|
||||
// CHECK: arith.maxf
|
||||
// CHECK: arith.minimumf
|
||||
// CHECK: arith.maximumf
|
||||
// CHECK: arith.fptosi
|
||||
%1 = "tosa.cast"(%arg0) : (tensor<1xf16>) -> tensor<1xi8>
|
||||
return
|
||||
@ -757,8 +757,8 @@ func.func @test_clamp_f16(%arg0: tensor<1xf16>) -> () {
|
||||
// CHECK: ^bb0(%[[ARG1:.+]]: f16,
|
||||
// CHECK-DAG: %[[C0:.+]] = arith.constant 0.0
|
||||
// CHECK-DAG: %[[C6:.+]] = arith.constant 6.0
|
||||
// CHECK-DAG: %[[MIN:.+]] = arith.minf %[[ARG1]], %[[C6]]
|
||||
// CHECK-DAG: %[[MAX:.+]] = arith.maxf %[[MIN]], %[[C0]]
|
||||
// CHECK-DAG: %[[MIN:.+]] = arith.minimumf %[[ARG1]], %[[C6]]
|
||||
// CHECK-DAG: %[[MAX:.+]] = arith.maximumf %[[MIN]], %[[C0]]
|
||||
%0 = tosa.clamp %arg0 {min_int = 0 : i64, max_int = 0 : i64, min_fp = 0.0 : f32, max_fp = 6.0 : f32} : (tensor<1xf16>) -> tensor<1xf16>
|
||||
|
||||
return
|
||||
@ -932,13 +932,13 @@ func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () {
|
||||
// CHECK: arith.constant 3.40282347E+38 : f32
|
||||
// CHECK: linalg.fill
|
||||
// CHECK: linalg.generic
|
||||
// CHECK: arith.minf
|
||||
// CHECK: arith.minimumf
|
||||
%3 = tosa.reduce_min %arg0 {axis = 0 : i32} : (tensor<5x4xf32>) -> tensor<1x4xf32>
|
||||
|
||||
// CHECK: arith.constant -3.40282347E+38 : f32
|
||||
// CHECK: linalg.fill
|
||||
// CHECK: linalg.generic
|
||||
// CHECK: arith.maxf
|
||||
// CHECK: arith.maximumf
|
||||
%4 = tosa.reduce_max %arg0 {axis = 0 : i32} : (tensor<5x4xf32>) -> tensor<1x4xf32>
|
||||
return
|
||||
}
|
||||
@ -1022,7 +1022,7 @@ func.func @reduce_float_dyn_multiple(%arg0: tensor<?x?xf32>) -> () {
|
||||
// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}outs(%[[INIT]]
|
||||
// CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs(%[[FILL]] : tensor<?xf32>)
|
||||
// CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
|
||||
// CHECK: %[[MAX:.+]] = arith.maxf %[[ARG1]], %[[ARG2]] : f32
|
||||
// CHECK: %[[MAX:.+]] = arith.maximumf %[[ARG1]], %[[ARG2]] : f32
|
||||
// CHECK: linalg.yield %[[MAX]] : f32
|
||||
// CHECK: tensor.expand_shape %[[GENERIC]] {{\[}}[0, 1]] : tensor<?xf32> into tensor<?x1xf32>
|
||||
%0 = tosa.reduce_max %arg0 {axis = 1 : i32} : (tensor<?x?xf32>) -> tensor<?x1xf32>
|
||||
|
@ -451,7 +451,7 @@ func.func @masked_float_max_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: v
|
||||
// CHECK-LABEL: func.func @masked_float_max_outerprod(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<2xf32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xf32> {
|
||||
// CHECK: %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<2xf32>
|
||||
// CHECK: %[[VAL_9:.*]] = arith.maxf %[[VAL_8]], %[[VAL_2]] : vector<2xf32>
|
||||
// CHECK: %[[VAL_9:.*]] = arith.maximumf %[[VAL_8]], %[[VAL_2]] : vector<2xf32>
|
||||
// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xf32>
|
||||
|
||||
// -----
|
||||
@ -464,7 +464,7 @@ func.func @masked_float_min_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: v
|
||||
// CHECK-LABEL: func.func @masked_float_min_outerprod(
|
||||
// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<2xf32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xf32> {
|
||||
// CHECK: %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<2xf32>
|
||||
// CHECK: %[[VAL_9:.*]] = arith.minf %[[VAL_8]], %[[VAL_2]] : vector<2xf32>
|
||||
// CHECK: %[[VAL_9:.*]] = arith.minimumf %[[VAL_8]], %[[VAL_2]] : vector<2xf32>
|
||||
// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xf32>
|
||||
|
||||
// -----
|
||||
|
@ -11,7 +11,7 @@ func.func @atomic_fmax(%val: f32, %buffer: memref<?xf32>, %idx: i32) {
|
||||
// GFX9: [[ld:%.+]] = amdgpu.raw_buffer_load {foo, indexOffset = 4 : i32} [[buffer]][[[idx]]]
|
||||
// GFX9: cf.br [[loop:\^.+]]([[ld]] : f32)
|
||||
// GFX9: [[loop]]([[arg:%.+]]: f32):
|
||||
// GFX9: [[operated:%.+]] = arith.maxf [[val]], [[arg]]
|
||||
// GFX9: [[operated:%.+]] = arith.maximumf [[val]], [[arg]]
|
||||
// GFX9: [[atomicRes:%.+]] = amdgpu.raw_buffer_atomic_cmpswap {foo, indexOffset = 4 : i32} [[operated]], [[arg]] -> [[buffer]][[[idx]]]
|
||||
// GFX9: [[argCast:%.+]] = arith.bitcast [[arg]] : f32 to i32
|
||||
// GFX9: [[resCast:%.+]] = arith.bitcast [[atomicRes]] : f32 to i32
|
||||
|
@ -34,7 +34,7 @@ func.func @vecdim_reduction_minf(%in: memref<256x512xf32>, %out: memref<256xf32>
|
||||
affine.for %i = 0 to 256 {
|
||||
%final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
|
||||
%ld = affine.load %in[%i, %j] : memref<256x512xf32>
|
||||
%min = arith.minf %red_iter, %ld : f32
|
||||
%min = arith.minimumf %red_iter, %ld : f32
|
||||
affine.yield %min : f32
|
||||
}
|
||||
affine.store %final_red, %out[%i] : memref<256xf32>
|
||||
@ -47,7 +47,7 @@ func.func @vecdim_reduction_minf(%in: memref<256x512xf32>, %out: memref<256xf32>
|
||||
// CHECK: %[[vmax:.*]] = arith.constant dense<0x7F800000> : vector<128xf32>
|
||||
// CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vmax]]) -> (vector<128xf32>) {
|
||||
// CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xf32>, vector<128xf32>
|
||||
// CHECK: %[[min:.*]] = arith.minf %[[red_iter]], %[[ld]] : vector<128xf32>
|
||||
// CHECK: %[[min:.*]] = arith.minimumf %[[red_iter]], %[[ld]] : vector<128xf32>
|
||||
// CHECK: affine.yield %[[min]] : vector<128xf32>
|
||||
// CHECK: }
|
||||
// CHECK: %[[final_min:.*]] = vector.reduction <minf>, %[[vred:.*]] : vector<128xf32> into f32
|
||||
@ -61,7 +61,7 @@ func.func @vecdim_reduction_maxf(%in: memref<256x512xf32>, %out: memref<256xf32>
|
||||
affine.for %i = 0 to 256 {
|
||||
%final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
|
||||
%ld = affine.load %in[%i, %j] : memref<256x512xf32>
|
||||
%max = arith.maxf %red_iter, %ld : f32
|
||||
%max = arith.maximumf %red_iter, %ld : f32
|
||||
affine.yield %max : f32
|
||||
}
|
||||
affine.store %final_red, %out[%i] : memref<256xf32>
|
||||
@ -74,7 +74,7 @@ func.func @vecdim_reduction_maxf(%in: memref<256x512xf32>, %out: memref<256xf32>
|
||||
// CHECK: %[[vmin:.*]] = arith.constant dense<0xFF800000> : vector<128xf32>
|
||||
// CHECK: %[[vred:.*]] = affine.for %{{.*}} = 0 to 512 step 128 iter_args(%[[red_iter:.*]] = %[[vmin]]) -> (vector<128xf32>) {
|
||||
// CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xf32>, vector<128xf32>
|
||||
// CHECK: %[[max:.*]] = arith.maxf %[[red_iter]], %[[ld]] : vector<128xf32>
|
||||
// CHECK: %[[max:.*]] = arith.maximumf %[[red_iter]], %[[ld]] : vector<128xf32>
|
||||
// CHECK: affine.yield %[[max]] : vector<128xf32>
|
||||
// CHECK: }
|
||||
// CHECK: %[[final_max:.*]] = vector.reduction <maxf>, %[[vred:.*]] : vector<128xf32> into f32
|
||||
|
@ -1638,13 +1638,13 @@ func.func @test_minui2(%arg0 : i8) -> (i8, i8, i8, i8) {
|
||||
// CHECK-LABEL: @test_minf(
|
||||
func.func @test_minf(%arg0 : f32) -> (f32, f32, f32) {
|
||||
// CHECK-DAG: %[[C0:.+]] = arith.constant 0.0
|
||||
// CHECK-NEXT: %[[X:.+]] = arith.minf %arg0, %[[C0]]
|
||||
// CHECK-NEXT: %[[X:.+]] = arith.minimumf %arg0, %[[C0]]
|
||||
// CHECK-NEXT: return %[[X]], %arg0, %arg0
|
||||
%c0 = arith.constant 0.0 : f32
|
||||
%inf = arith.constant 0x7F800000 : f32
|
||||
%0 = arith.minf %c0, %arg0 : f32
|
||||
%1 = arith.minf %arg0, %arg0 : f32
|
||||
%2 = arith.minf %inf, %arg0 : f32
|
||||
%0 = arith.minimumf %c0, %arg0 : f32
|
||||
%1 = arith.minimumf %arg0, %arg0 : f32
|
||||
%2 = arith.minimumf %inf, %arg0 : f32
|
||||
return %0, %1, %2 : f32, f32, f32
|
||||
}
|
||||
|
||||
@ -1653,13 +1653,13 @@ func.func @test_minf(%arg0 : f32) -> (f32, f32, f32) {
|
||||
// CHECK-LABEL: @test_maxf(
|
||||
func.func @test_maxf(%arg0 : f32) -> (f32, f32, f32) {
|
||||
// CHECK-DAG: %[[C0:.+]] = arith.constant
|
||||
// CHECK-NEXT: %[[X:.+]] = arith.maxf %arg0, %[[C0]]
|
||||
// CHECK-NEXT: %[[X:.+]] = arith.maximumf %arg0, %[[C0]]
|
||||
// CHECK-NEXT: return %[[X]], %arg0, %arg0
|
||||
%c0 = arith.constant 0.0 : f32
|
||||
%-inf = arith.constant 0xFF800000 : f32
|
||||
%0 = arith.maxf %c0, %arg0 : f32
|
||||
%1 = arith.maxf %arg0, %arg0 : f32
|
||||
%2 = arith.maxf %-inf, %arg0 : f32
|
||||
%0 = arith.maximumf %c0, %arg0 : f32
|
||||
%1 = arith.maximumf %arg0, %arg0 : f32
|
||||
%2 = arith.maximumf %-inf, %arg0 : f32
|
||||
return %0, %1, %2 : f32, f32, f32
|
||||
}
|
||||
|
||||
|
@ -178,7 +178,7 @@ func.func @ceildivui_index(%arg0: index, %arg1: index) -> (index) {
|
||||
|
||||
// CHECK-LABEL: func @maxf
|
||||
func.func @maxf(%a: f32, %b: f32) -> f32 {
|
||||
%result = arith.maxf %a, %b : f32
|
||||
%result = arith.maximumf %a, %b : f32
|
||||
return %result : f32
|
||||
}
|
||||
// CHECK-SAME: %[[LHS:.*]]: f32, %[[RHS:.*]]: f32)
|
||||
@ -192,7 +192,7 @@ func.func @maxf(%a: f32, %b: f32) -> f32 {
|
||||
|
||||
// CHECK-LABEL: func @maxf_vector
|
||||
func.func @maxf_vector(%a: vector<4xf16>, %b: vector<4xf16>) -> vector<4xf16> {
|
||||
%result = arith.maxf %a, %b : vector<4xf16>
|
||||
%result = arith.maximumf %a, %b : vector<4xf16>
|
||||
return %result : vector<4xf16>
|
||||
}
|
||||
// CHECK-SAME: %[[LHS:.*]]: vector<4xf16>, %[[RHS:.*]]: vector<4xf16>)
|
||||
@ -206,7 +206,7 @@ func.func @maxf_vector(%a: vector<4xf16>, %b: vector<4xf16>) -> vector<4xf16> {
|
||||
|
||||
// CHECK-LABEL: func @minf
|
||||
func.func @minf(%a: f32, %b: f32) -> f32 {
|
||||
%result = arith.minf %a, %b : f32
|
||||
%result = arith.minimumf %a, %b : f32
|
||||
return %result : f32
|
||||
}
|
||||
|
||||
|
@ -1071,9 +1071,9 @@ func.func @maximum(%v1: vector<4xf32>, %v2: vector<4xf32>,
|
||||
%sv1: vector<[4]xf32>, %sv2: vector<[4]xf32>,
|
||||
%f1: f32, %f2: f32,
|
||||
%i1: i32, %i2: i32) {
|
||||
%max_vector = arith.maxf %v1, %v2 : vector<4xf32>
|
||||
%max_scalable_vector = arith.maxf %sv1, %sv2 : vector<[4]xf32>
|
||||
%max_float = arith.maxf %f1, %f2 : f32
|
||||
%max_vector = arith.maximumf %v1, %v2 : vector<4xf32>
|
||||
%max_scalable_vector = arith.maximumf %sv1, %sv2 : vector<[4]xf32>
|
||||
%max_float = arith.maximumf %f1, %f2 : f32
|
||||
%max_signed = arith.maxsi %i1, %i2 : i32
|
||||
%max_unsigned = arith.maxui %i1, %i2 : i32
|
||||
return
|
||||
@ -1084,9 +1084,9 @@ func.func @minimum(%v1: vector<4xf32>, %v2: vector<4xf32>,
|
||||
%sv1: vector<[4]xf32>, %sv2: vector<[4]xf32>,
|
||||
%f1: f32, %f2: f32,
|
||||
%i1: i32, %i2: i32) {
|
||||
%min_vector = arith.minf %v1, %v2 : vector<4xf32>
|
||||
%min_scalable_vector = arith.minf %sv1, %sv2 : vector<[4]xf32>
|
||||
%min_float = arith.minf %f1, %f2 : f32
|
||||
%min_vector = arith.minimumf %v1, %v2 : vector<4xf32>
|
||||
%min_scalable_vector = arith.minimumf %sv1, %sv2 : vector<[4]xf32>
|
||||
%min_float = arith.minimumf %f1, %f2 : f32
|
||||
%min_signed = arith.minsi %i1, %i2 : i32
|
||||
%min_unsigned = arith.minui %i1, %i2 : i32
|
||||
return
|
||||
|
@ -940,7 +940,7 @@ func.func @no_fusion_missing_reduction_shape(%arg0: tensor<f32>, %arg1: index) -
|
||||
iterator_types = ["parallel", "reduction"]
|
||||
} ins(%5 : tensor<?x?xf32>) outs(%7 : tensor<?xf32>) {
|
||||
^bb0(%arg2: f32, %arg3: f32):
|
||||
%9 = arith.maxf %arg2, %arg3 : f32
|
||||
%9 = arith.maximumf %arg2, %arg3 : f32
|
||||
linalg.yield %9 : f32
|
||||
} -> tensor<?xf32>
|
||||
return %8 : tensor<?xf32>
|
||||
|
@ -560,5 +560,5 @@ func.func @generalize_max(%lhs: memref<7x14x21xf32>, %rhs: memref<7x14x21xf32>,
|
||||
// CHECK-SAME: outs(%[[OUT]] : memref<7x14x21xf32>)
|
||||
|
||||
// CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
|
||||
// CHECK-NEXT: %[[max:.+]] = arith.maxf %[[BBARG0]], %[[BBARG1]] : f32
|
||||
// CHECK-NEXT: %[[max:.+]] = arith.maximumf %[[BBARG0]], %[[BBARG1]] : f32
|
||||
// CHECK-NEXT: linalg.yield %[[max]] : f32
|
||||
|
@ -125,7 +125,7 @@ func.func @generalize_pooling_nhwc_max_f32(%input : tensor<1x4x16x1xf32>, %shape
|
||||
|
||||
// CHECK-LABEL: @generalize_pooling_nhwc_max_f32
|
||||
// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32)
|
||||
// CHECK-NEXT: %[[MAX:.+]] = arith.maxf %[[OUT_ARG]], %[[IN_ARG]] : f32
|
||||
// CHECK-NEXT: %[[MAX:.+]] = arith.maximumf %[[OUT_ARG]], %[[IN_ARG]] : f32
|
||||
// CHECK-NEXT: linalg.yield %[[MAX]] : f32
|
||||
// CHECK-NEXT: -> tensor<1x2x4x1xf32>
|
||||
|
||||
@ -139,7 +139,7 @@ func.func @generalize_pooling_nwc_max_f32(%input : tensor<1x16x1xf32>, %shape: t
|
||||
|
||||
// CHECK-LABEL: @generalize_pooling_nwc_max_f32
|
||||
// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32)
|
||||
// CHECK-NEXT: %[[MAX:.+]] = arith.maxf %[[OUT_ARG]], %[[IN_ARG]] : f32
|
||||
// CHECK-NEXT: %[[MAX:.+]] = arith.maximumf %[[OUT_ARG]], %[[IN_ARG]] : f32
|
||||
// CHECK-NEXT: linalg.yield %[[MAX]] : f32
|
||||
// CHECK-NEXT: -> tensor<1x4x1xf32>
|
||||
|
||||
@ -201,7 +201,7 @@ func.func @generalize_pooling_nhwc_min_f32(%input : tensor<1x4x16x1xf32>, %shape
|
||||
|
||||
// CHECK-LABEL: @generalize_pooling_nhwc_min_f32
|
||||
// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32)
|
||||
// CHECK-NEXT: %[[MIN:.+]] = arith.minf %[[OUT_ARG]], %[[IN_ARG]] : f32
|
||||
// CHECK-NEXT: %[[MIN:.+]] = arith.minimumf %[[OUT_ARG]], %[[IN_ARG]] : f32
|
||||
// CHECK-NEXT: linalg.yield %[[MIN]] : f32
|
||||
// CHECK-NEXT: -> tensor<1x2x4x1xf32>
|
||||
|
||||
@ -215,7 +215,7 @@ func.func @generalize_pooling_nwc_min_f32(%input : tensor<1x16x1xf32>, %shape: t
|
||||
|
||||
// CHECK-LABEL: @generalize_pooling_nwc_min_f32
|
||||
// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32)
|
||||
// CHECK-NEXT: %[[MIN:.+]] = arith.minf %[[OUT_ARG]], %[[IN_ARG]] : f32
|
||||
// CHECK-NEXT: %[[MIN:.+]] = arith.minimumf %[[OUT_ARG]], %[[IN_ARG]] : f32
|
||||
// CHECK-NEXT: linalg.yield %[[MIN]] : f32
|
||||
// CHECK-NEXT: -> tensor<1x4x1xf32>
|
||||
|
||||
|
@ -96,7 +96,7 @@ func.func @elementwise_no_conflict_4(%arg0: tensor<8x32x32x32xf32>, %arg1: tenso
|
||||
// They are different SSA values, but %6 and %extract_slice are equivalent.
|
||||
%7 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} ins(%6 : tensor<32x32xf32>) outs(%extracted_slice : tensor<32x32xf32>) {
|
||||
^bb0(%in: f32, %out: f32):
|
||||
%8 = arith.maxf %in, %cst_1 : f32
|
||||
%8 = arith.maximumf %in, %cst_1 : f32
|
||||
linalg.yield %8 : f32
|
||||
} -> tensor<32x32xf32>
|
||||
scf.forall.in_parallel {
|
||||
|
@ -215,7 +215,7 @@ func.func @softmax(%arg0: tensor<2x16x32xf32>, %dst: tensor<2x16x32xf32>) -> ten
|
||||
// CHECK: %[[D3:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP1]]], iterator_types = ["parallel",
|
||||
// CHECK-SAME: "parallel", "reduction"]} ins(%[[ARG0]] : tensor<2x16x32xf32>) outs(%[[D2]] : tensor<2x16xf32>) {
|
||||
// CHECK: ^bb0(%[[IN:.+]]: f32, %[[OUT:.+]]: f32):
|
||||
// CHECK: %[[D8:.+]] = arith.maxf %[[IN]], %[[OUT]] : f32
|
||||
// CHECK: %[[D8:.+]] = arith.maximumf %[[IN]], %[[OUT]] : f32
|
||||
// CHECK: linalg.yield %[[D8]] : f32
|
||||
// CHECK: } -> tensor<2x16xf32>
|
||||
// CHECK: %[[D4:.+]] = linalg.generic {indexing_maps = [#[[$MAP]], #[[$MAP1]], #[[$MAP]]], iterator_types =
|
||||
|
@ -447,7 +447,7 @@ module {
|
||||
indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]
|
||||
} ins(%in : tensor<?x?xf32>) outs(%out_1 : tensor<?xf32>) {
|
||||
^bb0(%a: f32, %b: f32):
|
||||
%d = arith.maxf %a, %b : f32
|
||||
%d = arith.maximumf %a, %b : f32
|
||||
linalg.yield %d : f32
|
||||
} -> tensor<?xf32>
|
||||
%d0 = tensor.dim %out_1, %c0 : tensor<?xf32>
|
||||
@ -580,7 +580,7 @@ module {
|
||||
%4 = linalg.fill ins(%cst_1 : f32) outs(%1 : tensor<16x128xf32>) -> tensor<16x128xf32>
|
||||
%5 = linalg.generic {producer, indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%cst : tensor<16x128x128xf32>) outs(%4 : tensor<16x128xf32>) {
|
||||
^bb0(%in: f32, %out: f32):
|
||||
%8 = arith.maxf %in, %out : f32
|
||||
%8 = arith.maximumf %in, %out : f32
|
||||
linalg.yield %8 : f32
|
||||
} -> tensor<16x128xf32>
|
||||
%c16 = arith.constant 16 : index
|
||||
|
@ -102,7 +102,7 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32>
|
||||
} ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) {
|
||||
^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
|
||||
%3 = arith.addf %arg0, %arg1 : f32
|
||||
%4 = arith.maxf %3, %arg2 : f32
|
||||
%4 = arith.maximumf %3, %arg2 : f32
|
||||
linalg.yield %4 : f32
|
||||
} -> tensor<5x2xf32>
|
||||
return %0 : tensor<5x2xf32>
|
||||
@ -122,12 +122,12 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32>
|
||||
// CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
|
||||
// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {
|
||||
// CHECK: arith.addf
|
||||
// CHECK: arith.maxf
|
||||
// CHECK: arith.maximumf
|
||||
// CHECK: linalg.yield
|
||||
// CHECK: } -> tensor<5x2x4xf32>
|
||||
// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]}
|
||||
// CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) {
|
||||
// CHECK: arith.maxf
|
||||
// CHECK: arith.maximumf
|
||||
// CHECK: linalg.yield
|
||||
// CHECK: } -> tensor<5x2xf32>
|
||||
// CHECK: return %[[R]] : tensor<5x2xf32>
|
||||
@ -158,7 +158,7 @@ func.func @generic_split_3d_ninf(%input: tensor<32x2xf32>, %input_2: tensor<5x32
|
||||
} ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) {
|
||||
^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
|
||||
%3 = arith.addf %arg0, %arg1 : f32
|
||||
%4 = arith.maxf %3, %arg2 fastmath<nnan,ninf> : f32
|
||||
%4 = arith.maximumf %3, %arg2 fastmath<nnan,ninf> : f32
|
||||
linalg.yield %4 : f32
|
||||
} -> tensor<5x2xf32>
|
||||
return %0 : tensor<5x2xf32>
|
||||
@ -178,12 +178,12 @@ func.func @generic_split_3d_ninf(%input: tensor<32x2xf32>, %input_2: tensor<5x32
|
||||
// CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
|
||||
// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {
|
||||
// CHECK: arith.addf
|
||||
// CHECK: arith.maxf {{.*}} fastmath<nnan,ninf>
|
||||
// CHECK: arith.maximumf {{.*}} fastmath<nnan,ninf>
|
||||
// CHECK: linalg.yield
|
||||
// CHECK: } -> tensor<5x2x4xf32>
|
||||
// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]}
|
||||
// CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) {
|
||||
// CHECK: arith.maxf {{.*}} fastmath<nnan,ninf>
|
||||
// CHECK: arith.maximumf {{.*}} fastmath<nnan,ninf>
|
||||
// CHECK: linalg.yield
|
||||
// CHECK: } -> tensor<5x2xf32>
|
||||
// CHECK: return %[[R]] : tensor<5x2xf32>
|
||||
@ -299,7 +299,7 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32>
|
||||
} ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) {
|
||||
^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
|
||||
%3 = arith.addf %arg0, %arg1 : f32
|
||||
%4 = arith.minf %3, %arg2 : f32
|
||||
%4 = arith.minimumf %3, %arg2 : f32
|
||||
linalg.yield %4 : f32
|
||||
} -> tensor<5x2xf32>
|
||||
return %0 : tensor<5x2xf32>
|
||||
@ -319,12 +319,12 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32>
|
||||
// CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
|
||||
// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {
|
||||
// CHECK: arith.addf
|
||||
// CHECK: arith.minf
|
||||
// CHECK: arith.minimumf
|
||||
// CHECK: linalg.yield
|
||||
// CHECK: } -> tensor<5x2x4xf32>
|
||||
// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]}
|
||||
// CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) {
|
||||
// CHECK: arith.minf
|
||||
// CHECK: arith.minimumf
|
||||
// CHECK: linalg.yield
|
||||
// CHECK: } -> tensor<5x2xf32>
|
||||
// CHECK: return %[[R]] : tensor<5x2xf32>
|
||||
@ -355,7 +355,7 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32>
|
||||
} ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) {
|
||||
^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
|
||||
%3 = arith.addf %arg0, %arg1 : f32
|
||||
%4 = arith.minf %3, %arg2 fastmath<ninf> : f32
|
||||
%4 = arith.minimumf %3, %arg2 fastmath<ninf> : f32
|
||||
linalg.yield %4 : f32
|
||||
} -> tensor<5x2xf32>
|
||||
return %0 : tensor<5x2xf32>
|
||||
@ -375,12 +375,12 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32>
|
||||
// CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
|
||||
// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {
|
||||
// CHECK: arith.addf
|
||||
// CHECK: arith.minf {{.*}} fastmath<ninf>
|
||||
// CHECK: arith.minimumf {{.*}} fastmath<ninf>
|
||||
// CHECK: linalg.yield
|
||||
// CHECK: } -> tensor<5x2x4xf32>
|
||||
// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]}
|
||||
// CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) {
|
||||
// CHECK: arith.minf {{.*}} fastmath<ninf>
|
||||
// CHECK: arith.minimumf {{.*}} fastmath<ninf>
|
||||
// CHECK: linalg.yield
|
||||
// CHECK: } -> tensor<5x2xf32>
|
||||
// CHECK: return %[[R]] : tensor<5x2xf32>
|
||||
|
@ -32,7 +32,7 @@ module {
|
||||
ins(%C, %6 : tensor<?xf32>, tensor<?x?xf32>)
|
||||
outs(%D : tensor<?x?xf32>) {
|
||||
^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
|
||||
%16 = arith.maxf %arg3, %cst : f32
|
||||
%16 = arith.maximumf %arg3, %cst : f32
|
||||
%17 = arith.cmpf ogt, %arg2, %cst : f32
|
||||
%18 = arith.select %17, %cst, %16 : f32
|
||||
linalg.yield %18 : f32
|
||||
@ -91,7 +91,7 @@ module {
|
||||
ins(%C, %6 : tensor<?xf32>, tensor<?x?xf32>)
|
||||
outs(%D : tensor<?x?xf32>) {
|
||||
^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
|
||||
%16 = arith.maxf %arg3, %cst : f32
|
||||
%16 = arith.maximumf %arg3, %cst : f32
|
||||
%17 = arith.cmpf ogt, %arg2, %cst : f32
|
||||
%18 = arith.select %17, %cst, %16 : f32
|
||||
linalg.yield %18 : f32
|
||||
|
@ -1182,7 +1182,7 @@ func.func @red_max_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
|
||||
iterator_types = ["parallel", "reduction"]}
|
||||
ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
|
||||
^bb0(%in0: f32, %out0: f32):
|
||||
%max = arith.maxf %in0, %out0 : f32
|
||||
%max = arith.maximumf %in0, %out0 : f32
|
||||
linalg.yield %max : f32
|
||||
} -> tensor<4xf32>
|
||||
return %red : tensor<4xf32>
|
||||
@ -1213,7 +1213,7 @@ func.func @red_min_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> {
|
||||
iterator_types = ["parallel", "reduction"]}
|
||||
ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
|
||||
^bb0(%in0: f32, %out0: f32):
|
||||
%min = arith.minf %out0, %in0 : f32
|
||||
%min = arith.minimumf %out0, %in0 : f32
|
||||
linalg.yield %min : f32
|
||||
} -> tensor<4xf32>
|
||||
return %red : tensor<4xf32>
|
||||
|
@ -700,8 +700,8 @@ func.func @pooling_nwc_max_memref_1_2_1_3(%input: memref<4x4x3xf32>, %filter: me
|
||||
// CHECK: %[[V3:.+]] = vector.extract_strided_slice %[[V0]] {offsets = [0, 3, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xf32> to vector<4x1x3xf32>
|
||||
// CHECK: %[[V4:.+]] = vector.extract_strided_slice %[[V1]] {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x2x3xf32> to vector<4x1x3xf32>
|
||||
// CHECK: %[[V5:.+]] = vector.extract_strided_slice %[[V1]] {offsets = [0, 1, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x2x3xf32> to vector<4x1x3xf32>
|
||||
// CHECK: %[[V6:.+]] = arith.maxf %[[V2]], %[[V4]] : vector<4x1x3xf32>
|
||||
// CHECK: %[[V7:.+]] = arith.maxf %[[V3]], %[[V5]] : vector<4x1x3xf32>
|
||||
// CHECK: %[[V6:.+]] = arith.maximumf %[[V2]], %[[V4]] : vector<4x1x3xf32>
|
||||
// CHECK: %[[V7:.+]] = arith.maximumf %[[V3]], %[[V5]] : vector<4x1x3xf32>
|
||||
// CHECK: %[[V8:.+]] = vector.insert_strided_slice %[[V6]], %[[V1]] {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<4x1x3xf32> into vector<4x2x3xf32>
|
||||
// CHECK: %[[V9:.+]] = vector.insert_strided_slice %[[V7]], %[[V8]] {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<4x1x3xf32> into vector<4x2x3xf32>
|
||||
// CHECK: vector.transfer_write %[[V9]], %[[OUTPUT]][%[[Vc0]], %[[Vc0]], %[[Vc0]]] {in_bounds = [true, true, true]} : vector<4x2x3xf32>, memref<4x2x3xf32>
|
||||
|
@ -16,7 +16,7 @@
|
||||
// CHECK: arith.addf
|
||||
// CHECK: linalg.generic
|
||||
// CHECK: math.exp
|
||||
// CHECK: arith.maxf
|
||||
// CHECK: arith.maximumf
|
||||
// CHECK-NOT: linalg.generic
|
||||
// CHECK: return
|
||||
func.func @sparse_fusion(%argA: tensor<100xf64, #SV>) -> tensor<100xf64> {
|
||||
@ -51,7 +51,7 @@ func.func @sparse_fusion(%argA: tensor<100xf64, #SV>) -> tensor<100xf64> {
|
||||
%l2 = linalg.generic #trait
|
||||
ins(%l1: tensor<100xf64>) outs(%t2: tensor<100xf64>) {
|
||||
^bb0(%in2: f64, %out2: f64):
|
||||
%b2 = arith.maxf %in2, %c100 : f64
|
||||
%b2 = arith.maximumf %in2, %c100 : f64
|
||||
linalg.yield %b2 : f64
|
||||
} -> tensor<100xf64>
|
||||
|
||||
|
@ -42,7 +42,7 @@ func.func @dense_op_without_sp_dep(%169: tensor<2x10x8xf32>,
|
||||
%180 = arith.mulf %in_60, %in_60 : f32
|
||||
%181 = arith.mulf %in_59, %cst_13 : f32
|
||||
%182 = arith.subf %181, %180 : f32
|
||||
%183 = arith.maxf %182, %cst_13 : f32
|
||||
%183 = arith.maximumf %182, %cst_13 : f32
|
||||
%184 = arith.addf %183, %cst_13 : f32
|
||||
%185 = math.rsqrt %184 : f32 // data dependent on sparse value.
|
||||
%186 = arith.mulf %185, %in_61 : f32
|
||||
@ -80,7 +80,7 @@ func.func @dense_op_with_sp_dep(%169: tensor<2x10x8xf32>,
|
||||
%180 = arith.mulf %in_60, %in_60 : f32
|
||||
%181 = arith.mulf %in_59, %cst_13 : f32
|
||||
%182 = arith.subf %181, %180 : f32
|
||||
%183 = arith.maxf %182, %cst_13 : f32
|
||||
%183 = arith.maximumf %182, %cst_13 : f32
|
||||
%184 = arith.addf %183, %cst_13 : f32
|
||||
%185 = math.rsqrt %184 : f32
|
||||
%186 = arith.mulf %185, %in_61 : f32
|
||||
|
@ -1995,7 +1995,7 @@ func.func @dont_reduce_one_element_vector(%a : vector<4xf32>) -> f32 {
|
||||
// CHECK-LABEL: func @reduce_one_element_vector_maxf
|
||||
// CHECK-SAME: (%[[V:.+]]: vector<1xf32>, %[[B:.+]]: f32)
|
||||
// CHECK: %[[A:.+]] = vector.extract %[[V]][0] : vector<1xf32>
|
||||
// CHECK: %[[S:.+]] = arith.maxf %[[A]], %[[B]] : f32
|
||||
// CHECK: %[[S:.+]] = arith.maximumf %[[A]], %[[B]] : f32
|
||||
// CHECK: return %[[S]]
|
||||
func.func @reduce_one_element_vector_maxf(%a : vector<1xf32>, %b: f32) -> f32 {
|
||||
%s = vector.reduction <maxf>, %a, %b : vector<1xf32> into f32
|
||||
|
@ -27,13 +27,13 @@ func.func @vector_multi_reduction_min(%arg0: vector<2x4xf32>, %acc: vector<2xf32
|
||||
// CHECK-SAME: %[[INPUT:.+]]: vector<2x4xf32>, %[[ACC:.*]]: vector<2xf32>
|
||||
// CHECK: %[[TRANSPOSED:.+]] = vector.transpose %[[INPUT]], [1, 0] : vector<2x4xf32> to vector<4x2xf32>
|
||||
// CHECK: %[[V0:.+]] = vector.extract %[[TRANSPOSED]][0] : vector<4x2xf32>
|
||||
// CHECK: %[[RV0:.+]] = arith.minf %[[V0]], %[[ACC]] : vector<2xf32>
|
||||
// CHECK: %[[RV0:.+]] = arith.minimumf %[[V0]], %[[ACC]] : vector<2xf32>
|
||||
// CHECK: %[[V1:.+]] = vector.extract %[[TRANSPOSED]][1] : vector<4x2xf32>
|
||||
// CHECK: %[[RV01:.+]] = arith.minf %[[V1]], %[[RV0]] : vector<2xf32>
|
||||
// CHECK: %[[RV01:.+]] = arith.minimumf %[[V1]], %[[RV0]] : vector<2xf32>
|
||||
// CHECK: %[[V2:.+]] = vector.extract %[[TRANSPOSED]][2] : vector<4x2xf32>
|
||||
// CHECK: %[[RV012:.+]] = arith.minf %[[V2]], %[[RV01]] : vector<2xf32>
|
||||
// CHECK: %[[RV012:.+]] = arith.minimumf %[[V2]], %[[RV01]] : vector<2xf32>
|
||||
// CHECK: %[[V3:.+]] = vector.extract %[[TRANSPOSED]][3] : vector<4x2xf32>
|
||||
// CHECK: %[[RESULT_VEC:.+]] = arith.minf %[[V3]], %[[RV012]] : vector<2xf32>
|
||||
// CHECK: %[[RESULT_VEC:.+]] = arith.minimumf %[[V3]], %[[RV012]] : vector<2xf32>
|
||||
// CHECK: return %[[RESULT_VEC]] : vector<2xf32>
|
||||
|
||||
func.func @vector_multi_reduction_max(%arg0: vector<2x4xf32>, %acc: vector<2xf32>) -> vector<2xf32> {
|
||||
@ -45,13 +45,13 @@ func.func @vector_multi_reduction_max(%arg0: vector<2x4xf32>, %acc: vector<2xf32
|
||||
// CHECK-SAME: %[[INPUT:.+]]: vector<2x4xf32>, %[[ACC:.*]]: vector<2xf32>
|
||||
// CHECK: %[[TRANSPOSED:.+]] = vector.transpose %[[INPUT]], [1, 0] : vector<2x4xf32> to vector<4x2xf32>
|
||||
// CHECK: %[[V0:.+]] = vector.extract %[[TRANSPOSED]][0] : vector<4x2xf32>
|
||||
// CHECK: %[[RV0:.+]] = arith.maxf %[[V0]], %[[ACC]] : vector<2xf32>
|
||||
// CHECK: %[[RV0:.+]] = arith.maximumf %[[V0]], %[[ACC]] : vector<2xf32>
|
||||
// CHECK: %[[V1:.+]] = vector.extract %[[TRANSPOSED]][1] : vector<4x2xf32>
|
||||
// CHECK: %[[RV01:.+]] = arith.maxf %[[V1]], %[[RV0]] : vector<2xf32>
|
||||
// CHECK: %[[RV01:.+]] = arith.maximumf %[[V1]], %[[RV0]] : vector<2xf32>
|
||||
// CHECK: %[[V2:.+]] = vector.extract %[[TRANSPOSED]][2] : vector<4x2xf32>
|
||||
// CHECK: %[[RV012:.+]] = arith.maxf %[[V2]], %[[RV01]] : vector<2xf32>
|
||||
// CHECK: %[[RV012:.+]] = arith.maximumf %[[V2]], %[[RV01]] : vector<2xf32>
|
||||
// CHECK: %[[V3:.+]] = vector.extract %[[TRANSPOSED]][3] : vector<4x2xf32>
|
||||
// CHECK: %[[RESULT_VEC:.+]] = arith.maxf %[[V3]], %[[RV012]] : vector<2xf32>
|
||||
// CHECK: %[[RESULT_VEC:.+]] = arith.maximumf %[[V3]], %[[RV012]] : vector<2xf32>
|
||||
// CHECK: return %[[RESULT_VEC]] : vector<2xf32>
|
||||
|
||||
func.func @vector_multi_reduction_and(%arg0: vector<2x4xi32>, %acc: vector<2xi32>) -> vector<2xi32> {
|
||||
|
@ -157,7 +157,7 @@ func.func @pool_strides_and_dilation(%arg0 : memref<?x?x?x?xf32>, %arg1 : memref
|
||||
// CHECK-DAG: %[[J:.+]] = affine.apply #[[MAP1]](%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]], %[[IV4]], %[[IV5]])
|
||||
// CHECK-DAG: %[[T8:.+]] = memref.load %[[ARG0]][%[[IV0]], %[[I]], %[[J]], %[[IV3]]]
|
||||
// CHECK-DAG: %[[T9:.+]] = memref.load %[[ARG2]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
|
||||
// CHECK: %[[T10:.+]] = arith.maxf %[[T9]], %[[T8]]
|
||||
// CHECK: %[[T10:.+]] = arith.maximumf %[[T9]], %[[T8]]
|
||||
// CHECK: memref.store %[[T10]], %[[ARG2]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
|
||||
|
||||
// -----
|
||||
|
@ -408,7 +408,7 @@ func.func @reduction_sequence(%arg0: tensor<30x3xf32>) -> tensor<30x3xf32> {
|
||||
iterator_types = ["parallel", "reduction"]}
|
||||
ins(%arg0 : tensor<30x3xf32>) outs(%1 : tensor<30xf32>) {
|
||||
^bb0(%arg1: f32, %arg2: f32):
|
||||
%8 = arith.maxf %arg2, %arg1 : f32
|
||||
%8 = arith.maximumf %arg2, %arg1 : f32
|
||||
linalg.yield %8 : f32
|
||||
} -> tensor<30xf32>
|
||||
%3 = tensor.empty() : tensor<30x3xf32>
|
||||
|
@ -81,7 +81,7 @@ with Context() as ctx, Location.unknown():
|
||||
# CHECK-SAME: indexing_maps = [#[[$POOL_MAP_I]], #[[$POOL_MAP_K]], #[[$POOL_MAP_O]]]
|
||||
# CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"]
|
||||
# CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[SHAPE:.+]]: f32, %[[OUT:.+]]: f32)
|
||||
# CHECK-NEXT: %[[MAX:.+]] = arith.maxf %[[OUT]], %[[IN:.+]] : f32
|
||||
# CHECK-NEXT: %[[MAX:.+]] = arith.maximumf %[[OUT]], %[[IN:.+]] : f32
|
||||
# CHECK-NEXT: linalg.yield %[[MAX]] : f32
|
||||
# CHECK-NEXT: -> tensor<1x2x4x1xf32>
|
||||
@func.FuncOp.from_py_func(
|
||||
@ -132,7 +132,7 @@ with Context() as ctx, Location.unknown():
|
||||
)
|
||||
|
||||
# CHECK-LABEL: @test_f32f32_min_pooling
|
||||
# CHECK: = arith.minf
|
||||
# CHECK: = arith.minimumf
|
||||
@func.FuncOp.from_py_func(
|
||||
RankedTensorType.get((1, 4, 16, 1), f32),
|
||||
RankedTensorType.get((2, 2), f32),
|
||||
|
@ -64,10 +64,10 @@ module.exports = {
|
||||
// operation ::= `arith.divf` $lhs `,` $rhs (`fastmath` ``
|
||||
// $fastmath^)?
|
||||
// attr-dict `:` type($result)
|
||||
// operation ::= `arith.maxf` $lhs `,` $rhs (`fastmath` ``
|
||||
// operation ::= `arith.maximumf` $lhs `,` $rhs (`fastmath` ``
|
||||
// $fastmath^)?
|
||||
// attr-dict `:` type($result)
|
||||
// operation ::= `arith.minf` $lhs `,` $rhs (`fastmath` ``
|
||||
// operation ::= `arith.minimumf` $lhs `,` $rhs (`fastmath` ``
|
||||
// $fastmath^)?
|
||||
// attr-dict `:` type($result)
|
||||
// operation ::= `arith.mulf` $lhs `,` $rhs (`fastmath` ``
|
||||
@ -79,8 +79,8 @@ module.exports = {
|
||||
// operation ::= `arith.subf` $lhs `,` $rhs (`fastmath` ``
|
||||
// $fastmath^)?
|
||||
// attr-dict `:` type($result)
|
||||
seq(choice('arith.addf', 'arith.divf', 'arith.maxf',
|
||||
'arith.minf', 'arith.mulf', 'arith.remf',
|
||||
seq(choice('arith.addf', 'arith.divf', 'arith.maximumf',
|
||||
'arith.minimumf', 'arith.mulf', 'arith.remf',
|
||||
'arith.subf'),
|
||||
field('lhs', $.value_use), ',',
|
||||
field('rhs', $.value_use),
|
||||
|
@ -99,8 +99,8 @@
|
||||
"arith.addui_extended"
|
||||
"arith.addf"
|
||||
"arith.divf"
|
||||
"arith.maxf"
|
||||
"arith.minf"
|
||||
"arith.maximumf"
|
||||
"arith.minimumf"
|
||||
"arith.mulf"
|
||||
"arith.remf"
|
||||
"arith.subf"
|
||||
|
@ -1239,7 +1239,7 @@ inf value_use
|
||||
func.func @test_maxf(%arg0 : f32) -> f32 {
|
||||
%c0 = arith.constant 0.0 : f32
|
||||
%-inf = arith.constant 0xFF800000 : f32
|
||||
%0 = arith.maxf %-inf, %arg0 : f32
|
||||
%0 = arith.maximumf %-inf, %arg0 : f32
|
||||
return %0 : f32
|
||||
}
|
||||
--------------------------------------------------------------------------------
|
||||
|
Loading…
Reference in New Issue
Block a user