[mlir][Linalg] Implement padding for linalg.conv and lowering to loops.

Summary:
To enable this, two changes are needed:
1) Add an optional attribute `padding` to linalg.conv.
2) Compute if the indices accessing is out of bound in the loops. If so, use the
padding value `0`. Otherwise, use the value derived from load.

In the patch, the padding only works for lowering without other transformations,
e.g., tiling, fusion, etc.

Differential Revision: https://reviews.llvm.org/D75722
This commit is contained in:
Hanhan Wang 2020-03-13 14:33:27 -07:00
parent b2bb8b6cd6
commit 92f7e8133a
10 changed files with 181 additions and 12 deletions

View File

@ -62,7 +62,7 @@ SmallVector<AffineExpr, 4> makeAffineDimExprs(unsigned num, unsigned &startIdx,
/// Builds the indexing expressions for a ConvOp `op`. Returns the vector of
/// AffineMaps representing:
/// `stride[i] * xs[i] + dilation[i] * zs[i]`
/// `stride[i] * xs[i] + dilation[i] * zs[i] - pad_low[i]`
SmallVector<AffineExpr, 4> weightedConvInputIndex(ConvOp op,
ArrayRef<AffineExpr> xs,
ArrayRef<AffineExpr> zs);

View File

@ -265,13 +265,18 @@ def ConvOp : LinalgStructured_Op<"conv", [NInputs<2>, NOutputs<1>]> {
```
}];
// TODO(ntv) padding.
// Following the TF source of truth above, strides and dilations are integer
// attributes of the same rank as the number of window dimensions.
// Following the TF source of truth above, strides, dilations and padding are
// integer attributes of the same rank as the number of window dimensions.
// The padding attribute specifies the amount of zero padding to be applied to
// the base area, which is a n-d array of (low, high) padding. Each pair has
// the low padding as the first element and the high padding as the second
// element. Using padding is equivalent to inserting those same zero values
// into the input before doing the convolution.
let arguments = (ins AnyStridedMemRef:$filter, AnyStridedMemRef:$input,
AnyStridedMemRef:$output,
OptionalAttr<I64ArrayAttr>:$strides,
OptionalAttr<I64ArrayAttr>:$dilations);
OptionalAttr<I64ArrayAttr>:$dilations,
OptionalAttr<I64ElementsAttr>:$padding);
let extraClassDeclaration = libraryCallName # [{
// TODO(ntv) extend to support more than 1 dimensions and potentially
@ -314,9 +319,17 @@ def ConvOp : LinalgStructured_Op<"conv", [NInputs<2>, NOutputs<1>]> {
.cast<IntegerAttr>().getValue().getSExtValue();
}
// F(z0, ..., zN-1, q, k) * I(b, x0 + z0, ..., xN-1 + zN-1, q) ->
// O(b, x0, ..., xN-1, k)
// for N equal to `nWindow`.
int64_t getLowPad(unsigned i) {
assert(i < getNumWindowLoops());
if (!padding().hasValue()) return 0;
return padding().getValue().getValue<int64_t>({i, 0});
}
// F(z0, ..., zN-1, q, k) *
// I(b, x0 + z0 - pad_low_0, ..., xN-1 + zN-1 - pad_low_N-1, q)
// -> O(b, x0, ..., xN-1, k)
// for N equal to `nWindow`. If there is no padding attirbute, it will be
// ignored.
llvm::Optional<SmallVector<AffineMap, 8>> referenceIndexingMaps() {
MLIRContext *context = getContext();
auto nWin = getNumWindowLoops();
@ -346,7 +359,9 @@ def ConvOp : LinalgStructured_Op<"conv", [NInputs<2>, NOutputs<1>]> {
// filter[z[0], ..., z[N-1], q, k]
AffineMap::get(idx, 0, concat(concat(zs, qs), ks)),
// input[b,
// x[0]*s[0] + d[0]*z[0], ..., x[N-1]*s[N-1] + d[N-1]*z[N-1],
// x[0]*s[0] + d[0]*z[0] - pad_low[0],
// ...
// x[N-1]*s[N-1] + d[N-1]*z[N-1] - pad_low[N-1],
// q]
AffineMap::get(idx, 0, concat(concat(bs, ws), qs)),
// output[b, x[0], ..., x[N-1], k]

View File

@ -900,8 +900,12 @@ mlir::linalg::weightedConvInputIndex(ConvOp op, ArrayRef<AffineExpr> xs,
assert(xs.size() == zs.size());
SmallVector<AffineExpr, 4> res;
res.reserve(xs.size());
for (unsigned i = 0, e = xs.size(); i < e; ++i)
res.push_back(op.getStride(i) * xs[i] + op.getDilation(i) * zs[i]);
for (unsigned i = 0, e = xs.size(); i < e; ++i) {
// TODO(ntv): add a level of indirection to linalg.generic.
auto expr =
op.getStride(i) * xs[i] + op.getDilation(i) * zs[i] - op.getLowPad(i);
res.push_back(expr);
}
return res;
}

View File

@ -152,6 +152,18 @@ static LinalgOp fuse(Value producedView, LinalgOp producer, LinalgOp consumer,
"expected linalg op with buffer semantics");
assert(consumer.hasBufferSemantics() &&
"expected linalg op with buffer semantics");
if (auto convOp = dyn_cast<linalg::ConvOp>(producer.getOperation())) {
// TODO(ntv): add a level of indirection to linalg.generic.
if (convOp.padding())
llvm_unreachable("Unexpected conv with padding");
}
if (auto convOp = dyn_cast<linalg::ConvOp>(consumer.getOperation())) {
// TODO(ntv): add a level of indirection to linalg.generic.
if (convOp.padding())
llvm_unreachable("Unexpected conv with padding");
}
auto subView = dyn_cast_or_null<SubViewOp>(
consumer.getInput(consumerIdx).getDefiningOp());
auto slice =

View File

@ -177,6 +177,51 @@ public:
template <typename IndexedValueType>
class LinalgScopedEmitter<IndexedValueType, ConvOp> {
public:
/// Returns the input value of convOp. If the indices in `imIdx` is out of
/// boundrary, returns 0 instead.
static ValueHandle getConvOpInput(ConvOp convOp, IndexedValueType im,
ArrayRef<ValueHandle> imIdx) {
// TODO(ntv): add a level of indirection to linalg.generic.
if (!convOp.padding())
return im(imIdx);
ValueHandle zeroIndex = std_constant_index(0);
SmallVector<ValueHandle, 8> conds = {
std_constant_int(/*value=*/1, /*width=*/1)};
SmallVector<ValueHandle, 8> clampedImIdx;
for (auto iter : llvm::enumerate(imIdx)) {
int idx = iter.index();
auto dim = iter.value();
// Only need to iterate over the window dimensions.
if (idx == 0 || idx == static_cast<int>(imIdx.size()) - 1) {
clampedImIdx.push_back(dim);
continue;
}
using edsc::op::operator<;
using edsc::op::operator>=;
using edsc::op::operator||;
conds.push_back(conds.back() || (dim < zeroIndex));
ValueHandle bound = std_dim(convOp.input(), idx);
conds.push_back(conds.back() || (dim >= bound));
// When padding is involed, the indices will only be shifted to negative,
// so having a max op is enough.
auto *context = ScopedContext::getContext();
auto maxMap = AffineMap::get(/*dimCount=*/1, 0,
{getAffineDimExpr(/*position=*/0, context),
getAffineConstantExpr(0, context)});
clampedImIdx.push_back(
affine_max(dim.getType(), maxMap, ValueRange{dim}));
}
auto b = ScopedContext::getBuilder();
Type type = convOp.input().getType().cast<MemRefType>().getElementType();
ValueHandle zero = std_constant(type, b.getZeroAttr(type));
ValueHandle readInput = im(clampedImIdx);
return std_select(conds.back(), zero, readInput);
}
static void emitScalarImplementation(ArrayRef<Value> allIvs, ConvOp convOp) {
assert(convOp.hasBufferSemantics() &&
"expected linalg op with buffer semantics");
@ -192,8 +237,10 @@ public:
SmallVector<ValueHandle, 8> oIdx(
makeCanonicalAffineApplies(b, loc, maps[2], allIvs));
IndexedValueType F(convOp.filter()), I(convOp.input()), O(convOp.output());
// Emit scalar form.
O(oIdx) += F(fIdx) * I(imIdx);
ValueHandle paddedInput = getConvOpInput(convOp, I, imIdx);
O(oIdx) += F(fIdx) * paddedInput;
}
};

View File

@ -193,6 +193,12 @@ SmallVector<Value, 0> mlir::linalg::vectorizeLinalgOp(PatternRewriter &rewriter,
auto linalgOp = cast<linalg::LinalgOp>(op);
assert(linalgOp.hasBufferSemantics() &&
"expected linalg op with buffer semantics");
if (auto convOp = dyn_cast<linalg::ConvOp>(op)) {
// TODO(ntv): add a level of indirection to linalg.generic.
if (convOp.padding())
llvm_unreachable("Unexpected conv with padding");
}
edsc::ScopedContext scope(rewriter, op->getLoc());
if (auto fillOp = dyn_cast<linalg::FillOp>(op)) {
@ -295,6 +301,12 @@ mlir::linalg::promoteSubviewsLinalgOp(PatternRewriter &rewriter,
assert(succeeded(promoteSubviewsLinalgOpPrecondition(op)) &&
"DRR failure case must be a precondition");
if (auto convOp = dyn_cast<linalg::ConvOp>(op)) {
// TODO(ntv): add a level of indirection to linalg.generic.
if (convOp.padding())
llvm_unreachable("Unexpected conv with padding");
}
LinalgOp linOp = cast<LinalgOp>(op);
assert(linOp.hasBufferSemantics() &&
"expected linalg op with buffer semantics");

View File

@ -160,6 +160,12 @@ LinalgOp mlir::linalg::promoteSubViewOperands(OpBuilder &b, LinalgOp op,
OperationFolder *folder) {
assert(op.hasBufferSemantics() && "expected linalg op with buffer semantics");
if (auto convOp = dyn_cast<linalg::ConvOp>(op.getOperation())) {
// TODO(ntv): add a level of indirection to linalg.generic.
if (convOp.padding())
llvm_unreachable("Unexpected conv with padding");
}
// 1. Promote the specified views and use them in the new op.
ScopedContext scope(b, op.getLoc());
auto promotedBufferAndViews = promoteSubViews(

View File

@ -342,6 +342,12 @@ Optional<TiledLinalgOp> static tileLinalgOpImpl(OpBuilder &b, LinalgOp op,
tileSizes.size() &&
"expected matching number of tile sizes and loops");
if (auto convOp = dyn_cast<linalg::ConvOp>(op.getOperation())) {
// TODO(ntv): add a level of indirection to linalg.generic.
if (convOp.padding())
llvm_unreachable("Unexpected conv with padding");
}
// If permutation is empty, use the identity. Build the permutation map
// otherwise.
auto invPermutationMap = AffineMap::getMultiDimIdentityMap(
@ -421,6 +427,12 @@ tileLinalgOpImpl(OpBuilder &b, LinalgOp op, ArrayRef<int64_t> tileSizes,
if (tileSizes.empty())
return llvm::None;
if (auto convOp = dyn_cast<linalg::ConvOp>(op.getOperation())) {
// TODO(ntv): add a level of indirection to linalg.generic.
if (convOp.padding())
llvm_unreachable("Unexpected conv with padding");
}
// The following uses the convention that "tiling by zero" skips tiling a
// particular dimension. This convention is significantly simpler to handle
// instead of adjusting affine maps to account for missing dimensions.

View File

@ -7,6 +7,7 @@
// CHECK-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
// CHECK-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)>
// CHECK-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
// CHECK-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)>
// CHECK-DAG: #[[Stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)>
// CHECK-DAG: #[[Stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)>
@ -212,6 +213,44 @@ func @conv_view4(%arg0: memref<?x?x?x?xf32, offset: ?, strides: [?, ?, ?, 1]>, %
// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32, #[[strided4D]]>
func @conv_padding(%arg0: memref<?x?x?x?xf32>,
%arg1: memref<?x?x?x?xf32>,
%arg2: memref<?x?x?x?xf32>) {
linalg.conv(%arg0, %arg1, %arg2) {dilations = [1, 1],
padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>,
strides = [1, 1]} :
memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>
return
}
// CHECK-LABEL: func @conv_padding
// CHECK: %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>, %{{.*}}: memref<?x?x?x?xf32>) {
// CHECK: %[[ZERO:.*]] = constant 0.000000e+00 : f32
// CHECK: %[[Z0:.*]] = dim %arg0, 0 : memref<?x?x?x?xf32>
// CHECK: %[[Z1:.*]] = dim %arg0, 1 : memref<?x?x?x?xf32>
// CHECK: %[[Q:.*]] = dim %arg0, 2 : memref<?x?x?x?xf32>
// CHECK: %[[K:.*]] = dim %arg0, 3 : memref<?x?x?x?xf32>
// CHECK: %[[B:.*]] = dim %arg1, 0 : memref<?x?x?x?xf32>
// CHECK: %[[X0:.*]] = dim %arg2, 1 : memref<?x?x?x?xf32>
// CHECK: %[[X1:.*]] = dim %arg2, 2 : memref<?x?x?x?xf32>
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} {
// CHECK: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} {
// CHECK: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
// CHECK: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}})
// CHECK: %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]])
// CHECK: %[[IDY:.*]] = affine.max #[[clampMinMap]](%[[SUM1]])
// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref<?x?x?x?xf32>
// CHECK: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32
// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
// CHECK: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32
// CHECK: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
// CHECK: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
// CHECK: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref<?x?x?x?xf32>
func @foo(%0: f32, %1: f32, %2: f32) -> (f32, f32) {
%f0 = constant 0.0 : f32
return %f0, %f0 : f32, f32

View File

@ -222,6 +222,28 @@ func @conv_view6(%arg0: memref<?x?x?x?x?x?xf32, offset: ?, strides: [?, ?, ?, ?,
// -----
func @conv_padding(%arg0: memref<?x?x?x?xf32>,
%arg1: memref<?x?x?x?xf32>,
%arg2: memref<?x?x?x?xf32>) {
linalg.conv(%arg0, %arg1, %arg2) {dilations = [1, 1],
padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>,
strides = [1, 1]} :
memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>
return
}
// CHECK-LABEL: func @conv_padding(
// CHECK: linalg.conv(%{{.*}}, %{{.*}}, %{{.*}}) {
// CHECK-SAME: dilations = [1, 1],
// CHECK-SAME: padding = dense<[
// CHECK-SAME: [0, 1], [1, 1]]> : tensor<2x2xi64>,
// CHECK-SAME: strides = [1, 1]} :
// CHECK-SAME: memref<?x?x?x?xf32>,
// CHECK-SAME: memref<?x?x?x?xf32>,
// CHECK-SAME: memref<?x?x?x?xf32>
// -----
// CHECK-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
// CHECK-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)>