mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-30 17:21:10 +00:00
beb5ac8b25
The single source file reduction.cpp is a little large in terms of both source lines and generated text bytes, so split SUM, PRODUCT, FINDLOC, and MAXLOC/MAXVAL/MINLOC/MINVAL off into their own C++ source files that share a set of implementation function templates now in a common header. Differential Revision: https://reviews.llvm.org/D101111
324 lines
12 KiB
C++
324 lines
12 KiB
C++
//===-- runtime/reduction-templates.h -------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Generic function templates used by various reduction transformation
|
|
// intrinsic functions (SUM, PRODUCT, &c.)
|
|
//
|
|
// * Partial reductions (i.e., those with DIM= arguments that are not
|
|
// required to be 1 by the rank of the argument) return arrays that
|
|
// are dynamically allocated in a caller-supplied descriptor.
|
|
// * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
|
|
// return integer vectors of some kind, not scalars; a caller-supplied
|
|
// descriptor is used
|
|
// * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
|
|
// length results, dynamically allocated in a caller-supplied descriptor
|
|
|
|
#ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
|
|
#define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
|
|
|
|
#include "cpp-type.h"
|
|
#include "descriptor.h"
|
|
#include "terminator.h"
|
|
#include "tools.h"
|
|
|
|
namespace Fortran::runtime {
|
|
|
|
// Reductions are implemented with *accumulators*, which are instances of
|
|
// classes that incrementally build up the result (or an element thereof) during
|
|
// a traversal of the unmasked elements of an array. Each accumulator class
|
|
// supports a constructor (which captures a reference to the array), an
|
|
// AccumulateAt() member function that applies supplied subscripts to the
|
|
// array and does something with a scalar element, and a GetResult()
|
|
// member function that copies a final result into its destination.
|
|
|
|
// Total reduction of the array argument to a scalar (or to a vector in the
|
|
// cases of FINDLOC, MAXLOC, & MINLOC). These are the cases without DIM= or
|
|
// cases where the argument has rank 1 and DIM=, if present, must be 1.
|
|
template <typename TYPE, typename ACCUMULATOR>
|
|
inline void DoTotalReduction(const Descriptor &x, int dim,
|
|
const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
|
|
Terminator &terminator) {
|
|
if (dim < 0 || dim > 1) {
|
|
terminator.Crash(
|
|
"%s: bad DIM=%d for argument with rank %d", intrinsic, dim, x.rank());
|
|
}
|
|
SubscriptValue xAt[maxRank];
|
|
x.GetLowerBounds(xAt);
|
|
if (mask) {
|
|
CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
|
|
SubscriptValue maskAt[maxRank];
|
|
mask->GetLowerBounds(maskAt);
|
|
if (mask->rank() > 0) {
|
|
for (auto elements{x.Elements()}; elements--;
|
|
x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
|
|
if (IsLogicalElementTrue(*mask, maskAt)) {
|
|
accumulator.template AccumulateAt<TYPE>(xAt);
|
|
}
|
|
}
|
|
return;
|
|
} else if (!IsLogicalElementTrue(*mask, maskAt)) {
|
|
// scalar MASK=.FALSE.: return identity value
|
|
return;
|
|
}
|
|
}
|
|
// No MASK=, or scalar MASK=.TRUE.
|
|
for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
|
|
if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
|
|
break; // cut short, result is known
|
|
}
|
|
}
|
|
}
|
|
|
|
template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
|
|
inline CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
|
|
const char *source, int line, int dim, const Descriptor *mask,
|
|
ACCUMULATOR &&accumulator, const char *intrinsic) {
|
|
Terminator terminator{source, line};
|
|
RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
|
|
using CppType = CppTypeFor<CAT, KIND>;
|
|
DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
|
|
CppType result;
|
|
#ifdef _MSC_VER // work around MSVC spurious error
|
|
accumulator.GetResult(&result);
|
|
#else
|
|
accumulator.template GetResult(&result);
|
|
#endif
|
|
return result;
|
|
}
|
|
|
|
// For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
|
|
// of the array is [2,3,5], the shape of the result is [2,5] and
|
|
// result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
|
|
// lower bounds other than one. This utility subroutine creates an
|
|
// array of subscripts [j,_,k] for result subscripts [j,k] so that the
|
|
// elemets of array(j,:,k) can be reduced.
|
|
inline void GetExpandedSubscripts(SubscriptValue at[],
|
|
const Descriptor &descriptor, int zeroBasedDim,
|
|
const SubscriptValue from[]) {
|
|
descriptor.GetLowerBounds(at);
|
|
int rank{descriptor.rank()};
|
|
int j{0};
|
|
for (; j < zeroBasedDim; ++j) {
|
|
at[j] += from[j] - 1 /*lower bound*/;
|
|
}
|
|
for (++j; j < rank; ++j) {
|
|
at[j] += from[j - 1] - 1;
|
|
}
|
|
}
|
|
|
|
template <typename TYPE, typename ACCUMULATOR>
|
|
inline void ReduceDimToScalar(const Descriptor &x, int zeroBasedDim,
|
|
SubscriptValue subscripts[], TYPE *result, ACCUMULATOR &accumulator) {
|
|
SubscriptValue xAt[maxRank];
|
|
GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
|
|
const auto &dim{x.GetDimension(zeroBasedDim)};
|
|
SubscriptValue at{dim.LowerBound()};
|
|
for (auto n{dim.Extent()}; n-- > 0; ++at) {
|
|
xAt[zeroBasedDim] = at;
|
|
if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
|
|
break;
|
|
}
|
|
}
|
|
#ifdef _MSC_VER // work around MSVC spurious error
|
|
accumulator.GetResult(result, zeroBasedDim);
|
|
#else
|
|
accumulator.template GetResult(result, zeroBasedDim);
|
|
#endif
|
|
}
|
|
|
|
template <typename TYPE, typename ACCUMULATOR>
|
|
inline void ReduceDimMaskToScalar(const Descriptor &x, int zeroBasedDim,
|
|
SubscriptValue subscripts[], const Descriptor &mask, TYPE *result,
|
|
ACCUMULATOR &accumulator) {
|
|
SubscriptValue xAt[maxRank], maskAt[maxRank];
|
|
GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
|
|
GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
|
|
const auto &xDim{x.GetDimension(zeroBasedDim)};
|
|
SubscriptValue xPos{xDim.LowerBound()};
|
|
const auto &maskDim{mask.GetDimension(zeroBasedDim)};
|
|
SubscriptValue maskPos{maskDim.LowerBound()};
|
|
for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
|
|
++xPos, ++maskPos) {
|
|
maskAt[zeroBasedDim] = maskPos;
|
|
if (IsLogicalElementTrue(mask, maskAt)) {
|
|
xAt[zeroBasedDim] = xPos;
|
|
if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#ifdef _MSC_VER // work around MSVC spurious error
|
|
accumulator.GetResult(result, zeroBasedDim);
|
|
#else
|
|
accumulator.template GetResult(result, zeroBasedDim);
|
|
#endif
|
|
}
|
|
|
|
// Utility: establishes & allocates the result array for a partial
|
|
// reduction (i.e., one with DIM=).
|
|
static void CreatePartialReductionResult(Descriptor &result,
|
|
const Descriptor &x, int dim, Terminator &terminator, const char *intrinsic,
|
|
TypeCode typeCode) {
|
|
int xRank{x.rank()};
|
|
if (dim < 1 || dim > xRank) {
|
|
terminator.Crash("%s: bad DIM=%d for rank %d", intrinsic, dim, xRank);
|
|
}
|
|
int zeroBasedDim{dim - 1};
|
|
SubscriptValue resultExtent[maxRank];
|
|
for (int j{0}; j < zeroBasedDim; ++j) {
|
|
resultExtent[j] = x.GetDimension(j).Extent();
|
|
}
|
|
for (int j{zeroBasedDim + 1}; j < xRank; ++j) {
|
|
resultExtent[j - 1] = x.GetDimension(j).Extent();
|
|
}
|
|
result.Establish(typeCode, x.ElementBytes(), nullptr, xRank - 1, resultExtent,
|
|
CFI_attribute_allocatable);
|
|
for (int j{0}; j + 1 < xRank; ++j) {
|
|
result.GetDimension(j).SetBounds(1, resultExtent[j]);
|
|
}
|
|
if (int stat{result.Allocate()}) {
|
|
terminator.Crash(
|
|
"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
|
|
}
|
|
}
|
|
|
|
// Partial reductions with DIM=
|
|
|
|
template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
|
|
inline void PartialReduction(Descriptor &result, const Descriptor &x, int dim,
|
|
const Descriptor *mask, Terminator &terminator, const char *intrinsic,
|
|
ACCUMULATOR &accumulator) {
|
|
CreatePartialReductionResult(
|
|
result, x, dim, terminator, intrinsic, TypeCode{CAT, KIND});
|
|
SubscriptValue at[maxRank];
|
|
result.GetLowerBounds(at);
|
|
INTERNAL_CHECK(at[0] == 1);
|
|
using CppType = CppTypeFor<CAT, KIND>;
|
|
if (mask) {
|
|
CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
|
|
SubscriptValue maskAt[maxRank]; // contents unused
|
|
if (mask->rank() > 0) {
|
|
for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
|
|
accumulator.Reinitialize();
|
|
ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
|
|
x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator);
|
|
}
|
|
return;
|
|
} else if (!IsLogicalElementTrue(*mask, maskAt)) {
|
|
// scalar MASK=.FALSE.
|
|
accumulator.Reinitialize();
|
|
for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
|
|
accumulator.GetResult(result.Element<CppType>(at));
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
// No MASK= or scalar MASK=.TRUE.
|
|
for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
|
|
accumulator.Reinitialize();
|
|
ReduceDimToScalar<CppType, ACCUMULATOR>(
|
|
x, dim - 1, at, result.Element<CppType>(at), accumulator);
|
|
}
|
|
}
|
|
|
|
template <template <typename> class ACCUM>
|
|
struct PartialIntegerReductionHelper {
|
|
template <int KIND> struct Functor {
|
|
static constexpr int Intermediate{
|
|
std::max(KIND, 4)}; // use at least "int" for intermediate results
|
|
void operator()(Descriptor &result, const Descriptor &x, int dim,
|
|
const Descriptor *mask, Terminator &terminator,
|
|
const char *intrinsic) const {
|
|
using Accumulator =
|
|
ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
|
|
Accumulator accumulator{x};
|
|
PartialReduction<Accumulator, TypeCategory::Integer, KIND>(
|
|
result, x, dim, mask, terminator, intrinsic, accumulator);
|
|
}
|
|
};
|
|
};
|
|
|
|
template <template <typename> class INTEGER_ACCUM>
|
|
inline void PartialIntegerReduction(Descriptor &result, const Descriptor &x,
|
|
int dim, int kind, const Descriptor *mask, const char *intrinsic,
|
|
Terminator &terminator) {
|
|
ApplyIntegerKind<
|
|
PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
|
|
kind, terminator, result, x, dim, mask, terminator, intrinsic);
|
|
}
|
|
|
|
template <TypeCategory CAT, template <typename> class ACCUM>
|
|
struct PartialFloatingReductionHelper {
|
|
template <int KIND> struct Functor {
|
|
static constexpr int Intermediate{
|
|
std::max(KIND, 8)}; // use at least "double" for intermediate results
|
|
void operator()(Descriptor &result, const Descriptor &x, int dim,
|
|
const Descriptor *mask, Terminator &terminator,
|
|
const char *intrinsic) const {
|
|
using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
|
|
Accumulator accumulator{x};
|
|
PartialReduction<Accumulator, CAT, KIND>(
|
|
result, x, dim, mask, terminator, intrinsic, accumulator);
|
|
}
|
|
};
|
|
};
|
|
|
|
template <template <typename> class INTEGER_ACCUM,
|
|
template <typename> class REAL_ACCUM,
|
|
template <typename> class COMPLEX_ACCUM>
|
|
inline void TypedPartialNumericReduction(Descriptor &result,
|
|
const Descriptor &x, int dim, const char *source, int line,
|
|
const Descriptor *mask, const char *intrinsic) {
|
|
Terminator terminator{source, line};
|
|
auto catKind{x.type().GetCategoryAndKind()};
|
|
RUNTIME_CHECK(terminator, catKind.has_value());
|
|
switch (catKind->first) {
|
|
case TypeCategory::Integer:
|
|
PartialIntegerReduction<INTEGER_ACCUM>(
|
|
result, x, dim, catKind->second, mask, intrinsic, terminator);
|
|
break;
|
|
case TypeCategory::Real:
|
|
ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
|
|
REAL_ACCUM>::template Functor,
|
|
void>(catKind->second, terminator, result, x, dim, mask, terminator,
|
|
intrinsic);
|
|
break;
|
|
case TypeCategory::Complex:
|
|
ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
|
|
COMPLEX_ACCUM>::template Functor,
|
|
void>(catKind->second, terminator, result, x, dim, mask, terminator,
|
|
intrinsic);
|
|
break;
|
|
default:
|
|
terminator.Crash("%s: invalid type code %d", intrinsic, x.type().raw());
|
|
}
|
|
}
|
|
|
|
template <typename ACCUMULATOR> struct LocationResultHelper {
|
|
template <int KIND> struct Functor {
|
|
void operator()(ACCUMULATOR &accumulator, const Descriptor &result) const {
|
|
accumulator.GetResult(
|
|
result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
|
|
}
|
|
};
|
|
};
|
|
|
|
template <typename ACCUMULATOR> struct PartialLocationHelper {
|
|
template <int KIND> struct Functor {
|
|
void operator()(Descriptor &result, const Descriptor &x, int dim,
|
|
const Descriptor *mask, Terminator &terminator, const char *intrinsic,
|
|
ACCUMULATOR &accumulator) const {
|
|
PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(
|
|
result, x, dim, mask, terminator, intrinsic, accumulator);
|
|
}
|
|
};
|
|
};
|
|
|
|
} // namespace Fortran::runtime
|
|
#endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
|