OpcodeDispatcher: Cache named vector constants in the block

If the named constant of that size gets used multiple times then just
use the previous value if it was in scope.

Makes addsubp{s,d} and phminposuw more optimal for each that are in a
block.

Needs #2993 merged first.
This commit is contained in:
Ryan Houdek 2023-08-23 20:58:34 -07:00
parent ab83ab42dd
commit 565b30e15e
2 changed files with 28 additions and 2 deletions

View File

@ -12,6 +12,7 @@
#include <FEXCore/IR/IREmitter.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/vector.h>
@ -90,6 +91,9 @@ public:
// New block needs to reset segment telemetry.
SegmentsNeedReadCheck = ~0U;
// Need to clear any named constants that were cached.
ClearCachedNamedConstants();
}
bool FinishOp(uint64_t NextRIP, bool LastOp) {
@ -1196,6 +1200,28 @@ private:
}
}
// Named constant cache for the current block.
// Different arrays for sizes 1,2,4,8,16,32.
OrderedNode *CachedNamedVectorConstants[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MAX][6]{};
// Load and cache a named vector constant.
OrderedNode *LoadAndCacheNamedVectorConstant(uint8_t Size, FEXCore::IR::NamedVectorConstant NamedConstant) {
auto log2_size_bytes = FEXCore::ilog2(Size);
if (CachedNamedVectorConstants[NamedConstant][log2_size_bytes]) {
return CachedNamedVectorConstants[NamedConstant][log2_size_bytes];
}
auto Constant = _LoadNamedVectorConstant(Size, NamedConstant);
CachedNamedVectorConstants[NamedConstant][log2_size_bytes] = Constant;
return Constant;
}
// Reset the named vector constants cache array.
// These are only cached per block.
void ClearCachedNamedConstants() {
memset(CachedNamedVectorConstants, 0, sizeof(CachedNamedVectorConstants));
}
OrderedNode *SelectCC(uint8_t OP, OrderedNode *TrueValue, OrderedNode *FalseValue);
/**

View File

@ -2962,7 +2962,7 @@ OrderedNode* OpDispatchBuilder::ADDSUBPOpImpl(OpcodeArgs, size_t ElementSize,
OrderedNode *Src1, OrderedNode *Src2) {
const auto Size = GetSrcSize(Op);
auto ConstantEOR = _LoadNamedVectorConstant(Size, ElementSize == 4 ? NAMED_VECTOR_PADDSUBPS_INVERT : NAMED_VECTOR_PADDSUBPD_INVERT);
auto ConstantEOR = LoadAndCacheNamedVectorConstant(Size, ElementSize == 4 ? NAMED_VECTOR_PADDSUBPS_INVERT : NAMED_VECTOR_PADDSUBPD_INVERT);
auto InvertedSource = _VXor(Size, ElementSize, Src2, ConstantEOR);
return _VFAdd(Size, ElementSize, Src1, InvertedSource);
}
@ -3858,7 +3858,7 @@ OrderedNode* OpDispatchBuilder::PHMINPOSUWOpImpl(OpcodeArgs) {
// Setup a vector swizzle
// Initially load a 64-bit mask of immediates
// Then zero-extend that to 128-bit mask with the immediates in the lower 16-bits of each element
auto ConstantSwizzle = _LoadNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX);
auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX);
// We now need to zip the vector sources together to become two uint32x4_t vectors
// Upper: