mirror of
https://github.com/RPCS3/asmjit.git
synced 2025-02-17 01:58:05 +00:00
[Opt] Added a feature to the Compiler to remove dead moves (moves to itself) when it's provable that it's safe
This commit is contained in:
parent
752eb38a4d
commit
8fdee13aea
@ -139,7 +139,7 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
|
||||
if (ASMJIT_UNLIKELY(!Inst::isDefinedId(realId)))
|
||||
return DebugUtils::errored(kErrorInvalidInstruction);
|
||||
|
||||
out->_instFlags = 0;
|
||||
out->_instFlags = InstRWFlags::kNone;
|
||||
out->_opCount = uint8_t(opCount);
|
||||
out->_rmFeature = 0;
|
||||
out->_extraReg.reset();
|
||||
|
@ -136,6 +136,8 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& controlType, RAInstB
|
||||
const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
|
||||
uint32_t singleRegOps = 0;
|
||||
|
||||
ib.addInstRWFlags(rwInfo.instFlags());
|
||||
|
||||
if (opCount) {
|
||||
uint32_t consecutiveOffset = 0xFFFFFFFFu;
|
||||
uint32_t consecutiveParent = Globals::kInvalidId;
|
||||
|
@ -618,13 +618,25 @@ struct OpRWInfo {
|
||||
//! \}
|
||||
};
|
||||
|
||||
//! Flags used by \ref InstRWInfo.
|
||||
enum class InstRWFlags : uint32_t {
|
||||
//! No flags.
|
||||
kNone = 0x00000000u,
|
||||
|
||||
//! Describes a move operation.
|
||||
//!
|
||||
//! This flag is used by RA to eliminate moves that are guaranteed to be moves only.
|
||||
kMovOp = 0x00000001u
|
||||
};
|
||||
ASMJIT_DEFINE_ENUM_FLAGS(InstRWFlags)
|
||||
|
||||
//! Read/Write information of an instruction.
|
||||
struct InstRWInfo {
|
||||
//! \name Members
|
||||
//! \{
|
||||
|
||||
//! Instruction flags (there are no flags at the moment, this field is reserved).
|
||||
uint32_t _instFlags;
|
||||
InstRWFlags _instFlags;
|
||||
//! CPU flags read.
|
||||
CpuRWFlags _readFlags;
|
||||
//! CPU flags written.
|
||||
@ -650,6 +662,20 @@ struct InstRWInfo {
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name Instruction Flags
|
||||
//! \{
|
||||
|
||||
//! Returns flags associated with the instruction, see \ref InstRWFlags.
|
||||
inline InstRWFlags instFlags() const noexcept { return _instFlags; }
|
||||
|
||||
//! Tests whether the instruction flags contain `flag`.
|
||||
inline bool hasInstFlag(InstRWFlags flag) const noexcept { return Support::test(_instFlags, flag); }
|
||||
|
||||
//! Tests whether the instruction flags contain \ref InstRWFlags::kMovOp.
|
||||
inline bool isMovOp() const noexcept { return hasInstFlag(InstRWFlags::kMovOp); }
|
||||
|
||||
//! \}
|
||||
|
||||
//! \name CPU Flags Information
|
||||
//! \{
|
||||
|
||||
|
@ -276,6 +276,8 @@ public:
|
||||
|
||||
//! Parent block.
|
||||
RABlock* _block;
|
||||
//! Instruction RW flags.
|
||||
InstRWFlags _instRWFlags;
|
||||
//! Aggregated RATiedFlags from all operands & instruction specific flags.
|
||||
RATiedFlags _flags;
|
||||
//! Total count of RATiedReg's.
|
||||
@ -298,9 +300,10 @@ public:
|
||||
//! \name Construction & Destruction
|
||||
//! \{
|
||||
|
||||
inline RAInst(RABlock* block, RATiedFlags flags, uint32_t tiedTotal, const RARegMask& clobberedRegs) noexcept {
|
||||
inline RAInst(RABlock* block, InstRWFlags instRWFlags, RATiedFlags tiedFlags, uint32_t tiedTotal, const RARegMask& clobberedRegs) noexcept {
|
||||
_block = block;
|
||||
_flags = flags;
|
||||
_instRWFlags = instRWFlags;
|
||||
_flags = tiedFlags;
|
||||
_tiedTotal = tiedTotal;
|
||||
_tiedIndex.reset();
|
||||
_tiedCount.reset();
|
||||
@ -314,6 +317,13 @@ public:
|
||||
//! \name Accessors
|
||||
//! \{
|
||||
|
||||
//! Returns instruction RW flags.
|
||||
inline InstRWFlags instRWFlags() const noexcept { return _instRWFlags; };
|
||||
//! Tests whether the given `flag` is present in instruction RW flags.
|
||||
inline bool hasInstRWFlag(InstRWFlags flag) const noexcept { return Support::test(_instRWFlags, flag); }
|
||||
//! Adds `flags` to instruction RW flags.
|
||||
inline void addInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags |= flags; }
|
||||
|
||||
//! Returns the instruction flags.
|
||||
inline RATiedFlags flags() const noexcept { return _flags; }
|
||||
//! Tests whether the instruction has flag `flag`.
|
||||
@ -376,6 +386,9 @@ public:
|
||||
//! \name Members
|
||||
//! \{
|
||||
|
||||
//! Instruction RW flags.
|
||||
InstRWFlags _instRWFlags;
|
||||
|
||||
//! Flags combined from all RATiedReg's.
|
||||
RATiedFlags _aggregatedFlags;
|
||||
//! Flags that will be cleared before storing the aggregated flags to `RAInst`.
|
||||
@ -400,6 +413,7 @@ public:
|
||||
|
||||
inline void init() noexcept { reset(); }
|
||||
inline void reset() noexcept {
|
||||
_instRWFlags = InstRWFlags::kNone;
|
||||
_aggregatedFlags = RATiedFlags::kNone;
|
||||
_forbiddenFlags = RATiedFlags::kNone;
|
||||
_count.reset();
|
||||
@ -414,10 +428,15 @@ public:
|
||||
//! \name Accessors
|
||||
//! \{
|
||||
|
||||
inline RATiedFlags aggregatedFlags() const noexcept { return _aggregatedFlags; }
|
||||
inline RATiedFlags forbiddenFlags() const noexcept { return _forbiddenFlags; }
|
||||
inline InstRWFlags instRWFlags() const noexcept { return _instRWFlags; }
|
||||
inline bool hasInstRWFlag(InstRWFlags flag) const noexcept { return Support::test(_instRWFlags, flag); }
|
||||
inline void addInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags |= flags; }
|
||||
inline void clearInstRWFlags(InstRWFlags flags) noexcept { _instRWFlags &= ~flags; }
|
||||
|
||||
inline RATiedFlags aggregatedFlags() const noexcept { return _aggregatedFlags; }
|
||||
inline void addAggregatedFlags(RATiedFlags flags) noexcept { _aggregatedFlags |= flags; }
|
||||
|
||||
inline RATiedFlags forbiddenFlags() const noexcept { return _forbiddenFlags; }
|
||||
inline void addForbiddenFlags(RATiedFlags flags) noexcept { _forbiddenFlags |= flags; }
|
||||
|
||||
//! Returns the number of tied registers added to the builder.
|
||||
@ -859,16 +878,16 @@ public:
|
||||
return _exits.append(allocator(), block);
|
||||
}
|
||||
|
||||
ASMJIT_FORCE_INLINE RAInst* newRAInst(RABlock* block, RATiedFlags flags, uint32_t tiedRegCount, const RARegMask& clobberedRegs) noexcept {
|
||||
ASMJIT_FORCE_INLINE RAInst* newRAInst(RABlock* block, InstRWFlags instRWFlags, RATiedFlags flags, uint32_t tiedRegCount, const RARegMask& clobberedRegs) noexcept {
|
||||
void* p = zone()->alloc(RAInst::sizeOf(tiedRegCount));
|
||||
if (ASMJIT_UNLIKELY(!p))
|
||||
return nullptr;
|
||||
return new(p) RAInst(block, flags, tiedRegCount, clobberedRegs);
|
||||
return new(p) RAInst(block, instRWFlags, flags, tiedRegCount, clobberedRegs);
|
||||
}
|
||||
|
||||
ASMJIT_FORCE_INLINE Error assignRAInst(BaseNode* node, RABlock* block, RAInstBuilder& ib) noexcept {
|
||||
uint32_t tiedRegCount = ib.tiedRegCount();
|
||||
RAInst* raInst = newRAInst(block, ib.aggregatedFlags(), tiedRegCount, ib._clobbered);
|
||||
RAInst* raInst = newRAInst(block, ib.instRWFlags(), ib.aggregatedFlags(), tiedRegCount, ib._clobbered);
|
||||
|
||||
if (ASMJIT_UNLIKELY(!raInst))
|
||||
return DebugUtils::errored(kErrorOutOfMemory);
|
||||
|
@ -606,7 +606,7 @@ namespace Inst {
|
||||
kIdPaddusb, //!< Instruction 'paddusb' {MMX|SSE2}.
|
||||
kIdPaddusw, //!< Instruction 'paddusw' {MMX|SSE2}.
|
||||
kIdPaddw, //!< Instruction 'paddw' {MMX|SSE2}.
|
||||
kIdPalignr, //!< Instruction 'palignr' {SSSE3}.
|
||||
kIdPalignr, //!< Instruction 'palignr' {SSE3}.
|
||||
kIdPand, //!< Instruction 'pand' {MMX|SSE2}.
|
||||
kIdPandn, //!< Instruction 'pandn' {MMX|SSE2}.
|
||||
kIdPause, //!< Instruction 'pause'.
|
||||
|
@ -776,6 +776,15 @@ static ASMJIT_FORCE_INLINE Error rwHandleAVX512(const BaseInst& inst, const Inst
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
static ASMJIT_FORCE_INLINE bool hasSameRegType(const BaseReg* regs, size_t opCount) noexcept {
|
||||
ASMJIT_ASSERT(opCount > 0);
|
||||
RegType regType = regs[0].type();
|
||||
for (size_t i = 1; i < opCount; i++)
|
||||
if (regs[i].type() != regType)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept {
|
||||
// Only called when `arch` matches X86 family.
|
||||
ASMJIT_ASSERT(Environment::isFamilyX86(arch));
|
||||
@ -801,13 +810,14 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
|
||||
: InstDB::rwInfoB[InstDB::rwInfoIndexB[instId]];
|
||||
const InstDB::RWInfoRm& instRmInfo = InstDB::rwInfoRm[instRwInfo.rmInfo];
|
||||
|
||||
out->_instFlags = 0;
|
||||
out->_instFlags = InstDB::_instFlagsTable[additionalInfo._instFlagsIndex];
|
||||
out->_opCount = uint8_t(opCount);
|
||||
out->_rmFeature = instRmInfo.rmFeature;
|
||||
out->_extraReg.reset();
|
||||
out->_readFlags = CpuRWFlags(rwFlags.readFlags);
|
||||
out->_writeFlags = CpuRWFlags(rwFlags.writeFlags);
|
||||
|
||||
uint32_t opTypeMask = 0u;
|
||||
uint32_t nativeGpSize = Environment::registerSizeFromArch(arch);
|
||||
|
||||
constexpr OpRWFlags R = OpRWFlags::kRead;
|
||||
@ -827,6 +837,8 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
|
||||
const Operand_& srcOp = operands[i];
|
||||
const InstDB::RWInfoOp& rwOpData = InstDB::rwInfoOp[instRwInfo.opInfoIndex[i]];
|
||||
|
||||
opTypeMask |= Support::bitMask(srcOp.opType());
|
||||
|
||||
if (!srcOp.isRegOrMem()) {
|
||||
op.reset();
|
||||
continue;
|
||||
@ -878,8 +890,23 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
|
||||
}
|
||||
}
|
||||
|
||||
if (instRmInfo.flags & (InstDB::RWInfoRm::kFlagPextrw | InstDB::RWInfoRm::kFlagFeatureIfRMI)) {
|
||||
if (instRmInfo.flags & InstDB::RWInfoRm::kFlagPextrw) {
|
||||
// Only keep kMovOp if the instruction is actually register to register move of the same kind.
|
||||
if (out->hasInstFlag(InstRWFlags::kMovOp)) {
|
||||
if (!(opCount >= 2 && opTypeMask == Support::bitMask(OperandType::kReg) && hasSameRegType(reinterpret_cast<const BaseReg*>(operands), opCount)))
|
||||
out->_instFlags &= ~InstRWFlags::kMovOp;
|
||||
}
|
||||
|
||||
// Special cases require more logic.
|
||||
if (instRmInfo.flags & (InstDB::RWInfoRm::kFlagMovssMovsd | InstDB::RWInfoRm::kFlagPextrw | InstDB::RWInfoRm::kFlagFeatureIfRMI)) {
|
||||
if (instRmInfo.flags & InstDB::RWInfoRm::kFlagMovssMovsd) {
|
||||
if (opCount == 2) {
|
||||
if (operands[0].isReg() && operands[1].isReg()) {
|
||||
// Doesn't zero extend the destination.
|
||||
out->_operands[0]._extendByteMask = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (instRmInfo.flags & InstDB::RWInfoRm::kFlagPextrw) {
|
||||
if (opCount == 3 && Reg::isMm(operands[1])) {
|
||||
out->_rmFeature = 0;
|
||||
rmOpsMask = 0;
|
||||
@ -930,6 +957,9 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
|
||||
// used to move between GP, segment, control and debug registers. Moving between GP registers also allow to
|
||||
// use memory operand.
|
||||
|
||||
// We will again set the flag if it's actually a move from GP to GP register, otherwise this flag cannot be set.
|
||||
out->_instFlags &= ~InstRWFlags::kMovOp;
|
||||
|
||||
if (opCount == 2) {
|
||||
if (operands[0].isReg() && operands[1].isReg()) {
|
||||
const Reg& o0 = operands[0].as<Reg>();
|
||||
@ -940,6 +970,7 @@ Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_*
|
||||
out->_operands[1].reset(R | RegM, operands[1].size());
|
||||
|
||||
rwZeroExtendGp(out->_operands[0], operands[0].as<Gp>(), nativeGpSize);
|
||||
out->_instFlags |= InstRWFlags::kMovOp;
|
||||
return kErrorOk;
|
||||
}
|
||||
|
||||
@ -1647,10 +1678,10 @@ UNIT(x86_inst_api_rm_feature) {
|
||||
InstRWInfo rwi;
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdPextrw, InstOptions::kNone, eax, mm1, imm(1));
|
||||
EXPECT(rwi._rmFeature == 0);
|
||||
EXPECT(rwi.rmFeature() == 0);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdPextrw, InstOptions::kNone, eax, xmm1, imm(1));
|
||||
EXPECT(rwi._rmFeature == CpuFeatures::X86::kSSE4_1);
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kSSE4_1);
|
||||
}
|
||||
|
||||
INFO("Verifying whether RM/feature is reported correctly for AVX512 shift instructions");
|
||||
@ -1658,40 +1689,40 @@ UNIT(x86_inst_api_rm_feature) {
|
||||
InstRWInfo rwi;
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslld, InstOptions::kNone, xmm1, xmm2, imm(8));
|
||||
EXPECT(rwi._rmFeature == CpuFeatures::X86::kAVX512_F);
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllq, InstOptions::kNone, ymm1, ymm2, imm(8));
|
||||
EXPECT(rwi._rmFeature == CpuFeatures::X86::kAVX512_F);
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrad, InstOptions::kNone, xmm1, xmm2, imm(8));
|
||||
EXPECT(rwi._rmFeature == CpuFeatures::X86::kAVX512_F);
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrld, InstOptions::kNone, ymm1, ymm2, imm(8));
|
||||
EXPECT(rwi._rmFeature == CpuFeatures::X86::kAVX512_F);
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrlq, InstOptions::kNone, xmm1, xmm2, imm(8));
|
||||
EXPECT(rwi._rmFeature == CpuFeatures::X86::kAVX512_F);
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_F);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslldq, InstOptions::kNone, xmm1, xmm2, imm(8));
|
||||
EXPECT(rwi._rmFeature == CpuFeatures::X86::kAVX512_BW);
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllw, InstOptions::kNone, ymm1, ymm2, imm(8));
|
||||
EXPECT(rwi._rmFeature == CpuFeatures::X86::kAVX512_BW);
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsraw, InstOptions::kNone, xmm1, xmm2, imm(8));
|
||||
EXPECT(rwi._rmFeature == CpuFeatures::X86::kAVX512_BW);
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrldq, InstOptions::kNone, ymm1, ymm2, imm(8));
|
||||
EXPECT(rwi._rmFeature == CpuFeatures::X86::kAVX512_BW);
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsrlw, InstOptions::kNone, xmm1, xmm2, imm(8));
|
||||
EXPECT(rwi._rmFeature == CpuFeatures::X86::kAVX512_BW);
|
||||
EXPECT(rwi.rmFeature() == CpuFeatures::X86::kAVX512_BW);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpslld, InstOptions::kNone, xmm1, xmm2, xmm3);
|
||||
EXPECT(rwi._rmFeature == 0);
|
||||
EXPECT(rwi.rmFeature() == 0);
|
||||
|
||||
queryRWInfoSimple(&rwi, Arch::kX64, Inst::kIdVpsllw, InstOptions::kNone, xmm1, xmm2, xmm3);
|
||||
EXPECT(rwi._rmFeature == 0);
|
||||
EXPECT(rwi.rmFeature() == 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -461,7 +461,7 @@ struct InstInfo {
|
||||
//! \name Accessors
|
||||
//! \{
|
||||
|
||||
//! Returns common information, see `CommonInfo`.
|
||||
//! Returns common information, see \ref CommonInfo.
|
||||
inline const CommonInfo& commonInfo() const noexcept { return _commonInfoTable[_commonInfoIndex]; }
|
||||
|
||||
//! Returns instruction flags, see \ref Flags.
|
||||
|
@ -189,12 +189,12 @@ enum EncodingId : uint32_t {
|
||||
|
||||
//! Additional information table, provides CPU extensions required to execute an instruction and RW flags.
|
||||
struct AdditionalInfo {
|
||||
//! Features vector.
|
||||
uint8_t _features[6];
|
||||
//! Index to `_instFlagsTable`.
|
||||
uint8_t _instFlagsIndex;
|
||||
//! Index to `_rwFlagsTable`.
|
||||
uint8_t _rwFlagsIndex;
|
||||
//! Reserved for future use.
|
||||
uint8_t _reserved;
|
||||
//! Features vector.
|
||||
uint8_t _features[6];
|
||||
|
||||
inline const uint8_t* featuresBegin() const noexcept { return _features; }
|
||||
inline const uint8_t* featuresEnd() const noexcept { return _features + ASMJIT_ARRAY_SIZE(_features); }
|
||||
@ -260,8 +260,12 @@ struct RWInfoRm {
|
||||
|
||||
enum Flags : uint8_t {
|
||||
kFlagAmbiguous = 0x01,
|
||||
//! Special semantics for PEXTRW - memory operand can only be used with SSE4.1 instruction and it's forbidden in MMX.
|
||||
kFlagPextrw = 0x02,
|
||||
kFlagFeatureIfRMI = 0x04
|
||||
//! Special semantics for MOVSS and MOVSD - doesn't zero extend the destination if the operation is a reg to reg move.
|
||||
kFlagMovssMovsd = 0x04,
|
||||
//! Special semantics for AVX shift instructions that do not provide reg/mem in AVX/AVX2 mode (AVX-512 is required).
|
||||
kFlagFeatureIfRMI = 0x08
|
||||
};
|
||||
|
||||
uint8_t category;
|
||||
@ -285,6 +289,7 @@ extern const RWInfo rwInfoB[];
|
||||
extern const RWInfoOp rwInfoOp[];
|
||||
extern const RWInfoRm rwInfoRm[];
|
||||
extern const RWFlagsInfoTable _rwFlagsInfoTable[];
|
||||
extern const InstRWFlags _instFlagsTable[];
|
||||
|
||||
extern const uint32_t _mainOpcodeTable[];
|
||||
extern const uint32_t _altOpcodeTable[];
|
||||
|
@ -126,6 +126,12 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& i
|
||||
bool hasGpbHiConstraint = false;
|
||||
uint32_t singleRegOps = 0;
|
||||
|
||||
// Copy instruction RW flags to instruction builder except kMovOp, which is propagated manually later.
|
||||
ib.addInstRWFlags(rwInfo.instFlags() & ~InstRWFlags::kMovOp);
|
||||
|
||||
// Mask of all operand types used by the instruction - can be used as an optimization later.
|
||||
uint32_t opTypesMask = 0u;
|
||||
|
||||
if (opCount) {
|
||||
// The mask is for all registers, but we are mostly interested in AVX-512 registers at the moment. The mask
|
||||
// will be combined with all available registers of the Compiler at the end so we it never use more registers
|
||||
@ -167,6 +173,8 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& i
|
||||
const Operand& op = opArray[i];
|
||||
const OpRWInfo& opRwInfo = rwInfo.operand(i);
|
||||
|
||||
opTypesMask |= 1u << uint32_t(op.opType());
|
||||
|
||||
if (op.isReg()) {
|
||||
// Register Operand
|
||||
// ----------------
|
||||
@ -394,6 +402,24 @@ Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& cf, RAInstBuilder& i
|
||||
}
|
||||
}
|
||||
|
||||
// If this instruction has move semantics then check whether it could be eliminated if all virtual registers
|
||||
// are allocated into the same register. Take into account the virtual size of the destination register as that's
|
||||
// more important than a physical register size in this case.
|
||||
if (rwInfo.hasInstFlag(InstRWFlags::kMovOp) && !inst->hasExtraReg() && Support::bitTest(opTypesMask, uint32_t(OperandType::kReg))) {
|
||||
// AVX+ move instructions have 3 operand form - the first two operands must be the same to guarantee move semantics.
|
||||
if (opCount == 2 || (opCount == 3 && opArray[0] == opArray[1])) {
|
||||
uint32_t vIndex = Operand::virtIdToIndex(opArray[0].as<Reg>().id());
|
||||
if (vIndex < Operand::kVirtIdCount) {
|
||||
const VirtReg* vReg = _cc->virtRegByIndex(vIndex);
|
||||
const OpRWInfo& opRwInfo = rwInfo.operand(0);
|
||||
|
||||
uint64_t remainingByteMask = vReg->workReg()->regByteMask() & ~opRwInfo.writeByteMask();
|
||||
if (remainingByteMask == 0u || (remainingByteMask & opRwInfo.extendByteMask()) == 0)
|
||||
ib.addInstRWFlags(InstRWFlags::kMovOp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle X86 constraints.
|
||||
if (hasGpbHiConstraint) {
|
||||
for (RATiedReg& tiedReg : ib) {
|
||||
@ -1251,6 +1277,10 @@ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) no
|
||||
|
||||
// Rewrite virtual registers into physical registers.
|
||||
if (raInst) {
|
||||
// This data is allocated by Zone passed to `runOnFunction()`, which will be reset after the RA pass finishes.
|
||||
// So reset this data to prevent having a dead pointer after the RA pass is complete.
|
||||
node->resetPassData();
|
||||
|
||||
// If the instruction contains pass data (raInst) then it was a subject for register allocation and must be
|
||||
// rewritten to use physical regs.
|
||||
RATiedReg* tiedRegs = raInst->tiedRegs();
|
||||
@ -1274,16 +1304,25 @@ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) no
|
||||
}
|
||||
}
|
||||
|
||||
// Transform VEX instruction to EVEX when necessary.
|
||||
if (raInst->isTransformable()) {
|
||||
if (maxRegId > 15) {
|
||||
// Transform VEX instruction to EVEX.
|
||||
inst->setId(transformVexToEvex(inst->id()));
|
||||
}
|
||||
}
|
||||
|
||||
// This data is allocated by Zone passed to `runOnFunction()`, which will be reset after the RA pass finishes.
|
||||
// So reset this data to prevent having a dead pointer after the RA pass is complete.
|
||||
node->resetPassData();
|
||||
// Remove moves that do not do anything.
|
||||
//
|
||||
// Usually these moves are inserted during code generation and originally they used different registers. If RA
|
||||
// allocated these into the same register such redundant mov would appear.
|
||||
if (raInst->hasInstRWFlag(InstRWFlags::kMovOp) && !inst->hasExtraReg()) {
|
||||
if (inst->opCount() == 2) {
|
||||
if (inst->op(0) == inst->op(1)) {
|
||||
cc()->removeNode(node);
|
||||
goto Next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ASMJIT_UNLIKELY(node->type() != NodeType::kInst)) {
|
||||
// FuncRet terminates the flow, it must either be removed if the exit label is next to it (optimization) or
|
||||
@ -1327,6 +1366,7 @@ ASMJIT_FAVOR_SPEED Error X86RAPass::_rewrite(BaseNode* first, BaseNode* stop) no
|
||||
}
|
||||
}
|
||||
|
||||
Next:
|
||||
node = next;
|
||||
}
|
||||
|
||||
|
@ -1786,6 +1786,7 @@ class AdditionalInfoTable extends core.Task {
|
||||
run() {
|
||||
const insts = this.ctx.insts;
|
||||
const rwInfoTable = new IndexedArray();
|
||||
const instFlagsTable = new IndexedArray();
|
||||
const additionaInfoTable = new IndexedArray();
|
||||
|
||||
// If the instruction doesn't read any flags it should point to the first index.
|
||||
@ -1800,9 +1801,48 @@ class AdditionalInfoTable extends core.Task {
|
||||
var [r, w] = this.rwFlagsOf(dbInsts);
|
||||
const rData = r.map(function(flag) { return `FLAG(${flag})`; }).join(" | ") || "0";
|
||||
const wData = w.map(function(flag) { return `FLAG(${flag})`; }).join(" | ") || "0";
|
||||
const rwDataIndex = rwInfoTable.addIndexed(`{ ${rData}, ${wData} }`);
|
||||
const instFlags = Object.create(null);
|
||||
|
||||
inst.additionalInfoIndex = additionaInfoTable.addIndexed(`{ { ${features} }, ${rwDataIndex}, 0 }`);
|
||||
switch (inst.name) {
|
||||
case "kmovb":
|
||||
case "kmovd":
|
||||
case "kmovq":
|
||||
case "kmovw":
|
||||
case "mov":
|
||||
case "movq":
|
||||
case "movsd":
|
||||
case "movss":
|
||||
case "movapd":
|
||||
case "movaps":
|
||||
case "movdqa":
|
||||
case "movdqu":
|
||||
case "movupd":
|
||||
case "movups":
|
||||
case "vmovapd":
|
||||
case "vmovaps":
|
||||
case "vmovdqa":
|
||||
case "vmovdqa8":
|
||||
case "vmovdqa16":
|
||||
case "vmovdqa32":
|
||||
case "vmovdqa64":
|
||||
case "vmovdqu":
|
||||
case "vmovdqu8":
|
||||
case "vmovdqu16":
|
||||
case "vmovdqu32":
|
||||
case "vmovdqu64":
|
||||
case "vmovq":
|
||||
case "vmovsd":
|
||||
case "vmovss":
|
||||
case "vmovupd":
|
||||
case "vmovups":
|
||||
instFlags["MovOp"] = true;
|
||||
break;
|
||||
}
|
||||
|
||||
const instFlagsIndex = instFlagsTable.addIndexed("InstRWFlags(" + CxxUtils.flags(instFlags, (f) => { return `FLAG(${f})`; }, "FLAG(None)") + ")");
|
||||
const rwInfoIndex = rwInfoTable.addIndexed(`{ ${rData}, ${wData} }`);
|
||||
|
||||
inst.additionalInfoIndex = additionaInfoTable.addIndexed(`{ ${instFlagsIndex}, ${rwInfoIndex}, { ${features} } }`);
|
||||
});
|
||||
|
||||
var s = `#define EXT(VAL) uint32_t(CpuFeatures::X86::k##VAL)\n` +
|
||||
@ -1811,8 +1851,12 @@ class AdditionalInfoTable extends core.Task {
|
||||
`\n` +
|
||||
`#define FLAG(VAL) uint32_t(CpuRWFlags::kX86_##VAL)\n` +
|
||||
`const InstDB::RWFlagsInfoTable InstDB::_rwFlagsInfoTable[] = {\n${StringUtils.format(rwInfoTable, kIndent, true)}\n};\n` +
|
||||
`#undef FLAG\n` +
|
||||
`\n` +
|
||||
`#define FLAG(VAL) uint32_t(InstRWFlags::k##VAL)\n` +
|
||||
`const InstRWFlags InstDB::_instFlagsTable[] = {\n${StringUtils.format(instFlagsTable, kIndent, true)}\n};\n` +
|
||||
`#undef FLAG\n`;
|
||||
this.inject("AdditionalInfoTable", disclaimer(s), additionaInfoTable.length * 8 + rwInfoTable.length * 8);
|
||||
this.inject("AdditionalInfoTable", disclaimer(s), additionaInfoTable.length * 8 + rwInfoTable.length * 8 + instFlagsTable.length * 4);
|
||||
}
|
||||
|
||||
rwFlagsOf(dbInsts) {
|
||||
@ -2032,6 +2076,7 @@ class InstRWInfoTable extends core.Task {
|
||||
String(Math.max(rmInfo.memFixed, 0)).padEnd(2),
|
||||
CxxUtils.flags({
|
||||
"InstDB::RWInfoRm::kFlagAmbiguous": Boolean(rmInfo.memAmbiguous),
|
||||
"InstDB::RWInfoRm::kFlagMovssMovsd": Boolean(inst.name === "movss" || inst.name === "movsd"),
|
||||
"InstDB::RWInfoRm::kFlagPextrw": Boolean(inst.name === "pextrw"),
|
||||
"InstDB::RWInfoRm::kFlagFeatureIfRMI": Boolean(rmInfo.memExtensionIfRMI)
|
||||
}),
|
||||
|
Loading…
x
Reference in New Issue
Block a user