3rdparty: Update xbyak to v7.27

This commit is contained in:
JordanTheToaster
2025-07-06 22:31:02 +01:00
committed by lightningterror
parent 7e00b2c9a9
commit 61f11d12ff
4 changed files with 665 additions and 592 deletions

View File

@@ -25,23 +25,3 @@ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た
す場合に限り、再頒布および使用が許可されます。
ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項
を含めること。
バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作
権表示、本条件一覧、および下記免責条項を含めること。
書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進
に、著作権者の名前またはコントリビューターの名前を使用してはならない。
本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ
れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性
に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。
著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを
問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で
あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、
本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の
喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接
損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、
一切責任を負わないものとします。

View File

@@ -151,11 +151,17 @@
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif
// Define this macro as 0 to disable strict checking of memory operand and register size matching.
// This macro may be removed in future versions.
#ifndef XBYAK_STRICT_CHECK_MEM_REG_SIZE
#define XBYAK_STRICT_CHECK_MEM_REG_SIZE 1
#endif
namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x7210 /* 0xABCD = A.BC(.D) */
VERSION = 0x7270 /* 0xABCD = A.BC(.D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@@ -233,6 +239,7 @@ enum {
ERR_INVALID_DFV,
ERR_INVALID_REG_IDX,
ERR_BAD_ENCODING_MODE,
ERR_CANT_USE_ABCDH,
ERR_INTERNAL // Put it at last.
};
@@ -292,6 +299,7 @@ inline const char *ConvertErrorToString(int err)
"invalid dfv",
"invalid reg index",
"bad encoding mode",
"can't use [abcd]h with rex",
"internal error"
};
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
@@ -636,12 +644,12 @@ public:
void setBit(int bit);
void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true)
{
if (mask_) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET)
if (mask_ && (mask_ != unsigned(idx))) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET)
mask_ = idx;
}
void setRounding(int idx)
{
if (rounding_) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET)
if (rounding_ && (rounding_ != unsigned(idx))) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET)
rounding_ = idx;
}
void setZero() { zero_ = true; }
@@ -786,6 +794,9 @@ class Label;
struct Reg8;
struct Reg16;
struct Reg32;
struct Xmm;
struct Ymm;
struct Zmm;
#ifdef XBYAK64
struct Reg64;
#endif
@@ -801,6 +812,9 @@ public:
#ifdef XBYAK64
Reg64 cvt64() const;
#endif
Xmm cvt128() const;
Ymm cvt256() const;
Zmm cvt512() const;
Reg operator|(const ApxFlagNF&) const { Reg r(*this); r.setNF(); return r; }
Reg operator|(const ApxFlagZU&) const { Reg r(*this); r.setZU(); return r; }
};
@@ -938,6 +952,21 @@ inline Reg64 Reg::cvt64() const
}
#endif
inline Xmm Reg::cvt128() const
{
return Xmm(changeBit(128).getIdx());
}
inline Ymm Reg::cvt256() const
{
return Ymm(changeBit(256).getIdx());
}
inline Zmm Reg::cvt512() const
{
return Zmm(changeBit(512).getIdx());
}
#ifndef XBYAK_DISABLE_SEGMENT
// not derived from Reg
class Segment {
@@ -1819,12 +1848,11 @@ private:
static const uint64_t T_0F = 1ull << 8;
static const uint64_t T_0F38 = 1ull << 9;
static const uint64_t T_0F3A = 1ull << 10;
static const uint64_t T_L0 = 1ull << 11;
static const uint64_t T_MAP5 = 1ull << 11;
static const uint64_t T_L1 = 1ull << 12;
static const uint64_t T_W0 = 1ull << 13;
static const uint64_t T_W1 = 1ull << 14;
static const uint64_t T_EW0 = 1ull << 15;
static const uint64_t T_EW1 = 1ull << 16;
static const uint64_t T_W0 = 1ull << 13; // T_EW0 = T_W0
static const uint64_t T_W1 = 1ull << 14; // for VEX
static const uint64_t T_EW1 = 1ull << 16; // for EVEX
static const uint64_t T_YMM = 1ull << 17; // support YMM, ZMM
static const uint64_t T_EVEX = 1ull << 18;
static const uint64_t T_ER_X = 1ull << 19; // xmm{er}
@@ -1840,23 +1868,29 @@ private:
static const uint64_t T_M_K = 1ull << 28; // mem{k}
static const uint64_t T_VSIB = 1ull << 29;
static const uint64_t T_MEM_EVEX = 1ull << 30; // use evex if mem
static const uint64_t T_FP16 = 1ull << 31; // avx512-fp16
static const uint64_t T_MAP5 = T_FP16 | T_0F;
static const uint64_t T_MAP6 = T_FP16 | T_0F38;
static const uint64_t T_MAP6 = 1ull << 31;
static const uint64_t T_NF = 1ull << 32; // T_nf
static const uint64_t T_CODE1_IF1 = 1ull << 33; // code|=1 if !r.isBit(8)
static const uint64_t T_ND1 = 1ull << 35; // ND=1
static const uint64_t T_ZU = 1ull << 36; // ND=ZU
static const uint64_t T_F2 = 1ull << 37; // pp = 3
static const uint64_t T_SENTRY = (1ull << 38)-1; // attribute(>=T_SENTRY) is for error check
static const uint64_t T_ALLOW_DIFF_SIZE = 1ull << 38; // allow difference reg size
static const uint64_t T_ALLOW_ABCDH = 1ull << 39; // allow [abcd]h reg
// T_66 = 1, T_F3 = 2, T_F2 = 3
static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; }
// @@@end of avx_type_def.h
static inline uint32_t getMap(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; }
static inline uint32_t getMap(uint64_t type)
{
if (type & T_MAP6) return 6;
if (type & T_MAP5) return 5;
return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
}
void vex(const Reg& reg, const Reg& base, const Operand *v, uint64_t type, int code, bool x = false)
{
int w = (type & T_W1) ? 1 : 0;
bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : reg.isYMM();
bool is256 = (type & T_L1) ? true : reg.isYMM();
bool r = reg.isExtIdx();
bool b = base.isExtIdx();
int idx = v ? v->getIdx() : 0;
@@ -1871,19 +1905,16 @@ private:
}
db(code);
}
// Allow YMM embedded rounding for AVX10.2 to minimize flag modifications
bool verifySAE(const Reg& r, const Reg& b, uint64_t type) const
void verifySAE(const Reg& r, uint64_t type) const
{
if (((type & T_SAE_X) && (r.isYMM() && b.isXMM())) || ((type & T_SAE_Y) && b.isXMM()) || ((type & T_SAE_Z) && b.isYMM())) return true;
if (((type & T_SAE_X) && b.isXMM()) || ((type & T_SAE_Y) && b.isYMM()) || ((type & T_SAE_Z) && b.isZMM())) return false;
XBYAK_THROW_RET(ERR_SAE_IS_INVALID, false)
if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return;
XBYAK_THROW(ERR_SAE_IS_INVALID)
}
bool verifyER(const Reg& r, const Reg& b, uint64_t type) const
void verifyER(const Reg& r, uint64_t type) const
{
if ((type & T_ER_R) && b.isREG(32|64)) return false;
if (((type & T_ER_X) && (r.isYMM() && b.isXMM())) || ((type & T_ER_Y) && b.isXMM()) || ((type & T_ER_Z) && b.isYMM())) return true;
if (((type & T_ER_X) && b.isXMM()) || ((type & T_ER_Y) && b.isYMM()) || ((type & T_ER_Z) && b.isZMM())) return false;
XBYAK_THROW_RET(ERR_SAE_IS_INVALID, false)
if ((type & T_ER_R) && r.isREG(32|64)) return;
if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
XBYAK_THROW(ERR_ER_IS_INVALID)
}
// (a, b, c) contains non zero two or three values then err
int verifyDuplicate(int a, int b, int c, int err)
@@ -1897,7 +1928,6 @@ private:
if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0)
int w = (type & T_EW1) ? 1 : 0;
uint32_t mmm = getMap(type);
if (type & T_FP16) mmm |= 4;
uint32_t pp = getPP(type);
int idx = v ? v->getIdx() : 0;
uint32_t vvvv = ~idx;
@@ -1912,18 +1942,16 @@ private:
int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v->getRounding() : 0, ERR_ROUNDING_IS_ALREADY_SET);
int disp8N = 1;
if (rounding) {
bool isUzero = false;
if (rounding == EvexModifierRounding::T_SAE) {
isUzero = verifySAE(reg, base, type); LL = 0;
verifySAE(base, type); LL = 0;
} else {
isUzero = verifyER(reg, base, type); LL = rounding - 1;
verifyER(base, type); LL = rounding - 1;
}
if (isUzero) U = 0; // avx10.2 Evex.U
b = true;
} else {
if (v) VL = (std::max)(VL, v->getBit());
VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL);
LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
LL = (VL >= 512 /* tmm */) ? 2 : (VL == 256) ? 1 : 0;
if (b) {
disp8N = ((type & T_B16) == T_B16) ? 2 : (type & T_B32) ? 4 : 8;
} else if ((type & T_NX_MASK) == T_DUP) {
@@ -2045,7 +2073,6 @@ private:
}
}
LabelManager labelMgr_;
bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
void writeCode(uint64_t type, const Reg& r, int code, bool rex2 = false)
{
if (!(type&T_APX || rex2)) {
@@ -2057,19 +2084,24 @@ private:
db(0x0F); db(0x3A);
}
}
db(code | ((type == 0 || (type & T_CODE1_IF1)) && !r.isBit(8)));
db(code | (((type & T_SENTRY) == 0 || (type & T_CODE1_IF1)) && !r.isBit(8)));
}
void opRR(const Reg& reg1, const Reg& reg2, uint64_t type, int code)
void opRR(const Reg& r1, const Reg& r2, uint64_t type, int code)
{
bool rex2 = rex(reg2, reg1, type);
writeCode(type, reg1, code, rex2);
setModRM(3, reg1.getIdx(), reg2.getIdx());
if (!(type & T_ALLOW_DIFF_SIZE) && r1.isREG() && r2.isREG() && r1.getBit() != r2.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
if (!(type & T_ALLOW_ABCDH) && (isBadCombination(r1, r2) || isBadCombination(r2, r1))) XBYAK_THROW(ERR_CANT_USE_ABCDH)
bool rex2 = rex(r2, r1, type);
writeCode(type, r1, code, rex2);
setModRM(3, r1.getIdx(), r2.getIdx());
}
void opMR(const Address& addr, const Reg& r, uint64_t type, int code, uint64_t type2 = 0, int code2 = NONE)
{
if (code2 == NONE) code2 = code;
if (type2 && opROO(Reg(), addr, r, type2, code2)) return;
if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
#if XBYAK_STRICT_CHECK_MEM_REG_SIZE == 1
if (!(type & T_ALLOW_DIFF_SIZE) && r.getBit() <= BIT && addr.getBit() > 0 && addr.getBit() != r.getBit()) XBYAK_THROW(ERR_BAD_MEM_SIZE)
#endif
bool rex2 = rex(addr, r, type);
writeCode(type, r, code, rex2);
opAddr(addr, r.getIdx());
@@ -2214,6 +2246,13 @@ private:
opSSE(mmx, op, T_66 | T_0F3A, code, isXMM_REG32orMEM, imm);
}
}
// r1 is [abcd]h and r2 is reg with rex
bool isBadCombination(const Reg& r1, const Reg& r2) const
{
if (!r1.isHigh8bit()) return false;
if (r2.isExt8bit() || r2.getIdx() >= 8) return true;
return false;
}
// (r, r, m) or (r, m, r)
bool opROO(const Reg& d, const Operand& op1, const Operand& op2, uint64_t type, int code, int immSize = 0, int sc = NONE)
{
@@ -2241,11 +2280,11 @@ private:
int opBit = op.getBit();
if (disableRex && opBit == 64) opBit = 32;
const Reg r(ext, Operand::REG, opBit);
if ((type & T_APX) && op.hasRex2NFZU() && opROO(d ? *d : Reg(0, Operand::REG, opBit), op, r, type, code)) return;
if ((type & T_APX) && (d != 0 || op.hasRex2NFZU()) && opROO(d ? *d : Reg(0, Operand::REG, opBit), op, r, type, code)) return;
if (op.isMEM()) {
opMR(op.getAddress(immSize), r, type, code);
} else if (op.isREG(bit)) {
opRR(r, op.getReg().changeBit(opBit), type, code);
opRR(r, op.getReg().changeBit(opBit), type | T_ALLOW_ABCDH, code);
} else {
XBYAK_THROW(ERR_BAD_COMBINATION)
}
@@ -2304,10 +2343,13 @@ private:
opRO(static_cast<const Reg&>(op2), op1, 0, code, op1.getKind() == op2.getKind());
}
}
bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
// allow add(ax, 0x8000);
bool isInDisp16relaxed(uint32_t x) const { uint32_t v = x & 0xffff0000; return v == 0 || v == 0xffff0000; }
uint32_t getImmBit(const Operand& op, uint32_t imm)
{
verifyMemHasSize(op);
uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16relaxed(imm) ? 16 : 32;
if (op.isBit(8)) immBit = 8;
if (op.getBit() < immBit) XBYAK_THROW_RET(ERR_IMM_IS_TOO_BIG, 0)
if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */
@@ -2361,7 +2403,7 @@ private:
if (op.isREG() && op.hasRex2()) {
const Reg& r = static_cast<const Reg&>(op);
rex2(0, rexRXB(3, 0, Reg(), r), Reg(), r);
db(alt);
db(alt | (r.getIdx() & 7));
return;
}
int bit = op.getBit();
@@ -2373,7 +2415,7 @@ private:
return;
}
if (op.isMEM()) {
opMR(op.getAddress(), Reg(ext, Operand::REG, 32), 0, code);
opMR(op.getAddress(), Reg(ext, Operand::REG, 32), T_ALLOW_DIFF_SIZE, code);
return;
}
}
@@ -2431,7 +2473,7 @@ private:
if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION)
int w = op.isBit(16);
if (!(reg.isREG() && (reg.getBit() > op.getBit()))) XBYAK_THROW(ERR_BAD_COMBINATION)
opRO(reg, op, T_0F, code | w);
opRO(reg, op, T_0F | T_ALLOW_DIFF_SIZE, code | w);
}
void opFpuMem(const Address& addr, uint8_t m16, uint8_t m32, uint8_t m64, uint8_t ext, uint8_t m64ext)
{
@@ -2589,8 +2631,7 @@ private:
if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
if (is16bit) db(0x66);
opRO(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, T_F3 | T_0F, code);
opRO(reg, op, T_F3 | T_0F, code);
}
void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, uint64_t type, uint8_t code, int mode)
{
@@ -2676,7 +2717,7 @@ private:
}
if ((sel == 0 && enc != VexEncoding && enc != EvexEncoding) || (sel == 1 && enc != PreAVX10v2Encoding && enc != AVX10v2Encoding)) XBYAK_THROW_RET(ERR_BAD_ENCODING_MODE, VexEncoding)
#ifdef XBYAK_DISABLE_AVX512
if (enc == EvexEncoding || enc == AVX10v2Encoding) XBYAK_THROW(ERR_EVEX_IS_INVALID)
if (enc == EvexEncoding || enc == AVX10v2Encoding) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, VexEncoding)
#endif
return enc;
}
@@ -2746,10 +2787,12 @@ private:
#ifdef XBYAK64
void opAMX(const Tmm& t1, const Address& addr, uint64_t type, int code)
{
// require both base and index
Address addr2 = addr.cloneNoOptimize();
const RegExp exp = addr2.getRegExp();
if (exp.getBase().getBit() == 0 || exp.getIndex().getBit() == 0) XBYAK_THROW(ERR_NOT_SUPPORTED)
// require both base and index for all but opcode 0x49 (ldtilecfg/sttilecfg)
if (code != 0x49) {
const RegExp exp = addr2.getRegExp();
if (exp.getBase().getBit() == 0 || exp.getIndex().getBit() == 0) XBYAK_THROW(ERR_NOT_SUPPORTED)
}
if (opROO(Reg(), addr2, t1, T_APX|type, code)) return;
opVex(t1, &tmm0, addr2, type, code);
}
@@ -2776,7 +2819,7 @@ private:
const Operand *p1 = &k, *p2 = &op;
if (code == 0x93) { std::swap(p1, p2); }
if (opROO(Reg(), *p2, *p1, T_APX|type, code)) return;
opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|type, code);
opVex(static_cast<const Reg&>(*p1), 0, *p2, type, code);
}
void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2)
{
@@ -3094,11 +3137,11 @@ public:
}
void mov(const Operand& op, const Segment& seg)
{
opRO(Reg8(seg.getIdx()), op, 0, 0x8C, op.isREG(16|i32e));
opRO(Reg8(seg.getIdx()), op, T_ALLOW_DIFF_SIZE | T_ALLOW_ABCDH, 0x8C, op.isREG(16|i32e));
}
void mov(const Segment& seg, const Operand& op)
{
opRO(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, 0, 0x8E, op.isREG(16|i32e));
opRO(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, T_ALLOW_DIFF_SIZE | T_ALLOW_ABCDH, 0x8E, op.isREG(16|i32e));
}
#endif
@@ -3232,7 +3275,7 @@ public:
{
const uint64_t typeTbl[] = {
T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512
T_MUST_EVEX|T_66|T_0F|T_EW0|T_N4, T_MUST_EVEX|T_F3|T_0F|T_EW0|T_N4, // avx10.2
T_MUST_EVEX|T_66|T_0F|T_N4, T_MUST_EVEX|T_F3|T_0F|T_N4, // avx10.2
};
const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E };
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 32);
@@ -3241,7 +3284,7 @@ public:
{
const uint64_t typeTbl[] = {
T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16
T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, // avx10.2
T_MUST_EVEX|T_F3|T_MAP5|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_N2, // avx10.2
};
const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E };
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 16|32|64);

File diff suppressed because it is too large Load Diff

View File

@@ -548,6 +548,12 @@ public:
XBYAK_DEFINE_TYPE(88, tSSE4a);
XBYAK_DEFINE_TYPE(89, tCLWB);
XBYAK_DEFINE_TYPE(90, tTSXLDTRK);
XBYAK_DEFINE_TYPE(91, tAMX_TRANSPOSE);
XBYAK_DEFINE_TYPE(92, tAMX_TF32);
XBYAK_DEFINE_TYPE(93, tAMX_AVX512);
XBYAK_DEFINE_TYPE(94, tAMX_MOVRS);
XBYAK_DEFINE_TYPE(95, tAMX_FP8);
XBYAK_DEFINE_TYPE(96, tMOVRS);
#undef XBYAK_SPLIT_ID
#undef XBYAK_DEFINE_TYPE
@@ -702,12 +708,20 @@ public:
if (EAX & (1U << 7)) type_ |= tCMPCCXADD;
if (EAX & (1U << 21)) type_ |= tAMX_FP16;
if (EAX & (1U << 23)) type_ |= tAVX_IFMA;
if (EAX & (1U << 31)) type_ |= tMOVRS;
if (EDX & (1U << 4)) type_ |= tAVX_VNNI_INT8;
if (EDX & (1U << 5)) type_ |= tAVX_NE_CONVERT;
if (EDX & (1U << 10)) type_ |= tAVX_VNNI_INT16;
if (EDX & (1U << 14)) type_ |= tPREFETCHITI;
if (EDX & (1U << 19)) type_ |= tAVX10;
if (EDX & (1U << 21)) type_ |= tAPX_F;
getCpuidEx(0x1e, 1, data);
if (EAX & (1U << 4)) type_ |= tAMX_FP8;
if (EAX & (1U << 5)) type_ |= tAMX_TRANSPOSE;
if (EAX & (1U << 6)) type_ |= tAMX_TF32;
if (EAX & (1U << 7)) type_ |= tAMX_AVX512;
if (EAX & (1U << 8)) type_ |= tAMX_MOVRS;
}
}
if (maxNum >= 0x19) {
@@ -892,17 +906,17 @@ class StackFrame {
#endif
static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax
Xbyak::CodeGenerator *code_;
int pNum_;
int tNum_;
bool useRcx_;
bool useRdx_;
int saveNum_;
int P_;
bool makeEpilog_;
Xbyak::Reg64 pTbl_[4];
Xbyak::Reg64 tTbl_[maxRegNum];
Pack p_;
Pack t_;
int pNum_;
int tNum_;
int saveNum_;
int P_;
bool useRcx_;
bool useRdx_;
bool makeEpilog_;
StackFrame(const StackFrame&);
void operator=(const StackFrame&);
public:
@@ -928,10 +942,10 @@ public:
: code_(code)
, pNum_(pNum)
, tNum_(tNum & ~(UseRCX | UseRDX))
, useRcx_((tNum & UseRCX) != 0)
, useRdx_((tNum & UseRDX) != 0)
, saveNum_(0)
, P_(0)
, useRcx_((tNum & UseRCX) != 0)
, useRdx_((tNum & UseRDX) != 0)
, makeEpilog_(makeEpilog)
, p(p_)
, t(t_)