|
|
|
|
@@ -1,4 +1,4 @@
|
|
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
#ifndef XBYAK_XBYAK_H_
|
|
|
|
|
#define XBYAK_XBYAK_H_
|
|
|
|
|
/*!
|
|
|
|
|
@@ -151,11 +151,17 @@
|
|
|
|
|
#pragma GCC diagnostic ignored "-Warray-bounds"
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// Define this macro as 0 to disable strict checking of memory operand and register size matching.
|
|
|
|
|
// This macro may be removed in future versions.
|
|
|
|
|
#ifndef XBYAK_STRICT_CHECK_MEM_REG_SIZE
|
|
|
|
|
#define XBYAK_STRICT_CHECK_MEM_REG_SIZE 1
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
namespace Xbyak {
|
|
|
|
|
|
|
|
|
|
enum {
|
|
|
|
|
DEFAULT_MAX_CODE_SIZE = 4096,
|
|
|
|
|
VERSION = 0x7070 /* 0xABCD = A.BC(.D) */
|
|
|
|
|
VERSION = 0x7260 /* 0xABCD = A.BC(.D) */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
#ifndef MIE_INTEGER_TYPE_DEFINED
|
|
|
|
|
@@ -232,6 +238,8 @@ enum {
|
|
|
|
|
ERR_CANT_USE_REX2,
|
|
|
|
|
ERR_INVALID_DFV,
|
|
|
|
|
ERR_INVALID_REG_IDX,
|
|
|
|
|
ERR_BAD_ENCODING_MODE,
|
|
|
|
|
ERR_CANT_USE_ABCDH,
|
|
|
|
|
ERR_INTERNAL // Put it at last.
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
@@ -290,6 +298,8 @@ inline const char *ConvertErrorToString(int err)
|
|
|
|
|
"can't use rex2",
|
|
|
|
|
"invalid dfv",
|
|
|
|
|
"invalid reg index",
|
|
|
|
|
"bad encoding mode",
|
|
|
|
|
"can't use [abcd]h with rex",
|
|
|
|
|
"internal error"
|
|
|
|
|
};
|
|
|
|
|
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
|
|
|
|
|
@@ -634,12 +644,12 @@ public:
|
|
|
|
|
void setBit(int bit);
|
|
|
|
|
void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true)
|
|
|
|
|
{
|
|
|
|
|
if (mask_) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET)
|
|
|
|
|
if (mask_ && (mask_ != idx)) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET)
|
|
|
|
|
mask_ = idx;
|
|
|
|
|
}
|
|
|
|
|
void setRounding(int idx)
|
|
|
|
|
{
|
|
|
|
|
if (rounding_) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET)
|
|
|
|
|
if (rounding_ && (rounding_ != idx)) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET)
|
|
|
|
|
rounding_ = idx;
|
|
|
|
|
}
|
|
|
|
|
void setZero() { zero_ = true; }
|
|
|
|
|
@@ -784,6 +794,9 @@ class Label;
|
|
|
|
|
struct Reg8;
|
|
|
|
|
struct Reg16;
|
|
|
|
|
struct Reg32;
|
|
|
|
|
struct Xmm;
|
|
|
|
|
struct Ymm;
|
|
|
|
|
struct Zmm;
|
|
|
|
|
#ifdef XBYAK64
|
|
|
|
|
struct Reg64;
|
|
|
|
|
#endif
|
|
|
|
|
@@ -799,6 +812,9 @@ public:
|
|
|
|
|
#ifdef XBYAK64
|
|
|
|
|
Reg64 cvt64() const;
|
|
|
|
|
#endif
|
|
|
|
|
Xmm cvt128() const;
|
|
|
|
|
Ymm cvt256() const;
|
|
|
|
|
Zmm cvt512() const;
|
|
|
|
|
Reg operator|(const ApxFlagNF&) const { Reg r(*this); r.setNF(); return r; }
|
|
|
|
|
Reg operator|(const ApxFlagZU&) const { Reg r(*this); r.setZU(); return r; }
|
|
|
|
|
};
|
|
|
|
|
@@ -936,6 +952,21 @@ inline Reg64 Reg::cvt64() const
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
inline Xmm Reg::cvt128() const
|
|
|
|
|
{
|
|
|
|
|
return Xmm(changeBit(128).getIdx());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline Ymm Reg::cvt256() const
|
|
|
|
|
{
|
|
|
|
|
return Ymm(changeBit(256).getIdx());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline Zmm Reg::cvt512() const
|
|
|
|
|
{
|
|
|
|
|
return Zmm(changeBit(512).getIdx());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifndef XBYAK_DISABLE_SEGMENT
|
|
|
|
|
// not derived from Reg
|
|
|
|
|
class Segment {
|
|
|
|
|
@@ -1673,7 +1704,9 @@ inline const uint8_t* Label::getAddress() const
|
|
|
|
|
typedef enum {
|
|
|
|
|
DefaultEncoding,
|
|
|
|
|
VexEncoding,
|
|
|
|
|
EvexEncoding
|
|
|
|
|
EvexEncoding,
|
|
|
|
|
PreAVX10v2Encoding,
|
|
|
|
|
AVX10v2Encoding
|
|
|
|
|
} PreferredEncoding;
|
|
|
|
|
|
|
|
|
|
class CodeGenerator : public CodeArray {
|
|
|
|
|
@@ -1730,10 +1763,10 @@ private:
|
|
|
|
|
{
|
|
|
|
|
return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
|
|
|
|
|
}
|
|
|
|
|
static inline bool isValidSSE(const Operand& op1)
|
|
|
|
|
static inline bool isValidSSE(const Operand& op)
|
|
|
|
|
{
|
|
|
|
|
// SSE instructions do not support XMM16 - XMM31
|
|
|
|
|
return !(op1.isXMM() && op1.getIdx() >= 16);
|
|
|
|
|
return !(op.isXMM() && op.getIdx() >= 16);
|
|
|
|
|
}
|
|
|
|
|
static inline uint8_t rexRXB(int bit, int bit3, const Reg& r, const Reg& b, const Reg& x = Reg())
|
|
|
|
|
{
|
|
|
|
|
@@ -1815,12 +1848,11 @@ private:
|
|
|
|
|
static const uint64_t T_0F = 1ull << 8;
|
|
|
|
|
static const uint64_t T_0F38 = 1ull << 9;
|
|
|
|
|
static const uint64_t T_0F3A = 1ull << 10;
|
|
|
|
|
static const uint64_t T_L0 = 1ull << 11;
|
|
|
|
|
static const uint64_t T_MAP5 = 1ull << 11;
|
|
|
|
|
static const uint64_t T_L1 = 1ull << 12;
|
|
|
|
|
static const uint64_t T_W0 = 1ull << 13;
|
|
|
|
|
static const uint64_t T_W1 = 1ull << 14;
|
|
|
|
|
static const uint64_t T_EW0 = 1ull << 15;
|
|
|
|
|
static const uint64_t T_EW1 = 1ull << 16;
|
|
|
|
|
static const uint64_t T_W0 = 1ull << 13; // T_EW0 = T_W0
|
|
|
|
|
static const uint64_t T_W1 = 1ull << 14; // for VEX
|
|
|
|
|
static const uint64_t T_EW1 = 1ull << 16; // for EVEX
|
|
|
|
|
static const uint64_t T_YMM = 1ull << 17; // support YMM, ZMM
|
|
|
|
|
static const uint64_t T_EVEX = 1ull << 18;
|
|
|
|
|
static const uint64_t T_ER_X = 1ull << 19; // xmm{er}
|
|
|
|
|
@@ -1836,23 +1868,29 @@ private:
|
|
|
|
|
static const uint64_t T_M_K = 1ull << 28; // mem{k}
|
|
|
|
|
static const uint64_t T_VSIB = 1ull << 29;
|
|
|
|
|
static const uint64_t T_MEM_EVEX = 1ull << 30; // use evex if mem
|
|
|
|
|
static const uint64_t T_FP16 = 1ull << 31; // avx512-fp16
|
|
|
|
|
static const uint64_t T_MAP5 = T_FP16 | T_0F;
|
|
|
|
|
static const uint64_t T_MAP6 = T_FP16 | T_0F38;
|
|
|
|
|
static const uint64_t T_MAP6 = 1ull << 31;
|
|
|
|
|
static const uint64_t T_NF = 1ull << 32; // T_nf
|
|
|
|
|
static const uint64_t T_CODE1_IF1 = 1ull << 33; // code|=1 if !r.isBit(8)
|
|
|
|
|
|
|
|
|
|
static const uint64_t T_ND1 = 1ull << 35; // ND=1
|
|
|
|
|
static const uint64_t T_ZU = 1ull << 36; // ND=ZU
|
|
|
|
|
static const uint64_t T_F2 = 1ull << 37; // pp = 3
|
|
|
|
|
static const uint64_t T_SENTRY = (1ull << 38)-1; // attribute(>=T_SENTRY) is for error check
|
|
|
|
|
static const uint64_t T_ALLOW_DIFF_SIZE = 1ull << 38; // allow difference reg size
|
|
|
|
|
static const uint64_t T_ALLOW_ABCDH = 1ull << 39; // allow [abcd]h reg
|
|
|
|
|
// T_66 = 1, T_F3 = 2, T_F2 = 3
|
|
|
|
|
static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; }
|
|
|
|
|
// @@@end of avx_type_def.h
|
|
|
|
|
static inline uint32_t getMap(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; }
|
|
|
|
|
static inline uint32_t getMap(uint64_t type)
|
|
|
|
|
{
|
|
|
|
|
if (type & T_MAP6) return 6;
|
|
|
|
|
if (type & T_MAP5) return 5;
|
|
|
|
|
return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
|
|
|
|
|
}
|
|
|
|
|
void vex(const Reg& reg, const Reg& base, const Operand *v, uint64_t type, int code, bool x = false)
|
|
|
|
|
{
|
|
|
|
|
int w = (type & T_W1) ? 1 : 0;
|
|
|
|
|
bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : reg.isYMM();
|
|
|
|
|
bool is256 = (type & T_L1) ? true : reg.isYMM();
|
|
|
|
|
bool r = reg.isExtIdx();
|
|
|
|
|
bool b = base.isExtIdx();
|
|
|
|
|
int idx = v ? v->getIdx() : 0;
|
|
|
|
|
@@ -1867,16 +1905,19 @@ private:
|
|
|
|
|
}
|
|
|
|
|
db(code);
|
|
|
|
|
}
|
|
|
|
|
void verifySAE(const Reg& r, uint64_t type) const
|
|
|
|
|
// Allow YMM embedded rounding for AVX10.2 to minimize flag modifications
|
|
|
|
|
bool verifySAE(const Reg& r, const Reg& b, uint64_t type) const
|
|
|
|
|
{
|
|
|
|
|
if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return;
|
|
|
|
|
XBYAK_THROW(ERR_SAE_IS_INVALID)
|
|
|
|
|
if (((type & T_SAE_X) && (r.isYMM() && b.isXMM())) || ((type & T_SAE_Y) && b.isXMM()) || ((type & T_SAE_Z) && b.isYMM())) return true;
|
|
|
|
|
if (((type & T_SAE_X) && b.isXMM()) || ((type & T_SAE_Y) && b.isYMM()) || ((type & T_SAE_Z) && b.isZMM())) return false;
|
|
|
|
|
XBYAK_THROW_RET(ERR_SAE_IS_INVALID, false)
|
|
|
|
|
}
|
|
|
|
|
void verifyER(const Reg& r, uint64_t type) const
|
|
|
|
|
bool verifyER(const Reg& r, const Reg& b, uint64_t type) const
|
|
|
|
|
{
|
|
|
|
|
if ((type & T_ER_R) && r.isREG(32|64)) return;
|
|
|
|
|
if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
|
|
|
|
|
XBYAK_THROW(ERR_ER_IS_INVALID)
|
|
|
|
|
if ((type & T_ER_R) && b.isREG(32|64)) return false;
|
|
|
|
|
if (((type & T_ER_X) && (r.isYMM() && b.isXMM())) || ((type & T_ER_Y) && b.isXMM()) || ((type & T_ER_Z) && b.isYMM())) return true;
|
|
|
|
|
if (((type & T_ER_X) && b.isXMM()) || ((type & T_ER_Y) && b.isYMM()) || ((type & T_ER_Z) && b.isZMM())) return false;
|
|
|
|
|
XBYAK_THROW_RET(ERR_SAE_IS_INVALID, false)
|
|
|
|
|
}
|
|
|
|
|
// (a, b, c) contains non zero two or three values then err
|
|
|
|
|
int verifyDuplicate(int a, int b, int c, int err)
|
|
|
|
|
@@ -1890,31 +1931,32 @@ private:
|
|
|
|
|
if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0)
|
|
|
|
|
int w = (type & T_EW1) ? 1 : 0;
|
|
|
|
|
uint32_t mmm = getMap(type);
|
|
|
|
|
if (type & T_FP16) mmm |= 4;
|
|
|
|
|
uint32_t pp = getPP(type);
|
|
|
|
|
int idx = v ? v->getIdx() : 0;
|
|
|
|
|
uint32_t vvvv = ~idx;
|
|
|
|
|
|
|
|
|
|
bool R = reg.isExtIdx();
|
|
|
|
|
bool X3 = (x && x->isExtIdx()) || (base.isSIMD() && base.isExtIdx2());
|
|
|
|
|
bool B4 = base.isREG() && base.isExtIdx2();
|
|
|
|
|
bool X4 = x && (x->isREG() && x->isExtIdx2());
|
|
|
|
|
uint8_t B4 = (base.isREG() && base.isExtIdx2()) ? 8 : 0;
|
|
|
|
|
uint8_t U = (x && (x->isREG() && x->isExtIdx2())) ? 0 : 4;
|
|
|
|
|
bool B = base.isExtIdx();
|
|
|
|
|
bool Rp = reg.isExtIdx2();
|
|
|
|
|
int LL;
|
|
|
|
|
int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v->getRounding() : 0, ERR_ROUNDING_IS_ALREADY_SET);
|
|
|
|
|
int disp8N = 1;
|
|
|
|
|
if (rounding) {
|
|
|
|
|
bool isUzero = false;
|
|
|
|
|
if (rounding == EvexModifierRounding::T_SAE) {
|
|
|
|
|
verifySAE(base, type); LL = 0;
|
|
|
|
|
isUzero = verifySAE(reg, base, type); LL = 0;
|
|
|
|
|
} else {
|
|
|
|
|
verifyER(base, type); LL = rounding - 1;
|
|
|
|
|
isUzero = verifyER(reg, base, type); LL = rounding - 1;
|
|
|
|
|
}
|
|
|
|
|
if (isUzero) U = 0; // avx10.2 Evex.U
|
|
|
|
|
b = true;
|
|
|
|
|
} else {
|
|
|
|
|
if (v) VL = (std::max)(VL, v->getBit());
|
|
|
|
|
VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL);
|
|
|
|
|
LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
|
|
|
|
|
LL = (VL >= 512 /* tmm */) ? 2 : (VL == 256) ? 1 : 0;
|
|
|
|
|
if (b) {
|
|
|
|
|
disp8N = ((type & T_B16) == T_B16) ? 2 : (type & T_B32) ? 4 : 8;
|
|
|
|
|
} else if ((type & T_NX_MASK) == T_DUP) {
|
|
|
|
|
@@ -1935,8 +1977,8 @@ private:
|
|
|
|
|
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
|
|
|
|
|
if (aaa == 0) z = 0; // clear T_z if mask is not set
|
|
|
|
|
db(0x62);
|
|
|
|
|
db((R ? 0 : 0x80) | (X3 ? 0 : 0x40) | (B ? 0 : 0x20) | (Rp ? 0 : 0x10) | (B4 ? 8 : 0) | mmm);
|
|
|
|
|
db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | (X4 ? 0 : 4) | (pp & 3));
|
|
|
|
|
db((R ? 0 : 0x80) | (X3 ? 0 : 0x40) | (B ? 0 : 0x20) | (Rp ? 0 : 0x10) | B4 | mmm);
|
|
|
|
|
db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | U | (pp & 3));
|
|
|
|
|
db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (V4 ? 0 : 8) | (aaa & 7));
|
|
|
|
|
db(code);
|
|
|
|
|
return disp8N;
|
|
|
|
|
@@ -2036,7 +2078,6 @@ private:
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
LabelManager labelMgr_;
|
|
|
|
|
bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
|
|
|
|
|
void writeCode(uint64_t type, const Reg& r, int code, bool rex2 = false)
|
|
|
|
|
{
|
|
|
|
|
if (!(type&T_APX || rex2)) {
|
|
|
|
|
@@ -2048,19 +2089,24 @@ private:
|
|
|
|
|
db(0x0F); db(0x3A);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
db(code | ((type == 0 || (type & T_CODE1_IF1)) && !r.isBit(8)));
|
|
|
|
|
db(code | (((type & T_SENTRY) == 0 || (type & T_CODE1_IF1)) && !r.isBit(8)));
|
|
|
|
|
}
|
|
|
|
|
void opRR(const Reg& reg1, const Reg& reg2, uint64_t type, int code)
|
|
|
|
|
void opRR(const Reg& r1, const Reg& r2, uint64_t type, int code)
|
|
|
|
|
{
|
|
|
|
|
bool rex2 = rex(reg2, reg1, type);
|
|
|
|
|
writeCode(type, reg1, code, rex2);
|
|
|
|
|
setModRM(3, reg1.getIdx(), reg2.getIdx());
|
|
|
|
|
if (!(type & T_ALLOW_DIFF_SIZE) && r1.isREG() && r2.isREG() && r1.getBit() != r2.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
|
|
|
|
|
if (!(type & T_ALLOW_ABCDH) && (isBadCombination(r1, r2) || isBadCombination(r2, r1))) XBYAK_THROW(ERR_CANT_USE_ABCDH)
|
|
|
|
|
bool rex2 = rex(r2, r1, type);
|
|
|
|
|
writeCode(type, r1, code, rex2);
|
|
|
|
|
setModRM(3, r1.getIdx(), r2.getIdx());
|
|
|
|
|
}
|
|
|
|
|
void opMR(const Address& addr, const Reg& r, uint64_t type, int code, uint64_t type2 = 0, int code2 = NONE)
|
|
|
|
|
{
|
|
|
|
|
if (code2 == NONE) code2 = code;
|
|
|
|
|
if (type2 && opROO(Reg(), addr, r, type2, code2)) return;
|
|
|
|
|
if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
|
|
|
|
|
#if XBYAK_STRICT_CHECK_MEM_REG_SIZE == 1
|
|
|
|
|
if (!(type & T_ALLOW_DIFF_SIZE) && r.getBit() <= BIT && addr.getBit() > 0 && addr.getBit() != r.getBit()) XBYAK_THROW(ERR_BAD_MEM_SIZE)
|
|
|
|
|
#endif
|
|
|
|
|
bool rex2 = rex(addr, r, type);
|
|
|
|
|
writeCode(type, r, code, rex2);
|
|
|
|
|
opAddr(addr, r.getIdx());
|
|
|
|
|
@@ -2163,7 +2209,7 @@ private:
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&), int imm8 = NONE)
|
|
|
|
|
void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&) = 0, int imm8 = NONE)
|
|
|
|
|
{
|
|
|
|
|
if (isValid && !isValid(r, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
|
|
|
|
|
if (!isValidSSE(r) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
|
|
|
|
@@ -2205,6 +2251,13 @@ private:
|
|
|
|
|
opSSE(mmx, op, T_66 | T_0F3A, code, isXMM_REG32orMEM, imm);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// r1 is [abcd]h and r2 is reg with rex
|
|
|
|
|
bool isBadCombination(const Reg& r1, const Reg& r2) const
|
|
|
|
|
{
|
|
|
|
|
if (!r1.isHigh8bit()) return false;
|
|
|
|
|
if (r2.isExt8bit() || r2.getIdx() >= 8) return true;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
// (r, r, m) or (r, m, r)
|
|
|
|
|
bool opROO(const Reg& d, const Operand& op1, const Operand& op2, uint64_t type, int code, int immSize = 0, int sc = NONE)
|
|
|
|
|
{
|
|
|
|
|
@@ -2232,11 +2285,11 @@ private:
|
|
|
|
|
int opBit = op.getBit();
|
|
|
|
|
if (disableRex && opBit == 64) opBit = 32;
|
|
|
|
|
const Reg r(ext, Operand::REG, opBit);
|
|
|
|
|
if ((type & T_APX) && op.hasRex2NFZU() && opROO(d ? *d : Reg(0, Operand::REG, opBit), op, r, type, code)) return;
|
|
|
|
|
if ((type & T_APX) && (d != 0 || op.hasRex2NFZU()) && opROO(d ? *d : Reg(0, Operand::REG, opBit), op, r, type, code)) return;
|
|
|
|
|
if (op.isMEM()) {
|
|
|
|
|
opMR(op.getAddress(immSize), r, type, code);
|
|
|
|
|
} else if (op.isREG(bit)) {
|
|
|
|
|
opRR(r, op.getReg().changeBit(opBit), type, code);
|
|
|
|
|
opRR(r, op.getReg().changeBit(opBit), type | T_ALLOW_ABCDH, code);
|
|
|
|
|
} else {
|
|
|
|
|
XBYAK_THROW(ERR_BAD_COMBINATION)
|
|
|
|
|
}
|
|
|
|
|
@@ -2295,10 +2348,13 @@ private:
|
|
|
|
|
opRO(static_cast<const Reg&>(op2), op1, 0, code, op1.getKind() == op2.getKind());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
|
|
|
|
|
// allow add(ax, 0x8000);
|
|
|
|
|
bool isInDisp16relaxed(uint32_t x) const { uint32_t v = x & 0xffff0000; return v == 0 || v == 0xffff0000; }
|
|
|
|
|
uint32_t getImmBit(const Operand& op, uint32_t imm)
|
|
|
|
|
{
|
|
|
|
|
verifyMemHasSize(op);
|
|
|
|
|
uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
|
|
|
|
|
uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16relaxed(imm) ? 16 : 32;
|
|
|
|
|
if (op.isBit(8)) immBit = 8;
|
|
|
|
|
if (op.getBit() < immBit) XBYAK_THROW_RET(ERR_IMM_IS_TOO_BIG, 0)
|
|
|
|
|
if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */
|
|
|
|
|
@@ -2352,7 +2408,7 @@ private:
|
|
|
|
|
if (op.isREG() && op.hasRex2()) {
|
|
|
|
|
const Reg& r = static_cast<const Reg&>(op);
|
|
|
|
|
rex2(0, rexRXB(3, 0, Reg(), r), Reg(), r);
|
|
|
|
|
db(alt);
|
|
|
|
|
db(alt | (r.getIdx() & 7));
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
int bit = op.getBit();
|
|
|
|
|
@@ -2364,7 +2420,7 @@ private:
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (op.isMEM()) {
|
|
|
|
|
opMR(op.getAddress(), Reg(ext, Operand::REG, 32), 0, code);
|
|
|
|
|
opMR(op.getAddress(), Reg(ext, Operand::REG, 32), T_ALLOW_DIFF_SIZE, code);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
@@ -2422,7 +2478,7 @@ private:
|
|
|
|
|
if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION)
|
|
|
|
|
int w = op.isBit(16);
|
|
|
|
|
if (!(reg.isREG() && (reg.getBit() > op.getBit()))) XBYAK_THROW(ERR_BAD_COMBINATION)
|
|
|
|
|
opRO(reg, op, T_0F, code | w);
|
|
|
|
|
opRO(reg, op, T_0F | T_ALLOW_DIFF_SIZE, code | w);
|
|
|
|
|
}
|
|
|
|
|
void opFpuMem(const Address& addr, uint8_t m16, uint8_t m32, uint8_t m64, uint8_t ext, uint8_t m64ext)
|
|
|
|
|
{
|
|
|
|
|
@@ -2554,6 +2610,18 @@ private:
|
|
|
|
|
Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM;
|
|
|
|
|
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
|
|
|
|
|
}
|
|
|
|
|
// (x, x, x/m), (x, y, y/m), (y, z, z/m)
|
|
|
|
|
void opCvt6(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code)
|
|
|
|
|
{
|
|
|
|
|
int b1 = x1.getBit();
|
|
|
|
|
int b2 = x2.getBit();
|
|
|
|
|
int b3 = op.getBit();
|
|
|
|
|
if ((b1 == 128 && (b2 == 128 || b2 == 256) && (b2 == b3 || op.isMEM())) || (b1 == 256 && b2 == 512 && (b3 == b2 || op.isMEM()))) {
|
|
|
|
|
opVex(x1, &x2, op, type, code);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
XBYAK_THROW(ERR_BAD_COMBINATION);
|
|
|
|
|
}
|
|
|
|
|
const Xmm& cvtIdx0(const Operand& x) const
|
|
|
|
|
{
|
|
|
|
|
return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
|
|
|
|
|
@@ -2568,8 +2636,7 @@ private:
|
|
|
|
|
if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
|
|
|
|
|
bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
|
|
|
|
|
if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
|
|
|
|
|
if (is16bit) db(0x66);
|
|
|
|
|
opRO(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, T_F3 | T_0F, code);
|
|
|
|
|
opRO(reg, op, T_F3 | T_0F, code);
|
|
|
|
|
}
|
|
|
|
|
void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, uint64_t type, uint8_t code, int mode)
|
|
|
|
|
{
|
|
|
|
|
@@ -2644,21 +2711,24 @@ private:
|
|
|
|
|
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
|
|
|
|
|
opVex(x, 0, addr, type, code);
|
|
|
|
|
}
|
|
|
|
|
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding encoding)
|
|
|
|
|
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding enc, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0)
|
|
|
|
|
{
|
|
|
|
|
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding), code);
|
|
|
|
|
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(enc, typeVex, typeEvex, sel), code, imm);
|
|
|
|
|
}
|
|
|
|
|
int orEvexIf(PreferredEncoding encoding) {
|
|
|
|
|
if (encoding == DefaultEncoding) {
|
|
|
|
|
encoding = defaultEncoding_;
|
|
|
|
|
PreferredEncoding getEncoding(PreferredEncoding enc, int sel) const
|
|
|
|
|
{
|
|
|
|
|
if (enc == DefaultEncoding) {
|
|
|
|
|
enc = defaultEncoding_[sel];
|
|
|
|
|
}
|
|
|
|
|
if (encoding == EvexEncoding) {
|
|
|
|
|
if ((sel == 0 && enc != VexEncoding && enc != EvexEncoding) || (sel == 1 && enc != PreAVX10v2Encoding && enc != AVX10v2Encoding)) XBYAK_THROW_RET(ERR_BAD_ENCODING_MODE, VexEncoding)
|
|
|
|
|
#ifdef XBYAK_DISABLE_AVX512
|
|
|
|
|
XBYAK_THROW(ERR_EVEX_IS_INVALID)
|
|
|
|
|
if (enc == EvexEncoding || enc == AVX10v2Encoding) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, VexEncoding)
|
|
|
|
|
#endif
|
|
|
|
|
return T_MUST_EVEX;
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
return enc;
|
|
|
|
|
}
|
|
|
|
|
uint64_t orEvexIf(PreferredEncoding enc, uint64_t typeVex, uint64_t typeEvex, int sel) {
|
|
|
|
|
enc = getEncoding(enc, sel);
|
|
|
|
|
return ((sel == 0 && enc == VexEncoding) || (sel == 1 && enc != AVX10v2Encoding)) ? typeVex : (T_MUST_EVEX | typeEvex);
|
|
|
|
|
}
|
|
|
|
|
void opInOut(const Reg& a, const Reg& d, uint8_t code)
|
|
|
|
|
{
|
|
|
|
|
@@ -2752,7 +2822,7 @@ private:
|
|
|
|
|
const Operand *p1 = &k, *p2 = &op;
|
|
|
|
|
if (code == 0x93) { std::swap(p1, p2); }
|
|
|
|
|
if (opROO(Reg(), *p2, *p1, T_APX|type, code)) return;
|
|
|
|
|
opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|type, code);
|
|
|
|
|
opVex(static_cast<const Reg&>(*p1), 0, *p2, type, code);
|
|
|
|
|
}
|
|
|
|
|
void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2)
|
|
|
|
|
{
|
|
|
|
|
@@ -2770,6 +2840,31 @@ private:
|
|
|
|
|
}
|
|
|
|
|
opSSE(x, op, type1, code1, isXMM_XMMorMEM, imm);
|
|
|
|
|
}
|
|
|
|
|
// AVX10 zero-extending for vmovd, vmovw
|
|
|
|
|
void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding enc, int bit)
|
|
|
|
|
{
|
|
|
|
|
const Operand *p1 = &op1;
|
|
|
|
|
const Operand *p2 = &op2;
|
|
|
|
|
bool rev = false;
|
|
|
|
|
if (p1->isMEM()) {
|
|
|
|
|
std::swap(p1, p2);
|
|
|
|
|
rev = true;
|
|
|
|
|
}
|
|
|
|
|
if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
|
|
|
|
|
if (p1->isXMM()) {
|
|
|
|
|
std::swap(p1, p2);
|
|
|
|
|
rev = !rev;
|
|
|
|
|
}
|
|
|
|
|
enc = getEncoding(enc, 1);
|
|
|
|
|
int sel = -1;
|
|
|
|
|
if (p1->isXMM() || (p1->isMEM() && enc == AVX10v2Encoding)) {
|
|
|
|
|
sel = 2 + int(rev);
|
|
|
|
|
} else if (p1->isREG(bit) || p1->isMEM()) {
|
|
|
|
|
sel = int(rev);
|
|
|
|
|
}
|
|
|
|
|
if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
|
|
|
|
|
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
|
|
|
|
|
}
|
|
|
|
|
public:
|
|
|
|
|
unsigned int getVersion() const { return VERSION; }
|
|
|
|
|
using CodeArray::db;
|
|
|
|
|
@@ -2828,7 +2923,7 @@ public:
|
|
|
|
|
#endif
|
|
|
|
|
private:
|
|
|
|
|
bool isDefaultJmpNEAR_;
|
|
|
|
|
PreferredEncoding defaultEncoding_;
|
|
|
|
|
PreferredEncoding defaultEncoding_[2]; // 0:vnni, 1:vmpsadbw
|
|
|
|
|
public:
|
|
|
|
|
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
|
|
|
|
|
void L(Label& label) { labelMgr_.defineClabel(label); }
|
|
|
|
|
@@ -2999,6 +3094,7 @@ public:
|
|
|
|
|
rex(*p2, *p1); db(0x90 | (p2->getIdx() & 7));
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (p1->isREG() && p2->isREG()) std::swap(p1, p2); // adapt to NASM 2.16.03 behavior to pass tests
|
|
|
|
|
opRO(static_cast<const Reg&>(*p1), *p2, 0, 0x86 | (p1->isBit(8) ? 0 : 1), (p1->isREG() && (p1->getBit() == p2->getBit())));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -3044,11 +3140,11 @@ public:
|
|
|
|
|
}
|
|
|
|
|
void mov(const Operand& op, const Segment& seg)
|
|
|
|
|
{
|
|
|
|
|
opRO(Reg8(seg.getIdx()), op, 0, 0x8C, op.isREG(16|i32e));
|
|
|
|
|
opRO(Reg8(seg.getIdx()), op, T_ALLOW_DIFF_SIZE | T_ALLOW_ABCDH, 0x8C, op.isREG(16|i32e));
|
|
|
|
|
}
|
|
|
|
|
void mov(const Segment& seg, const Operand& op)
|
|
|
|
|
{
|
|
|
|
|
opRO(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, 0, 0x8E, op.isREG(16|i32e));
|
|
|
|
|
opRO(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, T_ALLOW_DIFF_SIZE | T_ALLOW_ABCDH, 0x8E, op.isREG(16|i32e));
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
@@ -3113,8 +3209,9 @@ public:
|
|
|
|
|
, es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
|
|
|
|
|
#endif
|
|
|
|
|
, isDefaultJmpNEAR_(false)
|
|
|
|
|
, defaultEncoding_(EvexEncoding)
|
|
|
|
|
{
|
|
|
|
|
setDefaultEncoding();
|
|
|
|
|
setDefaultEncodingAVX10();
|
|
|
|
|
labelMgr_.set(this);
|
|
|
|
|
}
|
|
|
|
|
void reset()
|
|
|
|
|
@@ -3151,13 +3248,20 @@ public:
|
|
|
|
|
#undef jnl
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// set default encoding to select Vex or Evex
|
|
|
|
|
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
|
|
|
|
|
|
|
|
|
|
void sha1msg12(const Xmm& x, const Operand& op)
|
|
|
|
|
// set default encoding of VNNI
|
|
|
|
|
// EvexEncoding : AVX512_VNNI, VexEncoding : AVX-VNNI
|
|
|
|
|
void setDefaultEncoding(PreferredEncoding enc = EvexEncoding)
|
|
|
|
|
{
|
|
|
|
|
opROO(Reg(), op, x, T_MUST_EVEX, 0xD9);
|
|
|
|
|
if (enc != VexEncoding && enc != EvexEncoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE)
|
|
|
|
|
defaultEncoding_[0] = enc;
|
|
|
|
|
}
|
|
|
|
|
// default : PreferredEncoding : AVX-VNNI-INT8/AVX512-FP16
|
|
|
|
|
void setDefaultEncodingAVX10(PreferredEncoding enc = PreAVX10v2Encoding)
|
|
|
|
|
{
|
|
|
|
|
if (enc != PreAVX10v2Encoding && enc != AVX10v2Encoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE)
|
|
|
|
|
defaultEncoding_[1] = enc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void bswap(const Reg32e& r)
|
|
|
|
|
{
|
|
|
|
|
int idx = r.getIdx();
|
|
|
|
|
@@ -3170,6 +3274,24 @@ public:
|
|
|
|
|
}
|
|
|
|
|
db(0xC8 + (idx & 7));
|
|
|
|
|
}
|
|
|
|
|
void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
|
|
|
|
|
{
|
|
|
|
|
const uint64_t typeTbl[] = {
|
|
|
|
|
T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512
|
|
|
|
|
T_MUST_EVEX|T_66|T_0F|T_N4, T_MUST_EVEX|T_F3|T_0F|T_N4, // avx10.2
|
|
|
|
|
};
|
|
|
|
|
const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E };
|
|
|
|
|
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 32);
|
|
|
|
|
}
|
|
|
|
|
void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
|
|
|
|
|
{
|
|
|
|
|
const uint64_t typeTbl[] = {
|
|
|
|
|
T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16
|
|
|
|
|
T_MUST_EVEX|T_F3|T_MAP5|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_N2, // avx10.2
|
|
|
|
|
};
|
|
|
|
|
const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E };
|
|
|
|
|
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 16|32|64);
|
|
|
|
|
}
|
|
|
|
|
/*
|
|
|
|
|
use single byte nop if useMultiByteNop = false
|
|
|
|
|
*/
|
|
|
|
|
|