Rewrite CMake & First Work for Windows Support

This commit is contained in:
Xphalnos
2025-06-18 20:50:02 +02:00
parent 66aa17ee9e
commit 453d49ed44
7 changed files with 825 additions and 580 deletions

View File

@@ -1,32 +1,23 @@
cmake_minimum_required(VERSION 3.14)
set(PROJECT_SHARED rem)
set(CMAKE_C_COMPILER gcc)
set(CMAKE_CXX_COMPILER g++)
set(SOURCE_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/src/)
set(LIBRARY_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/libraries/)
project(rem)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_CXX_FLAGS "-g")
project(${PROJECT_SHARED})
add_definitions(-Ofast)
add_definitions(-Wno-c++11-narrowing)
add_definitions(-std=c++17)
add_compile_definitions(XBYAK64)
add_definitions(-Wno-c++11-narrowing)
set(CMAKE_BUILD_TYPE Debug)
set(SOURCE_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/src)
set(LIBRARY_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/libraries)
file(GLOB_RECURSE SOURCES "${SOURCE_DIRECTORY}*.cpp")
file(GLOB_RECURSE HEADERS "${SOURCE_DIRECTORY}*.h")
file(GLOB_RECURSE SOURCES "${SOURCE_DIRECTORY}/*.cpp")
file(GLOB_RECURSE HEADERS "${SOURCE_DIRECTORY}/*.h")
include_directories(${PROJECT_SHARED} ${LIBRARY_DIRECTORY})
include_directories(${PROJECT_SHARED} ${SOURCE_DIRECTORY})
include_directories(rem ${LIBRARY_DIRECTORY})
include_directories(rem ${SOURCE_DIRECTORY})
add_library(${PROJECT_SHARED} SHARED ${SOURCES} ${HEADERS})
add_library(rem STATIC ${SOURCES} ${HEADERS})
target_include_directories(rem PUBLIC ${SOURCE_DIRECTORY})

View File

@@ -1,4 +1,4 @@
#pragma once
#ifndef XBYAK_XBYAK_H_
#define XBYAK_XBYAK_H_
/*!
@@ -151,11 +151,17 @@
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif
// Define this macro as 0 to disable strict checking of memory operand and register size matching.
// This macro may be removed in future versions.
#ifndef XBYAK_STRICT_CHECK_MEM_REG_SIZE
#define XBYAK_STRICT_CHECK_MEM_REG_SIZE 1
#endif
namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x7070 /* 0xABCD = A.BC(.D) */
VERSION = 0x7260 /* 0xABCD = A.BC(.D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@@ -232,6 +238,8 @@ enum {
ERR_CANT_USE_REX2,
ERR_INVALID_DFV,
ERR_INVALID_REG_IDX,
ERR_BAD_ENCODING_MODE,
ERR_CANT_USE_ABCDH,
ERR_INTERNAL // Put it at last.
};
@@ -290,6 +298,8 @@ inline const char *ConvertErrorToString(int err)
"can't use rex2",
"invalid dfv",
"invalid reg index",
"bad encoding mode",
"can't use [abcd]h with rex",
"internal error"
};
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
@@ -634,12 +644,12 @@ public:
void setBit(int bit);
void setOpmaskIdx(int idx, bool /*ignore_idx0*/ = true)
{
if (mask_) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET)
if (mask_ && (mask_ != idx)) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET)
mask_ = idx;
}
void setRounding(int idx)
{
if (rounding_) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET)
if (rounding_ && (rounding_ != idx)) XBYAK_THROW(ERR_ROUNDING_IS_ALREADY_SET)
rounding_ = idx;
}
void setZero() { zero_ = true; }
@@ -784,6 +794,9 @@ class Label;
struct Reg8;
struct Reg16;
struct Reg32;
struct Xmm;
struct Ymm;
struct Zmm;
#ifdef XBYAK64
struct Reg64;
#endif
@@ -799,6 +812,9 @@ public:
#ifdef XBYAK64
Reg64 cvt64() const;
#endif
Xmm cvt128() const;
Ymm cvt256() const;
Zmm cvt512() const;
Reg operator|(const ApxFlagNF&) const { Reg r(*this); r.setNF(); return r; }
Reg operator|(const ApxFlagZU&) const { Reg r(*this); r.setZU(); return r; }
};
@@ -936,6 +952,21 @@ inline Reg64 Reg::cvt64() const
}
#endif
inline Xmm Reg::cvt128() const
{
return Xmm(changeBit(128).getIdx());
}
inline Ymm Reg::cvt256() const
{
return Ymm(changeBit(256).getIdx());
}
inline Zmm Reg::cvt512() const
{
return Zmm(changeBit(512).getIdx());
}
#ifndef XBYAK_DISABLE_SEGMENT
// not derived from Reg
class Segment {
@@ -1673,7 +1704,9 @@ inline const uint8_t* Label::getAddress() const
typedef enum {
DefaultEncoding,
VexEncoding,
EvexEncoding
EvexEncoding,
PreAVX10v2Encoding,
AVX10v2Encoding
} PreferredEncoding;
class CodeGenerator : public CodeArray {
@@ -1730,10 +1763,10 @@ private:
{
return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
}
static inline bool isValidSSE(const Operand& op1)
static inline bool isValidSSE(const Operand& op)
{
// SSE instructions do not support XMM16 - XMM31
return !(op1.isXMM() && op1.getIdx() >= 16);
return !(op.isXMM() && op.getIdx() >= 16);
}
static inline uint8_t rexRXB(int bit, int bit3, const Reg& r, const Reg& b, const Reg& x = Reg())
{
@@ -1815,12 +1848,11 @@ private:
static const uint64_t T_0F = 1ull << 8;
static const uint64_t T_0F38 = 1ull << 9;
static const uint64_t T_0F3A = 1ull << 10;
static const uint64_t T_L0 = 1ull << 11;
static const uint64_t T_MAP5 = 1ull << 11;
static const uint64_t T_L1 = 1ull << 12;
static const uint64_t T_W0 = 1ull << 13;
static const uint64_t T_W1 = 1ull << 14;
static const uint64_t T_EW0 = 1ull << 15;
static const uint64_t T_EW1 = 1ull << 16;
static const uint64_t T_W0 = 1ull << 13; // T_EW0 = T_W0
static const uint64_t T_W1 = 1ull << 14; // for VEX
static const uint64_t T_EW1 = 1ull << 16; // for EVEX
static const uint64_t T_YMM = 1ull << 17; // support YMM, ZMM
static const uint64_t T_EVEX = 1ull << 18;
static const uint64_t T_ER_X = 1ull << 19; // xmm{er}
@@ -1836,23 +1868,29 @@ private:
static const uint64_t T_M_K = 1ull << 28; // mem{k}
static const uint64_t T_VSIB = 1ull << 29;
static const uint64_t T_MEM_EVEX = 1ull << 30; // use evex if mem
static const uint64_t T_FP16 = 1ull << 31; // avx512-fp16
static const uint64_t T_MAP5 = T_FP16 | T_0F;
static const uint64_t T_MAP6 = T_FP16 | T_0F38;
static const uint64_t T_MAP6 = 1ull << 31;
static const uint64_t T_NF = 1ull << 32; // T_nf
static const uint64_t T_CODE1_IF1 = 1ull << 33; // code|=1 if !r.isBit(8)
static const uint64_t T_ND1 = 1ull << 35; // ND=1
static const uint64_t T_ZU = 1ull << 36; // ND=ZU
static const uint64_t T_F2 = 1ull << 37; // pp = 3
static const uint64_t T_SENTRY = (1ull << 38)-1; // attribute(>=T_SENTRY) is for error check
static const uint64_t T_ALLOW_DIFF_SIZE = 1ull << 38; // allow difference reg size
static const uint64_t T_ALLOW_ABCDH = 1ull << 39; // allow [abcd]h reg
// T_66 = 1, T_F3 = 2, T_F2 = 3
static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; }
// @@@end of avx_type_def.h
static inline uint32_t getMap(uint64_t type) { return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; }
static inline uint32_t getMap(uint64_t type)
{
if (type & T_MAP6) return 6;
if (type & T_MAP5) return 5;
return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
}
void vex(const Reg& reg, const Reg& base, const Operand *v, uint64_t type, int code, bool x = false)
{
int w = (type & T_W1) ? 1 : 0;
bool is256 = (type & T_L1) ? true : (type & T_L0) ? false : reg.isYMM();
bool is256 = (type & T_L1) ? true : reg.isYMM();
bool r = reg.isExtIdx();
bool b = base.isExtIdx();
int idx = v ? v->getIdx() : 0;
@@ -1867,16 +1905,19 @@ private:
}
db(code);
}
void verifySAE(const Reg& r, uint64_t type) const
// Allow YMM embedded rounding for AVX10.2 to minimize flag modifications
bool verifySAE(const Reg& r, const Reg& b, uint64_t type) const
{
if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return;
XBYAK_THROW(ERR_SAE_IS_INVALID)
if (((type & T_SAE_X) && (r.isYMM() && b.isXMM())) || ((type & T_SAE_Y) && b.isXMM()) || ((type & T_SAE_Z) && b.isYMM())) return true;
if (((type & T_SAE_X) && b.isXMM()) || ((type & T_SAE_Y) && b.isYMM()) || ((type & T_SAE_Z) && b.isZMM())) return false;
XBYAK_THROW_RET(ERR_SAE_IS_INVALID, false)
}
void verifyER(const Reg& r, uint64_t type) const
bool verifyER(const Reg& r, const Reg& b, uint64_t type) const
{
if ((type & T_ER_R) && r.isREG(32|64)) return;
if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
XBYAK_THROW(ERR_ER_IS_INVALID)
if ((type & T_ER_R) && b.isREG(32|64)) return false;
if (((type & T_ER_X) && (r.isYMM() && b.isXMM())) || ((type & T_ER_Y) && b.isXMM()) || ((type & T_ER_Z) && b.isYMM())) return true;
if (((type & T_ER_X) && b.isXMM()) || ((type & T_ER_Y) && b.isYMM()) || ((type & T_ER_Z) && b.isZMM())) return false;
XBYAK_THROW_RET(ERR_SAE_IS_INVALID, false)
}
// (a, b, c) contains non zero two or three values then err
int verifyDuplicate(int a, int b, int c, int err)
@@ -1890,31 +1931,32 @@ private:
if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0)
int w = (type & T_EW1) ? 1 : 0;
uint32_t mmm = getMap(type);
if (type & T_FP16) mmm |= 4;
uint32_t pp = getPP(type);
int idx = v ? v->getIdx() : 0;
uint32_t vvvv = ~idx;
bool R = reg.isExtIdx();
bool X3 = (x && x->isExtIdx()) || (base.isSIMD() && base.isExtIdx2());
bool B4 = base.isREG() && base.isExtIdx2();
bool X4 = x && (x->isREG() && x->isExtIdx2());
uint8_t B4 = (base.isREG() && base.isExtIdx2()) ? 8 : 0;
uint8_t U = (x && (x->isREG() && x->isExtIdx2())) ? 0 : 4;
bool B = base.isExtIdx();
bool Rp = reg.isExtIdx2();
int LL;
int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v->getRounding() : 0, ERR_ROUNDING_IS_ALREADY_SET);
int disp8N = 1;
if (rounding) {
bool isUzero = false;
if (rounding == EvexModifierRounding::T_SAE) {
verifySAE(base, type); LL = 0;
isUzero = verifySAE(reg, base, type); LL = 0;
} else {
verifyER(base, type); LL = rounding - 1;
isUzero = verifyER(reg, base, type); LL = rounding - 1;
}
if (isUzero) U = 0; // avx10.2 Evex.U
b = true;
} else {
if (v) VL = (std::max)(VL, v->getBit());
VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL);
LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
LL = (VL >= 512 /* tmm */) ? 2 : (VL == 256) ? 1 : 0;
if (b) {
disp8N = ((type & T_B16) == T_B16) ? 2 : (type & T_B32) ? 4 : 8;
} else if ((type & T_NX_MASK) == T_DUP) {
@@ -1935,8 +1977,8 @@ private:
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
if (aaa == 0) z = 0; // clear T_z if mask is not set
db(0x62);
db((R ? 0 : 0x80) | (X3 ? 0 : 0x40) | (B ? 0 : 0x20) | (Rp ? 0 : 0x10) | (B4 ? 8 : 0) | mmm);
db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | (X4 ? 0 : 4) | (pp & 3));
db((R ? 0 : 0x80) | (X3 ? 0 : 0x40) | (B ? 0 : 0x20) | (Rp ? 0 : 0x10) | B4 | mmm);
db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | U | (pp & 3));
db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (V4 ? 0 : 8) | (aaa & 7));
db(code);
return disp8N;
@@ -2036,7 +2078,6 @@ private:
}
}
LabelManager labelMgr_;
bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
void writeCode(uint64_t type, const Reg& r, int code, bool rex2 = false)
{
if (!(type&T_APX || rex2)) {
@@ -2048,19 +2089,24 @@ private:
db(0x0F); db(0x3A);
}
}
db(code | ((type == 0 || (type & T_CODE1_IF1)) && !r.isBit(8)));
db(code | (((type & T_SENTRY) == 0 || (type & T_CODE1_IF1)) && !r.isBit(8)));
}
void opRR(const Reg& reg1, const Reg& reg2, uint64_t type, int code)
void opRR(const Reg& r1, const Reg& r2, uint64_t type, int code)
{
bool rex2 = rex(reg2, reg1, type);
writeCode(type, reg1, code, rex2);
setModRM(3, reg1.getIdx(), reg2.getIdx());
if (!(type & T_ALLOW_DIFF_SIZE) && r1.isREG() && r2.isREG() && r1.getBit() != r2.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
if (!(type & T_ALLOW_ABCDH) && (isBadCombination(r1, r2) || isBadCombination(r2, r1))) XBYAK_THROW(ERR_CANT_USE_ABCDH)
bool rex2 = rex(r2, r1, type);
writeCode(type, r1, code, rex2);
setModRM(3, r1.getIdx(), r2.getIdx());
}
void opMR(const Address& addr, const Reg& r, uint64_t type, int code, uint64_t type2 = 0, int code2 = NONE)
{
if (code2 == NONE) code2 = code;
if (type2 && opROO(Reg(), addr, r, type2, code2)) return;
if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP)
#if XBYAK_STRICT_CHECK_MEM_REG_SIZE == 1
if (!(type & T_ALLOW_DIFF_SIZE) && r.getBit() <= BIT && addr.getBit() > 0 && addr.getBit() != r.getBit()) XBYAK_THROW(ERR_BAD_MEM_SIZE)
#endif
bool rex2 = rex(addr, r, type);
writeCode(type, r, code, rex2);
opAddr(addr, r.getIdx());
@@ -2163,7 +2209,7 @@ private:
}
}
}
void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&), int imm8 = NONE)
void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&) = 0, int imm8 = NONE)
{
if (isValid && !isValid(r, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
if (!isValidSSE(r) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED)
@@ -2205,6 +2251,13 @@ private:
opSSE(mmx, op, T_66 | T_0F3A, code, isXMM_REG32orMEM, imm);
}
}
// r1 is [abcd]h and r2 is reg with rex
bool isBadCombination(const Reg& r1, const Reg& r2) const
{
if (!r1.isHigh8bit()) return false;
if (r2.isExt8bit() || r2.getIdx() >= 8) return true;
return false;
}
// (r, r, m) or (r, m, r)
bool opROO(const Reg& d, const Operand& op1, const Operand& op2, uint64_t type, int code, int immSize = 0, int sc = NONE)
{
@@ -2232,11 +2285,11 @@ private:
int opBit = op.getBit();
if (disableRex && opBit == 64) opBit = 32;
const Reg r(ext, Operand::REG, opBit);
if ((type & T_APX) && op.hasRex2NFZU() && opROO(d ? *d : Reg(0, Operand::REG, opBit), op, r, type, code)) return;
if ((type & T_APX) && (d != 0 || op.hasRex2NFZU()) && opROO(d ? *d : Reg(0, Operand::REG, opBit), op, r, type, code)) return;
if (op.isMEM()) {
opMR(op.getAddress(immSize), r, type, code);
} else if (op.isREG(bit)) {
opRR(r, op.getReg().changeBit(opBit), type, code);
opRR(r, op.getReg().changeBit(opBit), type | T_ALLOW_ABCDH, code);
} else {
XBYAK_THROW(ERR_BAD_COMBINATION)
}
@@ -2295,10 +2348,13 @@ private:
opRO(static_cast<const Reg&>(op2), op1, 0, code, op1.getKind() == op2.getKind());
}
}
bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; }
// allow add(ax, 0x8000);
bool isInDisp16relaxed(uint32_t x) const { uint32_t v = x & 0xffff0000; return v == 0 || v == 0xffff0000; }
uint32_t getImmBit(const Operand& op, uint32_t imm)
{
verifyMemHasSize(op);
uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16(imm) ? 16 : 32;
uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16relaxed(imm) ? 16 : 32;
if (op.isBit(8)) immBit = 8;
if (op.getBit() < immBit) XBYAK_THROW_RET(ERR_IMM_IS_TOO_BIG, 0)
if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */
@@ -2352,7 +2408,7 @@ private:
if (op.isREG() && op.hasRex2()) {
const Reg& r = static_cast<const Reg&>(op);
rex2(0, rexRXB(3, 0, Reg(), r), Reg(), r);
db(alt);
db(alt | (r.getIdx() & 7));
return;
}
int bit = op.getBit();
@@ -2364,7 +2420,7 @@ private:
return;
}
if (op.isMEM()) {
opMR(op.getAddress(), Reg(ext, Operand::REG, 32), 0, code);
opMR(op.getAddress(), Reg(ext, Operand::REG, 32), T_ALLOW_DIFF_SIZE, code);
return;
}
}
@@ -2422,7 +2478,7 @@ private:
if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION)
int w = op.isBit(16);
if (!(reg.isREG() && (reg.getBit() > op.getBit()))) XBYAK_THROW(ERR_BAD_COMBINATION)
opRO(reg, op, T_0F, code | w);
opRO(reg, op, T_0F | T_ALLOW_DIFF_SIZE, code | w);
}
void opFpuMem(const Address& addr, uint8_t m16, uint8_t m32, uint8_t m64, uint8_t ext, uint8_t m64ext)
{
@@ -2554,6 +2610,18 @@ private:
Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM;
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
}
// (x, x, x/m), (x, y, y/m), (y, z, z/m)
void opCvt6(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code)
{
int b1 = x1.getBit();
int b2 = x2.getBit();
int b3 = op.getBit();
if ((b1 == 128 && (b2 == 128 || b2 == 256) && (b2 == b3 || op.isMEM())) || (b1 == 256 && b2 == 512 && (b3 == b2 || op.isMEM()))) {
opVex(x1, &x2, op, type, code);
return;
}
XBYAK_THROW(ERR_BAD_COMBINATION);
}
const Xmm& cvtIdx0(const Operand& x) const
{
return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
@@ -2568,8 +2636,7 @@ private:
if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER)
bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM());
if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION)
if (is16bit) db(0x66);
opRO(reg.changeBit(i32e == 32 ? 32 : reg.getBit()), op, T_F3 | T_0F, code);
opRO(reg, op, T_F3 | T_0F, code);
}
void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, uint64_t type, uint8_t code, int mode)
{
@@ -2644,21 +2711,24 @@ private:
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
opVex(x, 0, addr, type, code);
}
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding encoding)
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding enc, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0)
{
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding), code);
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(enc, typeVex, typeEvex, sel), code, imm);
}
int orEvexIf(PreferredEncoding encoding) {
if (encoding == DefaultEncoding) {
encoding = defaultEncoding_;
PreferredEncoding getEncoding(PreferredEncoding enc, int sel) const
{
if (enc == DefaultEncoding) {
enc = defaultEncoding_[sel];
}
if (encoding == EvexEncoding) {
if ((sel == 0 && enc != VexEncoding && enc != EvexEncoding) || (sel == 1 && enc != PreAVX10v2Encoding && enc != AVX10v2Encoding)) XBYAK_THROW_RET(ERR_BAD_ENCODING_MODE, VexEncoding)
#ifdef XBYAK_DISABLE_AVX512
XBYAK_THROW(ERR_EVEX_IS_INVALID)
if (enc == EvexEncoding || enc == AVX10v2Encoding) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, VexEncoding)
#endif
return T_MUST_EVEX;
}
return 0;
return enc;
}
uint64_t orEvexIf(PreferredEncoding enc, uint64_t typeVex, uint64_t typeEvex, int sel) {
enc = getEncoding(enc, sel);
return ((sel == 0 && enc == VexEncoding) || (sel == 1 && enc != AVX10v2Encoding)) ? typeVex : (T_MUST_EVEX | typeEvex);
}
void opInOut(const Reg& a, const Reg& d, uint8_t code)
{
@@ -2752,7 +2822,7 @@ private:
const Operand *p1 = &k, *p2 = &op;
if (code == 0x93) { std::swap(p1, p2); }
if (opROO(Reg(), *p2, *p1, T_APX|type, code)) return;
opVex(static_cast<const Reg&>(*p1), 0, *p2, T_L0|type, code);
opVex(static_cast<const Reg&>(*p1), 0, *p2, type, code);
}
void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2)
{
@@ -2770,6 +2840,31 @@ private:
}
opSSE(x, op, type1, code1, isXMM_XMMorMEM, imm);
}
// AVX10 zero-extending for vmovd, vmovw
void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding enc, int bit)
{
const Operand *p1 = &op1;
const Operand *p2 = &op2;
bool rev = false;
if (p1->isMEM()) {
std::swap(p1, p2);
rev = true;
}
if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
if (p1->isXMM()) {
std::swap(p1, p2);
rev = !rev;
}
enc = getEncoding(enc, 1);
int sel = -1;
if (p1->isXMM() || (p1->isMEM() && enc == AVX10v2Encoding)) {
sel = 2 + int(rev);
} else if (p1->isREG(bit) || p1->isMEM()) {
sel = int(rev);
}
if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
}
public:
unsigned int getVersion() const { return VERSION; }
using CodeArray::db;
@@ -2828,7 +2923,7 @@ public:
#endif
private:
bool isDefaultJmpNEAR_;
PreferredEncoding defaultEncoding_;
PreferredEncoding defaultEncoding_[2]; // 0:vnni, 1:vmpsadbw
public:
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
void L(Label& label) { labelMgr_.defineClabel(label); }
@@ -2999,6 +3094,7 @@ public:
rex(*p2, *p1); db(0x90 | (p2->getIdx() & 7));
return;
}
if (p1->isREG() && p2->isREG()) std::swap(p1, p2); // adapt to NASM 2.16.03 behavior to pass tests
opRO(static_cast<const Reg&>(*p1), *p2, 0, 0x86 | (p1->isBit(8) ? 0 : 1), (p1->isREG() && (p1->getBit() == p2->getBit())));
}
@@ -3044,11 +3140,11 @@ public:
}
void mov(const Operand& op, const Segment& seg)
{
opRO(Reg8(seg.getIdx()), op, 0, 0x8C, op.isREG(16|i32e));
opRO(Reg8(seg.getIdx()), op, T_ALLOW_DIFF_SIZE | T_ALLOW_ABCDH, 0x8C, op.isREG(16|i32e));
}
void mov(const Segment& seg, const Operand& op)
{
opRO(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, 0, 0x8E, op.isREG(16|i32e));
opRO(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast<const Operand&>(op.getReg().cvt32()) : op, T_ALLOW_DIFF_SIZE | T_ALLOW_ABCDH, 0x8E, op.isREG(16|i32e));
}
#endif
@@ -3113,8 +3209,9 @@ public:
, es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
#endif
, isDefaultJmpNEAR_(false)
, defaultEncoding_(EvexEncoding)
{
setDefaultEncoding();
setDefaultEncodingAVX10();
labelMgr_.set(this);
}
void reset()
@@ -3151,13 +3248,20 @@ public:
#undef jnl
#endif
// set default encoding to select Vex or Evex
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
void sha1msg12(const Xmm& x, const Operand& op)
// set default encoding of VNNI
// EvexEncoding : AVX512_VNNI, VexEncoding : AVX-VNNI
void setDefaultEncoding(PreferredEncoding enc = EvexEncoding)
{
opROO(Reg(), op, x, T_MUST_EVEX, 0xD9);
if (enc != VexEncoding && enc != EvexEncoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE)
defaultEncoding_[0] = enc;
}
// default : PreferredEncoding : AVX-VNNI-INT8/AVX512-FP16
void setDefaultEncodingAVX10(PreferredEncoding enc = PreAVX10v2Encoding)
{
if (enc != PreAVX10v2Encoding && enc != AVX10v2Encoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE)
defaultEncoding_[1] = enc;
}
void bswap(const Reg32e& r)
{
int idx = r.getIdx();
@@ -3170,6 +3274,24 @@ public:
}
db(0xC8 + (idx & 7));
}
void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
{
const uint64_t typeTbl[] = {
T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512
T_MUST_EVEX|T_66|T_0F|T_N4, T_MUST_EVEX|T_F3|T_0F|T_N4, // avx10.2
};
const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E };
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 32);
}
void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
{
const uint64_t typeTbl[] = {
T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16
T_MUST_EVEX|T_F3|T_MAP5|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_N2, // avx10.2
};
const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E };
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 16|32|64);
}
/*
use single byte nop if useMultiByteNop = false
*/

File diff suppressed because it is too large Load Diff

View File

@@ -548,6 +548,12 @@ public:
XBYAK_DEFINE_TYPE(88, tSSE4a);
XBYAK_DEFINE_TYPE(89, tCLWB);
XBYAK_DEFINE_TYPE(90, tTSXLDTRK);
XBYAK_DEFINE_TYPE(91, tAMX_TRANSPOSE);
XBYAK_DEFINE_TYPE(92, tAMX_TF32);
XBYAK_DEFINE_TYPE(93, tAMX_AVX512);
XBYAK_DEFINE_TYPE(94, tAMX_MOVRS);
XBYAK_DEFINE_TYPE(95, tAMX_FP8);
XBYAK_DEFINE_TYPE(96, tMOVRS);
#undef XBYAK_SPLIT_ID
#undef XBYAK_DEFINE_TYPE
@@ -702,12 +708,20 @@ public:
if (EAX & (1U << 7)) type_ |= tCMPCCXADD;
if (EAX & (1U << 21)) type_ |= tAMX_FP16;
if (EAX & (1U << 23)) type_ |= tAVX_IFMA;
if (EAX & (1U << 31)) type_ |= tMOVRS;
if (EDX & (1U << 4)) type_ |= tAVX_VNNI_INT8;
if (EDX & (1U << 5)) type_ |= tAVX_NE_CONVERT;
if (EDX & (1U << 10)) type_ |= tAVX_VNNI_INT16;
if (EDX & (1U << 14)) type_ |= tPREFETCHITI;
if (EDX & (1U << 19)) type_ |= tAVX10;
if (EDX & (1U << 21)) type_ |= tAPX_F;
getCpuidEx(0x1e, 1, data);
if (EAX & (1U << 4)) type_ |= tAMX_FP8;
if (EAX & (1U << 5)) type_ |= tAMX_TRANSPOSE;
if (EAX & (1U << 6)) type_ |= tAMX_TF32;
if (EAX & (1U << 7)) type_ |= tAMX_AVX512;
if (EAX & (1U << 8)) type_ |= tAMX_MOVRS;
}
}
if (maxNum >= 0x19) {
@@ -892,17 +906,17 @@ class StackFrame {
#endif
static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax
Xbyak::CodeGenerator *code_;
int pNum_;
int tNum_;
bool useRcx_;
bool useRdx_;
int saveNum_;
int P_;
bool makeEpilog_;
Xbyak::Reg64 pTbl_[4];
Xbyak::Reg64 tTbl_[maxRegNum];
Pack p_;
Pack t_;
int pNum_;
int tNum_;
int saveNum_;
int P_;
bool useRcx_;
bool useRdx_;
bool makeEpilog_;
StackFrame(const StackFrame&);
void operator=(const StackFrame&);
public:
@@ -928,10 +942,10 @@ public:
: code_(code)
, pNum_(pNum)
, tNum_(tNum & ~(UseRCX | UseRDX))
, useRcx_((tNum & UseRCX) != 0)
, useRdx_((tNum & UseRDX) != 0)
, saveNum_(0)
, P_(0)
, useRcx_((tNum & UseRCX) != 0)
, useRdx_((tNum & UseRDX) != 0)
, makeEpilog_(makeEpilog)
, p(p_)
, t(t_)

View File

@@ -1,11 +1,12 @@
#include "aarch64_assembler.h"
#include "jit/jit_context.h"
#include <string.h>
#include <sys/mman.h>
#include <stdio.h>
#include <string.h>
#ifdef __linux__
#include <sys/mman.h>
#include <unistd.h>
#endif
#define ONE_MB 1 * 1024 * 1024

View File

@@ -1,3 +1,4 @@
#include "aarch64_context_offsets.h"
#include "aarch64_emit_context.h"
#include "emulator/ssa_emit_context.h"
#include "emulator/guest_process.h"

View File

@@ -1,3 +1,4 @@
#include "emulator/aarch64/aarch64_context_offsets.h"
#include "emulator/guest_process.h"
#include "emulator/ssa_emit_context.h"