Merge pull request #2273 from lioncash/keygen

OpcodeDispatcher: Handle 128-bit AVX AES instructions
This commit is contained in:
Ryan Houdek 2022-12-20 10:52:53 -08:00 committed by GitHub
commit 1800451251
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 460 additions and 22 deletions

View File

@ -6015,6 +6015,12 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
{OPD(2, 0b01, 0x78), 1, &OpDispatchBuilder::VBROADCASTOp<1>},
{OPD(2, 0b01, 0x79), 1, &OpDispatchBuilder::VBROADCASTOp<2>},
{OPD(2, 0b01, 0xDB), 1, &OpDispatchBuilder::VAESIMCOp},
{OPD(2, 0b01, 0xDC), 1, &OpDispatchBuilder::VAESEncOp},
{OPD(2, 0b01, 0xDD), 1, &OpDispatchBuilder::VAESEncLastOp},
{OPD(2, 0b01, 0xDE), 1, &OpDispatchBuilder::VAESDecOp},
{OPD(2, 0b01, 0xDF), 1, &OpDispatchBuilder::VAESDecLastOp},
{OPD(3, 0b01, 0x00), 1, &OpDispatchBuilder::VPERMQOp},
{OPD(3, 0b01, 0x01), 1, &OpDispatchBuilder::VPERMQOp},
{OPD(3, 0b01, 0x06), 1, &OpDispatchBuilder::VPERM2Op},
@ -6032,6 +6038,8 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
{OPD(3, 0b01, 0x38), 1, &OpDispatchBuilder::VINSERTOp},
{OPD(3, 0b01, 0x46), 1, &OpDispatchBuilder::VPERM2Op},
{OPD(3, 0b01, 0xDF), 1, &OpDispatchBuilder::VAESKeyGenAssistOp},
};
#undef OPD

View File

@ -419,6 +419,13 @@ public:
template <size_t ElementSize>
void VADDSUBPOp(OpcodeArgs);
void VAESDecOp(OpcodeArgs);
void VAESDecLastOp(OpcodeArgs);
void VAESEncOp(OpcodeArgs);
void VAESEncLastOp(OpcodeArgs);
void VAESIMCOp(OpcodeArgs);
void VAESKeyGenAssistOp(OpcodeArgs);
void VANDNOp(OpcodeArgs);
template <size_t ElementSize>
@ -710,6 +717,9 @@ private:
void AVXVectorScalarALUOpImpl(OpcodeArgs, IROps IROp, size_t ElementSize);
void AVXVectorUnaryOpImpl(OpcodeArgs, IROps IROp, size_t ElementSize, bool Scalar);
OrderedNode* AESKeyGenAssistImpl(OpcodeArgs);
OrderedNode* AESIMCImpl(OpcodeArgs);
OrderedNode* ExtendVectorElementsImpl(OpcodeArgs, size_t ElementSize,
size_t DstElementSize, bool Signed);

View File

@ -264,47 +264,135 @@ void OpDispatchBuilder::SHA256RNDS2Op(OpcodeArgs) {
StoreResult(FPRClass, Op, Res0, -1);
}
void OpDispatchBuilder::AESImcOp(OpcodeArgs) {
OrderedNode* OpDispatchBuilder::AESIMCImpl(OpcodeArgs) {
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
auto Res = _VAESImc(Src);
StoreResult(FPRClass, Op, Res, -1);
return _VAESImc(Src);
}
void OpDispatchBuilder::AESImcOp(OpcodeArgs) {
OrderedNode *Result = AESIMCImpl(Op);
StoreResult(FPRClass, Op, Result, -1);
}
void OpDispatchBuilder::VAESIMCOp(OpcodeArgs) {
OrderedNode *Mixed = AESIMCImpl(Op);
OrderedNode *Result = _VMov(16, Mixed);
StoreResult(FPRClass, Op, Result, -1);
}
void OpDispatchBuilder::AESEncOp(OpcodeArgs) {
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
auto Res = _VAESEnc(Dest, Src);
StoreResult(FPRClass, Op, Res, -1);
OrderedNode *Result = _VAESEnc(Dest, Src);
StoreResult(FPRClass, Op, Result, -1);
}
void OpDispatchBuilder::VAESEncOp(OpcodeArgs) {
const auto DstSize = GetDstSize(Op);
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
// TODO: Handle 256-bit VAESENC.
LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESENC unimplemented");
OrderedNode *State = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
OrderedNode *Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, -1);
OrderedNode *Result = _VAESEnc(State, Key);
if (Is128Bit) {
Result = _VMov(16, Result);
}
StoreResult(FPRClass, Op, Result, -1);
}
void OpDispatchBuilder::AESEncLastOp(OpcodeArgs) {
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
auto Res = _VAESEncLast(Dest, Src);
StoreResult(FPRClass, Op, Res, -1);
OrderedNode *Result = _VAESEncLast(Dest, Src);
StoreResult(FPRClass, Op, Result, -1);
}
void OpDispatchBuilder::VAESEncLastOp(OpcodeArgs) {
const auto DstSize = GetDstSize(Op);
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
// TODO: Handle 256-bit VAESENCLAST.
LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESENCLAST unimplemented");
OrderedNode *State = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
OrderedNode *Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, -1);
OrderedNode *Result = _VAESEncLast(State, Key);
if (Is128Bit) {
Result = _VMov(16, Result);
}
StoreResult(FPRClass, Op, Result, -1);
}
void OpDispatchBuilder::AESDecOp(OpcodeArgs) {
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
auto Res = _VAESDec(Dest, Src);
StoreResult(FPRClass, Op, Res, -1);
OrderedNode *Result = _VAESDec(Dest, Src);
StoreResult(FPRClass, Op, Result, -1);
}
void OpDispatchBuilder::VAESDecOp(OpcodeArgs) {
const auto DstSize = GetDstSize(Op);
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
// TODO: Handle 256-bit VAESDEC.
LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESDEC unimplemented");
OrderedNode *State = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
OrderedNode *Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, -1);
OrderedNode *Result = _VAESDec(State, Key);
if (Is128Bit) {
Result = _VMov(16, Result);
}
StoreResult(FPRClass, Op, Result, -1);
}
void OpDispatchBuilder::AESDecLastOp(OpcodeArgs) {
OrderedNode *Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags, -1);
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
auto Res = _VAESDecLast(Dest, Src);
StoreResult(FPRClass, Op, Res, -1);
OrderedNode *Result = _VAESDecLast(Dest, Src);
StoreResult(FPRClass, Op, Result, -1);
}
void OpDispatchBuilder::VAESDecLastOp(OpcodeArgs) {
const auto DstSize = GetDstSize(Op);
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
// TODO: Handle 256-bit VAESDECLAST.
LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESDECLAST unimplemented");
OrderedNode *State = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
OrderedNode *Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, -1);
OrderedNode *Result = _VAESDecLast(State, Key);
if (Is128Bit) {
Result = _VMov(16, Result);
}
StoreResult(FPRClass, Op, Result, -1);
}
OrderedNode* OpDispatchBuilder::AESKeyGenAssistImpl(OpcodeArgs) {
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here");
const uint64_t RCON = Op->Src[1].Data.Literal.Value;
return _VAESKeyGenAssist(Src, RCON);
}
void OpDispatchBuilder::AESKeyGenAssist(OpcodeArgs) {
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here");
uint64_t RCON = Op->Src[1].Data.Literal.Value;
OrderedNode *Result = AESKeyGenAssistImpl(Op);
StoreResult(FPRClass, Op, Result, -1);
}
auto Res = _VAESKeyGenAssist(Src, RCON);
StoreResult(FPRClass, Op, Res, -1);
void OpDispatchBuilder::VAESKeyGenAssistOp(OpcodeArgs) {
OrderedNode *Assist = AESKeyGenAssistImpl(Op);
OrderedNode *Result = _VMov(16, Assist);
StoreResult(FPRClass, Op, Result, -1);
}
void OpDispatchBuilder::PCLMULQDQOp(OpcodeArgs) {

View File

@ -380,11 +380,11 @@ void InitializeVEXTables() {
{OPD(2, 0b01, 0xB6), 1, X86InstInfo{"VFMADDSUB231", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0xB7), 1, X86InstInfo{"VFMSUBADD231", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0xDB), 1, X86InstInfo{"VAESIMC", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0xDC), 1, X86InstInfo{"VAESENC", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0xDD), 1, X86InstInfo{"VAESENCLAST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0xDE), 1, X86InstInfo{"VAESDEC", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0xDF), 1, X86InstInfo{"VAESDECLAST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(2, 0b01, 0xDB), 1, X86InstInfo{"VAESIMC", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0xDC), 1, X86InstInfo{"VAESENC", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0xDD), 1, X86InstInfo{"VAESENCLAST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0xDE), 1, X86InstInfo{"VAESDEC", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b01, 0xDF), 1, X86InstInfo{"VAESDECLAST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0, nullptr}},
{OPD(2, 0b00, 0xF2), 1, X86InstInfo{"ANDN", TYPE_INST, FLAGS_MODRM | FLAGS_VEX_1ST_SRC, 0, nullptr}},
@ -478,7 +478,7 @@ void InitializeVEXTables() {
{OPD(3, 0b01, 0x7E), 1, X86InstInfo{"VFNMSUBSS", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(3, 0b01, 0x7F), 1, X86InstInfo{"VFNMSUBSD", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(3, 0b01, 0xDF), 1, X86InstInfo{"VAESKEYGENASSIST", TYPE_UNDEC, FLAGS_NONE, 0, nullptr}},
{OPD(3, 0b01, 0xDF), 1, X86InstInfo{"VAESKEYGENASSIST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(3, 0b11, 0xF0), 1, X86InstInfo{"RORX", TYPE_INST, FLAGS_MODRM, 1, nullptr}},

View File

@ -5,6 +5,12 @@ Test_H0F38/66_DD.asm
Test_H0F38/66_DE.asm
Test_H0F38/66_DF.asm
Test_H0F3A/0_66_DF.asm
Test_VEX/vaesdec.asm
Test_VEX/vaesdeclast.asm
Test_VEX/vaesenc.asm
Test_VEX/vaesenclast.asm
Test_VEX/vaesimc.asm
Test_VEX/vaeskeygenassist.asm
# PCMUL considered to be part of crypto operations. Simulator doesn't support this.
Test_H0F3A/pclmulqdq.asm

View File

@ -0,0 +1,49 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM1": ["0x7A1FC5A0A07A1FC5", "0xC5A07A1F1FC5A07A", "0x0000000000000000", "0x0000000000000000"],
"XMM2": ["0x85E03A5F5F85E03A", "0x3A5F85E0E03A5F85", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x7A1FC5A1A07A1FC4", "0xC5A07A1E1FC5A07B", "0x0000000000000000", "0x0000000000000000"],
"XMM4": ["0x85E03A5FA07A1FC5", "0xC5A07A1EE03A5F85", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif
lea rdx, [rel .data]
vmovaps ymm0, [rdx + 32 * 4]
vaesdec xmm1, xmm0, [rdx + 32 * 0]
vaesdec xmm2, xmm0, [rdx + 32 * 1]
vaesdec xmm3, xmm0, [rdx + 32 * 2]
vaesdec xmm4, xmm0, [rdx + 32 * 3]
hlt
align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303

View File

@ -0,0 +1,49 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM1": ["0xD5D56A6A6AD5D56A", "0x6A6AD5D5D56A6AD5", "0x0000000000000000", "0x0000000000000000"],
"XMM2": ["0x2A2A9595952A2A95", "0x95952A2A2A95952A", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0xD5D56A6B6AD5D56B", "0x6A6AD5D4D56A6AD4", "0x0000000000000000", "0x0000000000000000"],
"XMM4": ["0x2A2A95956AD5D56A", "0x6A6AD5D42A95952A", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif
lea rdx, [rel .data]
vmovaps ymm0, [rdx + 32 * 4]
vaesdeclast xmm1, xmm0, [rdx + 32 * 0]
vaesdeclast xmm2, xmm0, [rdx + 32 * 1]
vaesdeclast xmm3, xmm0, [rdx + 32 * 2]
vaesdeclast xmm4, xmm0, [rdx + 32 * 3]
hlt
align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303

View File

@ -0,0 +1,49 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM1": ["0x77637B6F637B6F77", "0x7B6F77636F77637B", "0x0000000000000000", "0x0000000000000000"],
"XMM2": ["0x889C84909C849088", "0x8490889C90889C84", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x77637B6E637B6F76", "0x7B6F77626F77637A", "0x0000000000000000", "0x0000000000000000"],
"XMM4": ["0x889C8490637B6F77", "0x7B6F776290889C84", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif
lea rdx, [rel .data]
vmovaps ymm0, [rdx + 32 * 4]
vaesenc xmm1, xmm0, [rdx + 32 * 0]
vaesenc xmm2, xmm0, [rdx + 32 * 1]
vaesenc xmm3, xmm0, [rdx + 32 * 2]
vaesenc xmm4, xmm0, [rdx + 32 * 3]
hlt
align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303

View File

@ -0,0 +1,49 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM1": ["0x777B7B777B7B7777", "0x7B77777B77777B7B", "0x0000000000000000", "0x0000000000000000"],
"XMM2": ["0x8884848884848888", "0x8488888488888484", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x777B7B767B7B7776", "0x7B77777A77777B7A", "0x0000000000000000", "0x0000000000000000"],
"XMM4": ["0x888484887B7B7777", "0x7B77777A88888484", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif
lea rdx, [rel .data]
vmovaps ymm0, [rdx + 32 * 4]
vaesenclast xmm1, xmm0, [rdx + 32 * 0]
vaesenclast xmm2, xmm0, [rdx + 32 * 1]
vaesenclast xmm3, xmm0, [rdx + 32 * 2]
vaesenclast xmm4, xmm0, [rdx + 32 * 3]
hlt
align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303

View File

@ -0,0 +1,66 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM1": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
"XMM2": ["0x0B0D090E0B0D090E", "0x0B0D090E0B0D090E", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0xFFFFFFFF00000000", "0x0B0D090EFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
"XMM4": ["0x0202020202020202", "0x0303030303030303", "0x0000000000000000", "0x0000000000000000"],
"XMM10": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
"XMM11": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
"XMM12": ["0x0B0D090E0B0D090E", "0x0B0D090E0B0D090E", "0x0000000000000000", "0x0000000000000000"],
"XMM13": ["0xFFFFFFFF00000000", "0x0B0D090EFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
"XMM14": ["0x0202020202020202", "0x0303030303030303", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif
lea rdx, [rel .data]
vaesimc xmm0, [rdx + 32 * 0]
vaesimc xmm1, [rdx + 32 * 1]
vaesimc xmm2, [rdx + 32 * 2]
vaesimc xmm3, [rdx + 32 * 3]
vaesimc xmm4, [rdx + 32 * 4]
vmovapd ymm5, [rdx + 32 * 0]
vmovapd ymm6, [rdx + 32 * 1]
vmovapd ymm7, [rdx + 32 * 2]
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vaesimc xmm10, xmm5
vaesimc xmm11, xmm6
vaesimc xmm12, xmm7
vaesimc xmm13, xmm8
vaesimc xmm14, xmm9
hlt
align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303

View File

@ -0,0 +1,64 @@
%ifdef CONFIG
{
"HostFeatures": ["AVX"],
"RegData": {
"XMM0": ["0x6363636363636363", "0x6363636363636363", "0x0000000000000000", "0x0000000000000000"],
"XMM1": ["0x1616161616161616", "0x1616161616161616", "0x0000000000000000", "0x0000000000000000"],
"XMM2": ["0x7C6363636363637C", "0x7C6363636363637C", "0x0000000000000000", "0x0000000000000000"],
"XMM3": ["0x1616161616161616", "0x7C6363636363637C", "0x0000000000000000", "0x0000000000000000"],
"XMM4": ["0x6363636263636363", "0x6363636263636363", "0x0000000000000000", "0x0000000000000000"],
"XMM5": ["0x1616161416161616", "0x1616161416161616", "0x0000000000000000", "0x0000000000000000"],
"XMM6": ["0x7C6363606363637C", "0x7C6363606363637C", "0x0000000000000000", "0x0000000000000000"],
"XMM7": ["0x1616161216161616", "0x7C6363676363637C", "0x0000000000000000", "0x0000000000000000"],
"XMM8": ["0x6363636663636363", "0x6363636663636363", "0x0000000000000000", "0x0000000000000000"],
"XMM9": ["0x1616161016161616", "0x1616161016161616", "0x0000000000000000", "0x0000000000000000"],
"XMM10": ["0x7C6363646363637C", "0x7C6363646363637C", "0x0000000000000000", "0x0000000000000000"],
"XMM11": ["0x1616161E16161616", "0x7C63636B6363637C", "0x0000000000000000", "0x0000000000000000"],
"XMM12": ["0x6363636A63636363", "0x6363636A63636363", "0x0000000000000000", "0x0000000000000000"],
"XMM13": ["0x1616161C16161616", "0x1616161C16161616", "0x0000000000000000", "0x0000000000000000"],
"XMM14": ["0x7C6363686363637C", "0x7C6363686363637C", "0x0000000000000000", "0x0000000000000000"],
"XMM15": ["0x1616161A16161616", "0x7C63636F6363637C", "0x0000000000000000", "0x0000000000000000"]
}
}
%endif
lea rdx, [rel .data]
vaeskeygenassist xmm0, [rdx + 16 * 0], 0
vaeskeygenassist xmm1, [rdx + 16 * 1], 0
vaeskeygenassist xmm2, [rdx + 16 * 2], 0
vaeskeygenassist xmm3, [rdx + 16 * 3], 0
vaeskeygenassist xmm4, [rdx + 16 * 0], 1
vaeskeygenassist xmm5, [rdx + 16 * 1], 2
vaeskeygenassist xmm6, [rdx + 16 * 2], 3
vaeskeygenassist xmm7, [rdx + 16 * 3], 4
vaeskeygenassist xmm8, [rdx + 16 * 0], 5
vaeskeygenassist xmm9, [rdx + 16 * 1], 6
vaeskeygenassist xmm10, [rdx + 16 * 2], 7
vaeskeygenassist xmm11, [rdx + 16 * 3], 8
vaeskeygenassist xmm12, [rdx + 16 * 0], 9
vaeskeygenassist xmm13, [rdx + 16 * 1], 10
vaeskeygenassist xmm14, [rdx + 16 * 2], 11
vaeskeygenassist xmm15, [rdx + 16 * 3], 12
hlt
align 16
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0x0000000100000001
dq 0x0000000100000001
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0x0202020202020202
dq 0x0303030303030303