Merge pull request #2877 from Sonicadvance1/classification_adds

InstructionCountCI: Adds three more instruction tables
This commit is contained in:
Ryan Houdek 2023-08-11 15:09:29 -07:00 committed by GitHub
commit 6d1fcfce09
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 974 additions and 2 deletions

View File

@ -70,7 +70,9 @@
"ENABLEPMULL128": "enablepmull128",
"DISABLEPMULL128": "disablepmull128",
"ENABLERNG": "enablerng",
"DISABLERNG": "disablerng"
"DISABLERNG": "disablerng",
"ENABLECLZERO": "enableclzero",
"DISABLECLZERO": "disableclzero"
},
"Desc": [
"Allows controlling of the CPU features in the JIT.",
@ -82,7 +84,8 @@
"\t{enable,disable}lrcpc2: Will force enable or disable lrcpc2 even if the host doesn't support it",
"\t{enable,disable}cssc: Will force enable or disable cssc even if the host doesn't support it",
"\t{enable,disable}pmull128: Will force enable or disable pmull128 even if the host doesn't support it",
"\t{enable,disable}rng: Will force enable or disable rng even if the host doesn't support it"
"\t{enable,disable}rng: Will force enable or disable rng even if the host doesn't support it",
"\t{enable,disable}clzero: Will force enable or disable clzero even if the host doesn't support it"
]
}
},

View File

@ -88,6 +88,10 @@ static void OverrideFeatures(HostFeatures *Features) {
const bool EnableRNG = HostFeatures() & FEXCore::Config::HostFeatures::ENABLERNG;
LogMan::Throw::AFmt(!(DisableRNG && EnableRNG), "Disabling and Enabling CPU features are mutually exclusive");
const bool DisableCLZERO = HostFeatures() & FEXCore::Config::HostFeatures::DISABLECLZERO;
const bool EnableCLZERO = HostFeatures() & FEXCore::Config::HostFeatures::ENABLECLZERO;
LogMan::Throw::AFmt(!(DisableCLZERO && EnableCLZERO), "Disabling and Enabling CPU features are mutually exclusive");
if (EnableAVX) {
Features->SupportsAVX = true;
}
@ -136,6 +140,12 @@ static void OverrideFeatures(HostFeatures *Features) {
else if (DisableRNG) {
Features->SupportsRAND = false;
}
if (EnableCLZERO) {
Features->SupportsCLZERO = true;
}
else if (DisableCLZERO) {
Features->SupportsCLZERO = false;
}
}
HostFeatures::HostFeatures() {

View File

@ -44,10 +44,13 @@ class HostFeatures(Flag) :
FEATURE_ANY = 0
FEATURE_SVE128 = (1 << 0)
FEATURE_SVE256 = (1 << 1)
FEATURE_CLZERO = (1 << 2)
HostFeaturesLookup = {
"SVE128" : HostFeatures.FEATURE_SVE128,
"SVE256" : HostFeatures.FEATURE_SVE256,
"CLZERO" : HostFeatures.FEATURE_CLZERO,
}
def GetHostFeatures(data):

View File

@ -388,6 +388,7 @@ int main(int argc, char **argv, char **const envp) {
enum HostFeatures {
FEATURE_SVE128 = (1U << 0),
FEATURE_SVE256 = (1U << 1),
FEATURE_CLZERO = (1U << 2),
};
uint64_t SVEWidth = 0;
@ -400,6 +401,9 @@ int main(int argc, char **argv, char **const envp) {
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLEAVX);
SVEWidth = 256;
}
if (TestHeaderData->EnabledHostFeatures & FEATURE_CLZERO) {
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLECLZERO);
}
if (TestHeaderData->DisabledHostFeatures & FEATURE_SVE128) {
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLESVE);
@ -407,6 +411,9 @@ int main(int argc, char **argv, char **const envp) {
if (TestHeaderData->DisabledHostFeatures & FEATURE_SVE256) {
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLEAVX);
}
if (TestHeaderData->DisabledHostFeatures & FEATURE_CLZERO) {
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLECLZERO);
}
FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_HOSTFEATURES, fextl::fmt::format("{}", HostFeatureControl));
FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_FORCESVEWIDTH, fextl::fmt::format("{}", SVEWidth));

View File

@ -0,0 +1,192 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [],
"DisabledHostFeatures": [
"SVE128",
"SVE256"
]
},
"Comment": [
"These 3DNow! instructions are optimal assuming that FEX doesn't SRA MMX registers",
"This accounts for the overhead of loading and storing the registers in each instruction",
"Could technically save some instructions by using SRA for MMX registers."
],
"Instructions": {
"pi2fw mm0, mm1": {
"ExpectedInstructionCount": 5,
"Optimal": "Unknown",
"Comment": [
"FPConvert instruction using 128-bit conversion instead of 64-bit.",
"This lowers throughput from 1 IPC to 1/2IPC",
"This instruction implementation might not be correct.",
"0x0f 0x0f 0x0c"
]
},
"pi2fd mm0, mm1": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"FPConvert instruction using 128-bit conversion instead of 64-bit.",
"This lowers throughput from 1 IPC to 1/2IPC",
"0x0f 0x0f 0x0d"
]
},
"pf2iw mm0, mm1": {
"ExpectedInstructionCount": 5,
"Optimal": "Unknown",
"Comment": [
"FPConvert instruction using 128-bit conversion instead of 64-bit.",
"This lowers throughput from 1 IPC to 1/2IPC",
"This instruction implementation might not be correct.",
"0x0f 0x0f 0x1c"
]
},
"pf2id mm0, mm1": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"FPConvert instruction using 128-bit conversion instead of 64-bit.",
"This lowers throughput from 1 IPC to 1/2IPC",
"0x0f 0x0f 0x1d"
]
},
"pfrcpv mm0, mm1": {
"ExpectedInstructionCount": 4,
"Optimal": "Yes",
"Comment": [
"FEAT_FPRES could make this more optimal",
"0x0f 0x0f 0x86"
]
},
"pfrsqrtv mm0, mm1": {
"ExpectedInstructionCount": 5,
"Optimal": "Yes",
"Comment": [
"FEAT_FPRES could make this more optimal",
"0x0f 0x0f 0x87"
]
},
"pfnacc mm0, mm1": {
"ExpectedInstructionCount": 8,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0x8a"
},
"pfpnacc mm0, mm1": {
"ExpectedInstructionCount": 7,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0x8e"
},
"pfcmpge mm0, mm1": {
"ExpectedInstructionCount": 4,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0x90"
},
"pfmin mm0, mm1": {
"ExpectedInstructionCount": 7,
"Optimal": "No",
"Comment": "0x0f 0x0f 0x94"
},
"pfrcp mm0, mm1": {
"ExpectedInstructionCount": 5,
"Optimal": "Yes",
"Comment": [
"FEAT_FPRES could make this more optimal",
"0x0f 0x0f 0x96"
]
},
"pfrsqrt mm0, mm1": {
"ExpectedInstructionCount": 6,
"Optimal": "Yes",
"Comment": [
"FEAT_FPRES could make this more optimal",
"0x0f 0x0f 0x97"
]
},
"pfsub mm0, mm1": {
"ExpectedInstructionCount": 4,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0x9a"
},
"pfadd mm0, mm1": {
"ExpectedInstructionCount": 4,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0x9e"
},
"pfcmpgt mm0, mm1": {
"ExpectedInstructionCount": 4,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0xa0"
},
"pfmax mm0, mm1": {
"ExpectedInstructionCount": 7,
"Optimal": "No",
"Comment": "0x0f 0x0f 0xa4"
},
"pfrcpit1 mm0, mm1": {
"ExpectedInstructionCount": 2,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0xa6"
},
"pfrcpit1 mm0, mm0": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": "0x0f 0x0f 0xa6"
},
"pfrsqit1 mm0, mm1": {
"ExpectedInstructionCount": 2,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0xa7"
},
"pfrsqit1 mm0, mm0": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": "0x0f 0x0f 0xa7"
},
"pfsubr mm0, mm1": {
"ExpectedInstructionCount": 4,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0xaa"
},
"pfcmpeq mm0, mm1": {
"ExpectedInstructionCount": 4,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0xb0"
},
"pfmul mm0, mm1": {
"ExpectedInstructionCount": 4,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0xb4"
},
"pfrcpit2 mm0, mm1": {
"ExpectedInstructionCount": 2,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0xb6"
},
"pfrcpit2 mm0, mm0": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": "0x0f 0x0f 0xb6"
},
"db 0x0f, 0x0f, 0xc1, 0xb7": {
"ExpectedInstructionCount": 8,
"Optimal": "No",
"Comment": [
"nasm doesn't support emitting this instruction",
"pmulhrw mm0, mm1",
"Might be able to use sqdmulh",
"0x0f 0x0f 0xb7"
]
},
"pswapd mm0, mm1": {
"ExpectedInstructionCount": 3,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0xbb"
},
"pavgusb mm0, mm1": {
"ExpectedInstructionCount": 4,
"Optimal": "Yes",
"Comment": "0x0f 0x0f 0xbf"
}
}
}

View File

@ -0,0 +1,728 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [],
"DisabledHostFeatures": [
"SVE128",
"SVE256"
]
},
"Comment": [
"SSE4.2 string instructions are skipped here.",
"Entirely because they are nightmare implementations of instructions."
],
"Instructions": {
"palignr mm0, mm1, 0": {
"ExpectedInstructionCount": 4,
"Optimal": "No",
"Comment": [
"NP 0x0f 0x3a 0x0f"
]
},
"palignr mm0, mm1, 1": {
"ExpectedInstructionCount": 4,
"Optimal": "Yes",
"Comment": [
"NP 0x0f 0x3a 0x0f"
]
},
"palignr mm0, mm1, 255": {
"ExpectedInstructionCount": 2,
"Optimal": "Yes",
"Comment": [
"NP 0x0f 0x3a 0x0f"
]
},
"roundps xmm0, xmm1, 00000000b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"Nearest rounding",
"0x66 0x0f 0x3a 0x08"
]
},
"roundps xmm0, xmm1, 00000001b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"-inf rounding",
"0x66 0x0f 0x3a 0x08"
]
},
"roundps xmm0, xmm1, 00000010b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"+inf rounding",
"0x66 0x0f 0x3a 0x08"
]
},
"roundps xmm0, xmm1, 00000011b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"truncate rounding",
"0x66 0x0f 0x3a 0x08"
]
},
"roundps xmm0, xmm1, 00000100b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"host rounding mode rounding",
"0x66 0x0f 0x3a 0x08"
]
},
"roundpd xmm0, xmm1, 00000000b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"Nearest rounding",
"0x66 0x0f 0x3a 0x09"
]
},
"roundpd xmm0, xmm1, 00000001b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"-inf rounding",
"0x66 0x0f 0x3a 0x09"
]
},
"roundpd xmm0, xmm1, 00000010b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"+inf rounding",
"0x66 0x0f 0x3a 0x09"
]
},
"roundpd xmm0, xmm1, 00000011b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"truncate rounding",
"0x66 0x0f 0x3a 0x09"
]
},
"roundpd xmm0, xmm1, 00000100b": {
"ExpectedInstructionCount": 1,
"Optimal": "yes",
"Comment": [
"host rounding mode rounding",
"0x66 0x0f 0x3a 0x09"
]
},
"roundss xmm0, xmm1, 00000000b": {
"ExpectedInstructionCount": 5,
"Optimal": "No",
"Comment": [
"Nearest rounding",
"FPConvert instruction using vector conversion instead of scalar.",
"This lowers throughput from 1 IPC to 1/2IPC",
"0x66 0x0f 0x3a 0x0a"
]
},
"roundss xmm0, xmm1, 00000001b": {
"ExpectedInstructionCount": 5,
"Optimal": "No",
"Comment": [
"-inf rounding",
"FPConvert instruction using vector conversion instead of scalar.",
"This lowers throughput from 1 IPC to 1/2IPC",
"0x66 0x0f 0x3a 0x0a"
]
},
"roundss xmm0, xmm1, 00000010b": {
"ExpectedInstructionCount": 5,
"Optimal": "No",
"Comment": [
"+inf rounding",
"FPConvert instruction using vector conversion instead of scalar.",
"This lowers throughput from 1 IPC to 1/2IPC",
"0x66 0x0f 0x3a 0x0a"
]
},
"roundss xmm0, xmm1, 00000011b": {
"ExpectedInstructionCount": 5,
"Optimal": "No",
"Comment": [
"truncate rounding",
"FPConvert instruction using vector conversion instead of scalar.",
"This lowers throughput from 1 IPC to 1/2IPC",
"0x66 0x0f 0x3a 0x0a"
]
},
"roundss xmm0, xmm1, 00000100b": {
"ExpectedInstructionCount": 5,
"Optimal": "No",
"Comment": [
"host rounding mode rounding",
"FPConvert instruction using vector conversion instead of scalar.",
"This lowers throughput from 1 IPC to 1/2IPC",
"0x66 0x0f 0x3a 0x0a"
]
},
"roundsd xmm0, xmm1, 00000000b": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"Nearest rounding",
"FPConvert instruction using vector conversion instead of scalar.",
"This lowers throughput from 1 IPC to 1/2IPC",
"0x66 0x0f 0x3a 0x0b"
]
},
"roundsd xmm0, xmm1, 00000001b": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"-inf rounding",
"FPConvert instruction using vector conversion instead of scalar.",
"This lowers throughput from 1 IPC to 1/2IPC",
"0x66 0x0f 0x3a 0x0b"
]
},
"roundsd xmm0, xmm1, 00000010b": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"+inf rounding",
"FPConvert instruction using vector conversion instead of scalar.",
"This lowers throughput from 1 IPC to 1/2IPC",
"0x66 0x0f 0x3a 0x0b"
]
},
"roundsd xmm0, xmm1, 00000011b": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"truncate rounding",
"FPConvert instruction using vector conversion instead of scalar.",
"This lowers throughput from 1 IPC to 1/2IPC",
"0x66 0x0f 0x3a 0x0b"
]
},
"roundsd xmm0, xmm1, 00000100b": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"host rounding mode rounding",
"FPConvert instruction using vector conversion instead of scalar.",
"This lowers throughput from 1 IPC to 1/2IPC",
"0x66 0x0f 0x3a 0x0b"
]
},
"blendps xmm0, xmm1, 0000b": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 0001b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 0010b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 0011b": {
"ExpectedInstructionCount": 6,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 0100b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 0101b": {
"ExpectedInstructionCount": 6,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 0110b": {
"ExpectedInstructionCount": 6,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 0111b": {
"ExpectedInstructionCount": 7,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 1000b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 1001b": {
"ExpectedInstructionCount": 6,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 1010b": {
"ExpectedInstructionCount": 6,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 1011b": {
"ExpectedInstructionCount": 7,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 1100b": {
"ExpectedInstructionCount": 6,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 1101b": {
"ExpectedInstructionCount": 7,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 1110b": {
"ExpectedInstructionCount": 7,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendps xmm0, xmm1, 1111b": {
"ExpectedInstructionCount": 8,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0c"
]
},
"blendpd xmm0, xmm1, 00b": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0d"
]
},
"blendpd xmm0, xmm1, 01b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x0d"
]
},
"blendpd xmm0, xmm1, 10b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x0d"
]
},
"blendpd xmm0, xmm1, 11b": {
"ExpectedInstructionCount": 6,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0d"
]
},
"pblendw xmm0, xmm1, 00000000b": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0e"
]
},
"pblendw xmm0, xmm1, 00000001b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x0e"
]
},
"pblendw xmm0, xmm1, 11111111b": {
"ExpectedInstructionCount": 12,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0e"
]
},
"palignr xmm0, xmm1, 0": {
"ExpectedInstructionCount": 1,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x0f"
]
},
"palignr xmm0, xmm1, 1": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x0f"
]
},
"palignr xmm0, xmm1, 255": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x0f"
]
},
"pextrb eax, xmm0, 0000b": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x14"
]
},
"pextrb eax, xmm0, 1111b": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x14"
]
},
"pextrw eax, xmm0, 000b": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x15"
]
},
"pextrw eax, xmm0, 111b": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x15"
]
},
"pextrd eax, xmm0, 00b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x16"
]
},
"pextrd eax, xmm0, 11b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x16"
]
},
"pextrq rax, xmm0, 0b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 REX.W 0x0f 0x3a 0x16"
]
},
"pextrq rax, xmm0, 1b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 REX.W 0x0f 0x3a 0x16"
]
},
"extractps eax, xmm0, 00b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x17"
]
},
"extractps eax, xmm0, 11b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x17"
]
},
"pinsrb xmm0, eax, 0000b": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x20"
]
},
"pinsrb xmm0, eax, 0001b": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x20"
]
},
"pinsrb xmm0, eax, 1111b": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x20"
]
},
"insertps xmm0, xmm1, 00000000b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x21"
]
},
"insertps xmm0, xmm1, 00001111b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x21"
]
},
"insertps xmm0, xmm1, 00010000b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x21"
]
},
"pinsrd xmm0, eax, 00b": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x22"
]
},
"pinsrd xmm0, eax, 01b": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x22"
]
},
"pinsrd xmm0, eax, 11b": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x22"
]
},
"pinsrq xmm0, rax, 0b": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"0x66 REX.W 0x0f 0x3a 0x22"
]
},
"pinsrq xmm0, rax, 1b": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"0x66 REX.W 0x0f 0x3a 0x22"
]
},
"dpps xmm0, xmm1, 00000000b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x40"
]
},
"dpps xmm0, xmm1, 00001111b": {
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x40"
]
},
"dpps xmm0, xmm1, 11110000b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x40"
]
},
"dpps xmm0, xmm1, 11111111b": {
"ExpectedInstructionCount": 10,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x40"
]
},
"dppd xmm0, xmm1, 00000000b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x41"
]
},
"dppd xmm0, xmm1, 00001111b": {
"ExpectedInstructionCount": 9,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x41"
]
},
"dppd xmm0, xmm1, 11110000b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x41"
]
},
"dppd xmm0, xmm1, 11111111b": {
"ExpectedInstructionCount": 7,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x41"
]
},
"mpsadbw xmm0, xmm1, 000b": {
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x42"
]
},
"mpsadbw xmm0, xmm1, 001b": {
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x42"
]
},
"mpsadbw xmm0, xmm1, 010b": {
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x42"
]
},
"mpsadbw xmm0, xmm1, 011b": {
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x42"
]
},
"mpsadbw xmm0, xmm1, 100b": {
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x42"
]
},
"mpsadbw xmm0, xmm1, 101b": {
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x42"
]
},
"mpsadbw xmm0, xmm1, 110b": {
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x42"
]
},
"mpsadbw xmm0, xmm1, 111b": {
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0x42"
]
},
"pclmulqdq xmm0, xmm1, 00000b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x44"
]
},
"pclmulqdq xmm0, xmm1, 00001b": {
"ExpectedInstructionCount": 2,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x44"
]
},
"pclmulqdq xmm0, xmm1, 10000b": {
"ExpectedInstructionCount": 2,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x44"
]
},
"pclmulqdq xmm0, xmm1, 10001b": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": [
"0x66 0x0f 0x3a 0x44"
]
},
"sha1rnds4 xmm0, xmm1, 00b": {
"ExpectedInstructionCount": 61,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0xcc"
]
},
"sha1rnds4 xmm0, xmm1, 01b": {
"ExpectedInstructionCount": 57,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0xcc"
]
},
"sha1rnds4 xmm0, xmm1, 10b": {
"ExpectedInstructionCount": 69,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0xcc"
]
},
"sha1rnds4 xmm0, xmm1, 11b": {
"ExpectedInstructionCount": 57,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0xcc"
]
},
"aeskeygenassist xmm0, xmm1, 0": {
"ExpectedInstructionCount": 10,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0xdf"
]
},
"aeskeygenassist xmm0, xmm1, 0xFF": {
"ExpectedInstructionCount": 13,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x3a 0xdf"
]
}
}
}

View File

@ -0,0 +1,29 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [
"CLZERO"
],
"DisabledHostFeatures": [
"SVE128",
"SVE256"
]
},
"Instructions": {
"xgetbv": {
"ExpectedInstructionCount": 48,
"Optimal": "No",
"Comment": "0xF 0x01 /2 RM-0"
},
"rdtscp": {
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": "0xF 0x01 /7 RM-1"
},
"clzero rax": {
"ExpectedInstructionCount": 1,
"Optimal": "Yes",
"Comment": "0xF 0x01 /7 RM-4"
}
}
}