mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-14 09:28:34 +00:00
Merge pull request #2877 from Sonicadvance1/classification_adds
InstructionCountCI: Adds three more instruction tables
This commit is contained in:
commit
6d1fcfce09
@ -70,7 +70,9 @@
|
||||
"ENABLEPMULL128": "enablepmull128",
|
||||
"DISABLEPMULL128": "disablepmull128",
|
||||
"ENABLERNG": "enablerng",
|
||||
"DISABLERNG": "disablerng"
|
||||
"DISABLERNG": "disablerng",
|
||||
"ENABLECLZERO": "enableclzero",
|
||||
"DISABLECLZERO": "disableclzero"
|
||||
},
|
||||
"Desc": [
|
||||
"Allows controlling of the CPU features in the JIT.",
|
||||
@ -82,7 +84,8 @@
|
||||
"\t{enable,disable}lrcpc2: Will force enable or disable lrcpc2 even if the host doesn't support it",
|
||||
"\t{enable,disable}cssc: Will force enable or disable cssc even if the host doesn't support it",
|
||||
"\t{enable,disable}pmull128: Will force enable or disable pmull128 even if the host doesn't support it",
|
||||
"\t{enable,disable}rng: Will force enable or disable rng even if the host doesn't support it"
|
||||
"\t{enable,disable}rng: Will force enable or disable rng even if the host doesn't support it",
|
||||
"\t{enable,disable}clzero: Will force enable or disable clzero even if the host doesn't support it"
|
||||
]
|
||||
}
|
||||
},
|
||||
|
@ -88,6 +88,10 @@ static void OverrideFeatures(HostFeatures *Features) {
|
||||
const bool EnableRNG = HostFeatures() & FEXCore::Config::HostFeatures::ENABLERNG;
|
||||
LogMan::Throw::AFmt(!(DisableRNG && EnableRNG), "Disabling and Enabling CPU features are mutually exclusive");
|
||||
|
||||
const bool DisableCLZERO = HostFeatures() & FEXCore::Config::HostFeatures::DISABLECLZERO;
|
||||
const bool EnableCLZERO = HostFeatures() & FEXCore::Config::HostFeatures::ENABLECLZERO;
|
||||
LogMan::Throw::AFmt(!(DisableCLZERO && EnableCLZERO), "Disabling and Enabling CPU features are mutually exclusive");
|
||||
|
||||
if (EnableAVX) {
|
||||
Features->SupportsAVX = true;
|
||||
}
|
||||
@ -136,6 +140,12 @@ static void OverrideFeatures(HostFeatures *Features) {
|
||||
else if (DisableRNG) {
|
||||
Features->SupportsRAND = false;
|
||||
}
|
||||
if (EnableCLZERO) {
|
||||
Features->SupportsCLZERO = true;
|
||||
}
|
||||
else if (DisableCLZERO) {
|
||||
Features->SupportsCLZERO = false;
|
||||
}
|
||||
}
|
||||
|
||||
HostFeatures::HostFeatures() {
|
||||
|
@ -44,10 +44,13 @@ class HostFeatures(Flag) :
|
||||
FEATURE_ANY = 0
|
||||
FEATURE_SVE128 = (1 << 0)
|
||||
FEATURE_SVE256 = (1 << 1)
|
||||
FEATURE_CLZERO = (1 << 2)
|
||||
|
||||
|
||||
HostFeaturesLookup = {
|
||||
"SVE128" : HostFeatures.FEATURE_SVE128,
|
||||
"SVE256" : HostFeatures.FEATURE_SVE256,
|
||||
"CLZERO" : HostFeatures.FEATURE_CLZERO,
|
||||
}
|
||||
|
||||
def GetHostFeatures(data):
|
||||
|
@ -388,6 +388,7 @@ int main(int argc, char **argv, char **const envp) {
|
||||
enum HostFeatures {
|
||||
FEATURE_SVE128 = (1U << 0),
|
||||
FEATURE_SVE256 = (1U << 1),
|
||||
FEATURE_CLZERO = (1U << 2),
|
||||
};
|
||||
|
||||
uint64_t SVEWidth = 0;
|
||||
@ -400,6 +401,9 @@ int main(int argc, char **argv, char **const envp) {
|
||||
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLEAVX);
|
||||
SVEWidth = 256;
|
||||
}
|
||||
if (TestHeaderData->EnabledHostFeatures & FEATURE_CLZERO) {
|
||||
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLECLZERO);
|
||||
}
|
||||
|
||||
if (TestHeaderData->DisabledHostFeatures & FEATURE_SVE128) {
|
||||
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLESVE);
|
||||
@ -407,6 +411,9 @@ int main(int argc, char **argv, char **const envp) {
|
||||
if (TestHeaderData->DisabledHostFeatures & FEATURE_SVE256) {
|
||||
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLEAVX);
|
||||
}
|
||||
if (TestHeaderData->DisabledHostFeatures & FEATURE_CLZERO) {
|
||||
HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLECLZERO);
|
||||
}
|
||||
FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_HOSTFEATURES, fextl::fmt::format("{}", HostFeatureControl));
|
||||
FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_FORCESVEWIDTH, fextl::fmt::format("{}", SVEWidth));
|
||||
|
||||
|
192
unittests/InstructionCountCI/DDD.json
Normal file
192
unittests/InstructionCountCI/DDD.json
Normal file
@ -0,0 +1,192 @@
|
||||
{
|
||||
"Features": {
|
||||
"Bitness": 64,
|
||||
"EnabledHostFeatures": [],
|
||||
"DisabledHostFeatures": [
|
||||
"SVE128",
|
||||
"SVE256"
|
||||
]
|
||||
},
|
||||
"Comment": [
|
||||
"These 3DNow! instructions are optimal assuming that FEX doesn't SRA MMX registers",
|
||||
"This accounts for the overhead of loading and storing the registers in each instruction",
|
||||
"Could technically save some instructions by using SRA for MMX registers."
|
||||
],
|
||||
"Instructions": {
|
||||
"pi2fw mm0, mm1": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "Unknown",
|
||||
"Comment": [
|
||||
"FPConvert instruction using 128-bit conversion instead of 64-bit.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"This instruction implementation might not be correct.",
|
||||
"0x0f 0x0f 0x0c"
|
||||
]
|
||||
},
|
||||
"pi2fd mm0, mm1": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"FPConvert instruction using 128-bit conversion instead of 64-bit.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"0x0f 0x0f 0x0d"
|
||||
]
|
||||
},
|
||||
"pf2iw mm0, mm1": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "Unknown",
|
||||
"Comment": [
|
||||
"FPConvert instruction using 128-bit conversion instead of 64-bit.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"This instruction implementation might not be correct.",
|
||||
"0x0f 0x0f 0x1c"
|
||||
]
|
||||
},
|
||||
"pf2id mm0, mm1": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"FPConvert instruction using 128-bit conversion instead of 64-bit.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"0x0f 0x0f 0x1d"
|
||||
]
|
||||
},
|
||||
"pfrcpv mm0, mm1": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"FEAT_FPRES could make this more optimal",
|
||||
"0x0f 0x0f 0x86"
|
||||
]
|
||||
},
|
||||
"pfrsqrtv mm0, mm1": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"FEAT_FPRES could make this more optimal",
|
||||
"0x0f 0x0f 0x87"
|
||||
]
|
||||
},
|
||||
"pfnacc mm0, mm1": {
|
||||
"ExpectedInstructionCount": 8,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0x8a"
|
||||
},
|
||||
"pfpnacc mm0, mm1": {
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0x8e"
|
||||
},
|
||||
"pfcmpge mm0, mm1": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0x90"
|
||||
},
|
||||
"pfmin mm0, mm1": {
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": "0x0f 0x0f 0x94"
|
||||
},
|
||||
"pfrcp mm0, mm1": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"FEAT_FPRES could make this more optimal",
|
||||
"0x0f 0x0f 0x96"
|
||||
]
|
||||
},
|
||||
"pfrsqrt mm0, mm1": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"FEAT_FPRES could make this more optimal",
|
||||
"0x0f 0x0f 0x97"
|
||||
]
|
||||
},
|
||||
"pfsub mm0, mm1": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0x9a"
|
||||
},
|
||||
"pfadd mm0, mm1": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0x9e"
|
||||
},
|
||||
"pfcmpgt mm0, mm1": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0xa0"
|
||||
},
|
||||
"pfmax mm0, mm1": {
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": "0x0f 0x0f 0xa4"
|
||||
},
|
||||
"pfrcpit1 mm0, mm1": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0xa6"
|
||||
},
|
||||
"pfrcpit1 mm0, mm0": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": "0x0f 0x0f 0xa6"
|
||||
},
|
||||
"pfrsqit1 mm0, mm1": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0xa7"
|
||||
},
|
||||
"pfrsqit1 mm0, mm0": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": "0x0f 0x0f 0xa7"
|
||||
},
|
||||
"pfsubr mm0, mm1": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0xaa"
|
||||
},
|
||||
"pfcmpeq mm0, mm1": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0xb0"
|
||||
},
|
||||
"pfmul mm0, mm1": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0xb4"
|
||||
},
|
||||
"pfrcpit2 mm0, mm1": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0xb6"
|
||||
},
|
||||
"pfrcpit2 mm0, mm0": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": "0x0f 0x0f 0xb6"
|
||||
},
|
||||
"db 0x0f, 0x0f, 0xc1, 0xb7": {
|
||||
"ExpectedInstructionCount": 8,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"nasm doesn't support emitting this instruction",
|
||||
"pmulhrw mm0, mm1",
|
||||
"Might be able to use sqdmulh",
|
||||
"0x0f 0x0f 0xb7"
|
||||
]
|
||||
},
|
||||
"pswapd mm0, mm1": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0xbb"
|
||||
},
|
||||
"pavgusb mm0, mm1": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0x0f 0x0f 0xbf"
|
||||
}
|
||||
}
|
||||
}
|
728
unittests/InstructionCountCI/H0F3A.json
Normal file
728
unittests/InstructionCountCI/H0F3A.json
Normal file
@ -0,0 +1,728 @@
|
||||
{
|
||||
"Features": {
|
||||
"Bitness": 64,
|
||||
"EnabledHostFeatures": [],
|
||||
"DisabledHostFeatures": [
|
||||
"SVE128",
|
||||
"SVE256"
|
||||
]
|
||||
},
|
||||
"Comment": [
|
||||
"SSE4.2 string instructions are skipped here.",
|
||||
"Entirely because they are nightmare implementations of instructions."
|
||||
],
|
||||
"Instructions": {
|
||||
"palignr mm0, mm1, 0": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"NP 0x0f 0x3a 0x0f"
|
||||
]
|
||||
},
|
||||
"palignr mm0, mm1, 1": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"NP 0x0f 0x3a 0x0f"
|
||||
]
|
||||
},
|
||||
"palignr mm0, mm1, 255": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"NP 0x0f 0x3a 0x0f"
|
||||
]
|
||||
},
|
||||
"roundps xmm0, xmm1, 00000000b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Nearest rounding",
|
||||
"0x66 0x0f 0x3a 0x08"
|
||||
]
|
||||
},
|
||||
"roundps xmm0, xmm1, 00000001b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"-inf rounding",
|
||||
"0x66 0x0f 0x3a 0x08"
|
||||
]
|
||||
},
|
||||
"roundps xmm0, xmm1, 00000010b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"+inf rounding",
|
||||
"0x66 0x0f 0x3a 0x08"
|
||||
]
|
||||
},
|
||||
"roundps xmm0, xmm1, 00000011b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"truncate rounding",
|
||||
"0x66 0x0f 0x3a 0x08"
|
||||
]
|
||||
},
|
||||
"roundps xmm0, xmm1, 00000100b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"host rounding mode rounding",
|
||||
"0x66 0x0f 0x3a 0x08"
|
||||
]
|
||||
},
|
||||
"roundpd xmm0, xmm1, 00000000b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"Nearest rounding",
|
||||
"0x66 0x0f 0x3a 0x09"
|
||||
]
|
||||
},
|
||||
"roundpd xmm0, xmm1, 00000001b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"-inf rounding",
|
||||
"0x66 0x0f 0x3a 0x09"
|
||||
]
|
||||
},
|
||||
"roundpd xmm0, xmm1, 00000010b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"+inf rounding",
|
||||
"0x66 0x0f 0x3a 0x09"
|
||||
]
|
||||
},
|
||||
"roundpd xmm0, xmm1, 00000011b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"truncate rounding",
|
||||
"0x66 0x0f 0x3a 0x09"
|
||||
]
|
||||
},
|
||||
"roundpd xmm0, xmm1, 00000100b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "yes",
|
||||
"Comment": [
|
||||
"host rounding mode rounding",
|
||||
"0x66 0x0f 0x3a 0x09"
|
||||
]
|
||||
},
|
||||
"roundss xmm0, xmm1, 00000000b": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Nearest rounding",
|
||||
"FPConvert instruction using vector conversion instead of scalar.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"0x66 0x0f 0x3a 0x0a"
|
||||
]
|
||||
},
|
||||
"roundss xmm0, xmm1, 00000001b": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"-inf rounding",
|
||||
"FPConvert instruction using vector conversion instead of scalar.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"0x66 0x0f 0x3a 0x0a"
|
||||
]
|
||||
},
|
||||
"roundss xmm0, xmm1, 00000010b": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"+inf rounding",
|
||||
"FPConvert instruction using vector conversion instead of scalar.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"0x66 0x0f 0x3a 0x0a"
|
||||
]
|
||||
},
|
||||
"roundss xmm0, xmm1, 00000011b": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"truncate rounding",
|
||||
"FPConvert instruction using vector conversion instead of scalar.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"0x66 0x0f 0x3a 0x0a"
|
||||
]
|
||||
},
|
||||
"roundss xmm0, xmm1, 00000100b": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"host rounding mode rounding",
|
||||
"FPConvert instruction using vector conversion instead of scalar.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"0x66 0x0f 0x3a 0x0a"
|
||||
]
|
||||
},
|
||||
"roundsd xmm0, xmm1, 00000000b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Nearest rounding",
|
||||
"FPConvert instruction using vector conversion instead of scalar.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"0x66 0x0f 0x3a 0x0b"
|
||||
]
|
||||
},
|
||||
"roundsd xmm0, xmm1, 00000001b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"-inf rounding",
|
||||
"FPConvert instruction using vector conversion instead of scalar.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"0x66 0x0f 0x3a 0x0b"
|
||||
]
|
||||
},
|
||||
"roundsd xmm0, xmm1, 00000010b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"+inf rounding",
|
||||
"FPConvert instruction using vector conversion instead of scalar.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"0x66 0x0f 0x3a 0x0b"
|
||||
]
|
||||
},
|
||||
"roundsd xmm0, xmm1, 00000011b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"truncate rounding",
|
||||
"FPConvert instruction using vector conversion instead of scalar.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"0x66 0x0f 0x3a 0x0b"
|
||||
]
|
||||
},
|
||||
"roundsd xmm0, xmm1, 00000100b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"host rounding mode rounding",
|
||||
"FPConvert instruction using vector conversion instead of scalar.",
|
||||
"This lowers throughput from 1 IPC to 1/2IPC",
|
||||
"0x66 0x0f 0x3a 0x0b"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 0000b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 0001b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 0010b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 0011b": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 0100b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 0101b": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 0110b": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 0111b": {
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 1000b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 1001b": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 1010b": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 1011b": {
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 1100b": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 1101b": {
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 1110b": {
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendps xmm0, xmm1, 1111b": {
|
||||
"ExpectedInstructionCount": 8,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0c"
|
||||
]
|
||||
},
|
||||
"blendpd xmm0, xmm1, 00b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0d"
|
||||
]
|
||||
},
|
||||
"blendpd xmm0, xmm1, 01b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0d"
|
||||
]
|
||||
},
|
||||
"blendpd xmm0, xmm1, 10b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0d"
|
||||
]
|
||||
},
|
||||
"blendpd xmm0, xmm1, 11b": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0d"
|
||||
]
|
||||
},
|
||||
"pblendw xmm0, xmm1, 00000000b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0e"
|
||||
]
|
||||
},
|
||||
"pblendw xmm0, xmm1, 00000001b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0e"
|
||||
]
|
||||
},
|
||||
"pblendw xmm0, xmm1, 11111111b": {
|
||||
"ExpectedInstructionCount": 12,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0e"
|
||||
]
|
||||
},
|
||||
"palignr xmm0, xmm1, 0": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0f"
|
||||
]
|
||||
},
|
||||
"palignr xmm0, xmm1, 1": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0f"
|
||||
]
|
||||
},
|
||||
"palignr xmm0, xmm1, 255": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x0f"
|
||||
]
|
||||
},
|
||||
"pextrb eax, xmm0, 0000b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x14"
|
||||
]
|
||||
},
|
||||
"pextrb eax, xmm0, 1111b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x14"
|
||||
]
|
||||
},
|
||||
"pextrw eax, xmm0, 000b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x15"
|
||||
]
|
||||
},
|
||||
"pextrw eax, xmm0, 111b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x15"
|
||||
]
|
||||
},
|
||||
"pextrd eax, xmm0, 00b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x16"
|
||||
]
|
||||
},
|
||||
"pextrd eax, xmm0, 11b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x16"
|
||||
]
|
||||
},
|
||||
"pextrq rax, xmm0, 0b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 REX.W 0x0f 0x3a 0x16"
|
||||
]
|
||||
},
|
||||
"pextrq rax, xmm0, 1b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 REX.W 0x0f 0x3a 0x16"
|
||||
]
|
||||
},
|
||||
"extractps eax, xmm0, 00b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x17"
|
||||
]
|
||||
},
|
||||
"extractps eax, xmm0, 11b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x17"
|
||||
]
|
||||
},
|
||||
"pinsrb xmm0, eax, 0000b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x20"
|
||||
]
|
||||
},
|
||||
"pinsrb xmm0, eax, 0001b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x20"
|
||||
]
|
||||
},
|
||||
"pinsrb xmm0, eax, 1111b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x20"
|
||||
]
|
||||
},
|
||||
"insertps xmm0, xmm1, 00000000b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x21"
|
||||
]
|
||||
},
|
||||
"insertps xmm0, xmm1, 00001111b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x21"
|
||||
]
|
||||
},
|
||||
"insertps xmm0, xmm1, 00010000b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x21"
|
||||
]
|
||||
},
|
||||
"pinsrd xmm0, eax, 00b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x22"
|
||||
]
|
||||
},
|
||||
"pinsrd xmm0, eax, 01b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x22"
|
||||
]
|
||||
},
|
||||
"pinsrd xmm0, eax, 11b": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x22"
|
||||
]
|
||||
},
|
||||
"pinsrq xmm0, rax, 0b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 REX.W 0x0f 0x3a 0x22"
|
||||
]
|
||||
},
|
||||
"pinsrq xmm0, rax, 1b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 REX.W 0x0f 0x3a 0x22"
|
||||
]
|
||||
},
|
||||
"dpps xmm0, xmm1, 00000000b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x40"
|
||||
]
|
||||
},
|
||||
"dpps xmm0, xmm1, 00001111b": {
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x40"
|
||||
]
|
||||
},
|
||||
"dpps xmm0, xmm1, 11110000b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x40"
|
||||
]
|
||||
},
|
||||
"dpps xmm0, xmm1, 11111111b": {
|
||||
"ExpectedInstructionCount": 10,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x40"
|
||||
]
|
||||
},
|
||||
"dppd xmm0, xmm1, 00000000b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x41"
|
||||
]
|
||||
},
|
||||
"dppd xmm0, xmm1, 00001111b": {
|
||||
"ExpectedInstructionCount": 9,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x41"
|
||||
]
|
||||
},
|
||||
"dppd xmm0, xmm1, 11110000b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x41"
|
||||
]
|
||||
},
|
||||
"dppd xmm0, xmm1, 11111111b": {
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x41"
|
||||
]
|
||||
},
|
||||
"mpsadbw xmm0, xmm1, 000b": {
|
||||
"ExpectedInstructionCount": 17,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x42"
|
||||
]
|
||||
},
|
||||
"mpsadbw xmm0, xmm1, 001b": {
|
||||
"ExpectedInstructionCount": 17,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x42"
|
||||
]
|
||||
},
|
||||
"mpsadbw xmm0, xmm1, 010b": {
|
||||
"ExpectedInstructionCount": 17,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x42"
|
||||
]
|
||||
},
|
||||
"mpsadbw xmm0, xmm1, 011b": {
|
||||
"ExpectedInstructionCount": 17,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x42"
|
||||
]
|
||||
},
|
||||
"mpsadbw xmm0, xmm1, 100b": {
|
||||
"ExpectedInstructionCount": 17,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x42"
|
||||
]
|
||||
},
|
||||
"mpsadbw xmm0, xmm1, 101b": {
|
||||
"ExpectedInstructionCount": 17,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x42"
|
||||
]
|
||||
},
|
||||
"mpsadbw xmm0, xmm1, 110b": {
|
||||
"ExpectedInstructionCount": 17,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x42"
|
||||
]
|
||||
},
|
||||
"mpsadbw xmm0, xmm1, 111b": {
|
||||
"ExpectedInstructionCount": 17,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x42"
|
||||
]
|
||||
},
|
||||
"pclmulqdq xmm0, xmm1, 00000b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x44"
|
||||
]
|
||||
},
|
||||
"pclmulqdq xmm0, xmm1, 00001b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x44"
|
||||
]
|
||||
},
|
||||
"pclmulqdq xmm0, xmm1, 10000b": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x44"
|
||||
]
|
||||
},
|
||||
"pclmulqdq xmm0, xmm1, 10001b": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0x44"
|
||||
]
|
||||
},
|
||||
"sha1rnds4 xmm0, xmm1, 00b": {
|
||||
"ExpectedInstructionCount": 61,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0xcc"
|
||||
]
|
||||
},
|
||||
"sha1rnds4 xmm0, xmm1, 01b": {
|
||||
"ExpectedInstructionCount": 57,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0xcc"
|
||||
]
|
||||
},
|
||||
"sha1rnds4 xmm0, xmm1, 10b": {
|
||||
"ExpectedInstructionCount": 69,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0xcc"
|
||||
]
|
||||
},
|
||||
"sha1rnds4 xmm0, xmm1, 11b": {
|
||||
"ExpectedInstructionCount": 57,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0xcc"
|
||||
]
|
||||
},
|
||||
"aeskeygenassist xmm0, xmm1, 0": {
|
||||
"ExpectedInstructionCount": 10,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0xdf"
|
||||
]
|
||||
},
|
||||
"aeskeygenassist xmm0, xmm1, 0xFF": {
|
||||
"ExpectedInstructionCount": 13,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"0x66 0x0f 0x3a 0xdf"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
29
unittests/InstructionCountCI/SecondaryModRM.json
Normal file
29
unittests/InstructionCountCI/SecondaryModRM.json
Normal file
@ -0,0 +1,29 @@
|
||||
{
|
||||
"Features": {
|
||||
"Bitness": 64,
|
||||
"EnabledHostFeatures": [
|
||||
"CLZERO"
|
||||
],
|
||||
"DisabledHostFeatures": [
|
||||
"SVE128",
|
||||
"SVE256"
|
||||
]
|
||||
},
|
||||
"Instructions": {
|
||||
"xgetbv": {
|
||||
"ExpectedInstructionCount": 48,
|
||||
"Optimal": "No",
|
||||
"Comment": "0xF 0x01 /2 RM-0"
|
||||
},
|
||||
"rdtscp": {
|
||||
"ExpectedInstructionCount": 17,
|
||||
"Optimal": "No",
|
||||
"Comment": "0xF 0x01 /7 RM-1"
|
||||
},
|
||||
"clzero rax": {
|
||||
"ExpectedInstructionCount": 1,
|
||||
"Optimal": "Yes",
|
||||
"Comment": "0xF 0x01 /7 RM-4"
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user