InstCountCI: Duplicate tests that change behaviour based on flagm

Necessary for #3162 to have consistent behaviour in CI
This commit is contained in:
Ryan Houdek 2023-10-23 14:03:19 -07:00
parent 423ce12001
commit 9f6d80fe5d
14 changed files with 51270 additions and 0 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,131 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [
"FLAGM",
"FLAGM2"
],
"DisabledHostFeatures": [
"SVE128",
"SVE256"
]
},
"Instructions": {
"ptest xmm0, xmm1": {
"ExpectedInstructionCount": 19,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x38 0x17"
],
"ExpectedArm64ASM": [
"and v2.16b, v16.16b, v17.16b",
"bic v3.16b, v17.16b, v16.16b",
"cnt v2.16b, v2.16b",
"cnt v3.16b, v3.16b",
"addv h2, v2.8h",
"addv h3, v3.8h",
"umov w20, v2.h[0]",
"umov w21, v3.h[0]",
"mov w22, #0x0",
"mov w23, #0x1",
"cmp x20, #0x0 (0)",
"cset x20, eq",
"cmp x21, #0x0 (0)",
"cset x21, eq",
"lsl x20, x20, #30",
"orr w20, w20, w21, lsl #29",
"strb w23, [x28, #706]",
"strb w22, [x28, #708]",
"str w20, [x28, #728]"
]
},
"adcx eax, ebx": {
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": [
"0x66 0x0f 0x38 0xf6"
],
"ExpectedArm64ASM": [
"ldr w20, [x28, #728]",
"ubfx w21, w20, #29, #1",
"mov w22, w7",
"mov w23, w4",
"add w24, w22, w21",
"add w4, w23, w24",
"cmp w4, w22",
"cset x23, lo",
"cmp w4, w22",
"cset x22, ls",
"cmp x21, #0x1 (1)",
"csel x21, x22, x23, eq",
"bfi w20, w21, #29, #1",
"str w20, [x28, #728]"
]
},
"adcx rax, rbx": {
"ExpectedInstructionCount": 12,
"Optimal": "Unknown",
"Comment": [
"0x66 REX.W 0x0f 0x38 0xf6"
],
"ExpectedArm64ASM": [
"ldr w20, [x28, #728]",
"ubfx w21, w20, #29, #1",
"add x22, x7, x21",
"add x4, x4, x22",
"cmp x4, x7",
"cset x22, lo",
"cmp x4, x7",
"cset x23, ls",
"cmp x21, #0x1 (1)",
"csel x21, x23, x22, eq",
"bfi w20, w21, #29, #1",
"str w20, [x28, #728]"
]
},
"adox eax, ebx": {
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": [
"0xf3 0x0f 0x38 0xf6"
],
"ExpectedArm64ASM": [
"ldr w20, [x28, #728]",
"ubfx w21, w20, #28, #1",
"mov w22, w7",
"mov w23, w4",
"add w24, w22, w21",
"add w4, w23, w24",
"cmp w4, w22",
"cset x23, lo",
"cmp w4, w22",
"cset x22, ls",
"cmp x21, #0x1 (1)",
"csel x21, x22, x23, eq",
"bfi w20, w21, #28, #1",
"str w20, [x28, #728]"
]
},
"adox rax, rbx": {
"ExpectedInstructionCount": 12,
"Optimal": "Unknown",
"Comment": [
"0xf3 REX.W 0x0f 0x38 0xf6"
],
"ExpectedArm64ASM": [
"ldr w20, [x28, #728]",
"ubfx w21, w20, #28, #1",
"add x22, x7, x21",
"add x4, x4, x22",
"cmp x4, x7",
"cset x22, lo",
"cmp x4, x7",
"cset x23, ls",
"cmp x21, #0x1 (1)",
"csel x21, x23, x22, eq",
"bfi w20, w21, #28, #1",
"str w20, [x28, #728]"
]
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,94 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [
"CLZERO",
"FLAGM",
"FLAGM2"
],
"DisabledHostFeatures": [
"SVE128",
"SVE256",
"AFP"
]
},
"Instructions": {
"xgetbv": {
"ExpectedInstructionCount": 46,
"Optimal": "No",
"Comment": "0xF 0x01 /2 RM-0",
"ExpectedArm64ASM": [
"sub sp, sp, #0xf0 (240)",
"mov x0, sp",
"st1 {v2.2d, v3.2d}, [x0], #32",
"st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
"st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x0], #64",
"st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [x0], #64",
"str x30, [x0]",
"stp x4, x5, [x28, #8]",
"stp x6, x7, [x28, #24]",
"stp x8, x9, [x28, #40]",
"stp x10, x11, [x28, #56]",
"stp x12, x13, [x28, #72]",
"stp x14, x15, [x28, #88]",
"stp x16, x17, [x28, #104]",
"stp x19, x29, [x28, #120]",
"add x0, x28, #0xc0 (192)",
"st1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x0], #64",
"st1 {v20.2d, v21.2d, v22.2d, v23.2d}, [x0], #64",
"st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x0], #64",
"st1 {v28.2d, v29.2d, v30.2d, v31.2d}, [x0], #64",
"ldr x0, [x28, #1096]",
"ldr x2, [x28, #1112]",
"mov w1, w5",
"blr x2",
"add x5, x28, #0xc0 (192)",
"ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x5], #64",
"ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [x5], #64",
"ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x5], #64",
"ld1 {v28.2d, v29.2d, v30.2d, v31.2d}, [x5], #64",
"ldp x4, x5, [x28, #8]",
"ldp x6, x7, [x28, #24]",
"ldp x8, x9, [x28, #40]",
"ldp x10, x11, [x28, #56]",
"ldp x12, x13, [x28, #72]",
"ldp x14, x15, [x28, #88]",
"ldp x16, x17, [x28, #104]",
"ldp x19, x29, [x28, #120]",
"ld1 {v2.2d, v3.2d}, [sp], #32",
"ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
"ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp], #64",
"ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp], #64",
"ldr x30, [sp], #16",
"mov w20, w0",
"lsr x21, x0, #32",
"mov w4, w20",
"mov w6, w21"
]
},
"rdtscp": {
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": "0xF 0x01 /7 RM-1",
"ExpectedArm64ASM": [
"dmb ld",
"mrs x20, S3_3_c14_c0_2",
"mov w4, w20",
"lsr x6, x20, #32",
"str x8, [x28, #40]",
"mov w0, #0x100",
"str x0, [x28, #1040]",
"sub sp, sp, #0x10 (16)",
"mov w8, #0xa8",
"mov x0, sp",
"add x1, sp, #0x4 (4)",
"svc #0x0",
"ldp w0, w1, [sp]",
"sub sp, sp, #0x10 (16)",
"ldr x8, [x28, #40]",
"str xzr, [x28, #1040]",
"orr x5, x0, x1, lsl #12"
]
}
}
}

View File

@ -0,0 +1,100 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [
"FLAGM",
"FLAGM2"
],
"DisabledHostFeatures": [
"SVE128",
"SVE256",
"FCMA"
]
},
"Instructions": {
"ucomisd xmm0, xmm1": {
"ExpectedInstructionCount": 19,
"Optimal": "No",
"Comment": "0x66 0x0f 0x2e",
"ExpectedArm64ASM": [
"fcmp d16, d17",
"cset x20, eq",
"csinc x20, x20, xzr, vc",
"cset x1, lt",
"bfi x20, x1, #1, #1",
"cset x1, vs",
"bfi x20, x1, #2, #1",
"ubfx x21, x20, #1, #1",
"ubfx x22, x20, #0, #1",
"ubfx x20, x20, #2, #1",
"lsl x21, x21, #29",
"orr w21, w21, w22, lsl #30",
"eor w20, w20, #0x1",
"strb w20, [x28, #706]",
"mov w20, #0x0",
"mov w22, #0x90000000",
"bic x21, x21, x22",
"strb w20, [x28, #708]",
"str w21, [x28, #728]"
]
},
"comisd xmm0, xmm1": {
"ExpectedInstructionCount": 22,
"Optimal": "No",
"Comment": "0x66 0x0f 0x2f",
"ExpectedArm64ASM": [
"fcmp d16, d17",
"cset x20, eq",
"csinc x20, x20, xzr, vc",
"cset x1, lt",
"bfi x20, x1, #1, #1",
"cset x1, vs",
"bfi x20, x1, #2, #1",
"ubfx x21, x20, #1, #1",
"ubfx x22, x20, #0, #1",
"ubfx x20, x20, #2, #1",
"ldr w23, [x28, #728]",
"mov w0, w23",
"bfi w0, w21, #29, #1",
"mov w21, w0",
"bfi w21, w22, #30, #1",
"eor w20, w20, #0x1",
"strb w20, [x28, #706]",
"mov w20, #0x0",
"mov w22, #0x90000000",
"bic x21, x21, x22",
"strb w20, [x28, #708]",
"str w21, [x28, #728]"
]
},
"pmovmskb eax, xmm0": {
"ExpectedInstructionCount": 11,
"Optimal": "Yes",
"Comment": "0x66 0x0f 0xd7",
"ExpectedArm64ASM": [
"mov x20, #0x201",
"movk x20, #0x804, lsl #16",
"movk x20, #0x2010, lsl #32",
"movk x20, #0x8040, lsl #48",
"dup v2.2d, x20",
"cmlt v3.16b, v16.16b, #0",
"and v2.16b, v3.16b, v2.16b",
"addp v2.16b, v2.16b, v2.16b",
"addp v2.8b, v2.8b, v2.8b",
"addp v2.8b, v2.8b, v2.8b",
"umov w4, v2.h[0]"
]
},
"maskmovdqu xmm0, xmm1": {
"ExpectedInstructionCount": 4,
"Optimal": "Yes",
"Comment": "0x66 0x0f 0xf7",
"ExpectedArm64ASM": [
"cmlt v2.16b, v17.16b, #0",
"ldr q3, [x11]",
"bsl v2.16b, v16.16b, v3.16b",
"str q2, [x11]"
]
}
}
}

View File

@ -0,0 +1,173 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [
"FLAGM",
"FLAGM2"
],
"DisabledHostFeatures": [
"SVE128",
"SVE256",
"RPRES",
"AFP"
]
},
"Instructions": {
"popcnt ax, bx": {
"ExpectedInstructionCount": 14,
"Optimal": "No",
"Comment": "0xf3 0x0f 0xb8",
"ExpectedArm64ASM": [
"uxth w20, w7",
"fmov s0, w20",
"cnt v0.8b, v0.8b",
"addp v0.8b, v0.8b, v0.8b",
"umov w20, v0.b[0]",
"bfxil x4, x20, #0, #16",
"mov w21, #0x0",
"mov w22, #0x1",
"cmp x20, #0x0 (0)",
"cset x20, eq",
"strb w22, [x28, #706]",
"strb w21, [x28, #708]",
"lsl x20, x20, #30",
"str w20, [x28, #728]"
]
},
"popcnt eax, ebx": {
"ExpectedInstructionCount": 13,
"Optimal": "No",
"Comment": "0xf3 0x0f 0xb8",
"ExpectedArm64ASM": [
"mov w20, w7",
"fmov s0, w20",
"cnt v0.8b, v0.8b",
"addv b0, v0.8b",
"umov w4, v0.b[0]",
"mov w20, #0x0",
"mov w21, #0x1",
"cmp x4, #0x0 (0)",
"cset x22, eq",
"strb w21, [x28, #706]",
"strb w20, [x28, #708]",
"lsl x20, x22, #30",
"str w20, [x28, #728]"
]
},
"popcnt rax, rbx": {
"ExpectedInstructionCount": 12,
"Optimal": "No",
"Comment": "0xf3 0x0f 0xb8",
"ExpectedArm64ASM": [
"fmov d0, x7",
"cnt v0.8b, v0.8b",
"addv b0, v0.8b",
"umov w4, v0.b[0]",
"mov w20, #0x0",
"mov w21, #0x1",
"cmp x4, #0x0 (0)",
"cset x22, eq",
"strb w21, [x28, #706]",
"strb w20, [x28, #708]",
"lsl x20, x22, #30",
"str w20, [x28, #728]"
]
},
"tzcnt ax, bx": {
"ExpectedInstructionCount": 11,
"Optimal": "No",
"Comment": "0xf3 0x0f 0xbc",
"ExpectedArm64ASM": [
"uxth w20, w7",
"rbit w20, w20",
"orr w20, w20, #0x8000",
"clz w20, w20",
"bfxil x4, x20, #0, #16",
"cmp x20, #0x0 (0)",
"cset x21, eq",
"lsl x21, x21, #29",
"ubfx w20, w20, #0, #1",
"orr w20, w21, w20, lsl #30",
"str w20, [x28, #728]"
]
},
"tzcnt eax, ebx": {
"ExpectedInstructionCount": 9,
"Optimal": "No",
"Comment": "0xf3 0x0f 0xbc",
"ExpectedArm64ASM": [
"mov w20, w7",
"rbit w4, w20",
"clz w4, w4",
"cmp x4, #0x0 (0)",
"cset x20, eq",
"lsl x20, x20, #29",
"ubfx w21, w4, #0, #1",
"orr w20, w20, w21, lsl #30",
"str w20, [x28, #728]"
]
},
"tzcnt rax, rbx": {
"ExpectedInstructionCount": 8,
"Optimal": "Yes",
"Comment": "0xf3 0x0f 0xbc",
"ExpectedArm64ASM": [
"rbit x4, x7",
"clz x4, x4",
"cmp x4, #0x0 (0)",
"cset x20, eq",
"lsl x20, x20, #29",
"ubfx w21, w4, #0, #1",
"orr w20, w20, w21, lsl #30",
"str w20, [x28, #728]"
]
},
"lzcnt ax, bx": {
"ExpectedInstructionCount": 11,
"Optimal": "No",
"Comment": "0xf3 0x0f 0xbd",
"ExpectedArm64ASM": [
"uxth w20, w7",
"lsl w21, w20, #16",
"orr w21, w21, #0x8000",
"clz w21, w21",
"bfxil x4, x21, #0, #16",
"cmp x20, #0x0 (0)",
"cset x21, eq",
"lsl x21, x21, #29",
"ubfx w20, w20, #15, #1",
"orr w20, w21, w20, lsl #30",
"str w20, [x28, #728]"
]
},
"lzcnt eax, ebx": {
"ExpectedInstructionCount": 8,
"Optimal": "No",
"Comment": "0xf3 0x0f 0xbd",
"ExpectedArm64ASM": [
"mov w20, w7",
"clz w4, w20",
"cmp x20, #0x0 (0)",
"cset x21, eq",
"lsl x21, x21, #29",
"lsr w20, w20, #31",
"orr w20, w21, w20, lsl #30",
"str w20, [x28, #728]"
]
},
"lzcnt rax, rbx": {
"ExpectedInstructionCount": 7,
"Optimal": "No",
"Comment": "0xf3 0x0f 0xbd",
"ExpectedArm64ASM": [
"clz x4, x7",
"cmp x7, #0x0 (0)",
"cset x20, eq",
"lsl x20, x20, #29",
"lsr x21, x7, #63",
"orr w20, w20, w21, lsl #30",
"str w20, [x28, #728]"
]
}
}
}

View File

@ -0,0 +1,200 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [
"SVE128",
"SVE256",
"FLAGM",
"FLAGM2"
],
"DisabledHostFeatures": [
"FCMA",
"RPRES",
"AFP"
]
},
"Instructions": {
"vucomiss xmm0, xmm1": {
"ExpectedInstructionCount": 19,
"Optimal": "No",
"Comment": [
"Map 1 0b00 0x2e 128-bit"
],
"ExpectedArm64ASM": [
"fcmp s16, s17",
"cset x20, eq",
"csinc x20, x20, xzr, vc",
"cset x1, lt",
"bfi x20, x1, #1, #1",
"cset x1, vs",
"bfi x20, x1, #2, #1",
"ubfx x21, x20, #1, #1",
"ubfx x22, x20, #0, #1",
"ubfx x20, x20, #2, #1",
"lsl x21, x21, #29",
"orr w21, w21, w22, lsl #30",
"eor w20, w20, #0x1",
"strb w20, [x28, #706]",
"mov w20, #0x0",
"mov w22, #0x90000000",
"bic x21, x21, x22",
"strb w20, [x28, #708]",
"str w21, [x28, #728]"
]
},
"vucomisd xmm0, xmm1": {
"ExpectedInstructionCount": 22,
"Optimal": "No",
"Comment": [
"Map 1 0b01 0x2e 128-bit"
],
"ExpectedArm64ASM": [
"fcmp d16, d17",
"cset x20, eq",
"csinc x20, x20, xzr, vc",
"cset x1, lt",
"bfi x20, x1, #1, #1",
"cset x1, vs",
"bfi x20, x1, #2, #1",
"ubfx x21, x20, #1, #1",
"ubfx x22, x20, #0, #1",
"ubfx x20, x20, #2, #1",
"ldr w23, [x28, #728]",
"mov w0, w23",
"bfi w0, w21, #29, #1",
"mov w21, w0",
"bfi w21, w22, #30, #1",
"eor w20, w20, #0x1",
"strb w20, [x28, #706]",
"mov w20, #0x0",
"mov w22, #0x90000000",
"bic x21, x21, x22",
"strb w20, [x28, #708]",
"str w21, [x28, #728]"
]
},
"vcomiss xmm0, xmm1": {
"ExpectedInstructionCount": 22,
"Optimal": "No",
"Comment": [
"Map 1 0b00 0x2f 128-bit"
],
"ExpectedArm64ASM": [
"fcmp s16, s17",
"cset x20, eq",
"csinc x20, x20, xzr, vc",
"cset x1, lt",
"bfi x20, x1, #1, #1",
"cset x1, vs",
"bfi x20, x1, #2, #1",
"ubfx x21, x20, #1, #1",
"ubfx x22, x20, #0, #1",
"ubfx x20, x20, #2, #1",
"ldr w23, [x28, #728]",
"mov w0, w23",
"bfi w0, w21, #29, #1",
"mov w21, w0",
"bfi w21, w22, #30, #1",
"eor w20, w20, #0x1",
"strb w20, [x28, #706]",
"mov w20, #0x0",
"mov w22, #0x90000000",
"bic x21, x21, x22",
"strb w20, [x28, #708]",
"str w21, [x28, #728]"
]
},
"vcomisd xmm0, xmm1": {
"ExpectedInstructionCount": 22,
"Optimal": "No",
"Comment": [
"Map 1 0b01 0x2f 128-bit"
],
"ExpectedArm64ASM": [
"fcmp d16, d17",
"cset x20, eq",
"csinc x20, x20, xzr, vc",
"cset x1, lt",
"bfi x20, x1, #1, #1",
"cset x1, vs",
"bfi x20, x1, #2, #1",
"ubfx x21, x20, #1, #1",
"ubfx x22, x20, #0, #1",
"ubfx x20, x20, #2, #1",
"ldr w23, [x28, #728]",
"mov w0, w23",
"bfi w0, w21, #29, #1",
"mov w21, w0",
"bfi w21, w22, #30, #1",
"eor w20, w20, #0x1",
"strb w20, [x28, #706]",
"mov w20, #0x0",
"mov w22, #0x90000000",
"bic x21, x21, x22",
"strb w20, [x28, #708]",
"str w21, [x28, #728]"
]
},
"vpmovmskb rax, xmm0": {
"ExpectedInstructionCount": 11,
"Optimal": "No",
"Comment": [
"Map 1 0b01 0xd7 256-bit"
],
"ExpectedArm64ASM": [
"mov x20, #0x201",
"movk x20, #0x804, lsl #16",
"movk x20, #0x2010, lsl #32",
"movk x20, #0x8040, lsl #48",
"dup v2.2d, x20",
"cmlt v3.16b, v16.16b, #0",
"and v2.16b, v3.16b, v2.16b",
"addp v2.16b, v2.16b, v2.16b",
"addp v2.8b, v2.8b, v2.8b",
"addp v2.8b, v2.8b, v2.8b",
"umov w4, v2.h[0]"
]
},
"vpmovmskb rax, ymm0": {
"ExpectedInstructionCount": 19,
"Optimal": "No",
"Comment": [
"Map 1 0b01 0xd7 256-bit"
],
"ExpectedArm64ASM": [
"mov x20, #0x201",
"movk x20, #0x804, lsl #16",
"movk x20, #0x2010, lsl #32",
"movk x20, #0x8040, lsl #48",
"mov z2.d, x20",
"mov z0.d, #0",
"cmplt p0.b, p7/z, z16.b, #0",
"not z0.b, p0/m, z16.b",
"orr z0.b, p0/m, z0.b, z16.b",
"mov z3.d, z0.d",
"and z2.d, z3.d, z2.d",
"movprfx z0, z2",
"addp z0.b, p7/m, z0.b, z2.b",
"uzp1 z2.b, z0.b, z0.b",
"uzp2 z1.b, z0.b, z0.b",
"splice z2.d, p6, z2.d, z1.d",
"addp v2.16b, v2.16b, v2.16b",
"addp v2.8b, v2.8b, v2.8b",
"mov w4, v2.s[0]"
]
},
"vmaskmovdqu xmm0, xmm1": {
"ExpectedInstructionCount": 4,
"Optimal": "Yes",
"Comment": [
"Map 1 0b01 0xf7 128-bit"
],
"ExpectedArm64ASM": [
"cmlt v2.16b, v17.16b, #0",
"ldr q3, [x11]",
"bsl v2.16b, v16.16b, v3.16b",
"str q2, [x11]"
]
}
}
}

View File

@ -0,0 +1,614 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [
"SVE256",
"FLAGM",
"FLAGM2"
],
"DisabledHostFeatures": [
"AFP"
]
},
"Instructions": {
"vtestps xmm0, xmm1": {
"ExpectedInstructionCount": 25,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x0e 128-bit"
],
"ExpectedArm64ASM": [
"mov w20, #0x80000000",
"dup v2.4s, w20",
"and v3.16b, v17.16b, v16.16b",
"bic v4.16b, v17.16b, v16.16b",
"and v3.16b, v3.16b, v2.16b",
"and v2.16b, v4.16b, v2.16b",
"cnt v3.16b, v3.16b",
"cnt v2.16b, v2.16b",
"addv h3, v3.8h",
"addv h2, v2.8h",
"umov w20, v3.h[0]",
"umov w21, v2.h[0]",
"mov w22, #0x0",
"mov w23, #0x1",
"cmp x20, #0x0 (0)",
"cset x20, eq",
"cmp x21, #0x0 (0)",
"cset x21, eq",
"lsl x20, x20, #30",
"orr w20, w20, w21, lsl #29",
"mov w21, #0x90000000",
"bic x20, x20, x21",
"strb w23, [x28, #706]",
"strb w22, [x28, #708]",
"str w20, [x28, #728]"
]
},
"vtestps ymm0, ymm1": {
"ExpectedInstructionCount": 33,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x0e 256-bit"
],
"ExpectedArm64ASM": [
"mov w20, #0x80000000",
"mov z2.s, w20",
"and z3.d, z17.d, z16.d",
"bic z4.d, z17.d, z16.d",
"and z3.d, z3.d, z2.d",
"and z2.d, z4.d, z2.d",
"cnt z3.b, p7/m, z3.b",
"cnt z2.b, p7/m, z2.b",
"not p0.b, p7/z, p6.b",
"compact z0.d, p0, z3.d",
"addv h1, v3.8h",
"addv h0, v0.8h",
"add v3.8h, v0.8h, v1.8h",
"not p0.b, p7/z, p6.b",
"compact z0.d, p0, z2.d",
"addv h1, v2.8h",
"addv h0, v0.8h",
"add v2.8h, v0.8h, v1.8h",
"umov w20, v3.h[0]",
"umov w21, v2.h[0]",
"mov w22, #0x0",
"mov w23, #0x1",
"cmp x20, #0x0 (0)",
"cset x20, eq",
"cmp x21, #0x0 (0)",
"cset x21, eq",
"lsl x20, x20, #30",
"orr w20, w20, w21, lsl #29",
"mov w21, #0x90000000",
"bic x20, x20, x21",
"strb w23, [x28, #706]",
"strb w22, [x28, #708]",
"str w20, [x28, #728]"
]
},
"vtestpd xmm0, xmm1": {
"ExpectedInstructionCount": 25,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x0f 128-bit"
],
"ExpectedArm64ASM": [
"mov x20, #0x8000000000000000",
"dup v2.2d, x20",
"and v3.16b, v17.16b, v16.16b",
"bic v4.16b, v17.16b, v16.16b",
"and v3.16b, v3.16b, v2.16b",
"and v2.16b, v4.16b, v2.16b",
"cnt v3.16b, v3.16b",
"cnt v2.16b, v2.16b",
"addv h3, v3.8h",
"addv h2, v2.8h",
"umov w20, v3.h[0]",
"umov w21, v2.h[0]",
"mov w22, #0x0",
"mov w23, #0x1",
"cmp x20, #0x0 (0)",
"cset x20, eq",
"cmp x21, #0x0 (0)",
"cset x21, eq",
"lsl x20, x20, #30",
"orr w20, w20, w21, lsl #29",
"mov w21, #0x90000000",
"bic x20, x20, x21",
"strb w23, [x28, #706]",
"strb w22, [x28, #708]",
"str w20, [x28, #728]"
]
},
"vtestpd ymm0, ymm1": {
"ExpectedInstructionCount": 33,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x0f 256-bit"
],
"ExpectedArm64ASM": [
"mov x20, #0x8000000000000000",
"mov z2.d, x20",
"and z3.d, z17.d, z16.d",
"bic z4.d, z17.d, z16.d",
"and z3.d, z3.d, z2.d",
"and z2.d, z4.d, z2.d",
"cnt z3.b, p7/m, z3.b",
"cnt z2.b, p7/m, z2.b",
"not p0.b, p7/z, p6.b",
"compact z0.d, p0, z3.d",
"addv h1, v3.8h",
"addv h0, v0.8h",
"add v3.8h, v0.8h, v1.8h",
"not p0.b, p7/z, p6.b",
"compact z0.d, p0, z2.d",
"addv h1, v2.8h",
"addv h0, v0.8h",
"add v2.8h, v0.8h, v1.8h",
"umov w20, v3.h[0]",
"umov w21, v2.h[0]",
"mov w22, #0x0",
"mov w23, #0x1",
"cmp x20, #0x0 (0)",
"cset x20, eq",
"cmp x21, #0x0 (0)",
"cset x21, eq",
"lsl x20, x20, #30",
"orr w20, w20, w21, lsl #29",
"mov w21, #0x90000000",
"bic x20, x20, x21",
"strb w23, [x28, #706]",
"strb w22, [x28, #708]",
"str w20, [x28, #728]"
]
},
"vptest xmm0, xmm1": {
"ExpectedInstructionCount": 19,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x16 128-bit"
],
"ExpectedArm64ASM": [
"and v2.16b, v16.16b, v17.16b",
"bic v3.16b, v17.16b, v16.16b",
"cnt v2.16b, v2.16b",
"cnt v3.16b, v3.16b",
"addv h2, v2.8h",
"addv h3, v3.8h",
"umov w20, v2.h[0]",
"umov w21, v3.h[0]",
"mov w22, #0x0",
"mov w23, #0x1",
"cmp x20, #0x0 (0)",
"cset x20, eq",
"cmp x21, #0x0 (0)",
"cset x21, eq",
"lsl x20, x20, #30",
"orr w20, w20, w21, lsl #29",
"strb w23, [x28, #706]",
"strb w22, [x28, #708]",
"str w20, [x28, #728]"
]
},
"vptest ymm0, ymm1": {
"ExpectedInstructionCount": 27,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x16 256-bit"
],
"ExpectedArm64ASM": [
"and z2.d, z16.d, z17.d",
"bic z3.d, z17.d, z16.d",
"cnt z2.b, p7/m, z2.b",
"cnt z3.b, p7/m, z3.b",
"not p0.b, p7/z, p6.b",
"compact z0.d, p0, z2.d",
"addv h1, v2.8h",
"addv h0, v0.8h",
"add v2.8h, v0.8h, v1.8h",
"not p0.b, p7/z, p6.b",
"compact z0.d, p0, z3.d",
"addv h1, v3.8h",
"addv h0, v0.8h",
"add v3.8h, v0.8h, v1.8h",
"umov w20, v2.h[0]",
"umov w21, v3.h[0]",
"mov w22, #0x0",
"mov w23, #0x1",
"cmp x20, #0x0 (0)",
"cset x20, eq",
"cmp x21, #0x0 (0)",
"cset x21, eq",
"lsl x20, x20, #30",
"orr w20, w20, w21, lsl #29",
"strb w23, [x28, #706]",
"strb w22, [x28, #708]",
"str w20, [x28, #728]"
]
},
"vmaskmovps xmm0, xmm1, [rax]": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x2c 128-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.s, p6/z, z17.s, #0",
"ld1w {z2.s}, p0/z, [x4]",
"mov v16.16b, v2.16b"
]
},
"vmaskmovps ymm0, ymm1, [rax]": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x2c 256-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.s, p7/z, z17.s, #0",
"ld1w {z16.s}, p0/z, [x4]"
]
},
"vmaskmovpd xmm0, xmm1, [rax]": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x2d 128-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.d, p6/z, z17.d, #0",
"ld1d {z2.d}, p0/z, [x4]",
"mov v16.16b, v2.16b"
]
},
"vmaskmovpd ymm0, ymm1, [rax]": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x2d 256-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.d, p7/z, z17.d, #0",
"ld1d {z16.d}, p0/z, [x4]"
]
},
"vmaskmovps [rax], xmm0, xmm1": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x2e 128-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.s, p6/z, z16.s, #0",
"st1w {z17.s}, p0, [x4]"
]
},
"vmaskmovps [rax], ymm0, ymm1": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x2e 256-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.s, p7/z, z16.s, #0",
"st1w {z17.s}, p0, [x4]"
]
},
"vmaskmovpd [rax], xmm0, xmm1": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x2f 128-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.d, p6/z, z16.d, #0",
"st1d {z17.d}, p0, [x4]"
]
},
"vmaskmovpd [rax], ymm0, ymm1": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x2f 256-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.d, p7/z, z16.d, #0",
"st1d {z17.d}, p0, [x4]"
]
},
"vpmaskmovd xmm0, xmm1, [rax]": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x8c 128-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.s, p6/z, z17.s, #0",
"ld1w {z2.s}, p0/z, [x4]",
"mov v16.16b, v2.16b"
]
},
"vpmaskmovd ymm0, ymm1, [rax]": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x8c 256-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.s, p7/z, z17.s, #0",
"ld1w {z16.s}, p0/z, [x4]"
]
},
"vpmaskmovq xmm0, xmm1, [rax]": {
"ExpectedInstructionCount": 3,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x8c 128-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.d, p6/z, z17.d, #0",
"ld1d {z2.d}, p0/z, [x4]",
"mov v16.16b, v2.16b"
]
},
"vpmaskmovq ymm0, ymm1, [rax]": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x8c 256-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.d, p7/z, z17.d, #0",
"ld1d {z16.d}, p0/z, [x4]"
]
},
"vpmaskmovd [rax], xmm0, xmm1": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x8e 128-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.s, p6/z, z16.s, #0",
"st1w {z17.s}, p0, [x4]"
]
},
"vpmaskmovd [rax], ymm0, ymm1": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x8e 256-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.s, p7/z, z16.s, #0",
"st1w {z17.s}, p0, [x4]"
]
},
"vpmaskmovq [rax], xmm0, xmm1": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x8e 128-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.d, p6/z, z16.d, #0",
"st1d {z17.d}, p0, [x4]"
]
},
"vpmaskmovq [rax], ymm0, ymm1": {
"ExpectedInstructionCount": 2,
"Optimal": "No",
"Comment": [
"Map 2 0b01 0x8e 256-bit"
],
"ExpectedArm64ASM": [
"cmplt p0.d, p7/z, z16.d, #0",
"st1d {z17.d}, p0, [x4]"
]
},
"andn eax, ebx, ecx": {
"ExpectedInstructionCount": 7,
"Optimal": "No",
"Comment": [
"Map 2 0b00 0xf2 32-bit"
],
"ExpectedArm64ASM": [
"mov w20, w7",
"mov w21, w5",
"bic w4, w21, w20",
"strb w4, [x28, #706]",
"tst w4, w4",
"mrs x20, nzcv",
"str w20, [x28, #728]"
]
},
"andn rax, rbx, rcx": {
"ExpectedInstructionCount": 5,
"Optimal": "No",
"Comment": [
"Map 2 0b00 0xf2 64-bit"
],
"ExpectedArm64ASM": [
"bic x4, x5, x7",
"strb w4, [x28, #706]",
"tst x4, x4",
"mrs x20, nzcv",
"str w20, [x28, #728]"
]
},
"bzhi eax, ebx, ecx": {
"ExpectedInstructionCount": 18,
"Optimal": "No",
"Comment": [
"Map 2 0b00 0xf5 32-bit"
],
"ExpectedArm64ASM": [
"mov w20, w7",
"mov w21, w5",
"and x21, x21, #0xff",
"mov w22, #0xffff",
"movk w22, #0xffff, lsl #16",
"lsl w22, w22, w21",
"bic w22, w20, w22",
"cmp x21, #0x1f (31)",
"csel w4, w20, w22, hi",
"ldr w20, [x28, #728]",
"and w20, w20, #0xefffffff",
"cmp x4, #0x0 (0)",
"cset x22, eq",
"bfi w20, w22, #30, #1",
"cmp x21, #0x1f (31)",
"cset x21, hi",
"bfi w20, w21, #29, #1",
"str w20, [x28, #728]"
]
},
"bzhi rax, rbx, rcx": {
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": [
"Map 2 0b00 0xf5 64-bit"
],
"ExpectedArm64ASM": [
"and x20, x5, #0xff",
"mov x21, #0xffffffffffffffff",
"lsl x21, x21, x20",
"bic x21, x7, x21",
"cmp x20, #0x3f (63)",
"csel x4, x7, x21, hi",
"ldr w21, [x28, #728]",
"and w21, w21, #0xefffffff",
"cmp x4, #0x0 (0)",
"cset x22, eq",
"bfi w21, w22, #30, #1",
"cmp x20, #0x3f (63)",
"cset x20, hi",
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"str w20, [x28, #728]"
]
},
"pdep eax, ebx, ecx": {
"ExpectedInstructionCount": 25,
"Optimal": "No",
"Comment": [
"Map 2 0b11 0xf5 32-bit"
],
"ExpectedArm64ASM": [
"mov w20, w7",
"mov w21, w5",
"cbz w21, #+0x58",
"mov w3, wzr",
"stp x4, x5, [x28, #8]",
"str x6, [x28, #24]",
"mov w4, w20",
"mov w5, w21",
"mov w6, wzr",
"rbit w0, w5",
"clz w0, w0",
"lsr w1, w4, w3",
"and w1, w1, #0x1",
"sub w2, w5, #0x1 (1)",
"add w3, w3, #0x1 (1)",
"ands w5, w5, w2",
"lsl w0, w1, w0",
"orr w6, w6, w0",
"b.ne #-0x24",
"mov w3, w6",
"ldp x4, x5, [x28, #8]",
"ldr x6, [x28, #24]",
"mov w4, w3",
"b #+0x8",
"mov w4, wzr"
]
},
"pdep rax, rbx, rcx": {
"ExpectedInstructionCount": 23,
"Optimal": "No",
"Comment": [
"Map 2 0b11 0xf5 64-bit"
],
"ExpectedArm64ASM": [
"cbz x5, #+0x58",
"mov x3, xzr",
"stp x4, x5, [x28, #8]",
"str x6, [x28, #24]",
"mov x4, x7",
"mov x5, x5",
"mov x6, xzr",
"rbit x0, x5",
"clz x0, x0",
"lsr x1, x4, x3",
"and x1, x1, #0x1",
"sub x2, x5, #0x1 (1)",
"add x3, x3, #0x1 (1)",
"ands x5, x5, x2",
"lsl x0, x1, x0",
"orr x6, x6, x0",
"b.ne #-0x24",
"mov x3, x6",
"ldp x4, x5, [x28, #8]",
"ldr x6, [x28, #24]",
"mov x4, x3",
"b #+0x8",
"mov x4, xzr"
]
},
"bextr eax, ebx, ecx": {
"ExpectedInstructionCount": 19,
"Optimal": "No",
"Comment": [
"Map 2 0b00 0xf7 32-bit"
],
"ExpectedArm64ASM": [
"mov w20, w7",
"mov w21, w5",
"mov w22, #0x1f",
"uxtb w23, w21",
"lsr w20, w20, w23",
"mov w24, #0x0",
"cmp w23, #0x1f (31)",
"csel w20, w20, w24, ls",
"ubfx w21, w21, #8, #8",
"cmp w21, #0x1f (31)",
"csel w21, w21, w22, ls",
"mov w22, #0x1",
"lsl w21, w22, w21",
"sub w21, w21, #0x1 (1)",
"and w4, w20, w21",
"cmp x4, #0x0 (0)",
"cset x20, eq",
"lsl x20, x20, #30",
"str w20, [x28, #728]"
]
},
"bextr rax, rbx, rcx": {
"ExpectedInstructionCount": 17,
"Optimal": "No",
"Comment": [
"Map 2 0b00 0xf7 64-bit"
],
"ExpectedArm64ASM": [
"mov w20, #0x3f",
"uxtb x21, w5",
"lsr x22, x7, x21",
"mov w23, #0x0",
"cmp x21, #0x3f (63)",
"csel x21, x22, x23, ls",
"ubfx x22, x5, #8, #8",
"cmp x22, #0x3f (63)",
"csel x20, x22, x20, ls",
"mov w22, #0x1",
"lsl x20, x22, x20",
"sub x20, x20, #0x1 (1)",
"and x4, x21, x20",
"cmp x4, #0x0 (0)",
"cset x20, eq",
"lsl x20, x20, #30",
"str w20, [x28, #728]"
]
}
}
}

View File

@ -0,0 +1,124 @@
{
"Features": {
"Bitness": 64,
"EnabledHostFeatures": [
"SVE256",
"FLAGM",
"FLAGM2"
],
"DisabledHostFeatures": []
},
"Instructions": {
"blsr eax, ebx": {
"ExpectedInstructionCount": 10,
"Optimal": "No",
"Comment": [
"Map group 17 0b001 32-bit"
],
"ExpectedArm64ASM": [
"mov w20, w7",
"sub x21, x20, #0x1 (1)",
"and x21, x21, x20",
"mov w4, w21",
"tst w21, w21",
"mrs x21, nzcv",
"cmp x20, #0x0 (0)",
"cset x20, ne",
"orr w20, w21, w20, lsl #29",
"str w20, [x28, #728]"
]
},
"blsr rax, rbx": {
"ExpectedInstructionCount": 8,
"Optimal": "No",
"Comment": [
"Map group 17 0b001 64-bit"
],
"ExpectedArm64ASM": [
"sub x20, x7, #0x1 (1)",
"and x4, x20, x7",
"tst x4, x4",
"mrs x20, nzcv",
"cmp x7, #0x0 (0)",
"cset x21, ne",
"orr w20, w20, w21, lsl #29",
"str w20, [x28, #728]"
]
},
"blsmsk eax, ebx": {
"ExpectedInstructionCount": 13,
"Optimal": "No",
"Comment": [
"Map group 17 0b010 32-bit"
],
"ExpectedArm64ASM": [
"mov w20, w7",
"sub x21, x20, #0x1 (1)",
"eor x21, x21, x20",
"mov w4, w21",
"mov w21, #0x50000000",
"ldr w22, [x28, #728]",
"bic x21, x22, x21",
"cmp x20, #0x0 (0)",
"cset x20, ne",
"mov w0, w21",
"bfi w0, w20, #29, #1",
"mov w20, w0",
"str w20, [x28, #728]"
]
},
"blsmsk rax, rbx": {
"ExpectedInstructionCount": 9,
"Optimal": "No",
"Comment": [
"Map group 17 0b010 64-bit"
],
"ExpectedArm64ASM": [
"sub x20, x7, #0x1 (1)",
"eor x4, x20, x7",
"mov w20, #0x50000000",
"ldr w21, [x28, #728]",
"bic x20, x21, x20",
"cmp x7, #0x0 (0)",
"cset x21, ne",
"bfi w20, w21, #29, #1",
"str w20, [x28, #728]"
]
},
"blsi eax, ebx": {
"ExpectedInstructionCount": 9,
"Optimal": "No",
"Comment": [
"Map group 17 0b011 32-bit"
],
"ExpectedArm64ASM": [
"mov w20, w7",
"neg w21, w20",
"and w4, w20, w21",
"tst w4, w4",
"mrs x20, nzcv",
"cmp x4, #0x0 (0)",
"cset x21, ne",
"orr w20, w20, w21, lsl #29",
"str w20, [x28, #728]"
]
},
"blsi rax, rbx": {
"ExpectedInstructionCount": 8,
"Optimal": "No",
"Comment": [
"Map group 17 0b011 64-bit"
],
"ExpectedArm64ASM": [
"neg x20, x7",
"and x4, x7, x20",
"tst x4, x4",
"mrs x20, nzcv",
"cmp x4, #0x0 (0)",
"cset x21, ne",
"orr w20, w20, w21, lsl #29",
"str w20, [x28, #728]"
]
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff