InstcountCI: Update for wide gathers

This commit is contained in:
Ryan Houdek 2024-07-05 23:54:03 -07:00
parent ec7c8fd922
commit 007c07e612
No known key found for this signature in database
2 changed files with 294 additions and 616 deletions

View File

@ -3199,136 +3199,128 @@
]
},
"vpgatherqd xmm0, [ymm1*1 + rax], xmm2": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Comment": [
"Map 2 0b01 0x91 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #32]",
"mov v3.16b, v16.16b",
"mov w0, v18.s[0]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[0]",
"add x1, x4, x0",
"ld1 {v3.s}[0], [x1]",
"ld1 {v16.s}[0], [x1]",
"mov w0, v18.s[1]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[1]",
"add x1, x4, x0",
"ld1 {v3.s}[1], [x1]",
"ld1 {v16.s}[1], [x1]",
"mov w0, v18.s[2]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[0]",
"add x1, x4, x0",
"ld1 {v3.s}[2], [x1]",
"ld1 {v16.s}[2], [x1]",
"mov w0, v18.s[3]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[1]",
"add x1, x4, x0",
"ld1 {v3.s}[3], [x1]",
"mov v16.16b, v3.16b",
"ld1 {v16.s}[3], [x1]",
"movi v18.2d, #0x0",
"str q18, [x28, #16]",
"str q18, [x28, #48]"
]
},
"vpgatherqd xmm0, [ymm1*2 + rax], xmm2": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Comment": [
"Map 2 0b01 0x91 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #32]",
"mov v3.16b, v16.16b",
"mov w0, v18.s[0]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[0]",
"add x1, x4, x0, lsl #1",
"ld1 {v3.s}[0], [x1]",
"ld1 {v16.s}[0], [x1]",
"mov w0, v18.s[1]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[1]",
"add x1, x4, x0, lsl #1",
"ld1 {v3.s}[1], [x1]",
"ld1 {v16.s}[1], [x1]",
"mov w0, v18.s[2]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[0]",
"add x1, x4, x0, lsl #1",
"ld1 {v3.s}[2], [x1]",
"ld1 {v16.s}[2], [x1]",
"mov w0, v18.s[3]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[1]",
"add x1, x4, x0, lsl #1",
"ld1 {v3.s}[3], [x1]",
"mov v16.16b, v3.16b",
"ld1 {v16.s}[3], [x1]",
"movi v18.2d, #0x0",
"str q18, [x28, #16]",
"str q18, [x28, #48]"
]
},
"vpgatherqd xmm0, [ymm1*4 + rax], xmm2": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Comment": [
"Map 2 0b01 0x91 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #32]",
"mov v3.16b, v16.16b",
"mov w0, v18.s[0]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[0]",
"add x1, x4, x0, lsl #2",
"ld1 {v3.s}[0], [x1]",
"ld1 {v16.s}[0], [x1]",
"mov w0, v18.s[1]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[1]",
"add x1, x4, x0, lsl #2",
"ld1 {v3.s}[1], [x1]",
"ld1 {v16.s}[1], [x1]",
"mov w0, v18.s[2]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[0]",
"add x1, x4, x0, lsl #2",
"ld1 {v3.s}[2], [x1]",
"ld1 {v16.s}[2], [x1]",
"mov w0, v18.s[3]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[1]",
"add x1, x4, x0, lsl #2",
"ld1 {v3.s}[3], [x1]",
"mov v16.16b, v3.16b",
"ld1 {v16.s}[3], [x1]",
"movi v18.2d, #0x0",
"str q18, [x28, #16]",
"str q18, [x28, #48]"
]
},
"vpgatherqd xmm0, [ymm1*8 + rax], xmm2": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Comment": [
"Map 2 0b01 0x91 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #32]",
"mov v3.16b, v16.16b",
"mov w0, v18.s[0]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[0]",
"add x1, x4, x0, lsl #3",
"ld1 {v3.s}[0], [x1]",
"ld1 {v16.s}[0], [x1]",
"mov w0, v18.s[1]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[1]",
"add x1, x4, x0, lsl #3",
"ld1 {v3.s}[1], [x1]",
"ld1 {v16.s}[1], [x1]",
"mov w0, v18.s[2]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[0]",
"add x1, x4, x0, lsl #3",
"ld1 {v3.s}[2], [x1]",
"ld1 {v16.s}[2], [x1]",
"mov w0, v18.s[3]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[1]",
"add x1, x4, x0, lsl #3",
"ld1 {v3.s}[3], [x1]",
"mov v16.16b, v3.16b",
"ld1 {v16.s}[3], [x1]",
"movi v18.2d, #0x0",
"str q18, [x28, #16]",
"str q18, [x28, #48]"
@ -4203,136 +4195,128 @@
]
},
"vgatherqps xmm0, [ymm1*1 + rax], xmm2": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Comment": [
"Map 2 0b01 0x93 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #32]",
"mov v3.16b, v16.16b",
"mov w0, v18.s[0]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[0]",
"add x1, x4, x0",
"ld1 {v3.s}[0], [x1]",
"ld1 {v16.s}[0], [x1]",
"mov w0, v18.s[1]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[1]",
"add x1, x4, x0",
"ld1 {v3.s}[1], [x1]",
"ld1 {v16.s}[1], [x1]",
"mov w0, v18.s[2]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[0]",
"add x1, x4, x0",
"ld1 {v3.s}[2], [x1]",
"ld1 {v16.s}[2], [x1]",
"mov w0, v18.s[3]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[1]",
"add x1, x4, x0",
"ld1 {v3.s}[3], [x1]",
"mov v16.16b, v3.16b",
"ld1 {v16.s}[3], [x1]",
"movi v18.2d, #0x0",
"str q18, [x28, #16]",
"str q18, [x28, #48]"
]
},
"vgatherqps xmm0, [ymm1*2 + rax], xmm2": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Comment": [
"Map 2 0b01 0x93 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #32]",
"mov v3.16b, v16.16b",
"mov w0, v18.s[0]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[0]",
"add x1, x4, x0, lsl #1",
"ld1 {v3.s}[0], [x1]",
"ld1 {v16.s}[0], [x1]",
"mov w0, v18.s[1]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[1]",
"add x1, x4, x0, lsl #1",
"ld1 {v3.s}[1], [x1]",
"ld1 {v16.s}[1], [x1]",
"mov w0, v18.s[2]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[0]",
"add x1, x4, x0, lsl #1",
"ld1 {v3.s}[2], [x1]",
"ld1 {v16.s}[2], [x1]",
"mov w0, v18.s[3]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[1]",
"add x1, x4, x0, lsl #1",
"ld1 {v3.s}[3], [x1]",
"mov v16.16b, v3.16b",
"ld1 {v16.s}[3], [x1]",
"movi v18.2d, #0x0",
"str q18, [x28, #16]",
"str q18, [x28, #48]"
]
},
"vgatherqps xmm0, [ymm1*4 + rax], xmm2": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Comment": [
"Map 2 0b01 0x93 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #32]",
"mov v3.16b, v16.16b",
"mov w0, v18.s[0]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[0]",
"add x1, x4, x0, lsl #2",
"ld1 {v3.s}[0], [x1]",
"ld1 {v16.s}[0], [x1]",
"mov w0, v18.s[1]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[1]",
"add x1, x4, x0, lsl #2",
"ld1 {v3.s}[1], [x1]",
"ld1 {v16.s}[1], [x1]",
"mov w0, v18.s[2]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[0]",
"add x1, x4, x0, lsl #2",
"ld1 {v3.s}[2], [x1]",
"ld1 {v16.s}[2], [x1]",
"mov w0, v18.s[3]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[1]",
"add x1, x4, x0, lsl #2",
"ld1 {v3.s}[3], [x1]",
"mov v16.16b, v3.16b",
"ld1 {v16.s}[3], [x1]",
"movi v18.2d, #0x0",
"str q18, [x28, #16]",
"str q18, [x28, #48]"
]
},
"vgatherqps xmm0, [ymm1*8 + rax], xmm2": {
"ExpectedInstructionCount": 26,
"ExpectedInstructionCount": 24,
"Comment": [
"Map 2 0b01 0x93 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #32]",
"mov v3.16b, v16.16b",
"mov w0, v18.s[0]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[0]",
"add x1, x4, x0, lsl #3",
"ld1 {v3.s}[0], [x1]",
"ld1 {v16.s}[0], [x1]",
"mov w0, v18.s[1]",
"tbz w0, #31, #+0x10",
"mov x0, v17.d[1]",
"add x1, x4, x0, lsl #3",
"ld1 {v3.s}[1], [x1]",
"ld1 {v16.s}[1], [x1]",
"mov w0, v18.s[2]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[0]",
"add x1, x4, x0, lsl #3",
"ld1 {v3.s}[2], [x1]",
"ld1 {v16.s}[2], [x1]",
"mov w0, v18.s[3]",
"tbz w0, #31, #+0x10",
"mov x0, v2.d[1]",
"add x1, x4, x0, lsl #3",
"ld1 {v3.s}[3], [x1]",
"mov v16.16b, v3.16b",
"ld1 {v16.s}[3], [x1]",
"movi v18.2d, #0x0",
"str q18, [x28, #16]",
"str q18, [x28, #48]"

File diff suppressed because it is too large Load Diff