InstcountCI: Update for 32-bit gather sign extend optimization

This commit is contained in:
Ryan Houdek 2024-07-05 19:09:19 -07:00
parent df40515087
commit 81165f0c40
No known key found for this signature in database
2 changed files with 224 additions and 520 deletions

View File

@ -463,34 +463,22 @@
]
},
"vpgatherdq ymm0, [xmm1*1 + rax], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x90 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sxtl2 v4.2d, v17.4s",
"sxtl v5.2d, v17.2s",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -498,34 +486,22 @@
]
},
"vpgatherdq ymm0, [xmm1*2 + rax], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x90 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sshll2 v4.2d, v17.4s, #1",
"sshll v5.2d, v17.2s, #1",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw #1",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw #1",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw #1",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw #1",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -533,34 +509,22 @@
]
},
"vpgatherdq ymm0, [xmm1*4 + rax], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x90 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sshll2 v4.2d, v17.4s, #2",
"sshll v5.2d, v17.2s, #2",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw #2",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw #2",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw #2",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw #2",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -568,34 +532,22 @@
]
},
"vpgatherdq ymm0, [xmm1*8 + rax], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x90 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sshll2 v4.2d, v17.4s, #3",
"sshll v5.2d, v17.2s, #3",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw #3",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw #3",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw #3",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw #3",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -1351,34 +1303,22 @@
]
},
"vgatherdpd ymm0, [xmm1*1 + rax], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x92 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sxtl2 v4.2d, v17.4s",
"sxtl v5.2d, v17.2s",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -1386,34 +1326,22 @@
]
},
"vgatherdpd ymm0, [xmm1*2 + rax], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x92 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sshll2 v4.2d, v17.4s, #1",
"sshll v5.2d, v17.2s, #1",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw #1",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw #1",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw #1",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw #1",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -1421,34 +1349,22 @@
]
},
"vgatherdpd ymm0, [xmm1*4 + rax], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x92 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sshll2 v4.2d, v17.4s, #2",
"sshll v5.2d, v17.2s, #2",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw #2",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw #2",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw #2",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw #2",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -1456,34 +1372,22 @@
]
},
"vgatherdpd ymm0, [xmm1*8 + rax], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x92 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sshll2 v4.2d, v17.4s, #3",
"sshll v5.2d, v17.2s, #3",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw #3",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw #3",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw #3",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw #3",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -2226,34 +2130,22 @@
]
},
"vpgatherdq ymm0, [xmm1*1], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x90 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sxtl2 v4.2d, v17.4s",
"sxtl v5.2d, v17.2s",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"sxtw x1, w0",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"sxtw x1, w0",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"sxtw x1, w0",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"sxtw x1, w0",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -2261,34 +2153,22 @@
]
},
"vpgatherdq ymm0, [xmm1*2], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x90 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sshll2 v4.2d, v17.4s, #1",
"sshll v5.2d, v17.2s, #1",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"sbfiz x1, x0, #1, #32",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"sbfiz x1, x0, #1, #32",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"sbfiz x1, x0, #1, #32",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"sbfiz x1, x0, #1, #32",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -2296,34 +2176,22 @@
]
},
"vpgatherdq ymm0, [xmm1*4], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x90 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sshll2 v4.2d, v17.4s, #2",
"sshll v5.2d, v17.2s, #2",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"sbfiz x1, x0, #2, #32",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"sbfiz x1, x0, #2, #32",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"sbfiz x1, x0, #2, #32",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"sbfiz x1, x0, #2, #32",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -2331,34 +2199,22 @@
]
},
"vpgatherdq ymm0, [xmm1*8], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x90 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sshll2 v4.2d, v17.4s, #3",
"sshll v5.2d, v17.2s, #3",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"sbfiz x1, x0, #3, #32",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"sbfiz x1, x0, #3, #32",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"sbfiz x1, x0, #3, #32",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"sbfiz x1, x0, #3, #32",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -3120,34 +2976,22 @@
]
},
"vgatherdpd ymm0, [xmm1*1], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x92 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sxtl2 v4.2d, v17.4s",
"sxtl v5.2d, v17.2s",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"sxtw x1, w0",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"sxtw x1, w0",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"sxtw x1, w0",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"sxtw x1, w0",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -3155,34 +2999,22 @@
]
},
"vgatherdpd ymm0, [xmm1*2], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x92 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sshll2 v4.2d, v17.4s, #1",
"sshll v5.2d, v17.2s, #1",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"sbfiz x1, x0, #1, #32",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"sbfiz x1, x0, #1, #32",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"sbfiz x1, x0, #1, #32",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"sbfiz x1, x0, #1, #32",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -3190,34 +3022,22 @@
]
},
"vgatherdpd ymm0, [xmm1*4], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x92 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sshll2 v4.2d, v17.4s, #2",
"sshll v5.2d, v17.2s, #2",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"sbfiz x1, x0, #2, #32",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"sbfiz x1, x0, #2, #32",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"sbfiz x1, x0, #2, #32",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"sbfiz x1, x0, #2, #32",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",
@ -3225,34 +3045,22 @@
]
},
"vgatherdpd ymm0, [xmm1*8], ymm2": {
"ExpectedInstructionCount": 27,
"ExpectedInstructionCount": 15,
"Comment": [
"Map 2 0b01 0x92 256-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #16]",
"ldr q3, [x28, #48]",
"sshll2 v4.2d, v17.4s, #3",
"sshll v5.2d, v17.2s, #3",
"mrs x20, nzcv",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"sbfiz x1, x0, #3, #32",
"ld1 {v16.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"sbfiz x1, x0, #3, #32",
"ld1 {v16.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"sbfiz x1, x0, #3, #32",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"sbfiz x1, x0, #3, #32",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [z5.d]",
"mov z16.d, p0/m, z0.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [z4.d]",
"mov z2.d, p0/m, z0.d",
"str q2, [x28, #16]",
"movi v18.2d, #0x0",
"str q18, [x28, #48]",

View File

@ -2339,37 +2339,24 @@
]
},
"vpgatherdq ymm0, [xmm1*1 + rax], ymm2": {
"ExpectedInstructionCount": 30,
"ExpectedInstructionCount": 17,
"Comment": [
"Map 2 0b01 0x90 256-bit"
],
"ExpectedArm64ASM": [
"mov z2.q, z16.q[1]",
"mov z3.q, z18.q[1]",
"sxtl2 v4.2d, v17.4s",
"sxtl v5.2d, v17.2s",
"mrs x20, nzcv",
"mov v4.16b, v16.16b",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw",
"ld1 {v4.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw",
"ld1 {v4.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"sel z5.d, p0, z0.d, z16.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"mov z1.q, q2",
"mov z16.d, z4.d",
"mov z16.d, z5.d",
"not p0.b, p7/z, p6.b",
"mov z16.b, p0/m, z1.b",
"movi v18.2d, #0x0",
@ -2377,37 +2364,24 @@
]
},
"vpgatherdq ymm0, [xmm1*2 + rax], ymm2": {
"ExpectedInstructionCount": 30,
"ExpectedInstructionCount": 17,
"Comment": [
"Map 2 0b01 0x90 256-bit"
],
"ExpectedArm64ASM": [
"mov z2.q, z16.q[1]",
"mov z3.q, z18.q[1]",
"sshll2 v4.2d, v17.4s, #1",
"sshll v5.2d, v17.2s, #1",
"mrs x20, nzcv",
"mov v4.16b, v16.16b",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw #1",
"ld1 {v4.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw #1",
"ld1 {v4.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw #1",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw #1",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"sel z5.d, p0, z0.d, z16.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"mov z1.q, q2",
"mov z16.d, z4.d",
"mov z16.d, z5.d",
"not p0.b, p7/z, p6.b",
"mov z16.b, p0/m, z1.b",
"movi v18.2d, #0x0",
@ -2415,37 +2389,24 @@
]
},
"vpgatherdq ymm0, [xmm1*4 + rax], ymm2": {
"ExpectedInstructionCount": 30,
"ExpectedInstructionCount": 17,
"Comment": [
"Map 2 0b01 0x90 256-bit"
],
"ExpectedArm64ASM": [
"mov z2.q, z16.q[1]",
"mov z3.q, z18.q[1]",
"sshll2 v4.2d, v17.4s, #2",
"sshll v5.2d, v17.2s, #2",
"mrs x20, nzcv",
"mov v4.16b, v16.16b",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw #2",
"ld1 {v4.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw #2",
"ld1 {v4.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw #2",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw #2",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"sel z5.d, p0, z0.d, z16.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"mov z1.q, q2",
"mov z16.d, z4.d",
"mov z16.d, z5.d",
"not p0.b, p7/z, p6.b",
"mov z16.b, p0/m, z1.b",
"movi v18.2d, #0x0",
@ -2453,37 +2414,24 @@
]
},
"vpgatherdq ymm0, [xmm1*8 + rax], ymm2": {
"ExpectedInstructionCount": 30,
"ExpectedInstructionCount": 17,
"Comment": [
"Map 2 0b01 0x90 256-bit"
],
"ExpectedArm64ASM": [
"mov z2.q, z16.q[1]",
"mov z3.q, z18.q[1]",
"sshll2 v4.2d, v17.4s, #3",
"sshll v5.2d, v17.2s, #3",
"mrs x20, nzcv",
"mov v4.16b, v16.16b",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw #3",
"ld1 {v4.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw #3",
"ld1 {v4.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw #3",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw #3",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"sel z5.d, p0, z0.d, z16.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"mov z1.q, q2",
"mov z16.d, z4.d",
"mov z16.d, z5.d",
"not p0.b, p7/z, p6.b",
"mov z16.b, p0/m, z1.b",
"movi v18.2d, #0x0",
@ -3251,37 +3199,24 @@
]
},
"vgatherdpd ymm0, [xmm1*1 + rax], ymm2": {
"ExpectedInstructionCount": 30,
"ExpectedInstructionCount": 17,
"Comment": [
"Map 2 0b01 0x92 256-bit"
],
"ExpectedArm64ASM": [
"mov z2.q, z16.q[1]",
"mov z3.q, z18.q[1]",
"sxtl2 v4.2d, v17.4s",
"sxtl v5.2d, v17.2s",
"mrs x20, nzcv",
"mov v4.16b, v16.16b",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw",
"ld1 {v4.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw",
"ld1 {v4.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"sel z5.d, p0, z0.d, z16.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"mov z1.q, q2",
"mov z16.d, z4.d",
"mov z16.d, z5.d",
"not p0.b, p7/z, p6.b",
"mov z16.b, p0/m, z1.b",
"movi v18.2d, #0x0",
@ -3289,37 +3224,24 @@
]
},
"vgatherdpd ymm0, [xmm1*2 + rax], ymm2": {
"ExpectedInstructionCount": 30,
"ExpectedInstructionCount": 17,
"Comment": [
"Map 2 0b01 0x92 256-bit"
],
"ExpectedArm64ASM": [
"mov z2.q, z16.q[1]",
"mov z3.q, z18.q[1]",
"sshll2 v4.2d, v17.4s, #1",
"sshll v5.2d, v17.2s, #1",
"mrs x20, nzcv",
"mov v4.16b, v16.16b",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw #1",
"ld1 {v4.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw #1",
"ld1 {v4.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw #1",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw #1",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"sel z5.d, p0, z0.d, z16.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"mov z1.q, q2",
"mov z16.d, z4.d",
"mov z16.d, z5.d",
"not p0.b, p7/z, p6.b",
"mov z16.b, p0/m, z1.b",
"movi v18.2d, #0x0",
@ -3327,37 +3249,24 @@
]
},
"vgatherdpd ymm0, [xmm1*4 + rax], ymm2": {
"ExpectedInstructionCount": 30,
"ExpectedInstructionCount": 17,
"Comment": [
"Map 2 0b01 0x92 256-bit"
],
"ExpectedArm64ASM": [
"mov z2.q, z16.q[1]",
"mov z3.q, z18.q[1]",
"sshll2 v4.2d, v17.4s, #2",
"sshll v5.2d, v17.2s, #2",
"mrs x20, nzcv",
"mov v4.16b, v16.16b",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw #2",
"ld1 {v4.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw #2",
"ld1 {v4.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw #2",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw #2",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"sel z5.d, p0, z0.d, z16.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"mov z1.q, q2",
"mov z16.d, z4.d",
"mov z16.d, z5.d",
"not p0.b, p7/z, p6.b",
"mov z16.b, p0/m, z1.b",
"movi v18.2d, #0x0",
@ -3365,37 +3274,24 @@
]
},
"vgatherdpd ymm0, [xmm1*8 + rax], ymm2": {
"ExpectedInstructionCount": 30,
"ExpectedInstructionCount": 17,
"Comment": [
"Map 2 0b01 0x92 256-bit"
],
"ExpectedArm64ASM": [
"mov z2.q, z16.q[1]",
"mov z3.q, z18.q[1]",
"sshll2 v4.2d, v17.4s, #3",
"sshll v5.2d, v17.2s, #3",
"mrs x20, nzcv",
"mov v4.16b, v16.16b",
"mov x0, v18.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[0]",
"add x1, x4, w0, sxtw #3",
"ld1 {v4.d}[0], [x1]",
"mov x0, v18.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[1]",
"add x1, x4, w0, sxtw #3",
"ld1 {v4.d}[1], [x1]",
"mov x0, v3.d[0]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[2]",
"add x1, x4, w0, sxtw #3",
"ld1 {v2.d}[0], [x1]",
"mov x0, v3.d[1]",
"tbz x0, #63, #+0x10",
"smov x0, v17.s[3]",
"add x1, x4, w0, sxtw #3",
"ld1 {v2.d}[1], [x1]",
"cmplt p0.d, p6/z, z18.d, #0",
"ld1d {z0.d}, p0/z, [x4, z5.d]",
"sel z5.d, p0, z0.d, z16.d",
"cmplt p0.d, p6/z, z3.d, #0",
"ld1d {z0.d}, p0/z, [x4, z4.d]",
"mov z2.d, p0/m, z0.d",
"mov z1.q, q2",
"mov z16.d, z4.d",
"mov z16.d, z5.d",
"not p0.b, p7/z, p6.b",
"mov z16.b, p0/m, z1.b",
"movi v18.2d, #0x0",