Update InstCountCI

This commit is contained in:
Billy Laws 2024-03-18 23:30:19 +00:00
parent 8d4d8fe3e5
commit 29b05f6b90
2 changed files with 792 additions and 56 deletions

View File

@ -216,7 +216,7 @@
]
},
"positive rep movsb": {
"ExpectedInstructionCount": 19,
"ExpectedInstructionCount": 42,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -232,6 +232,29 @@
"mov x0, x5",
"mov x1, x11",
"mov x2, x10",
"cbz x0, #+0x70",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x54",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x40 (64)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x40 (64)",
"cbz x0, #+0x34",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x14",
"ldrb w3, [x2], #1",
"strb w3, [x1], #1",
@ -248,7 +271,7 @@
]
},
"positive rep movsw": {
"ExpectedInstructionCount": 19,
"ExpectedInstructionCount": 42,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -264,6 +287,29 @@
"mov x0, x5",
"mov x1, x11",
"mov x2, x10",
"cbz x0, #+0x70",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x54",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x34",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x14",
"ldrh w3, [x2], #2",
"strh w3, [x1], #2",
@ -280,7 +326,7 @@
]
},
"positive rep movsd": {
"ExpectedInstructionCount": 19,
"ExpectedInstructionCount": 42,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -296,6 +342,29 @@
"mov x0, x5",
"mov x1, x11",
"mov x2, x10",
"cbz x0, #+0x70",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x54",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x34",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x14",
"ldr w3, [x2], #4",
"str w3, [x1], #4",
@ -312,7 +381,7 @@
]
},
"positive rep movsq": {
"ExpectedInstructionCount": 19,
"ExpectedInstructionCount": 42,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -328,6 +397,29 @@
"mov x0, x5",
"mov x1, x11",
"mov x2, x10",
"cbz x0, #+0x70",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x54",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x34",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x4 (4)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x4 (4)",
"cbz x0, #+0x14",
"ldr x3, [x2], #8",
"str x3, [x1], #8",
@ -344,7 +436,7 @@
]
},
"negative rep movsb": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 45,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -359,7 +451,34 @@
"mov x0, x5",
"mov x1, x11",
"mov x2, x10",
"cbz x0, #+0x14",
"cbz x0, #+0x80",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x64",
"sub x1, x1, #0x1f (31)",
"sub x2, x2, #0x1f (31)",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x40 (64)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x40 (64)",
"cbz x0, #+0x3c",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x1c",
"add x1, x1, #0x1f (31)",
"add x2, x2, #0x1f (31)",
"ldrb w3, [x2], #-1",
"strb w3, [x1], #-1",
"sub x0, x0, #0x1 (1)",
@ -375,7 +494,7 @@
]
},
"negative rep movsw": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 45,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -390,7 +509,34 @@
"mov x0, x5",
"mov x1, x11",
"mov x2, x10",
"cbz x0, #+0x14",
"cbz x0, #+0x80",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x64",
"sub x1, x1, #0x1e (30)",
"sub x2, x2, #0x1e (30)",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x3c",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x1c",
"add x1, x1, #0x1e (30)",
"add x2, x2, #0x1e (30)",
"ldrh w3, [x2], #-2",
"strh w3, [x1], #-2",
"sub x0, x0, #0x1 (1)",
@ -406,7 +552,7 @@
]
},
"negative rep movsd": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 45,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -421,7 +567,34 @@
"mov x0, x5",
"mov x1, x11",
"mov x2, x10",
"cbz x0, #+0x14",
"cbz x0, #+0x80",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x64",
"sub x1, x1, #0x1c (28)",
"sub x2, x2, #0x1c (28)",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x3c",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x1c",
"add x1, x1, #0x1c (28)",
"add x2, x2, #0x1c (28)",
"ldr w3, [x2], #-4",
"str w3, [x1], #-4",
"sub x0, x0, #0x1 (1)",
@ -437,7 +610,7 @@
]
},
"negative rep movsq": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 45,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -452,7 +625,34 @@
"mov x0, x5",
"mov x1, x11",
"mov x2, x10",
"cbz x0, #+0x14",
"cbz x0, #+0x80",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x64",
"sub x1, x1, #0x18 (24)",
"sub x2, x2, #0x18 (24)",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x3c",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x4 (4)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x4 (4)",
"cbz x0, #+0x1c",
"add x1, x1, #0x18 (24)",
"add x2, x2, #0x18 (24)",
"ldr x3, [x2], #-8",
"str x3, [x1], #-8",
"sub x0, x0, #0x1 (1)",
@ -468,7 +668,7 @@
]
},
"positive rep stosb": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 32,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -484,6 +684,26 @@
"uxtb w21, w4",
"mov x0, x5",
"mov x1, x11",
"cbz x0, #+0x60",
"and x3, x1, #0x3",
"cbnz x3, #+0x4c",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x3c",
"dup v1.16b, w21",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #32",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x40 (64)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x40 (64)",
"cbz x0, #+0x2c",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x10",
"strb w21, [x1], #1",
"sub x0, x0, #0x1 (1)",
@ -493,7 +713,7 @@
]
},
"positive rep stosw": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 32,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -509,6 +729,26 @@
"uxth w21, w4",
"mov x0, x5",
"mov x1, x11",
"cbz x0, #+0x60",
"and x3, x1, #0x3",
"cbnz x3, #+0x4c",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x3c",
"dup v1.8h, w21",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #32",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x2c",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x10",
"strh w21, [x1], #2",
"sub x0, x0, #0x1 (1)",
@ -518,7 +758,7 @@
]
},
"positive rep stosd": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 32,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -534,6 +774,26 @@
"mov w21, w4",
"mov x0, x5",
"mov x1, x11",
"cbz x0, #+0x60",
"and x3, x1, #0x3",
"cbnz x3, #+0x4c",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x3c",
"dup v1.4s, w21",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #32",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x2c",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x10",
"str w21, [x1], #4",
"sub x0, x0, #0x1 (1)",
@ -543,7 +803,7 @@
]
},
"positive rep stosq": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 31,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -558,6 +818,26 @@
"strb w21, [x28, #714]",
"mov x0, x5",
"mov x1, x11",
"cbz x0, #+0x60",
"and x3, x1, #0x3",
"cbnz x3, #+0x4c",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x3c",
"dup v1.2d, x4",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #32",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x2c",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x4 (4)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x4 (4)",
"cbz x0, #+0x10",
"str x4, [x1], #8",
"sub x0, x0, #0x1 (1)",
@ -567,7 +847,7 @@
]
},
"negative rep stosb": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 33,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -582,7 +862,29 @@
"uxtb w20, w4",
"mov x0, x5",
"mov x1, x11",
"cbz x0, #+0x10",
"cbz x0, #+0x68",
"and x3, x1, #0x3",
"cbnz x3, #+0x54",
"sub x1, x1, #0x1f (31)",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x3c",
"dup v1.16b, w20",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #-32",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x40 (64)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x40 (64)",
"cbz x0, #+0x30",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x14",
"add x1, x1, #0x1f (31)",
"strb w20, [x1], #-1",
"sub x0, x0, #0x1 (1)",
"cbnz x0, #-0x8",
@ -591,7 +893,7 @@
]
},
"negative rep stosw": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 33,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -606,7 +908,29 @@
"uxth w20, w4",
"mov x0, x5",
"mov x1, x11",
"cbz x0, #+0x10",
"cbz x0, #+0x68",
"and x3, x1, #0x3",
"cbnz x3, #+0x54",
"sub x1, x1, #0x1e (30)",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x3c",
"dup v1.8h, w20",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #-32",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x30",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x14",
"add x1, x1, #0x1e (30)",
"strh w20, [x1], #-2",
"sub x0, x0, #0x1 (1)",
"cbnz x0, #-0x8",
@ -615,7 +939,7 @@
]
},
"negative rep stosd": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 33,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -630,7 +954,29 @@
"mov w20, w4",
"mov x0, x5",
"mov x1, x11",
"cbz x0, #+0x10",
"cbz x0, #+0x68",
"and x3, x1, #0x3",
"cbnz x3, #+0x54",
"sub x1, x1, #0x1c (28)",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x3c",
"dup v1.4s, w20",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #-32",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x30",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x14",
"add x1, x1, #0x1c (28)",
"str w20, [x1], #-4",
"sub x0, x0, #0x1 (1)",
"cbnz x0, #-0x8",
@ -639,7 +985,7 @@
]
},
"negative rep stosq": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 32,
"Comment": [
"When direction flag is a compile time constant we can optimize",
"loads and stores can turn in to post-increment when known"
@ -653,7 +999,29 @@
"strb w20, [x28, #714]",
"mov x0, x5",
"mov x1, x11",
"cbz x0, #+0x10",
"cbz x0, #+0x68",
"and x3, x1, #0x3",
"cbnz x3, #+0x54",
"sub x1, x1, #0x18 (24)",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x3c",
"dup v1.2d, x4",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #-32",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x30",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x4 (4)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x4 (4)",
"cbz x0, #+0x14",
"add x1, x1, #0x18 (24)",
"str x4, [x1], #-8",
"sub x0, x0, #0x1 (1)",
"cbnz x0, #-0x8",

View File

@ -2860,14 +2860,37 @@
]
},
"rep movsb": {
"ExpectedInstructionCount": 29,
"ExpectedInstructionCount": 79,
"Comment": "0xa4",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"mov x0, x5",
"mov x1, x11",
"mov x2, x10",
"tbnz w20, #1, #+0x30",
"tbnz w20, #1, #+0x8c",
"cbz x0, #+0x70",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x54",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x40 (64)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x40 (64)",
"cbz x0, #+0x34",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x14",
"ldrb w3, [x2], #1",
"strb w3, [x1], #1",
@ -2878,8 +2901,35 @@
"mov x2, x5",
"add x20, x0, x2",
"add x21, x1, x2",
"b #+0x2c",
"cbz x0, #+0x14",
"b #+0x98",
"cbz x0, #+0x80",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x64",
"sub x1, x1, #0x1f (31)",
"sub x2, x2, #0x1f (31)",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x40 (64)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x40 (64)",
"cbz x0, #+0x3c",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x1c",
"add x1, x1, #0x1f (31)",
"add x2, x2, #0x1f (31)",
"ldrb w3, [x2], #-1",
"strb w3, [x1], #-1",
"sub x0, x0, #0x1 (1)",
@ -2895,14 +2945,37 @@
]
},
"rep movsw": {
"ExpectedInstructionCount": 29,
"ExpectedInstructionCount": 79,
"Comment": "0xa5",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"mov x0, x5",
"mov x1, x11",
"mov x2, x10",
"tbnz w20, #1, #+0x30",
"tbnz w20, #1, #+0x8c",
"cbz x0, #+0x70",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x54",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x34",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x14",
"ldrh w3, [x2], #2",
"strh w3, [x1], #2",
@ -2913,8 +2986,35 @@
"mov x2, x5",
"add x20, x0, x2, lsl #1",
"add x21, x1, x2, lsl #1",
"b #+0x2c",
"cbz x0, #+0x14",
"b #+0x98",
"cbz x0, #+0x80",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x64",
"sub x1, x1, #0x1e (30)",
"sub x2, x2, #0x1e (30)",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x3c",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x1c",
"add x1, x1, #0x1e (30)",
"add x2, x2, #0x1e (30)",
"ldrh w3, [x2], #-2",
"strh w3, [x1], #-2",
"sub x0, x0, #0x1 (1)",
@ -2930,14 +3030,37 @@
]
},
"rep movsd": {
"ExpectedInstructionCount": 29,
"ExpectedInstructionCount": 79,
"Comment": "0xa5",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"mov x0, x5",
"mov x1, x11",
"mov x2, x10",
"tbnz w20, #1, #+0x30",
"tbnz w20, #1, #+0x8c",
"cbz x0, #+0x70",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x54",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x34",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x14",
"ldr w3, [x2], #4",
"str w3, [x1], #4",
@ -2948,8 +3071,35 @@
"mov x2, x5",
"add x20, x0, x2, lsl #2",
"add x21, x1, x2, lsl #2",
"b #+0x2c",
"cbz x0, #+0x14",
"b #+0x98",
"cbz x0, #+0x80",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x64",
"sub x1, x1, #0x1c (28)",
"sub x2, x2, #0x1c (28)",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x3c",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x1c",
"add x1, x1, #0x1c (28)",
"add x2, x2, #0x1c (28)",
"ldr w3, [x2], #-4",
"str w3, [x1], #-4",
"sub x0, x0, #0x1 (1)",
@ -2965,14 +3115,37 @@
]
},
"rep movsq": {
"ExpectedInstructionCount": 29,
"ExpectedInstructionCount": 79,
"Comment": "0xa5",
"ExpectedArm64ASM": [
"ldrsb x20, [x28, #714]",
"mov x0, x5",
"mov x1, x11",
"mov x2, x10",
"tbnz w20, #1, #+0x30",
"tbnz w20, #1, #+0x8c",
"cbz x0, #+0x70",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x54",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x34",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #32",
"stp q0, q1, [x1], #32",
"sub x0, x0, #0x4 (4)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x4 (4)",
"cbz x0, #+0x14",
"ldr x3, [x2], #8",
"str x3, [x1], #8",
@ -2983,8 +3156,35 @@
"mov x2, x5",
"add x20, x0, x2, lsl #3",
"add x21, x1, x2, lsl #3",
"b #+0x2c",
"cbz x0, #+0x14",
"b #+0x98",
"cbz x0, #+0x80",
"orr x3, x1, x2",
"and x3, x3, #0x3",
"cbnz x3, #+0x64",
"sub x1, x1, #0x18 (24)",
"sub x2, x2, #0x18 (24)",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x44",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x1c",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0x14",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x3c",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x14",
"ldp q0, q1, [x2], #-32",
"stp q0, q1, [x1], #-32",
"sub x0, x0, #0x4 (4)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x4 (4)",
"cbz x0, #+0x1c",
"add x1, x1, #0x18 (24)",
"add x2, x2, #0x18 (24)",
"ldr x3, [x2], #-8",
"str x3, [x1], #-8",
"sub x0, x0, #0x1 (1)",
@ -3341,21 +3541,63 @@
]
},
"rep stosb": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 59,
"Comment": "0xaa",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"ldrsb x21, [x28, #714]",
"mov x0, x5",
"mov x1, x11",
"tbnz w21, #1, #+0x1c",
"tbnz w21, #1, #+0x6c",
"cbz x0, #+0x60",
"and x3, x1, #0x3",
"cbnz x3, #+0x4c",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x3c",
"dup v1.16b, w20",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #32",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x40 (64)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x40 (64)",
"cbz x0, #+0x2c",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x10",
"strb w20, [x1], #1",
"sub x0, x0, #0x1 (1)",
"cbnz x0, #-0x8",
"add x11, x11, x5",
"b #+0x18",
"cbz x0, #+0x10",
"b #+0x70",
"cbz x0, #+0x68",
"and x3, x1, #0x3",
"cbnz x3, #+0x54",
"sub x1, x1, #0x1f (31)",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x3c",
"dup v1.16b, w20",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #-32",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x40 (64)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x40 (64)",
"cbz x0, #+0x30",
"sub x0, x0, #0x20 (32)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x14",
"add x1, x1, #0x1f (31)",
"strb w20, [x1], #-1",
"sub x0, x0, #0x1 (1)",
"cbnz x0, #-0x8",
@ -3364,21 +3606,63 @@
]
},
"rep stosw": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 59,
"Comment": "0xab",
"ExpectedArm64ASM": [
"uxth w20, w4",
"ldrsb x21, [x28, #714]",
"mov x0, x5",
"mov x1, x11",
"tbnz w21, #1, #+0x1c",
"tbnz w21, #1, #+0x6c",
"cbz x0, #+0x60",
"and x3, x1, #0x3",
"cbnz x3, #+0x4c",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x3c",
"dup v1.8h, w20",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #32",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x2c",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x10",
"strh w20, [x1], #2",
"sub x0, x0, #0x1 (1)",
"cbnz x0, #-0x8",
"add x11, x11, x5, lsl #1",
"b #+0x18",
"cbz x0, #+0x10",
"b #+0x70",
"cbz x0, #+0x68",
"and x3, x1, #0x3",
"cbnz x3, #+0x54",
"sub x1, x1, #0x1e (30)",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x3c",
"dup v1.8h, w20",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #-32",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x20 (32)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x20 (32)",
"cbz x0, #+0x30",
"sub x0, x0, #0x10 (16)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x14",
"add x1, x1, #0x1e (30)",
"strh w20, [x1], #-2",
"sub x0, x0, #0x1 (1)",
"cbnz x0, #-0x8",
@ -3387,21 +3671,63 @@
]
},
"rep stosd": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 59,
"Comment": "0xab",
"ExpectedArm64ASM": [
"mov w20, w4",
"ldrsb x21, [x28, #714]",
"mov x0, x5",
"mov x1, x11",
"tbnz w21, #1, #+0x1c",
"tbnz w21, #1, #+0x6c",
"cbz x0, #+0x60",
"and x3, x1, #0x3",
"cbnz x3, #+0x4c",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x3c",
"dup v1.4s, w20",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #32",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x2c",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x10",
"str w20, [x1], #4",
"sub x0, x0, #0x1 (1)",
"cbnz x0, #-0x8",
"add x11, x11, x5, lsl #2",
"b #+0x18",
"cbz x0, #+0x10",
"b #+0x70",
"cbz x0, #+0x68",
"and x3, x1, #0x3",
"cbnz x3, #+0x54",
"sub x1, x1, #0x1c (28)",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x3c",
"dup v1.4s, w20",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #-32",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x10 (16)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x10 (16)",
"cbz x0, #+0x30",
"sub x0, x0, #0x8 (8)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x14",
"add x1, x1, #0x1c (28)",
"str w20, [x1], #-4",
"sub x0, x0, #0x1 (1)",
"cbnz x0, #-0x8",
@ -3410,7 +3736,7 @@
]
},
"rep stosq": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 58,
"Comment": [
"Unrolling the loop for faster memset can be done.",
"Taking advantage of ARM MOPs instructions can be done",
@ -3420,14 +3746,56 @@
"ldrsb x20, [x28, #714]",
"mov x0, x5",
"mov x1, x11",
"tbnz w20, #1, #+0x1c",
"tbnz w20, #1, #+0x6c",
"cbz x0, #+0x60",
"and x3, x1, #0x3",
"cbnz x3, #+0x4c",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x3c",
"dup v1.2d, x4",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #32",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x2c",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #32",
"sub x0, x0, #0x4 (4)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x4 (4)",
"cbz x0, #+0x10",
"str x4, [x1], #8",
"sub x0, x0, #0x1 (1)",
"cbnz x0, #-0x8",
"add x11, x11, x5, lsl #3",
"b #+0x18",
"cbz x0, #+0x10",
"b #+0x70",
"cbz x0, #+0x68",
"and x3, x1, #0x3",
"cbnz x3, #+0x54",
"sub x1, x1, #0x18 (24)",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x3c",
"dup v1.2d, x4",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x14",
"stp q1, q1, [x1], #-32",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x8 (8)",
"tbz x0, #63, #-0xc",
"add x0, x0, #0x8 (8)",
"cbz x0, #+0x30",
"sub x0, x0, #0x4 (4)",
"tbnz x0, #63, #+0x10",
"stp q1, q1, [x1], #-32",
"sub x0, x0, #0x4 (4)",
"tbz x0, #63, #-0x8",
"add x0, x0, #0x4 (4)",
"cbz x0, #+0x14",
"add x1, x1, #0x18 (24)",
"str x4, [x1], #-8",
"sub x0, x0, #0x1 (1)",
"cbnz x0, #-0x8",