Merge pull request #3843 from Sonicadvance1/remove_half_moves_fma3

Arm64: Remove one move if possible in FMA operations
This commit is contained in:
Ryan Houdek 2024-07-09 00:25:07 -07:00 committed by GitHub
commit 3bea08da5f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 344 additions and 372 deletions

View File

@ -4047,12 +4047,16 @@ DEF_OP(VFMLA) {
const auto Mask = PRED_TMP_32B.Merging();
ARMEmitter::VRegister DestTmp = Dst;
if (Dst != VectorAddend) {
DestTmp = VTMP1;
if (Dst != Vector1 && Dst != Vector2) {
DestTmp = Dst;
} else {
DestTmp = VTMP1;
}
mov(DestTmp.Z(), VectorAddend.Z());
}
fmla(SubRegSize, DestTmp.Z(), Mask, Vector1.Z(), Vector2.Z());
if (Dst != VectorAddend) {
if (Dst != DestTmp) {
mov(Dst.Z(), DestTmp.Z());
}
} else {
@ -4068,7 +4072,11 @@ DEF_OP(VFMLA) {
}
ARMEmitter::VRegister DestTmp = Dst;
if (Dst != VectorAddend) {
DestTmp = VTMP1;
if (Dst != Vector1 && Dst != Vector2) {
DestTmp = Dst;
} else {
DestTmp = VTMP1;
}
mov(DestTmp.Q(), VectorAddend.Q());
}
if (OpSize == 16) {
@ -4077,7 +4085,7 @@ DEF_OP(VFMLA) {
fmla(SubRegSize, DestTmp.D(), Vector1.D(), Vector2.D());
}
if (Dst != VectorAddend) {
if (Dst != DestTmp) {
mov(Dst.Q(), DestTmp.Q());
}
}
@ -4105,24 +4113,32 @@ DEF_OP(VFMLS) {
const auto Mask = PRED_TMP_32B.Merging();
ARMEmitter::VRegister DestTmp = Dst;
if (Dst != VectorAddend) {
DestTmp = VTMP1;
if (Dst != Vector1 && Dst != Vector2) {
DestTmp = Dst;
} else {
DestTmp = VTMP1;
}
mov(DestTmp.Z(), VectorAddend.Z());
}
fnmls(SubRegSize, DestTmp.Z(), Mask, Vector1.Z(), Vector2.Z());
if (Dst != VectorAddend) {
if (Dst != DestTmp) {
mov(Dst.Z(), DestTmp.Z());
}
} else if (HostSupportsSVE128 && Is128Bit) {
const auto Mask = PRED_TMP_16B.Merging();
ARMEmitter::VRegister DestTmp = Dst;
if (Dst != VectorAddend) {
DestTmp = VTMP1;
if (Dst != Vector1 && Dst != Vector2) {
DestTmp = Dst;
} else {
DestTmp = VTMP1;
}
mov(DestTmp.Z(), VectorAddend.Z());
}
fnmls(SubRegSize, DestTmp.Z(), Mask, Vector1.Z(), Vector2.Z());
if (Dst != VectorAddend) {
if (Dst != DestTmp) {
mov(Dst.Z(), DestTmp.Z());
}
} else {
@ -4138,15 +4154,29 @@ DEF_OP(VFMLS) {
}
// Addend needs to get negated to match correct behaviour here.
ARMEmitter::VRegister DestTmp = VTMP1;
ARMEmitter::VRegister DestTmp = Dst;
if (Dst == Vector1 || Dst == Vector2) {
DestTmp = VTMP1;
}
if (Is128Bit) {
fneg(SubRegSize, DestTmp.Q(), VectorAddend.Q());
fmla(SubRegSize, DestTmp.Q(), Vector1.Q(), Vector2.Q());
mov(Dst.Q(), DestTmp.Q());
} else {
fneg(SubRegSize, DestTmp.D(), VectorAddend.D());
}
if (Is128Bit) {
fmla(SubRegSize, DestTmp.Q(), Vector1.Q(), Vector2.Q());
} else {
fmla(SubRegSize, DestTmp.D(), Vector1.D(), Vector2.D());
mov(Dst.D(), DestTmp.D());
}
if (DestTmp != Dst) {
if (Is128Bit) {
mov(Dst.Q(), DestTmp.Q());
} else {
mov(Dst.D(), DestTmp.D());
}
}
}
}
@ -4172,12 +4202,16 @@ DEF_OP(VFNMLA) {
const auto Mask = PRED_TMP_32B.Merging();
ARMEmitter::VRegister DestTmp = Dst;
if (Dst != VectorAddend) {
DestTmp = VTMP1;
if (Dst != Vector1 && Dst != Vector2) {
DestTmp = Dst;
} else {
DestTmp = VTMP1;
}
mov(DestTmp.Z(), VectorAddend.Z());
}
fmls(SubRegSize, DestTmp.Z(), Mask, Vector1.Z(), Vector2.Z());
if (Dst != VectorAddend) {
if (Dst != DestTmp) {
mov(Dst.Z(), DestTmp.Z());
}
} else {
@ -4194,7 +4228,11 @@ DEF_OP(VFNMLA) {
ARMEmitter::VRegister DestTmp = Dst;
if (Dst != VectorAddend) {
DestTmp = VTMP1;
if (Dst != Vector1 && Dst != Vector2) {
DestTmp = Dst;
} else {
DestTmp = VTMP1;
}
mov(DestTmp.Q(), VectorAddend.Q());
}
if (OpSize == 16) {
@ -4203,7 +4241,7 @@ DEF_OP(VFNMLA) {
fmls(SubRegSize, DestTmp.D(), Vector1.D(), Vector2.D());
}
if (Dst != VectorAddend) {
if (Dst != DestTmp) {
mov(Dst.Q(), DestTmp.Q());
}
}
@ -4232,24 +4270,32 @@ DEF_OP(VFNMLS) {
const auto Mask = PRED_TMP_32B.Merging();
ARMEmitter::VRegister DestTmp = Dst;
if (Dst != VectorAddend) {
DestTmp = VTMP1;
if (Dst != Vector1 && Dst != Vector2) {
DestTmp = Dst;
} else {
DestTmp = VTMP1;
}
mov(DestTmp.Z(), VectorAddend.Z());
}
fnmla(SubRegSize, DestTmp.Z(), Mask, Vector1.Z(), Vector2.Z());
if (Dst != VectorAddend) {
if (Dst != DestTmp) {
mov(Dst.Z(), DestTmp.Z());
}
} else if (HostSupportsSVE128 && Is128Bit) {
const auto Mask = PRED_TMP_16B.Merging();
ARMEmitter::VRegister DestTmp = Dst;
if (Dst != VectorAddend) {
DestTmp = VTMP1;
if (Dst != Vector1 && Dst != Vector2) {
DestTmp = Dst;
} else {
DestTmp = VTMP1;
}
mov(DestTmp.Z(), VectorAddend.Z());
}
fnmla(SubRegSize, DestTmp.Z(), Mask, Vector1.Z(), Vector2.Z());
if (Dst != VectorAddend) {
if (Dst != DestTmp) {
mov(Dst.Z(), DestTmp.Z());
}
} else {
@ -4265,15 +4311,29 @@ DEF_OP(VFNMLS) {
}
// Addend needs to get negated to match correct behaviour here.
ARMEmitter::VRegister DestTmp = VTMP1;
ARMEmitter::VRegister DestTmp = Dst;
if (Dst == Vector1 || Dst == Vector2) {
DestTmp = VTMP1;
}
if (Is128Bit) {
fneg(SubRegSize, DestTmp.Q(), VectorAddend.Q());
fmls(SubRegSize, DestTmp.Q(), Vector1.Q(), Vector2.Q());
mov(Dst.Q(), DestTmp.Q());
} else {
fneg(SubRegSize, DestTmp.D(), VectorAddend.D());
}
if (Is128Bit) {
fmls(SubRegSize, DestTmp.Q(), Vector1.Q(), Vector2.Q());
} else {
fmls(SubRegSize, DestTmp.D(), Vector1.D(), Vector2.D());
mov(Dst.D(), DestTmp.D());
}
if (DestTmp != Dst) {
if (Is128Bit) {
mov(Dst.Q(), DestTmp.Q());
} else {
mov(Dst.D(), DestTmp.D());
}
}
}
}

View File

@ -4774,7 +4774,7 @@
]
},
"vfmsub132ps ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 9,
"Comment": [
"Map 2 0b01 0x9a 256-bit"
],
@ -4785,9 +4785,8 @@
"fneg v0.4s, v17.4s",
"fmla v0.4s, v16.4s, v18.4s",
"mov v16.16b, v0.16b",
"fneg v0.4s, v3.4s",
"fmla v0.4s, v2.4s, v4.4s",
"mov v3.16b, v0.16b",
"fneg v3.4s, v3.4s",
"fmla v3.4s, v2.4s, v4.4s",
"str q3, [x28, #16]"
]
},
@ -4805,7 +4804,7 @@
]
},
"vfmsub132pd ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 9,
"Comment": [
"Map 2 0b01 0x9a 256-bit"
],
@ -4816,9 +4815,8 @@
"fneg v0.2d, v17.2d",
"fmla v0.2d, v16.2d, v18.2d",
"mov v16.16b, v0.16b",
"fneg v0.2d, v3.2d",
"fmla v0.2d, v2.2d, v4.2d",
"mov v3.16b, v0.16b",
"fneg v3.2d, v3.2d",
"fmla v3.2d, v2.2d, v4.2d",
"str q3, [x28, #16]"
]
},
@ -4942,7 +4940,7 @@
]
},
"vfnmsub132ps ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 9,
"Comment": [
"Map 2 0b01 0x9e 256-bit"
],
@ -4953,9 +4951,8 @@
"fneg v0.4s, v17.4s",
"fmls v0.4s, v16.4s, v18.4s",
"mov v16.16b, v0.16b",
"fneg v0.4s, v3.4s",
"fmls v0.4s, v2.4s, v4.4s",
"mov v3.16b, v0.16b",
"fneg v3.4s, v3.4s",
"fmls v3.4s, v2.4s, v4.4s",
"str q3, [x28, #16]"
]
},
@ -4973,7 +4970,7 @@
]
},
"vfnmsub132pd ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 9,
"Comment": [
"Map 2 0b01 0x9e 256-bit"
],
@ -4984,9 +4981,8 @@
"fneg v0.2d, v17.2d",
"fmls v0.2d, v16.2d, v18.2d",
"mov v16.16b, v0.16b",
"fneg v0.2d, v3.2d",
"fmls v0.2d, v2.2d, v4.2d",
"mov v3.16b, v0.16b",
"fneg v3.2d, v3.2d",
"fmls v3.2d, v2.2d, v4.2d",
"str q3, [x28, #16]"
]
},
@ -5110,7 +5106,7 @@
]
},
"vfmsub213ps ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 9,
"Comment": [
"Map 2 0b01 0xaa 256-bit"
],
@ -5121,9 +5117,8 @@
"fneg v0.4s, v18.4s",
"fmla v0.4s, v17.4s, v16.4s",
"mov v16.16b, v0.16b",
"fneg v0.4s, v4.4s",
"fmla v0.4s, v3.4s, v2.4s",
"mov v4.16b, v0.16b",
"fneg v4.4s, v4.4s",
"fmla v4.4s, v3.4s, v2.4s",
"str q4, [x28, #16]"
]
},
@ -5141,7 +5136,7 @@
]
},
"vfmsub213pd ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 9,
"Comment": [
"Map 2 0b01 0xaa 256-bit"
],
@ -5152,9 +5147,8 @@
"fneg v0.2d, v18.2d",
"fmla v0.2d, v17.2d, v16.2d",
"mov v16.16b, v0.16b",
"fneg v0.2d, v4.2d",
"fmla v0.2d, v3.2d, v2.2d",
"mov v4.16b, v0.16b",
"fneg v4.2d, v4.2d",
"fmla v4.2d, v3.2d, v2.2d",
"str q4, [x28, #16]"
]
},
@ -5278,7 +5272,7 @@
]
},
"vfnmsub213ps ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 9,
"Comment": [
"Map 2 0b01 0xae 256-bit"
],
@ -5289,9 +5283,8 @@
"fneg v0.4s, v18.4s",
"fmls v0.4s, v17.4s, v16.4s",
"mov v16.16b, v0.16b",
"fneg v0.4s, v4.4s",
"fmls v0.4s, v3.4s, v2.4s",
"mov v4.16b, v0.16b",
"fneg v4.4s, v4.4s",
"fmls v4.4s, v3.4s, v2.4s",
"str q4, [x28, #16]"
]
},
@ -5309,7 +5302,7 @@
]
},
"vfnmsub213pd ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 9,
"Comment": [
"Map 2 0b01 0xae 256-bit"
],
@ -5320,9 +5313,8 @@
"fneg v0.2d, v18.2d",
"fmls v0.2d, v17.2d, v16.2d",
"mov v16.16b, v0.16b",
"fneg v0.2d, v4.2d",
"fmls v0.2d, v3.2d, v2.2d",
"mov v4.16b, v0.16b",
"fneg v4.2d, v4.2d",
"fmls v4.2d, v3.2d, v2.2d",
"str q4, [x28, #16]"
]
},
@ -5425,20 +5417,19 @@
]
},
"vfmsub231ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 4,
"Comment": [
"Map 2 0b01 0xba 128-bit"
],
"ExpectedArm64ASM": [
"fneg v0.4s, v16.4s",
"fmla v0.4s, v17.4s, v18.4s",
"mov v16.16b, v0.16b",
"fneg v16.4s, v16.4s",
"fmla v16.4s, v17.4s, v18.4s",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vfmsub231ps ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 8,
"Comment": [
"Map 2 0b01 0xba 256-bit"
],
@ -5446,30 +5437,27 @@
"ldr q2, [x28, #16]",
"ldr q3, [x28, #32]",
"ldr q4, [x28, #48]",
"fneg v0.4s, v16.4s",
"fmla v0.4s, v17.4s, v18.4s",
"mov v16.16b, v0.16b",
"fneg v0.4s, v2.4s",
"fmla v0.4s, v3.4s, v4.4s",
"mov v2.16b, v0.16b",
"fneg v16.4s, v16.4s",
"fmla v16.4s, v17.4s, v18.4s",
"fneg v2.4s, v2.4s",
"fmla v2.4s, v3.4s, v4.4s",
"str q2, [x28, #16]"
]
},
"vfmsub231pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 4,
"Comment": [
"Map 2 0b01 0xba 128-bit"
],
"ExpectedArm64ASM": [
"fneg v0.2d, v16.2d",
"fmla v0.2d, v17.2d, v18.2d",
"mov v16.16b, v0.16b",
"fneg v16.2d, v16.2d",
"fmla v16.2d, v17.2d, v18.2d",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vfmsub231pd ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 8,
"Comment": [
"Map 2 0b01 0xba 256-bit"
],
@ -5477,12 +5465,10 @@
"ldr q2, [x28, #16]",
"ldr q3, [x28, #32]",
"ldr q4, [x28, #48]",
"fneg v0.2d, v16.2d",
"fmla v0.2d, v17.2d, v18.2d",
"mov v16.16b, v0.16b",
"fneg v0.2d, v2.2d",
"fmla v0.2d, v3.2d, v4.2d",
"mov v2.16b, v0.16b",
"fneg v16.2d, v16.2d",
"fmla v16.2d, v17.2d, v18.2d",
"fneg v2.2d, v2.2d",
"fmla v2.2d, v3.2d, v4.2d",
"str q2, [x28, #16]"
]
},
@ -5585,20 +5571,19 @@
]
},
"vfnmsub231ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 4,
"Comment": [
"Map 2 0b01 0xbe 128-bit"
],
"ExpectedArm64ASM": [
"fneg v0.4s, v16.4s",
"fmls v0.4s, v17.4s, v18.4s",
"mov v16.16b, v0.16b",
"fneg v16.4s, v16.4s",
"fmls v16.4s, v17.4s, v18.4s",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vfnmsub231ps ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 8,
"Comment": [
"Map 2 0b01 0xbe 256-bit"
],
@ -5606,30 +5591,27 @@
"ldr q2, [x28, #16]",
"ldr q3, [x28, #32]",
"ldr q4, [x28, #48]",
"fneg v0.4s, v16.4s",
"fmls v0.4s, v17.4s, v18.4s",
"mov v16.16b, v0.16b",
"fneg v0.4s, v2.4s",
"fmls v0.4s, v3.4s, v4.4s",
"mov v2.16b, v0.16b",
"fneg v16.4s, v16.4s",
"fmls v16.4s, v17.4s, v18.4s",
"fneg v2.4s, v2.4s",
"fmls v2.4s, v3.4s, v4.4s",
"str q2, [x28, #16]"
]
},
"vfnmsub231pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 4,
"Comment": [
"Map 2 0b01 0xbe 128-bit"
],
"ExpectedArm64ASM": [
"fneg v0.2d, v16.2d",
"fmls v0.2d, v17.2d, v18.2d",
"mov v16.16b, v0.16b",
"fneg v16.2d, v16.2d",
"fmls v16.2d, v17.2d, v18.2d",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vfnmsub231pd ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 8,
"Comment": [
"Map 2 0b01 0xbe 256-bit"
],
@ -5637,12 +5619,10 @@
"ldr q2, [x28, #16]",
"ldr q3, [x28, #32]",
"ldr q4, [x28, #48]",
"fneg v0.2d, v16.2d",
"fmls v0.2d, v17.2d, v18.2d",
"mov v16.16b, v0.16b",
"fneg v0.2d, v2.2d",
"fmls v0.2d, v3.2d, v4.2d",
"mov v2.16b, v0.16b",
"fneg v16.2d, v16.2d",
"fmls v16.2d, v17.2d, v18.2d",
"fneg v2.2d, v2.2d",
"fmls v2.2d, v3.2d, v4.2d",
"str q2, [x28, #16]"
]
},
@ -5807,22 +5787,21 @@
]
},
"vfmaddsub231ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xb6 128-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2384]",
"eor v2.16b, v16.16b, v2.16b",
"mov v0.16b, v2.16b",
"fmla v0.4s, v17.4s, v18.4s",
"mov v16.16b, v0.16b",
"mov v16.16b, v2.16b",
"fmla v16.4s, v17.4s, v18.4s",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vfmaddsub231ps ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 10,
"Comment": [
"Map 2 0b01 0xb6 256-bit"
],
@ -5832,31 +5811,29 @@
"ldr q4, [x28, #48]",
"ldr q5, [x28, #2384]",
"eor v6.16b, v16.16b, v5.16b",
"mov v0.16b, v6.16b",
"fmla v0.4s, v17.4s, v18.4s",
"mov v16.16b, v0.16b",
"mov v16.16b, v6.16b",
"fmla v16.4s, v17.4s, v18.4s",
"eor v2.16b, v2.16b, v5.16b",
"fmla v2.4s, v3.4s, v4.4s",
"str q2, [x28, #16]"
]
},
"vfmaddsub231pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xb6 128-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2416]",
"eor v2.16b, v16.16b, v2.16b",
"mov v0.16b, v2.16b",
"fmla v0.2d, v17.2d, v18.2d",
"mov v16.16b, v0.16b",
"mov v16.16b, v2.16b",
"fmla v16.2d, v17.2d, v18.2d",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vfmaddsub231pd ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 10,
"Comment": [
"Map 2 0b01 0xb6 256-bit"
],
@ -5866,31 +5843,29 @@
"ldr q4, [x28, #48]",
"ldr q5, [x28, #2416]",
"eor v6.16b, v16.16b, v5.16b",
"mov v0.16b, v6.16b",
"fmla v0.2d, v17.2d, v18.2d",
"mov v16.16b, v0.16b",
"mov v16.16b, v6.16b",
"fmla v16.2d, v17.2d, v18.2d",
"eor v2.16b, v2.16b, v5.16b",
"fmla v2.2d, v3.2d, v4.2d",
"str q2, [x28, #16]"
]
},
"vfmsubadd231ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xb7 128-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2448]",
"eor v2.16b, v16.16b, v2.16b",
"mov v0.16b, v2.16b",
"fmla v0.4s, v17.4s, v18.4s",
"mov v16.16b, v0.16b",
"mov v16.16b, v2.16b",
"fmla v16.4s, v17.4s, v18.4s",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vfmsubadd231ps ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 10,
"Comment": [
"Map 2 0b01 0xb7 256-bit"
],
@ -5900,31 +5875,29 @@
"ldr q4, [x28, #48]",
"ldr q5, [x28, #2448]",
"eor v6.16b, v16.16b, v5.16b",
"mov v0.16b, v6.16b",
"fmla v0.4s, v17.4s, v18.4s",
"mov v16.16b, v0.16b",
"mov v16.16b, v6.16b",
"fmla v16.4s, v17.4s, v18.4s",
"eor v2.16b, v2.16b, v5.16b",
"fmla v2.4s, v3.4s, v4.4s",
"str q2, [x28, #16]"
]
},
"vfmsubadd231pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xb7 128-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2480]",
"eor v2.16b, v16.16b, v2.16b",
"mov v0.16b, v2.16b",
"fmla v0.2d, v17.2d, v18.2d",
"mov v16.16b, v0.16b",
"mov v16.16b, v2.16b",
"fmla v16.2d, v17.2d, v18.2d",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vfmsubadd231pd ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 10,
"Comment": [
"Map 2 0b01 0xb7 256-bit"
],
@ -5934,9 +5907,8 @@
"ldr q4, [x28, #48]",
"ldr q5, [x28, #2480]",
"eor v6.16b, v16.16b, v5.16b",
"mov v0.16b, v6.16b",
"fmla v0.2d, v17.2d, v18.2d",
"mov v16.16b, v0.16b",
"mov v16.16b, v6.16b",
"fmla v16.2d, v17.2d, v18.2d",
"eor v2.16b, v2.16b, v5.16b",
"fmla v2.2d, v3.2d, v4.2d",
"str q2, [x28, #16]"

View File

@ -4047,22 +4047,21 @@
]
},
"vfmaddsub231ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xb6 128-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2384]",
"eor v2.16b, v16.16b, v2.16b",
"mov v0.16b, v2.16b",
"fmla v0.4s, v17.4s, v18.4s",
"mov v16.16b, v0.16b",
"mov v16.16b, v2.16b",
"fmla v16.4s, v17.4s, v18.4s",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vfmaddsub231ps ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 10,
"Comment": [
"Map 2 0b01 0xb6 256-bit"
],
@ -4072,31 +4071,29 @@
"ldr q4, [x28, #48]",
"ldr q5, [x28, #2384]",
"eor v6.16b, v16.16b, v5.16b",
"mov v0.16b, v6.16b",
"fmla v0.4s, v17.4s, v18.4s",
"mov v16.16b, v0.16b",
"mov v16.16b, v6.16b",
"fmla v16.4s, v17.4s, v18.4s",
"eor v2.16b, v2.16b, v5.16b",
"fmla v2.4s, v3.4s, v4.4s",
"str q2, [x28, #16]"
]
},
"vfmaddsub231pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xb6 128-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2416]",
"eor v2.16b, v16.16b, v2.16b",
"mov v0.16b, v2.16b",
"fmla v0.2d, v17.2d, v18.2d",
"mov v16.16b, v0.16b",
"mov v16.16b, v2.16b",
"fmla v16.2d, v17.2d, v18.2d",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vfmaddsub231pd ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 10,
"Comment": [
"Map 2 0b01 0xb6 256-bit"
],
@ -4106,31 +4103,29 @@
"ldr q4, [x28, #48]",
"ldr q5, [x28, #2416]",
"eor v6.16b, v16.16b, v5.16b",
"mov v0.16b, v6.16b",
"fmla v0.2d, v17.2d, v18.2d",
"mov v16.16b, v0.16b",
"mov v16.16b, v6.16b",
"fmla v16.2d, v17.2d, v18.2d",
"eor v2.16b, v2.16b, v5.16b",
"fmla v2.2d, v3.2d, v4.2d",
"str q2, [x28, #16]"
]
},
"vfmsubadd231ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xb7 128-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2448]",
"eor v2.16b, v16.16b, v2.16b",
"mov v0.16b, v2.16b",
"fmla v0.4s, v17.4s, v18.4s",
"mov v16.16b, v0.16b",
"mov v16.16b, v2.16b",
"fmla v16.4s, v17.4s, v18.4s",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vfmsubadd231ps ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 10,
"Comment": [
"Map 2 0b01 0xb7 256-bit"
],
@ -4140,31 +4135,29 @@
"ldr q4, [x28, #48]",
"ldr q5, [x28, #2448]",
"eor v6.16b, v16.16b, v5.16b",
"mov v0.16b, v6.16b",
"fmla v0.4s, v17.4s, v18.4s",
"mov v16.16b, v0.16b",
"mov v16.16b, v6.16b",
"fmla v16.4s, v17.4s, v18.4s",
"eor v2.16b, v2.16b, v5.16b",
"fmla v2.4s, v3.4s, v4.4s",
"str q2, [x28, #16]"
]
},
"vfmsubadd231pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xb7 128-bit"
],
"ExpectedArm64ASM": [
"ldr q2, [x28, #2480]",
"eor v2.16b, v16.16b, v2.16b",
"mov v0.16b, v2.16b",
"fmla v0.2d, v17.2d, v18.2d",
"mov v16.16b, v0.16b",
"mov v16.16b, v2.16b",
"fmla v16.2d, v17.2d, v18.2d",
"movi v2.2d, #0x0",
"str q2, [x28, #16]"
]
},
"vfmsubadd231pd ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 10,
"Comment": [
"Map 2 0b01 0xb7 256-bit"
],
@ -4174,9 +4167,8 @@
"ldr q4, [x28, #48]",
"ldr q5, [x28, #2480]",
"eor v6.16b, v16.16b, v5.16b",
"mov v0.16b, v6.16b",
"fmla v0.2d, v17.2d, v18.2d",
"mov v16.16b, v0.16b",
"mov v16.16b, v6.16b",
"fmla v16.2d, v17.2d, v18.2d",
"eor v2.16b, v2.16b, v5.16b",
"fmla v2.2d, v3.2d, v4.2d",
"str q2, [x28, #16]"

View File

@ -3755,14 +3755,13 @@
]
},
"vfmadd132ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0x98 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v17.16b",
"fmla v0.4s, v16.4s, v18.4s",
"mov v2.16b, v0.16b",
"mov v2.16b, v17.16b",
"fmla v2.4s, v16.4s, v18.4s",
"mov v16.16b, v2.16b"
]
},
@ -3778,14 +3777,13 @@
]
},
"vfmadd132pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0x98 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v17.16b",
"fmla v0.2d, v16.2d, v18.2d",
"mov v2.16b, v0.16b",
"mov v2.16b, v17.16b",
"fmla v2.2d, v16.2d, v18.2d",
"mov v16.16b, v2.16b"
]
},
@ -3801,14 +3799,13 @@
]
},
"vfmadd132ss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0x99 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v17.16b",
"fmla v0.4s, v16.4s, v18.4s",
"mov v2.16b, v0.16b",
"mov v2.16b, v17.16b",
"fmla v2.4s, v16.4s, v18.4s",
"mov v0.16b, v16.16b",
"mov v0.s[0], v2.s[0]",
"mov v2.16b, v0.16b",
@ -3816,14 +3813,13 @@
]
},
"vfmadd132sd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0x99 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v17.16b",
"fmla v0.2d, v16.2d, v18.2d",
"mov v2.16b, v0.16b",
"mov v2.16b, v17.16b",
"fmla v2.2d, v16.2d, v18.2d",
"mov v0.16b, v16.16b",
"mov v0.d[0], v2.d[0]",
"mov v2.16b, v0.16b",
@ -3831,14 +3827,13 @@
]
},
"vfmsub132ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0x9a 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z17.d",
"fnmls z0.s, p6/m, z16.s, z18.s",
"mov z2.d, z0.d",
"mov z2.d, z17.d",
"fnmls z2.s, p6/m, z16.s, z18.s",
"mov v16.16b, v2.16b"
]
},
@ -3854,14 +3849,13 @@
]
},
"vfmsub132pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0x9a 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z17.d",
"fnmls z0.d, p6/m, z16.d, z18.d",
"mov z2.d, z0.d",
"mov z2.d, z17.d",
"fnmls z2.d, p6/m, z16.d, z18.d",
"mov v16.16b, v2.16b"
]
},
@ -3877,14 +3871,13 @@
]
},
"vfmsub132ss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0x9b 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z17.d",
"fnmls z0.s, p6/m, z16.s, z18.s",
"mov z2.d, z0.d",
"mov z2.d, z17.d",
"fnmls z2.s, p6/m, z16.s, z18.s",
"mov v0.16b, v16.16b",
"mov v0.s[0], v2.s[0]",
"mov v2.16b, v0.16b",
@ -3892,14 +3885,13 @@
]
},
"vfmsub132sd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0x9b 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z17.d",
"fnmls z0.d, p6/m, z16.d, z18.d",
"mov z2.d, z0.d",
"mov z2.d, z17.d",
"fnmls z2.d, p6/m, z16.d, z18.d",
"mov v0.16b, v16.16b",
"mov v0.d[0], v2.d[0]",
"mov v2.16b, v0.16b",
@ -3907,14 +3899,13 @@
]
},
"vfnmadd132ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0x9c 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v17.16b",
"fmls v0.4s, v16.4s, v18.4s",
"mov v2.16b, v0.16b",
"mov v2.16b, v17.16b",
"fmls v2.4s, v16.4s, v18.4s",
"mov v16.16b, v2.16b"
]
},
@ -3930,14 +3921,13 @@
]
},
"vfnmadd132pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0x9c 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v17.16b",
"fmls v0.2d, v16.2d, v18.2d",
"mov v2.16b, v0.16b",
"mov v2.16b, v17.16b",
"fmls v2.2d, v16.2d, v18.2d",
"mov v16.16b, v2.16b"
]
},
@ -3953,14 +3943,13 @@
]
},
"vfnmadd132ss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0x9d 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v17.16b",
"fmls v0.4s, v16.4s, v18.4s",
"mov v2.16b, v0.16b",
"mov v2.16b, v17.16b",
"fmls v2.4s, v16.4s, v18.4s",
"mov v0.16b, v16.16b",
"mov v0.s[0], v2.s[0]",
"mov v2.16b, v0.16b",
@ -3968,14 +3957,13 @@
]
},
"vfnmadd132sd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0x9d 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v17.16b",
"fmls v0.2d, v16.2d, v18.2d",
"mov v2.16b, v0.16b",
"mov v2.16b, v17.16b",
"fmls v2.2d, v16.2d, v18.2d",
"mov v0.16b, v16.16b",
"mov v0.d[0], v2.d[0]",
"mov v2.16b, v0.16b",
@ -3983,14 +3971,13 @@
]
},
"vfnmsub132ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0x9e 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z17.d",
"fnmla z0.s, p6/m, z16.s, z18.s",
"mov z2.d, z0.d",
"mov z2.d, z17.d",
"fnmla z2.s, p6/m, z16.s, z18.s",
"mov v16.16b, v2.16b"
]
},
@ -4006,14 +3993,13 @@
]
},
"vfnmsub132pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0x9e 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z17.d",
"fnmla z0.d, p6/m, z16.d, z18.d",
"mov z2.d, z0.d",
"mov z2.d, z17.d",
"fnmla z2.d, p6/m, z16.d, z18.d",
"mov v16.16b, v2.16b"
]
},
@ -4029,14 +4015,13 @@
]
},
"vfnmsub132ss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0x9f 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z17.d",
"fnmla z0.s, p6/m, z16.s, z18.s",
"mov z2.d, z0.d",
"mov z2.d, z17.d",
"fnmla z2.s, p6/m, z16.s, z18.s",
"mov v0.16b, v16.16b",
"mov v0.s[0], v2.s[0]",
"mov v2.16b, v0.16b",
@ -4044,14 +4029,13 @@
]
},
"vfnmsub132sd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0x9f 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z17.d",
"fnmla z0.d, p6/m, z16.d, z18.d",
"mov z2.d, z0.d",
"mov z2.d, z17.d",
"fnmla z2.d, p6/m, z16.d, z18.d",
"mov v0.16b, v16.16b",
"mov v0.d[0], v2.d[0]",
"mov v2.16b, v0.16b",
@ -4059,14 +4043,13 @@
]
},
"vfmadd213ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xa8 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v18.16b",
"fmla v0.4s, v17.4s, v16.4s",
"mov v2.16b, v0.16b",
"mov v2.16b, v18.16b",
"fmla v2.4s, v17.4s, v16.4s",
"mov v16.16b, v2.16b"
]
},
@ -4082,14 +4065,13 @@
]
},
"vfmadd213pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xa8 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v18.16b",
"fmla v0.2d, v17.2d, v16.2d",
"mov v2.16b, v0.16b",
"mov v2.16b, v18.16b",
"fmla v2.2d, v17.2d, v16.2d",
"mov v16.16b, v2.16b"
]
},
@ -4105,14 +4087,13 @@
]
},
"vfmadd213ss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xa9 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v18.16b",
"fmla v0.4s, v17.4s, v16.4s",
"mov v2.16b, v0.16b",
"mov v2.16b, v18.16b",
"fmla v2.4s, v17.4s, v16.4s",
"mov v0.16b, v16.16b",
"mov v0.s[0], v2.s[0]",
"mov v2.16b, v0.16b",
@ -4120,14 +4101,13 @@
]
},
"vfmadd213sd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xa9 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v18.16b",
"fmla v0.2d, v17.2d, v16.2d",
"mov v2.16b, v0.16b",
"mov v2.16b, v18.16b",
"fmla v2.2d, v17.2d, v16.2d",
"mov v0.16b, v16.16b",
"mov v0.d[0], v2.d[0]",
"mov v2.16b, v0.16b",
@ -4135,14 +4115,13 @@
]
},
"vfmsub213ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xaa 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z18.d",
"fnmls z0.s, p6/m, z17.s, z16.s",
"mov z2.d, z0.d",
"mov z2.d, z18.d",
"fnmls z2.s, p6/m, z17.s, z16.s",
"mov v16.16b, v2.16b"
]
},
@ -4158,14 +4137,13 @@
]
},
"vfmsub213pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xaa 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z18.d",
"fnmls z0.d, p6/m, z17.d, z16.d",
"mov z2.d, z0.d",
"mov z2.d, z18.d",
"fnmls z2.d, p6/m, z17.d, z16.d",
"mov v16.16b, v2.16b"
]
},
@ -4181,14 +4159,13 @@
]
},
"vfmsub213ss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xab 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z18.d",
"fnmls z0.s, p6/m, z17.s, z16.s",
"mov z2.d, z0.d",
"mov z2.d, z18.d",
"fnmls z2.s, p6/m, z17.s, z16.s",
"mov v0.16b, v16.16b",
"mov v0.s[0], v2.s[0]",
"mov v2.16b, v0.16b",
@ -4196,14 +4173,13 @@
]
},
"vfmsub213sd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xab 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z18.d",
"fnmls z0.d, p6/m, z17.d, z16.d",
"mov z2.d, z0.d",
"mov z2.d, z18.d",
"fnmls z2.d, p6/m, z17.d, z16.d",
"mov v0.16b, v16.16b",
"mov v0.d[0], v2.d[0]",
"mov v2.16b, v0.16b",
@ -4211,14 +4187,13 @@
]
},
"vfnmadd213ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xac 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v18.16b",
"fmls v0.4s, v17.4s, v16.4s",
"mov v2.16b, v0.16b",
"mov v2.16b, v18.16b",
"fmls v2.4s, v17.4s, v16.4s",
"mov v16.16b, v2.16b"
]
},
@ -4234,14 +4209,13 @@
]
},
"vfnmadd213pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xac 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v18.16b",
"fmls v0.2d, v17.2d, v16.2d",
"mov v2.16b, v0.16b",
"mov v2.16b, v18.16b",
"fmls v2.2d, v17.2d, v16.2d",
"mov v16.16b, v2.16b"
]
},
@ -4257,14 +4231,13 @@
]
},
"vfnmadd213ss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xad 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v18.16b",
"fmls v0.4s, v17.4s, v16.4s",
"mov v2.16b, v0.16b",
"mov v2.16b, v18.16b",
"fmls v2.4s, v17.4s, v16.4s",
"mov v0.16b, v16.16b",
"mov v0.s[0], v2.s[0]",
"mov v2.16b, v0.16b",
@ -4272,14 +4245,13 @@
]
},
"vfnmadd213sd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xad 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v18.16b",
"fmls v0.2d, v17.2d, v16.2d",
"mov v2.16b, v0.16b",
"mov v2.16b, v18.16b",
"fmls v2.2d, v17.2d, v16.2d",
"mov v0.16b, v16.16b",
"mov v0.d[0], v2.d[0]",
"mov v2.16b, v0.16b",
@ -4287,14 +4259,13 @@
]
},
"vfnmsub213ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xae 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z18.d",
"fnmla z0.s, p6/m, z17.s, z16.s",
"mov z2.d, z0.d",
"mov z2.d, z18.d",
"fnmla z2.s, p6/m, z17.s, z16.s",
"mov v16.16b, v2.16b"
]
},
@ -4310,14 +4281,13 @@
]
},
"vfnmsub213pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xae 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z18.d",
"fnmla z0.d, p6/m, z17.d, z16.d",
"mov z2.d, z0.d",
"mov z2.d, z18.d",
"fnmla z2.d, p6/m, z17.d, z16.d",
"mov v16.16b, v2.16b"
]
},
@ -4333,14 +4303,13 @@
]
},
"vfnmsub213ss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xaf 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z18.d",
"fnmla z0.s, p6/m, z17.s, z16.s",
"mov z2.d, z0.d",
"mov z2.d, z18.d",
"fnmla z2.s, p6/m, z17.s, z16.s",
"mov v0.16b, v16.16b",
"mov v0.s[0], v2.s[0]",
"mov v2.16b, v0.16b",
@ -4348,14 +4317,13 @@
]
},
"vfnmsub213sd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xaf 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z18.d",
"fnmla z0.d, p6/m, z17.d, z16.d",
"mov z2.d, z0.d",
"mov z2.d, z18.d",
"fnmla z2.d, p6/m, z17.d, z16.d",
"mov v0.16b, v16.16b",
"mov v0.d[0], v2.d[0]",
"mov v2.16b, v0.16b",
@ -4363,14 +4331,13 @@
]
},
"vfmadd231ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xb8 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v16.16b",
"fmla v0.4s, v17.4s, v18.4s",
"mov v2.16b, v0.16b",
"mov v2.16b, v16.16b",
"fmla v2.4s, v17.4s, v18.4s",
"mov v16.16b, v2.16b"
]
},
@ -4384,14 +4351,13 @@
]
},
"vfmadd231pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xb8 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v16.16b",
"fmla v0.2d, v17.2d, v18.2d",
"mov v2.16b, v0.16b",
"mov v2.16b, v16.16b",
"fmla v2.2d, v17.2d, v18.2d",
"mov v16.16b, v2.16b"
]
},
@ -4405,14 +4371,13 @@
]
},
"vfmadd231ss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xb9 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v16.16b",
"fmla v0.4s, v17.4s, v18.4s",
"mov v2.16b, v0.16b",
"mov v2.16b, v16.16b",
"fmla v2.4s, v17.4s, v18.4s",
"mov v0.16b, v16.16b",
"mov v0.s[0], v2.s[0]",
"mov v2.16b, v0.16b",
@ -4420,14 +4385,13 @@
]
},
"vfmadd231sd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xb9 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v16.16b",
"fmla v0.2d, v17.2d, v18.2d",
"mov v2.16b, v0.16b",
"mov v2.16b, v16.16b",
"fmla v2.2d, v17.2d, v18.2d",
"mov v0.16b, v16.16b",
"mov v0.d[0], v2.d[0]",
"mov v2.16b, v0.16b",
@ -4435,14 +4399,13 @@
]
},
"vfmsub231ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xba 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z16.d",
"fnmls z0.s, p6/m, z17.s, z18.s",
"mov z2.d, z0.d",
"mov z2.d, z16.d",
"fnmls z2.s, p6/m, z17.s, z18.s",
"mov v16.16b, v2.16b"
]
},
@ -4456,14 +4419,13 @@
]
},
"vfmsub231pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xba 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z16.d",
"fnmls z0.d, p6/m, z17.d, z18.d",
"mov z2.d, z0.d",
"mov z2.d, z16.d",
"fnmls z2.d, p6/m, z17.d, z18.d",
"mov v16.16b, v2.16b"
]
},
@ -4477,14 +4439,13 @@
]
},
"vfmsub231ss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xbb 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z16.d",
"fnmls z0.s, p6/m, z17.s, z18.s",
"mov z2.d, z0.d",
"mov z2.d, z16.d",
"fnmls z2.s, p6/m, z17.s, z18.s",
"mov v0.16b, v16.16b",
"mov v0.s[0], v2.s[0]",
"mov v2.16b, v0.16b",
@ -4492,14 +4453,13 @@
]
},
"vfmsub231sd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xbb 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z16.d",
"fnmls z0.d, p6/m, z17.d, z18.d",
"mov z2.d, z0.d",
"mov z2.d, z16.d",
"fnmls z2.d, p6/m, z17.d, z18.d",
"mov v0.16b, v16.16b",
"mov v0.d[0], v2.d[0]",
"mov v2.16b, v0.16b",
@ -4507,14 +4467,13 @@
]
},
"vfnmadd231ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xbc 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v16.16b",
"fmls v0.4s, v17.4s, v18.4s",
"mov v2.16b, v0.16b",
"mov v2.16b, v16.16b",
"fmls v2.4s, v17.4s, v18.4s",
"mov v16.16b, v2.16b"
]
},
@ -4528,14 +4487,13 @@
]
},
"vfnmadd231pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xbc 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v16.16b",
"fmls v0.2d, v17.2d, v18.2d",
"mov v2.16b, v0.16b",
"mov v2.16b, v16.16b",
"fmls v2.2d, v17.2d, v18.2d",
"mov v16.16b, v2.16b"
]
},
@ -4549,14 +4507,13 @@
]
},
"vfnmadd231ss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xbd 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v16.16b",
"fmls v0.4s, v17.4s, v18.4s",
"mov v2.16b, v0.16b",
"mov v2.16b, v16.16b",
"fmls v2.4s, v17.4s, v18.4s",
"mov v0.16b, v16.16b",
"mov v0.s[0], v2.s[0]",
"mov v2.16b, v0.16b",
@ -4564,14 +4521,13 @@
]
},
"vfnmadd231sd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xbd 128-bit"
],
"ExpectedArm64ASM": [
"mov v0.16b, v16.16b",
"fmls v0.2d, v17.2d, v18.2d",
"mov v2.16b, v0.16b",
"mov v2.16b, v16.16b",
"fmls v2.2d, v17.2d, v18.2d",
"mov v0.16b, v16.16b",
"mov v0.d[0], v2.d[0]",
"mov v2.16b, v0.16b",
@ -4579,14 +4535,13 @@
]
},
"vfnmsub231ps xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xbe 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z16.d",
"fnmla z0.s, p6/m, z17.s, z18.s",
"mov z2.d, z0.d",
"mov z2.d, z16.d",
"fnmla z2.s, p6/m, z17.s, z18.s",
"mov v16.16b, v2.16b"
]
},
@ -4600,14 +4555,13 @@
]
},
"vfnmsub231pd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": [
"Map 2 0b01 0xbe 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z16.d",
"fnmla z0.d, p6/m, z17.d, z18.d",
"mov z2.d, z0.d",
"mov z2.d, z16.d",
"fnmla z2.d, p6/m, z17.d, z18.d",
"mov v16.16b, v2.16b"
]
},
@ -4621,14 +4575,13 @@
]
},
"vfnmsub231ss xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xbf 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z16.d",
"fnmla z0.s, p6/m, z17.s, z18.s",
"mov z2.d, z0.d",
"mov z2.d, z16.d",
"fnmla z2.s, p6/m, z17.s, z18.s",
"mov v0.16b, v16.16b",
"mov v0.s[0], v2.s[0]",
"mov v2.16b, v0.16b",
@ -4636,14 +4589,13 @@
]
},
"vfnmsub231sd xmm0, xmm1, xmm2": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": [
"Map 2 0b01 0xbf 128-bit"
],
"ExpectedArm64ASM": [
"mov z0.d, z16.d",
"fnmla z0.d, p6/m, z17.d, z18.d",
"mov z2.d, z0.d",
"mov z2.d, z16.d",
"fnmla z2.d, p6/m, z17.d, z18.d",
"mov v0.16b, v16.16b",
"mov v0.d[0], v2.d[0]",
"mov v2.16b, v0.16b",
@ -4767,7 +4719,7 @@
]
},
"vfmaddsub231ps ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 6,
"ExpectedInstructionCount": 5,
"Comment": [
"Map 2 0b01 0xb6 256-bit"
],
@ -4775,9 +4727,8 @@
"ldr x0, [x28, #1840]",
"ld1b {z2.b}, p7/z, [x0]",
"eor z2.d, z16.d, z2.d",
"mov z0.d, z2.d",
"fmla z0.s, p7/m, z17.s, z18.s",
"mov z16.d, z0.d"
"mov z16.d, z2.d",
"fmla z16.s, p7/m, z17.s, z18.s"
]
},
"vfmaddsub231pd xmm0, xmm1, xmm2": {
@ -4793,7 +4744,7 @@
]
},
"vfmaddsub231pd ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 6,
"ExpectedInstructionCount": 5,
"Comment": [
"Map 2 0b01 0xb6 256-bit"
],
@ -4801,9 +4752,8 @@
"ldr x0, [x28, #1856]",
"ld1b {z2.b}, p7/z, [x0]",
"eor z2.d, z16.d, z2.d",
"mov z0.d, z2.d",
"fmla z0.d, p7/m, z17.d, z18.d",
"mov z16.d, z0.d"
"mov z16.d, z2.d",
"fmla z16.d, p7/m, z17.d, z18.d"
]
},
"vfmsubadd231ps xmm0, xmm1, xmm2": {
@ -4819,7 +4769,7 @@
]
},
"vfmsubadd231ps ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 6,
"ExpectedInstructionCount": 5,
"Comment": [
"Map 2 0b01 0xb7 256-bit"
],
@ -4827,9 +4777,8 @@
"ldr x0, [x28, #1872]",
"ld1b {z2.b}, p7/z, [x0]",
"eor z2.d, z16.d, z2.d",
"mov z0.d, z2.d",
"fmla z0.s, p7/m, z17.s, z18.s",
"mov z16.d, z0.d"
"mov z16.d, z2.d",
"fmla z16.s, p7/m, z17.s, z18.s"
]
},
"vfmsubadd231pd xmm0, xmm1, xmm2": {
@ -4845,7 +4794,7 @@
]
},
"vfmsubadd231pd ymm0, ymm1, ymm2": {
"ExpectedInstructionCount": 6,
"ExpectedInstructionCount": 5,
"Comment": [
"Map 2 0b01 0xb7 256-bit"
],
@ -4853,9 +4802,8 @@
"ldr x0, [x28, #1888]",
"ld1b {z2.b}, p7/z, [x0]",
"eor z2.d, z16.d, z2.d",
"mov z0.d, z2.d",
"fmla z0.d, p7/m, z17.d, z18.d",
"mov z16.d, z0.d"
"mov z16.d, z2.d",
"fmla z16.d, p7/m, z17.d, z18.d"
]
},
"vaesimc xmm0, xmm1": {