diff --git a/unittests/InstructionCountCI/Secondary.json b/unittests/InstructionCountCI/Secondary.json index 0e5ebe7d5..2410549bd 100644 --- a/unittests/InstructionCountCI/Secondary.json +++ b/unittests/InstructionCountCI/Secondary.json @@ -4182,13 +4182,13 @@ "Optimal": "Yes", "Comment": "0x0f 0xd1", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "uqshl d0, d2, #57", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.8h, v0.h[0]", "neg v0.8h, v0.8h", - "ushl v2.8h, v3.8h, v0.8h", + "ushl v2.8h, v2.8h, v0.8h", "str d2, [x28, #752]" ] }, @@ -4197,13 +4197,13 @@ "Optimal": "Yes", "Comment": "0x0f 0xd2", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "uqshl d0, d2, #57", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.4s, v0.s[0]", "neg v0.4s, v0.4s", - "ushl v2.4s, v3.4s, v0.4s", + "ushl v2.4s, v2.4s, v0.4s", "str d2, [x28, #752]" ] }, @@ -4212,13 +4212,13 @@ "Optimal": "Yes", "Comment": "0x0f 0xd3", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "uqshl d0, d2, #57", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.2d, v0.d[0]", "neg v0.2d, v0.2d", - "ushl v2.2d, v3.2d, v0.2d", + "ushl v2.2d, v2.2d, v0.2d", "str d2, [x28, #752]" ] }, @@ -4367,13 +4367,13 @@ "Optimal": "Yes", "Comment": "0x0f 0xe1", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "uqshl d0, d2, #57", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.8h, v0.h[0]", "neg v0.8h, v0.8h", - "sshl v2.8h, v3.8h, v0.8h", + "sshl v2.8h, v2.8h, v0.8h", "str d2, [x28, #752]" ] }, @@ -4382,13 +4382,13 @@ "Optimal": "Yes", "Comment": "0x0f 0xe2", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "uqshl d0, d2, #57", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.4s, v0.s[0]", "neg v0.4s, v0.4s", - "sshl v2.4s, v3.4s, v0.4s", + "sshl v2.4s, v2.4s, v0.4s", "str d2, [x28, #752]" ] }, @@ -4529,12 +4529,12 @@ "Optimal": "Yes", "Comment": "0x0f 0xf1", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "uqshl d0, d2, #57", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.8h, v0.h[0]", - "ushl v2.8h, v3.8h, v0.8h", + "ushl v2.8h, v2.8h, v0.8h", "str d2, [x28, #752]" ] }, @@ -4543,12 +4543,12 @@ "Optimal": "Yes", "Comment": "0x0f 0xf2", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "uqshl d0, d2, #57", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.4s, v0.s[0]", - "ushl v2.4s, v3.4s, v0.4s", + "ushl v2.4s, v2.4s, v0.4s", "str d2, [x28, #752]" ] }, @@ -4557,12 +4557,12 @@ "Optimal": "Yes", "Comment": "0x0f 0xf3", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "uqshl d0, d2, #57", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.2d, v0.d[0]", - "ushl v2.2d, v3.2d, v0.2d", + "ushl v2.2d, v2.2d, v0.2d", "str d2, [x28, #752]" ] }, diff --git a/unittests/InstructionCountCI/Secondary_OpSize_SVE128.json b/unittests/InstructionCountCI/Secondary_OpSize_SVE128.json index 16b0f75a2..fe05ed8d4 100644 --- a/unittests/InstructionCountCI/Secondary_OpSize_SVE128.json +++ b/unittests/InstructionCountCI/Secondary_OpSize_SVE128.json @@ -9,7 +9,7 @@ "Instructions": { "psrlw xmm0, xmm1": { "ExpectedInstructionCount": 2, - "Optimal": "No", + "Optimal": "Yes", "Comment": "0x66 0x0f 0xd1", "ExpectedArm64ASM": [ "mov z0.d, d17", @@ -18,7 +18,7 @@ }, "psrld xmm0, xmm1": { "ExpectedInstructionCount": 2, - "Optimal": "No", + "Optimal": "Yes", "Comment": "0x66 0x0f 0xd2", "ExpectedArm64ASM": [ "mov z0.d, d17", @@ -27,7 +27,7 @@ }, "psrlq xmm0, xmm1": { "ExpectedInstructionCount": 2, - "Optimal": "No", + "Optimal": "Yes", "Comment": "0x66 0x0f 0xd3", "ExpectedArm64ASM": [ "mov z0.d, d17", @@ -36,7 +36,7 @@ }, "psraw xmm0, xmm1": { "ExpectedInstructionCount": 2, - "Optimal": "No", + "Optimal": "Yes", "Comment": "0x66 0x0f 0xe1", "ExpectedArm64ASM": [ "mov z0.d, d17", @@ -45,7 +45,7 @@ }, "psrad xmm0, xmm1": { "ExpectedInstructionCount": 2, - "Optimal": "No", + "Optimal": "Yes", "Comment": "0x66 0x0f 0xe2", "ExpectedArm64ASM": [ "mov z0.d, d17", @@ -70,7 +70,7 @@ }, "psllw xmm0, xmm1": { "ExpectedInstructionCount": 2, - "Optimal": "No", + "Optimal": "Yes", "Comment": "0x66 0x0f 0xf1", "ExpectedArm64ASM": [ "mov z0.d, d17", @@ -79,7 +79,7 @@ }, "pslld xmm0, xmm1": { "ExpectedInstructionCount": 2, - "Optimal": "No", + "Optimal": "Yes", "Comment": "0x66 0x0f 0xf2", "ExpectedArm64ASM": [ "mov z0.d, d17", @@ -88,7 +88,7 @@ }, "psllq xmm0, xmm1": { "ExpectedInstructionCount": 2, - "Optimal": "No", + "Optimal": "Yes", "Comment": "0x66 0x0f 0xf3", "ExpectedArm64ASM": [ "mov z0.d, d17", diff --git a/unittests/InstructionCountCI/Secondary_SVE128.json b/unittests/InstructionCountCI/Secondary_SVE128.json index 01caeb897..4c423cd41 100644 --- a/unittests/InstructionCountCI/Secondary_SVE128.json +++ b/unittests/InstructionCountCI/Secondary_SVE128.json @@ -34,106 +34,90 @@ ] }, "psrlw mm0, mm1": { - "ExpectedInstructionCount": 6, - "Optimal": "No", + "ExpectedInstructionCount": 4, + "Optimal": "Yes", "Comment": "0x0f 0xd1", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "mov z0.d, d2", - "movprfx z2, z3", - "lsr z2.h, p6/m, z2.h, z0.d", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "lsr z2.h, p6/m, z2.h, z3.d", "str d2, [x28, #752]" ] }, "psrld mm0, mm1": { - "ExpectedInstructionCount": 6, - "Optimal": "No", + "ExpectedInstructionCount": 4, + "Optimal": "Yes", "Comment": "0x0f 0xd2", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "mov z0.d, d2", - "movprfx z2, z3", - "lsr z2.s, p6/m, z2.s, z0.d", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "lsr z2.s, p6/m, z2.s, z3.d", "str d2, [x28, #752]" ] }, "psrlq mm0, mm1": { - "ExpectedInstructionCount": 6, - "Optimal": "No", + "ExpectedInstructionCount": 4, + "Optimal": "Yes", "Comment": "0x0f 0xd3", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "mov z0.d, d2", - "movprfx z2, z3", - "lsr z2.d, p6/m, z2.d, z0.d", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "lsr z2.d, p6/m, z2.d, z3.d", "str d2, [x28, #752]" ] }, "psraw mm0, mm1": { - "ExpectedInstructionCount": 6, - "Optimal": "No", + "ExpectedInstructionCount": 4, + "Optimal": "Yes", "Comment": "0x0f 0xe1", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "mov z0.d, d2", - "movprfx z2, z3", - "asr z2.h, p6/m, z2.h, z0.d", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "asr z2.h, p6/m, z2.h, z3.d", "str d2, [x28, #752]" ] }, "psrad mm0, mm1": { - "ExpectedInstructionCount": 6, - "Optimal": "No", + "ExpectedInstructionCount": 4, + "Optimal": "Yes", "Comment": "0x0f 0xe2", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "mov z0.d, d2", - "movprfx z2, z3", - "asr z2.s, p6/m, z2.s, z0.d", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "asr z2.s, p6/m, z2.s, z3.d", "str d2, [x28, #752]" ] }, "psllw mm0, mm1": { - "ExpectedInstructionCount": 6, - "Optimal": "No", + "ExpectedInstructionCount": 4, + "Optimal": "Yes", "Comment": "0x0f 0xf1", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "mov z0.d, d2", - "movprfx z2, z3", - "lsl z2.h, p6/m, z2.h, z0.d", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "lsl z2.h, p6/m, z2.h, z3.d", "str d2, [x28, #752]" ] }, "pslld mm0, mm1": { - "ExpectedInstructionCount": 6, - "Optimal": "No", + "ExpectedInstructionCount": 4, + "Optimal": "Yes", "Comment": "0x0f 0xf2", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "mov z0.d, d2", - "movprfx z2, z3", - "lsl z2.s, p6/m, z2.s, z0.d", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "lsl z2.s, p6/m, z2.s, z3.d", "str d2, [x28, #752]" ] }, "psllq mm0, mm1": { - "ExpectedInstructionCount": 6, - "Optimal": "No", + "ExpectedInstructionCount": 4, + "Optimal": "Yes", "Comment": "0x0f 0xf3", "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "ldr d3, [x28, #752]", - "mov z0.d, d2", - "movprfx z2, z3", - "lsl z2.d, p6/m, z2.d, z0.d", + "ldr d2, [x28, #752]", + "ldr d3, [x28, #768]", + "lsl z2.d, p6/m, z2.d, z3.d", "str d2, [x28, #752]" ] }