mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-14 19:47:59 +00:00
OpcodeDispatcher: Eliminate unnecessary moves in {AVX}VFCMPOp
We dealing with scalar vector sources, we don't need to zero-extend the vector, and we can just use it as is.
This commit is contained in:
parent
db60a2fd4b
commit
343b00818d
@ -2384,9 +2384,12 @@ OrderedNode* OpDispatchBuilder::VFCMPOpImpl(OpcodeArgs, size_t ElementSize, bool
|
||||
|
||||
template<size_t ElementSize, bool Scalar>
|
||||
void OpDispatchBuilder::VFCMPOp(OpcodeArgs) {
|
||||
// No need for zero-extending in the scalar case, since
|
||||
// all we need is an insert at the end of the operation.
|
||||
const auto SrcSize = Scalar && Op->Src[0].IsGPR() ? 16U : GetSrcSize(Op);
|
||||
const auto DstSize = GetDstSize(Op);
|
||||
|
||||
OrderedNode *Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, -1);
|
||||
OrderedNode *Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags, -1);
|
||||
OrderedNode *Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags, -1);
|
||||
const uint8_t CompType = Op->Src[1].Data.Literal.Value;
|
||||
|
||||
@ -2406,6 +2409,9 @@ void OpDispatchBuilder::VFCMPOp<8, true>(OpcodeArgs);
|
||||
|
||||
template <size_t ElementSize, bool Scalar>
|
||||
void OpDispatchBuilder::AVXVFCMPOp(OpcodeArgs) {
|
||||
// No need for zero-extending in the scalar case, since
|
||||
// all we need is an insert at the end of the operation.
|
||||
const auto SrcSize = Scalar && Op->Src[1].IsGPR() ? 16U : GetSrcSize(Op);
|
||||
const auto DstSize = GetDstSize(Op);
|
||||
const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
|
||||
|
||||
@ -2413,7 +2419,7 @@ void OpDispatchBuilder::AVXVFCMPOp(OpcodeArgs) {
|
||||
const uint8_t CompType = Op->Src[2].Data.Literal.Value;
|
||||
|
||||
OrderedNode *Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags, -1);
|
||||
OrderedNode *Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, -1);
|
||||
OrderedNode *Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], SrcSize, Op->Flags, -1);
|
||||
OrderedNode *Result = VFCMPOpImpl(Op, ElementSize, Scalar, Src1, Src2, CompType);
|
||||
|
||||
if (Is128Bit) {
|
||||
|
@ -600,36 +600,42 @@
|
||||
]
|
||||
},
|
||||
"cmpss xmm0, xmm1, 0": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf3 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v17.s[0]",
|
||||
"mov v4.16b, v0.16b",
|
||||
"fcmeq s4, s16, s4",
|
||||
"fcmeq s4, s16, s17",
|
||||
"mov v16.s[0], v4.s[0]"
|
||||
]
|
||||
},
|
||||
"cmpss xmm0, xmm1, 1": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf3 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v17.s[0]",
|
||||
"mov v4.16b, v0.16b",
|
||||
"fcmgt s4, s4, s16",
|
||||
"fcmgt s4, s17, s16",
|
||||
"mov v16.s[0], v4.s[0]"
|
||||
]
|
||||
},
|
||||
"cmpss xmm0, xmm1, 2": {
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf3 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fcmge s4, s17, s16",
|
||||
"mov v16.s[0], v4.s[0]"
|
||||
]
|
||||
},
|
||||
"cmpss xmm0, xmm1, 3": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
@ -637,92 +643,62 @@
|
||||
"0xf3 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v17.s[0]",
|
||||
"mov v4.16b, v0.16b",
|
||||
"fcmge s4, s4, s16",
|
||||
"mov v16.s[0], v4.s[0]"
|
||||
]
|
||||
},
|
||||
"cmpss xmm0, xmm1, 3": {
|
||||
"ExpectedInstructionCount": 8,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf3 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v17.s[0]",
|
||||
"mov v4.16b, v0.16b",
|
||||
"fcmge s0, s16, s4",
|
||||
"fcmgt s1, s4, s16",
|
||||
"fcmge s0, s16, s17",
|
||||
"fcmgt s1, s17, s16",
|
||||
"orr v4.8b, v0.8b, v1.8b",
|
||||
"mvn v4.8b, v4.8b",
|
||||
"mov v16.s[0], v4.s[0]"
|
||||
]
|
||||
},
|
||||
"cmpss xmm0, xmm1, 4": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf3 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v17.s[0]",
|
||||
"mov v4.16b, v0.16b",
|
||||
"fcmeq s4, s16, s4",
|
||||
"fcmeq s4, s16, s17",
|
||||
"mvn v4.8b, v4.8b",
|
||||
"mov v16.s[0], v4.s[0]"
|
||||
]
|
||||
},
|
||||
"cmpss xmm0, xmm1, 5": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf3 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v17.s[0]",
|
||||
"mov v4.16b, v0.16b",
|
||||
"fcmgt s4, s4, s16",
|
||||
"fcmgt s4, s17, s16",
|
||||
"mvn v4.16b, v4.16b",
|
||||
"mov v16.s[0], v4.s[0]"
|
||||
]
|
||||
},
|
||||
"cmpss xmm0, xmm1, 6": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf3 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v17.s[0]",
|
||||
"mov v4.16b, v0.16b",
|
||||
"fcmge s4, s4, s16",
|
||||
"fcmge s4, s17, s16",
|
||||
"mvn v4.16b, v4.16b",
|
||||
"mov v16.s[0], v4.s[0]"
|
||||
]
|
||||
},
|
||||
"cmpss xmm0, xmm1, 7": {
|
||||
"ExpectedInstructionCount": 7,
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf3 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v17.s[0]",
|
||||
"mov v4.16b, v0.16b",
|
||||
"fcmge s0, s16, s4",
|
||||
"fcmgt s1, s4, s16",
|
||||
"fcmge s0, s16, s17",
|
||||
"fcmgt s1, s17, s16",
|
||||
"orr v4.8b, v0.8b, v1.8b",
|
||||
"mov v16.s[0], v4.s[0]"
|
||||
]
|
||||
|
@ -413,103 +413,42 @@
|
||||
]
|
||||
},
|
||||
"cmpsd xmm0, xmm1, 0": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf2 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v4.8b, v17.8b",
|
||||
"fcmeq d4, d16, d4",
|
||||
"fcmeq d4, d16, d17",
|
||||
"mov v16.d[0], v4.d[0]"
|
||||
]
|
||||
},
|
||||
"cmpsd xmm0, xmm1, 1": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf2 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v4.8b, v17.8b",
|
||||
"fcmgt d4, d4, d16",
|
||||
"fcmgt d4, d17, d16",
|
||||
"mov v16.d[0], v4.d[0]"
|
||||
]
|
||||
},
|
||||
"cmpsd xmm0, xmm1, 2": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"ExpectedInstructionCount": 2,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf2 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v4.8b, v17.8b",
|
||||
"fcmge d4, d4, d16",
|
||||
"fcmge d4, d17, d16",
|
||||
"mov v16.d[0], v4.d[0]"
|
||||
]
|
||||
},
|
||||
"cmpsd xmm0, xmm1, 3": {
|
||||
"ExpectedInstructionCount": 6,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf2 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v4.8b, v17.8b",
|
||||
"fcmge d0, d16, d4",
|
||||
"fcmgt d1, d4, d16",
|
||||
"orr v4.8b, v0.8b, v1.8b",
|
||||
"mvn v4.8b, v4.8b",
|
||||
"mov v16.d[0], v4.d[0]"
|
||||
]
|
||||
},
|
||||
"cmpsd xmm0, xmm1, 4": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf2 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v4.8b, v17.8b",
|
||||
"fcmeq d4, d16, d4",
|
||||
"mvn v4.8b, v4.8b",
|
||||
"mov v16.d[0], v4.d[0]"
|
||||
]
|
||||
},
|
||||
"cmpsd xmm0, xmm1, 5": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf2 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v4.8b, v17.8b",
|
||||
"fcmgt d4, d4, d16",
|
||||
"mvn v4.16b, v4.16b",
|
||||
"mov v16.d[0], v4.d[0]"
|
||||
]
|
||||
},
|
||||
"cmpsd xmm0, xmm1, 6": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf2 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v4.8b, v17.8b",
|
||||
"fcmge d4, d4, d16",
|
||||
"mvn v4.16b, v4.16b",
|
||||
"mov v16.d[0], v4.d[0]"
|
||||
]
|
||||
},
|
||||
"cmpsd xmm0, xmm1, 7": {
|
||||
"ExpectedInstructionCount": 5,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
@ -517,9 +456,62 @@
|
||||
"0xf2 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"mov v4.8b, v17.8b",
|
||||
"fcmge d0, d16, d4",
|
||||
"fcmgt d1, d4, d16",
|
||||
"fcmge d0, d16, d17",
|
||||
"fcmgt d1, d17, d16",
|
||||
"orr v4.8b, v0.8b, v1.8b",
|
||||
"mvn v4.8b, v4.8b",
|
||||
"mov v16.d[0], v4.d[0]"
|
||||
]
|
||||
},
|
||||
"cmpsd xmm0, xmm1, 4": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf2 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fcmeq d4, d16, d17",
|
||||
"mvn v4.8b, v4.8b",
|
||||
"mov v16.d[0], v4.d[0]"
|
||||
]
|
||||
},
|
||||
"cmpsd xmm0, xmm1, 5": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf2 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fcmgt d4, d17, d16",
|
||||
"mvn v4.16b, v4.16b",
|
||||
"mov v16.d[0], v4.d[0]"
|
||||
]
|
||||
},
|
||||
"cmpsd xmm0, xmm1, 6": {
|
||||
"ExpectedInstructionCount": 3,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf2 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fcmge d4, d17, d16",
|
||||
"mvn v4.16b, v4.16b",
|
||||
"mov v16.d[0], v4.d[0]"
|
||||
]
|
||||
},
|
||||
"cmpsd xmm0, xmm1, 7": {
|
||||
"ExpectedInstructionCount": 4,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"With AFP mode FEX can remove an insert after the operation.",
|
||||
"0xf2 0x0f 0xc2"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"fcmge d0, d16, d17",
|
||||
"fcmgt d1, d17, d16",
|
||||
"orr v4.8b, v0.8b, v1.8b",
|
||||
"mov v16.d[0], v4.d[0]"
|
||||
]
|
||||
|
@ -3136,7 +3136,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpss xmm0, xmm1, xmm2, 0x00": {
|
||||
"ExpectedInstructionCount": 10,
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b10 0xC2 128-bit"
|
||||
@ -3144,9 +3144,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v5.s[0]",
|
||||
"mov v5.16b, v0.16b",
|
||||
"fcmeq s5, s4, s5",
|
||||
"mov v4.s[0], v5.s[0]",
|
||||
"mov v4.16b, v4.16b",
|
||||
@ -3155,7 +3152,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpss xmm0, xmm1, xmm2, 0x01": {
|
||||
"ExpectedInstructionCount": 10,
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b10 0xC2 128-bit"
|
||||
@ -3163,9 +3160,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v5.s[0]",
|
||||
"mov v5.16b, v0.16b",
|
||||
"fcmgt s5, s5, s4",
|
||||
"mov v4.s[0], v5.s[0]",
|
||||
"mov v4.16b, v4.16b",
|
||||
@ -3174,7 +3168,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpss xmm0, xmm1, xmm2, 0x02": {
|
||||
"ExpectedInstructionCount": 10,
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b10 0xC2 128-bit"
|
||||
@ -3182,9 +3176,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v5.s[0]",
|
||||
"mov v5.16b, v0.16b",
|
||||
"fcmge s5, s5, s4",
|
||||
"mov v4.s[0], v5.s[0]",
|
||||
"mov v4.16b, v4.16b",
|
||||
@ -3193,7 +3184,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpss xmm0, xmm1, xmm2, 0x03": {
|
||||
"ExpectedInstructionCount": 13,
|
||||
"ExpectedInstructionCount": 10,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b10 0xC2 128-bit"
|
||||
@ -3201,9 +3192,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v5.s[0]",
|
||||
"mov v5.16b, v0.16b",
|
||||
"fcmge s0, s4, s5",
|
||||
"fcmgt s1, s5, s4",
|
||||
"orr v5.8b, v0.8b, v1.8b",
|
||||
@ -3215,7 +3203,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpss xmm0, xmm1, xmm2, 0x04": {
|
||||
"ExpectedInstructionCount": 11,
|
||||
"ExpectedInstructionCount": 8,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b10 0xC2 128-bit"
|
||||
@ -3223,9 +3211,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v5.s[0]",
|
||||
"mov v5.16b, v0.16b",
|
||||
"fcmeq s5, s4, s5",
|
||||
"mvn v5.8b, v5.8b",
|
||||
"mov v4.s[0], v5.s[0]",
|
||||
@ -3235,7 +3220,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpss xmm0, xmm1, xmm2, 0x05": {
|
||||
"ExpectedInstructionCount": 11,
|
||||
"ExpectedInstructionCount": 8,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b10 0xC2 128-bit"
|
||||
@ -3243,9 +3228,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v5.s[0]",
|
||||
"mov v5.16b, v0.16b",
|
||||
"fcmgt s5, s5, s4",
|
||||
"mvn v5.16b, v5.16b",
|
||||
"mov v4.s[0], v5.s[0]",
|
||||
@ -3255,7 +3237,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpss xmm0, xmm1, xmm2, 0x06": {
|
||||
"ExpectedInstructionCount": 11,
|
||||
"ExpectedInstructionCount": 8,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b10 0xC2 128-bit"
|
||||
@ -3263,9 +3245,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v5.s[0]",
|
||||
"mov v5.16b, v0.16b",
|
||||
"fcmge s5, s5, s4",
|
||||
"mvn v5.16b, v5.16b",
|
||||
"mov v4.s[0], v5.s[0]",
|
||||
@ -3275,7 +3254,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpss xmm0, xmm1, xmm2, 0x07": {
|
||||
"ExpectedInstructionCount": 12,
|
||||
"ExpectedInstructionCount": 9,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b10 0xC2 128-bit"
|
||||
@ -3283,9 +3262,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"movi v0.2d, #0x0",
|
||||
"mov v0.s[0], v5.s[0]",
|
||||
"mov v5.16b, v0.16b",
|
||||
"fcmge s0, s4, s5",
|
||||
"fcmgt s1, s5, s4",
|
||||
"orr v5.8b, v0.8b, v1.8b",
|
||||
@ -3296,7 +3272,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpsd xmm0, xmm1, xmm2, 0x00": {
|
||||
"ExpectedInstructionCount": 8,
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b11 0xC2 128-bit"
|
||||
@ -3304,7 +3280,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"mov v5.8b, v5.8b",
|
||||
"fcmeq d5, d4, d5",
|
||||
"mov v4.d[0], v5.d[0]",
|
||||
"mov v4.16b, v4.16b",
|
||||
@ -3313,7 +3288,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpsd xmm0, xmm1, xmm2, 0x01": {
|
||||
"ExpectedInstructionCount": 8,
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b11 0xC2 128-bit"
|
||||
@ -3321,7 +3296,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"mov v5.8b, v5.8b",
|
||||
"fcmgt d5, d5, d4",
|
||||
"mov v4.d[0], v5.d[0]",
|
||||
"mov v4.16b, v4.16b",
|
||||
@ -3330,7 +3304,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpsd xmm0, xmm1, xmm2, 0x02": {
|
||||
"ExpectedInstructionCount": 8,
|
||||
"ExpectedInstructionCount": 7,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b11 0xC2 128-bit"
|
||||
@ -3338,7 +3312,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"mov v5.8b, v5.8b",
|
||||
"fcmge d5, d5, d4",
|
||||
"mov v4.d[0], v5.d[0]",
|
||||
"mov v4.16b, v4.16b",
|
||||
@ -3347,7 +3320,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpsd xmm0, xmm1, xmm2, 0x03": {
|
||||
"ExpectedInstructionCount": 11,
|
||||
"ExpectedInstructionCount": 10,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b11 0xC2 128-bit"
|
||||
@ -3355,7 +3328,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"mov v5.8b, v5.8b",
|
||||
"fcmge d0, d4, d5",
|
||||
"fcmgt d1, d5, d4",
|
||||
"orr v5.8b, v0.8b, v1.8b",
|
||||
@ -3367,7 +3339,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpsd xmm0, xmm1, xmm2, 0x04": {
|
||||
"ExpectedInstructionCount": 9,
|
||||
"ExpectedInstructionCount": 8,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b11 0xC2 128-bit"
|
||||
@ -3375,7 +3347,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"mov v5.8b, v5.8b",
|
||||
"fcmeq d5, d4, d5",
|
||||
"mvn v5.8b, v5.8b",
|
||||
"mov v4.d[0], v5.d[0]",
|
||||
@ -3385,7 +3356,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpsd xmm0, xmm1, xmm2, 0x05": {
|
||||
"ExpectedInstructionCount": 9,
|
||||
"ExpectedInstructionCount": 8,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b11 0xC2 128-bit"
|
||||
@ -3393,7 +3364,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"mov v5.8b, v5.8b",
|
||||
"fcmgt d5, d5, d4",
|
||||
"mvn v5.16b, v5.16b",
|
||||
"mov v4.d[0], v5.d[0]",
|
||||
@ -3403,7 +3373,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpsd xmm0, xmm1, xmm2, 0x06": {
|
||||
"ExpectedInstructionCount": 9,
|
||||
"ExpectedInstructionCount": 8,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b11 0xC2 128-bit"
|
||||
@ -3411,7 +3381,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"mov v5.8b, v5.8b",
|
||||
"fcmge d5, d5, d4",
|
||||
"mvn v5.16b, v5.16b",
|
||||
"mov v4.d[0], v5.d[0]",
|
||||
@ -3421,7 +3390,7 @@
|
||||
]
|
||||
},
|
||||
"vcmpsd xmm0, xmm1, xmm2, 0x07": {
|
||||
"ExpectedInstructionCount": 10,
|
||||
"ExpectedInstructionCount": 9,
|
||||
"Optimal": "No",
|
||||
"Comment": [
|
||||
"Map 1 0b11 0xC2 128-bit"
|
||||
@ -3429,7 +3398,6 @@
|
||||
"ExpectedArm64ASM": [
|
||||
"mov z4.d, p7/m, z17.d",
|
||||
"mov z5.d, p7/m, z18.d",
|
||||
"mov v5.8b, v5.8b",
|
||||
"fcmge d0, d4, d5",
|
||||
"fcmgt d1, d5, d4",
|
||||
"orr v5.8b, v0.8b, v1.8b",
|
||||
|
Loading…
x
Reference in New Issue
Block a user