mirror of
https://github.com/FEX-Emu/FEX.git
synced 2024-12-13 17:15:41 +00:00
Merge pull request #3836 from Sonicadvance1/optimize_sve_vpgatherdd
AVX128: Optimize the vpgatherdd/vgatherdps cases that would fall back to ASIMD
This commit is contained in:
commit
22b26696ba
@ -2742,8 +2742,42 @@ void OpDispatchBuilder::AVX128_VPGATHER(OpcodeArgs) {
|
||||
auto VSIB = AVX128_LoadVSIB(Op, Op->Src[0], Op->Flags, NeedsHighAddrBytes);
|
||||
auto Mask = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);
|
||||
|
||||
bool NeedsSVEScale = (VSIB.Scale == 2 || VSIB.Scale == 8) || (VSIB.BaseAddr == Invalid() && VSIB.Scale != 1);
|
||||
|
||||
const bool NeedsExplicitSVEPath =
|
||||
CTX->HostFeatures.SupportsSVE128 && AddrElementSize == OpSize::i32Bit && ElementLoadSize == OpSize::i32Bit && NeedsSVEScale;
|
||||
|
||||
RefPair Result {};
|
||||
if (AddrElementSize == OpSize::i64Bit && ElementLoadSize == OpSize::i32Bit) {
|
||||
if (NeedsExplicitSVEPath) {
|
||||
// Special case for VGATHERDPS/VPGATHERDD (32-bit addresses loading 32-bit elements) that can't use the SVE codepath.
|
||||
// The problem is due to the scale not matching SVE limitations, we need to prescale the addresses to be 64-bit.
|
||||
auto ScaleVSIBHalf = [this](Ref VSIB, Ref BaseAddr, int32_t Displacement, uint8_t Scale) -> RefVSIB {
|
||||
RefVSIB Result {};
|
||||
Result.High = _VSSHLL2(OpSize::i128Bit, OpSize::i32Bit, VSIB, FEXCore::ilog2(Scale));
|
||||
Result.Low = _VSSHLL(OpSize::i128Bit, OpSize::i32Bit, VSIB, FEXCore::ilog2(Scale));
|
||||
|
||||
Result.Displacement = Displacement;
|
||||
Result.BaseAddr = BaseAddr;
|
||||
|
||||
///< Set the scale to one now that it has been prescaled as well.
|
||||
Result.Scale = 1;
|
||||
return Result;
|
||||
};
|
||||
|
||||
RefVSIB VSIBLow = ScaleVSIBHalf(VSIB.Low, VSIB.BaseAddr, VSIB.Displacement, VSIB.Scale);
|
||||
RefVSIB VSIBHigh {};
|
||||
|
||||
if (NeedsHighAddrBytes) {
|
||||
VSIBHigh = ScaleVSIBHalf(VSIB.High, VSIB.BaseAddr, VSIB.Displacement, VSIB.Scale);
|
||||
}
|
||||
|
||||
///< AddressElementSize is now OpSize::i64Bit
|
||||
Result = AVX128_VPGatherQPSImpl(Dest.Low, Mask.Low, VSIBLow);
|
||||
if (NeedsHighAddrBytes) {
|
||||
auto Res = AVX128_VPGatherQPSImpl(Dest.High, Mask.High, VSIBHigh);
|
||||
Result.High = Res.Low;
|
||||
}
|
||||
} else if (AddrElementSize == OpSize::i64Bit && ElementLoadSize == OpSize::i32Bit) {
|
||||
Result = AVX128_VPGatherQPSImpl(Dest.Low, Mask.Low, VSIB);
|
||||
} else {
|
||||
Result = AVX128_VPGatherImpl(SizeToOpSize(Size), ElementLoadSize, AddrElementSize, Dest, Mask, VSIB);
|
||||
|
@ -149,32 +149,21 @@
|
||||
]
|
||||
},
|
||||
"vpgatherdd xmm0, [xmm1*2 + rax], xmm2": {
|
||||
"ExpectedInstructionCount": 25,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x90 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshll2 v2.2d, v17.4s, #1",
|
||||
"sshll v3.2d, v17.2s, #1",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [x4, z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [x4, z2.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"str q18, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
@ -198,32 +187,21 @@
|
||||
]
|
||||
},
|
||||
"vpgatherdd xmm0, [xmm1*8 + rax], xmm2": {
|
||||
"ExpectedInstructionCount": 25,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x90 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshll2 v2.2d, v17.4s, #3",
|
||||
"sshll v3.2d, v17.2s, #3",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [x4, z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [x4, z2.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"str q18, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
@ -253,7 +231,7 @@
|
||||
]
|
||||
},
|
||||
"vpgatherdd ymm0, [ymm1*2 + rax], ymm2": {
|
||||
"ExpectedInstructionCount": 48,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x90 256-bit"
|
||||
],
|
||||
@ -261,49 +239,27 @@
|
||||
"ldr q2, [x28, #16]",
|
||||
"ldr q3, [x28, #32]",
|
||||
"ldr q4, [x28, #48]",
|
||||
"sshll2 v5.2d, v17.4s, #1",
|
||||
"sshll v6.2d, v17.2s, #1",
|
||||
"sshll2 v7.2d, v3.4s, #1",
|
||||
"sshll v3.2d, v3.2s, #1",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"mov w0, v4.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[0]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v2.s}[0], [x1]",
|
||||
"mov w0, v4.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[1]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v2.s}[1], [x1]",
|
||||
"mov w0, v4.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[2]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v2.s}[2], [x1]",
|
||||
"mov w0, v4.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[3]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v2.s}[3], [x1]",
|
||||
"str q2, [x28, #16]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [x4, z6.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [x4, z5.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"cmplt p0.s, p6/z, z4.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [x4, z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [x4, z7.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z2.s, p0/m, z0.s",
|
||||
"str q2, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
@ -331,7 +287,7 @@
|
||||
]
|
||||
},
|
||||
"vpgatherdd ymm0, [ymm1*8 + rax], ymm2": {
|
||||
"ExpectedInstructionCount": 48,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x90 256-bit"
|
||||
],
|
||||
@ -339,49 +295,27 @@
|
||||
"ldr q2, [x28, #16]",
|
||||
"ldr q3, [x28, #32]",
|
||||
"ldr q4, [x28, #48]",
|
||||
"sshll2 v5.2d, v17.4s, #3",
|
||||
"sshll v6.2d, v17.2s, #3",
|
||||
"sshll2 v7.2d, v3.4s, #3",
|
||||
"sshll v3.2d, v3.2s, #3",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"mov w0, v4.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[0]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v2.s}[0], [x1]",
|
||||
"mov w0, v4.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[1]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v2.s}[1], [x1]",
|
||||
"mov w0, v4.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[2]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v2.s}[2], [x1]",
|
||||
"mov w0, v4.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[3]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v2.s}[3], [x1]",
|
||||
"str q2, [x28, #16]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [x4, z6.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [x4, z5.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"cmplt p0.s, p6/z, z4.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [x4, z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [x4, z7.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z2.s, p0/m, z0.s",
|
||||
"str q2, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
@ -911,32 +845,21 @@
|
||||
]
|
||||
},
|
||||
"vgatherdps xmm0, [xmm1*2 + rax], xmm2": {
|
||||
"ExpectedInstructionCount": 25,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x92 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshll2 v2.2d, v17.4s, #1",
|
||||
"sshll v3.2d, v17.2s, #1",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [x4, z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [x4, z2.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"str q18, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
@ -960,32 +883,21 @@
|
||||
]
|
||||
},
|
||||
"vgatherdps xmm0, [xmm1*8 + rax], xmm2": {
|
||||
"ExpectedInstructionCount": 25,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x92 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshll2 v2.2d, v17.4s, #3",
|
||||
"sshll v3.2d, v17.2s, #3",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [x4, z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [x4, z2.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"str q18, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
@ -1015,7 +927,7 @@
|
||||
]
|
||||
},
|
||||
"vgatherdps ymm0, [ymm1*2 + rax], ymm2": {
|
||||
"ExpectedInstructionCount": 48,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x92 256-bit"
|
||||
],
|
||||
@ -1023,49 +935,27 @@
|
||||
"ldr q2, [x28, #16]",
|
||||
"ldr q3, [x28, #32]",
|
||||
"ldr q4, [x28, #48]",
|
||||
"sshll2 v5.2d, v17.4s, #1",
|
||||
"sshll v6.2d, v17.2s, #1",
|
||||
"sshll2 v7.2d, v3.4s, #1",
|
||||
"sshll v3.2d, v3.2s, #1",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"mov w0, v4.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[0]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v2.s}[0], [x1]",
|
||||
"mov w0, v4.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[1]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v2.s}[1], [x1]",
|
||||
"mov w0, v4.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[2]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v2.s}[2], [x1]",
|
||||
"mov w0, v4.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[3]",
|
||||
"add x1, x4, w0, sxtw #1",
|
||||
"ld1 {v2.s}[3], [x1]",
|
||||
"str q2, [x28, #16]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [x4, z6.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [x4, z5.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"cmplt p0.s, p6/z, z4.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [x4, z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [x4, z7.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z2.s, p0/m, z0.s",
|
||||
"str q2, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
@ -1093,7 +983,7 @@
|
||||
]
|
||||
},
|
||||
"vgatherdps ymm0, [ymm1*8 + rax], ymm2": {
|
||||
"ExpectedInstructionCount": 48,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x92 256-bit"
|
||||
],
|
||||
@ -1101,49 +991,27 @@
|
||||
"ldr q2, [x28, #16]",
|
||||
"ldr q3, [x28, #32]",
|
||||
"ldr q4, [x28, #48]",
|
||||
"sshll2 v5.2d, v17.4s, #3",
|
||||
"sshll v6.2d, v17.2s, #3",
|
||||
"sshll2 v7.2d, v3.4s, #3",
|
||||
"sshll v3.2d, v3.2s, #3",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"mov w0, v4.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[0]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v2.s}[0], [x1]",
|
||||
"mov w0, v4.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[1]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v2.s}[1], [x1]",
|
||||
"mov w0, v4.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[2]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v2.s}[2], [x1]",
|
||||
"mov w0, v4.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[3]",
|
||||
"add x1, x4, w0, sxtw #3",
|
||||
"ld1 {v2.s}[3], [x1]",
|
||||
"str q2, [x28, #16]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [x4, z6.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [x4, z5.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"cmplt p0.s, p6/z, z4.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [x4, z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [x4, z7.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z2.s, p0/m, z0.s",
|
||||
"str q2, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
@ -1657,32 +1525,21 @@
|
||||
]
|
||||
},
|
||||
"vpgatherdd xmm0, [xmm1*2], xmm2": {
|
||||
"ExpectedInstructionCount": 25,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x90 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshll2 v2.2d, v17.4s, #1",
|
||||
"sshll v3.2d, v17.2s, #1",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [z2.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"str q18, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
@ -1707,32 +1564,21 @@
|
||||
]
|
||||
},
|
||||
"vpgatherdd xmm0, [xmm1*8], xmm2": {
|
||||
"ExpectedInstructionCount": 25,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x90 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshll2 v2.2d, v17.4s, #3",
|
||||
"sshll v3.2d, v17.2s, #3",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [z2.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"str q18, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
@ -1762,7 +1608,7 @@
|
||||
]
|
||||
},
|
||||
"vpgatherdd ymm0, [ymm1*2], ymm2": {
|
||||
"ExpectedInstructionCount": 48,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x90 256-bit"
|
||||
],
|
||||
@ -1770,49 +1616,27 @@
|
||||
"ldr q2, [x28, #16]",
|
||||
"ldr q3, [x28, #32]",
|
||||
"ldr q4, [x28, #48]",
|
||||
"sshll2 v5.2d, v17.4s, #1",
|
||||
"sshll v6.2d, v17.2s, #1",
|
||||
"sshll2 v7.2d, v3.4s, #1",
|
||||
"sshll v3.2d, v3.2s, #1",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"mov w0, v4.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[0]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v2.s}[0], [x1]",
|
||||
"mov w0, v4.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[1]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v2.s}[1], [x1]",
|
||||
"mov w0, v4.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[2]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v2.s}[2], [x1]",
|
||||
"mov w0, v4.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[3]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v2.s}[3], [x1]",
|
||||
"str q2, [x28, #16]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [z6.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [z5.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"cmplt p0.s, p6/z, z4.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [z7.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z2.s, p0/m, z0.s",
|
||||
"str q2, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
@ -1842,7 +1666,7 @@
|
||||
]
|
||||
},
|
||||
"vpgatherdd ymm0, [ymm1*8], ymm2": {
|
||||
"ExpectedInstructionCount": 48,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x90 256-bit"
|
||||
],
|
||||
@ -1850,49 +1674,27 @@
|
||||
"ldr q2, [x28, #16]",
|
||||
"ldr q3, [x28, #32]",
|
||||
"ldr q4, [x28, #48]",
|
||||
"sshll2 v5.2d, v17.4s, #3",
|
||||
"sshll v6.2d, v17.2s, #3",
|
||||
"sshll2 v7.2d, v3.4s, #3",
|
||||
"sshll v3.2d, v3.2s, #3",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"mov w0, v4.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[0]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v2.s}[0], [x1]",
|
||||
"mov w0, v4.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[1]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v2.s}[1], [x1]",
|
||||
"mov w0, v4.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[2]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v2.s}[2], [x1]",
|
||||
"mov w0, v4.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[3]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v2.s}[3], [x1]",
|
||||
"str q2, [x28, #16]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [z6.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [z5.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"cmplt p0.s, p6/z, z4.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [z7.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z2.s, p0/m, z0.s",
|
||||
"str q2, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
@ -2428,32 +2230,21 @@
|
||||
]
|
||||
},
|
||||
"vgatherdps xmm0, [xmm1*2], xmm2": {
|
||||
"ExpectedInstructionCount": 25,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x92 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshll2 v2.2d, v17.4s, #1",
|
||||
"sshll v3.2d, v17.2s, #1",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [z2.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"str q18, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
@ -2478,32 +2269,21 @@
|
||||
]
|
||||
},
|
||||
"vgatherdps xmm0, [xmm1*8], xmm2": {
|
||||
"ExpectedInstructionCount": 25,
|
||||
"ExpectedInstructionCount": 14,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x92 128-bit"
|
||||
],
|
||||
"ExpectedArm64ASM": [
|
||||
"sshll2 v2.2d, v17.4s, #3",
|
||||
"sshll v3.2d, v17.2s, #3",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [z2.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"str q18, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
@ -2533,7 +2313,7 @@
|
||||
]
|
||||
},
|
||||
"vgatherdps ymm0, [ymm1*2], ymm2": {
|
||||
"ExpectedInstructionCount": 48,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x92 256-bit"
|
||||
],
|
||||
@ -2541,49 +2321,27 @@
|
||||
"ldr q2, [x28, #16]",
|
||||
"ldr q3, [x28, #32]",
|
||||
"ldr q4, [x28, #48]",
|
||||
"sshll2 v5.2d, v17.4s, #1",
|
||||
"sshll v6.2d, v17.2s, #1",
|
||||
"sshll2 v7.2d, v3.4s, #1",
|
||||
"sshll v3.2d, v3.2s, #1",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"mov w0, v4.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[0]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v2.s}[0], [x1]",
|
||||
"mov w0, v4.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[1]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v2.s}[1], [x1]",
|
||||
"mov w0, v4.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[2]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v2.s}[2], [x1]",
|
||||
"mov w0, v4.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[3]",
|
||||
"sbfiz x1, x0, #1, #32",
|
||||
"ld1 {v2.s}[3], [x1]",
|
||||
"str q2, [x28, #16]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [z6.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [z5.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"cmplt p0.s, p6/z, z4.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [z7.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z2.s, p0/m, z0.s",
|
||||
"str q2, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
@ -2613,7 +2371,7 @@
|
||||
]
|
||||
},
|
||||
"vgatherdps ymm0, [ymm1*8], ymm2": {
|
||||
"ExpectedInstructionCount": 48,
|
||||
"ExpectedInstructionCount": 26,
|
||||
"Comment": [
|
||||
"Map 2 0b01 0x92 256-bit"
|
||||
],
|
||||
@ -2621,49 +2379,27 @@
|
||||
"ldr q2, [x28, #16]",
|
||||
"ldr q3, [x28, #32]",
|
||||
"ldr q4, [x28, #48]",
|
||||
"sshll2 v5.2d, v17.4s, #3",
|
||||
"sshll v6.2d, v17.2s, #3",
|
||||
"sshll2 v7.2d, v3.4s, #3",
|
||||
"sshll v3.2d, v3.2s, #3",
|
||||
"mrs x20, nzcv",
|
||||
"mov w0, v18.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[0]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[0], [x1]",
|
||||
"mov w0, v18.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[1]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[1], [x1]",
|
||||
"mov w0, v18.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[2]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[2], [x1]",
|
||||
"mov w0, v18.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v17.s[3]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v16.s}[3], [x1]",
|
||||
"mov w0, v4.s[0]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[0]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v2.s}[0], [x1]",
|
||||
"mov w0, v4.s[1]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[1]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v2.s}[1], [x1]",
|
||||
"mov w0, v4.s[2]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[2]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v2.s}[2], [x1]",
|
||||
"mov w0, v4.s[3]",
|
||||
"tbz w0, #31, #+0x10",
|
||||
"smov x0, v3.s[3]",
|
||||
"sbfiz x1, x0, #3, #32",
|
||||
"ld1 {v2.s}[3], [x1]",
|
||||
"str q2, [x28, #16]",
|
||||
"cmplt p0.s, p6/z, z18.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [z6.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [z5.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z16.s, p0/m, z0.s",
|
||||
"movi v18.2d, #0x0",
|
||||
"cmplt p0.s, p6/z, z4.s, #0",
|
||||
"punpklo p1.h, p0.b",
|
||||
"ld1w {z0.d}, p1/z, [z3.d]",
|
||||
"punpkhi p1.h, p0.b",
|
||||
"ld1w {z1.d}, p1/z, [z7.d]",
|
||||
"uzp1 v0.4s, v0.4s, v1.4s",
|
||||
"mov z2.s, p0/m, z0.s",
|
||||
"str q2, [x28, #16]",
|
||||
"str q18, [x28, #48]",
|
||||
"msr nzcv, x20"
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user