mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-01-31 11:32:07 +00:00
Merge pull request #3770 from alyssarosenzweig/opt/vzeroall
Tiny opt for vzeroall
This commit is contained in:
commit
ad4d4c9e67
@ -780,15 +780,15 @@ void OpDispatchBuilder::AVX128_VZERO(OpcodeArgs) {
|
||||
if (IsVZEROALL) {
|
||||
// NOTE: Despite the name being VZEROALL, this will still only ever
|
||||
// zero out up to the first 16 registers (even on AVX-512, where we have 32 registers)
|
||||
Ref ZeroVector;
|
||||
|
||||
for (uint32_t i = 0; i < NumRegs; i++) {
|
||||
// Explicitly not caching named vector zero. This ensures that every register gets movi #0.0 directly.
|
||||
Ref ZeroVector = LoadUncachedZeroVector(OpSize::i128Bit);
|
||||
ZeroVector = LoadUncachedZeroVector(OpSize::i128Bit);
|
||||
AVX128_StoreXMMRegister(i, ZeroVector, false);
|
||||
}
|
||||
|
||||
// More efficient for non-SRA upper-halves to cache the constant and store directly.
|
||||
const auto ZeroVector = LoadZeroVector(OpSize::i128Bit);
|
||||
// More efficient for non-SRA upper-halves to use a cached constant and store directly.
|
||||
for (uint32_t i = 0; i < NumRegs; i++) {
|
||||
AVX128_StoreXMMRegister(i, ZeroVector, true);
|
||||
}
|
||||
|
@ -1655,7 +1655,7 @@
|
||||
]
|
||||
},
|
||||
"vzeroall": {
|
||||
"ExpectedInstructionCount": 33,
|
||||
"ExpectedInstructionCount": 32,
|
||||
"Comment": [
|
||||
"Might be able to use DZ ZVA",
|
||||
"Map 1 0b01 0x77 L=1"
|
||||
@ -1677,23 +1677,22 @@
|
||||
"movi v29.2d, #0x0",
|
||||
"movi v30.2d, #0x0",
|
||||
"movi v31.2d, #0x0",
|
||||
"movi v2.2d, #0x0",
|
||||
"str q2, [x28, #256]",
|
||||
"str q2, [x28, #240]",
|
||||
"str q2, [x28, #224]",
|
||||
"str q2, [x28, #208]",
|
||||
"str q2, [x28, #192]",
|
||||
"str q2, [x28, #176]",
|
||||
"str q2, [x28, #160]",
|
||||
"str q2, [x28, #144]",
|
||||
"str q2, [x28, #128]",
|
||||
"str q2, [x28, #112]",
|
||||
"str q2, [x28, #96]",
|
||||
"str q2, [x28, #80]",
|
||||
"str q2, [x28, #64]",
|
||||
"str q2, [x28, #48]",
|
||||
"str q2, [x28, #32]",
|
||||
"str q2, [x28, #16]"
|
||||
"str q31, [x28, #256]",
|
||||
"str q31, [x28, #240]",
|
||||
"str q31, [x28, #224]",
|
||||
"str q31, [x28, #208]",
|
||||
"str q31, [x28, #192]",
|
||||
"str q31, [x28, #176]",
|
||||
"str q31, [x28, #160]",
|
||||
"str q31, [x28, #144]",
|
||||
"str q31, [x28, #128]",
|
||||
"str q31, [x28, #112]",
|
||||
"str q31, [x28, #96]",
|
||||
"str q31, [x28, #80]",
|
||||
"str q31, [x28, #64]",
|
||||
"str q31, [x28, #48]",
|
||||
"str q31, [x28, #32]",
|
||||
"str q31, [x28, #16]"
|
||||
]
|
||||
},
|
||||
"vcmpps xmm0, xmm1, xmm2, 0x00": {
|
||||
|
Loading…
x
Reference in New Issue
Block a user