From 337f0b2948de65650d3b4ccaa45f6c13516b0a8f Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Thu, 3 Feb 2022 13:19:33 -0800 Subject: [PATCH] [x64] Add AVX512 optimization for `VECTOR_ROTATE_LEFT(Int32)` `vprolvd` is an almost 1:1 analog with this opcode and can be conditionally emitted when the host supports AVX512{F,VL}. Altivec docs say that `vrl{bhw}` masks the lower log2(n) bits of the element-size. [vprold](https://www.felixcloutier.com/x86/vprold:vprolvd:vprolq:vprolvq) modulos the shift-value by the element size in bits, which is the same as masking the lower log2(n) bits. So `vrlw` maps exactly to `vprold`. --- src/xenia/cpu/backend/x64/x64_seq_vector.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_seq_vector.cc b/src/xenia/cpu/backend/x64/x64_seq_vector.cc index 4daea260b..7cf4650b5 100644 --- a/src/xenia/cpu/backend/x64/x64_seq_vector.cc +++ b/src/xenia/cpu/backend/x64/x64_seq_vector.cc @@ -2,7 +2,7 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2018 Xenia Developers. All rights reserved. * + * Copyright 2022 Xenia Developers. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ @@ -1287,7 +1287,6 @@ static __m128i EmulateVectorRotateLeft(void*, __m128i src1, __m128i src2) { return _mm_load_si128(reinterpret_cast<__m128i*>(value)); } -// TODO(benvanik): AVX512 has a native variable rotate (rolv). struct VECTOR_ROTATE_LEFT_V128 : Sequence> { @@ -1318,7 +1317,9 @@ struct VECTOR_ROTATE_LEFT_V128 e.vmovaps(i.dest, e.xmm0); break; case INT32_TYPE: { - if (e.IsFeatureEnabled(kX64EmitAVX2)) { + if (e.IsFeatureEnabled(kX64EmitAVX512Ortho)) { + e.vprolvd(i.dest, i.src1, i.src2); + } else if (e.IsFeatureEnabled(kX64EmitAVX2)) { Xmm temp = i.dest; if (i.dest == i.src1 || i.dest == i.src2) { temp = e.xmm2; @@ -2683,4 +2684,4 @@ EMITTER_OPCODE_TABLE(OPCODE_UNPACK, UNPACK); } // namespace x64 } // namespace backend } // namespace cpu -} // namespace xe \ No newline at end of file +} // namespace xe