From 4e73f842e8b4d90776fb25a5b699b807902f1881 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 17 Dec 2021 22:02:57 -0800 Subject: [PATCH] tcg/i386: Implement avx512 immediate rotate AVX512VL has VPROLD and VPROLQ, layered onto the same opcode as PSHIFTD, but requires EVEX encoding and W1. Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c.inc | 15 +++++++++++++-- tcg/i386/tcg-target.h | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index de01fbf40c..3a9f6a3360 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -362,7 +362,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2) #define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3) #define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */ -#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */ +#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */ #define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */ #define OPC_PSLLW (0xf1 | P_EXT | P_DATA16) #define OPC_PSLLD (0xf2 | P_EXT | P_DATA16) @@ -3000,6 +3000,14 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, insn = shift_imm_insn[vece]; } sub = 4; + goto gen_shift; + case INDEX_op_rotli_vec: + insn = OPC_PSHIFTD_Ib | P_EVEX; /* VPROL[DQ] */ + if (vece == MO_64) { + insn |= P_VEXW; + } + sub = 1; + goto gen_shift; gen_shift: tcg_debug_assert(vece != MO_8); if (type == TCG_TYPE_V256) { @@ -3289,6 +3297,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_shli_vec: case INDEX_op_shri_vec: case INDEX_op_sari_vec: + case INDEX_op_rotli_vec: case INDEX_op_x86_psrldq_vec: return C_O1_I1(x, x); @@ -3310,11 +3319,13 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_xor_vec: case INDEX_op_andc_vec: return 1; - case INDEX_op_rotli_vec: case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: return -1; + case INDEX_op_rotli_vec: + return have_avx512vl && vece >= MO_32 ? 1 : -1; + case INDEX_op_shli_vec: case INDEX_op_shri_vec: /* We must expand the operation for MO_8. */ diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 79af353860..23a8b2a8c8 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -195,7 +195,7 @@ extern bool have_movbe; #define TCG_TARGET_HAS_not_vec 0 #define TCG_TARGET_HAS_neg_vec 0 #define TCG_TARGET_HAS_abs_vec 1 -#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_roti_vec have_avx512vl #define TCG_TARGET_HAS_rots_vec 0 #define TCG_TARGET_HAS_rotv_vec 0 #define TCG_TARGET_HAS_shi_vec 1