From 02926157aa83b5255ae437c10ac610ce33f44582 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 26 Dec 2016 07:26:07 +0000 Subject: [PATCH] [AVX-512] Fix some patterns to use extended register classes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290536 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 137 ++++++++++++++++--------------- 1 file changed, 73 insertions(+), 64 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 534068f9a13..bd6ae0cb731 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3001,24 +3001,24 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), let Predicates = [HasVLX, NoBWI] in { // 128-bit load/store without BWI. - def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (VMOVDQA32Z128mr addr:$dst, VR128:$src)>; - def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (VMOVDQA32Z128mr addr:$dst, VR128:$src)>; - def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (VMOVDQU32Z128mr addr:$dst, VR128:$src)>; - def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (VMOVDQU32Z128mr addr:$dst, VR128:$src)>; + def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), + (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>; + def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), + (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>; + def : Pat<(store (v8i16 VR128X:$src), addr:$dst), + (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>; + def : Pat<(store (v16i8 VR128X:$src), addr:$dst), + (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>; // 256-bit load/store without BWI. - def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), - (VMOVDQA32Z256mr addr:$dst, VR256:$src)>; - def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), - (VMOVDQA32Z256mr addr:$dst, VR256:$src)>; - def : Pat<(store (v16i16 VR256:$src), addr:$dst), - (VMOVDQU32Z256mr addr:$dst, VR256:$src)>; - def : Pat<(store (v32i8 VR256:$src), addr:$dst), - (VMOVDQU32Z256mr addr:$dst, VR256:$src)>; + def : Pat<(alignedstore256 (v16i16 VR256X:$src), addr:$dst), + (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>; + def : Pat<(alignedstore256 (v32i8 VR256X:$src), addr:$dst), + (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>; + def : Pat<(store (v16i16 VR256X:$src), addr:$dst), + (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>; + def : Pat<(store (v32i8 VR256X:$src), addr:$dst), + (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>; } let Predicates = [HasVLX] in { @@ -6618,16 +6618,16 @@ let Predicates = [HasAVX512, HasVLX] in { let AddedComplexity = 15 in { def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))), - (VCVTPD2DQZ128rr VR128:$src)>; + (VCVTPD2DQZ128rr VR128X:$src)>; def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))))), - (VCVTPD2UDQZ128rr VR128:$src)>; + (VCVTPD2UDQZ128rr VR128X:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))), - (VCVTTPD2DQZ128rr VR128:$src)>; + (VCVTTPD2DQZ128rr VR128X:$src)>; def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))))), - (VCVTTPD2UDQZ128rr VR128:$src)>; + (VCVTTPD2UDQZ128rr VR128X:$src)>; } } @@ -6642,10 +6642,10 @@ let Predicates = [HasDQI, HasVLX] in { let AddedComplexity = 15 in { def : Pat<(X86vzmovl (v2f64 (bitconvert (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))), - (VCVTQQ2PSZ128rr VR128:$src)>; + (VCVTQQ2PSZ128rr VR128X:$src)>; def : Pat<(X86vzmovl (v2f64 (bitconvert (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))), - (VCVTUQQ2PSZ128rr VR128:$src)>; + (VCVTUQQ2PSZ128rr VR128X:$src)>; } } @@ -8552,33 +8552,42 @@ multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w, defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>; +def avx512_v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)), + VR128X:$src))>; +def avx512_v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128X:$src, (i8 15)))>; +def avx512_v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128X:$src, (i8 31)))>; +def avx512_v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)), + VR256X:$src))>; +def avx512_v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256X:$src, (i8 15)))>; +def avx512_v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256X:$src, (i8 31)))>; + let Predicates = [HasBWI, HasVLX] in { def : Pat<(xor - (bc_v2i64 (v16i1sextv16i8)), - (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), - (VPABSBZ128rr VR128:$src)>; + (bc_v2i64 (avx512_v16i1sextv16i8)), + (bc_v2i64 (add (v16i8 VR128X:$src), (avx512_v16i1sextv16i8)))), + (VPABSBZ128rr VR128X:$src)>; def : Pat<(xor - (bc_v2i64 (v8i1sextv8i16)), - (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), - (VPABSWZ128rr VR128:$src)>; + (bc_v2i64 (avx512_v8i1sextv8i16)), + (bc_v2i64 (add (v8i16 VR128X:$src), (avx512_v8i1sextv8i16)))), + (VPABSWZ128rr VR128X:$src)>; def : Pat<(xor - (bc_v4i64 (v32i1sextv32i8)), - (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))), - (VPABSBZ256rr VR256:$src)>; + (bc_v4i64 (avx512_v32i1sextv32i8)), + (bc_v4i64 (add (v32i8 VR256X:$src), (avx512_v32i1sextv32i8)))), + (VPABSBZ256rr VR256X:$src)>; def : Pat<(xor - (bc_v4i64 (v16i1sextv16i16)), - (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))), - (VPABSWZ256rr VR256:$src)>; + (bc_v4i64 (avx512_v16i1sextv16i16)), + (bc_v4i64 (add (v16i16 VR256X:$src), (avx512_v16i1sextv16i16)))), + (VPABSWZ256rr VR256X:$src)>; } let Predicates = [HasAVX512, HasVLX] in { def : Pat<(xor - (bc_v2i64 (v4i1sextv4i32)), - (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), - (VPABSDZ128rr VR128:$src)>; + (bc_v2i64 (avx512_v4i1sextv4i32)), + (bc_v2i64 (add (v4i32 VR128X:$src), (avx512_v4i1sextv4i32)))), + (VPABSDZ128rr VR128X:$src)>; def : Pat<(xor - (bc_v4i64 (v8i1sextv8i32)), - (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))), - (VPABSDZ256rr VR256:$src)>; + (bc_v4i64 (avx512_v8i1sextv8i32)), + (bc_v4i64 (add (v8i32 VR256X:$src), (avx512_v8i1sextv8i32)))), + (VPABSDZ256rr VR256X:$src)>; } let Predicates = [HasAVX512] in { @@ -9086,27 +9095,27 @@ defm VFIXUPIMMPD : avx512_fixupimm_packed_all, multiclass AVX512_scalar_math_f32_patterns { let Predicates = [HasAVX512] in { // extracted scalar math op with insert via movss - def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector - (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))), - FR32:$src))))), + def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector + (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))), + FR32X:$src))))), (!cast("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, - (COPY_TO_REGCLASS FR32:$src, VR128))>; + (COPY_TO_REGCLASS FR32X:$src, VR128X))>; // extracted scalar math op with insert via blend - def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector - (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))), - FR32:$src))), (i8 1))), + def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector + (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))), + FR32X:$src))), (i8 1))), (!cast("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, - (COPY_TO_REGCLASS FR32:$src, VR128))>; + (COPY_TO_REGCLASS FR32X:$src, VR128X))>; // vector math op with insert via movss - def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), - (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)))), + def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), + (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))), (!cast("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>; // vector math op with insert via blend - def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), - (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), + def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst), + (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)), (i8 1))), (!cast("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>; } } @@ -9119,27 +9128,27 @@ defm : AVX512_scalar_math_f32_patterns; multiclass AVX512_scalar_math_f64_patterns { let Predicates = [HasAVX512] in { // extracted scalar math op with insert via movsd - def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector - (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))), - FR64:$src))))), + def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector + (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))), + FR64X:$src))))), (!cast("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, - (COPY_TO_REGCLASS FR64:$src, VR128))>; + (COPY_TO_REGCLASS FR64X:$src, VR128X))>; // extracted scalar math op with insert via blend - def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector - (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))), - FR64:$src))), (i8 1))), + def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector + (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))), + FR64X:$src))), (i8 1))), (!cast("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, - (COPY_TO_REGCLASS FR64:$src, VR128))>; + (COPY_TO_REGCLASS FR64X:$src, VR128X))>; // vector math op with insert via movsd - def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), - (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)))), + def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), + (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))), (!cast("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>; // vector math op with insert via blend - def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), - (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), + def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst), + (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)), (i8 1))), (!cast("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>; } }