From 7bef62dc08a1af436c4b281fc81e70cb704300cc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 29 Nov 2017 11:35:45 +0000 Subject: [PATCH] [X86][SSE] Merged sse2_pack and sse2_pack_y PACKSS/PACKUS instruction templates. NFCI. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319308 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 124 +++++++++++++--------------------- 1 file changed, 46 insertions(+), 78 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 83b8a2eb5e3..17e728e02a3 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3946,126 +3946,94 @@ defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, let ExeDomain = SSEPackedInt in { multiclass sse2_pack opc, string OpcodeStr, ValueType OutVT, - ValueType ArgVT, SDNode OpNode, OpndItins itins, - PatFrag ld_frag, bit Is2Addr = 1> { + ValueType ArgVT, SDNode OpNode, RegisterClass RC, + X86MemOperand x86memop, OpndItins itins, PatFrag ld_frag, + bit Is2Addr = 1> { def rr : PDI, Sched<[itins.Sched]>; def rm : PDI, Sched<[itins.Sched.Folded, ReadAfterLd]>; } -multiclass sse2_pack_y opc, string OpcodeStr, ValueType OutVT, - ValueType ArgVT, SDNode OpNode, OpndItins itins> { - def Yrr : PDI, Sched<[itins.Sched]>; - def Yrm : PDI, Sched<[itins.Sched.Folded, ReadAfterLd]>; -} - multiclass sse4_pack opc, string OpcodeStr, ValueType OutVT, - ValueType ArgVT, SDNode OpNode, OpndItins itins, - PatFrag ld_frag, bit Is2Addr = 1> { + ValueType ArgVT, SDNode OpNode, RegisterClass RC, + X86MemOperand x86memop, OpndItins itins, PatFrag ld_frag, + bit Is2Addr = 1> { def rr : SS48I, Sched<[itins.Sched]>; def rm : SS48I, Sched<[itins.Sched.Folded, ReadAfterLd]>; } -multiclass sse4_pack_y opc, string OpcodeStr, ValueType OutVT, - ValueType ArgVT, SDNode OpNode, OpndItins itins> { - def Yrr : SS48I, Sched<[itins.Sched]>; - def Yrm : SS48I, Sched<[itins.Sched.Folded, ReadAfterLd]>; -} - let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { - defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, - SSE_PACK, loadv2i64, 0>, VEX_4V, VEX_WIG; - defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, - SSE_PACK, loadv2i64, 0>, VEX_4V, VEX_WIG; + defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128, + i128mem, SSE_PACK, loadv2i64, 0>, VEX_4V, VEX_WIG; + defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128, + i128mem, SSE_PACK, loadv2i64, 0>, VEX_4V, VEX_WIG; - defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, - SSE_PACK, loadv2i64, 0>, VEX_4V, VEX_WIG; - defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, - SSE_PACK, loadv2i64, 0>, VEX_4V; + defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128, + i128mem, SSE_PACK, loadv2i64, 0>, VEX_4V, VEX_WIG; + defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128, + i128mem, SSE_PACK, loadv2i64, 0>, VEX_4V; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss, SSE_PACK>, - VEX_4V, VEX_L, VEX_WIG; - defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, SSE_PACK>, - VEX_4V, VEX_L, VEX_WIG; + defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, + VR256, i256mem, SSE_PACK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; + defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, + VR256, i256mem, SSE_PACK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; - defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus, SSE_PACK>, - VEX_4V, VEX_L, VEX_WIG; - defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, SSE_PACK>, - VEX_4V, VEX_L; + defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, + VR256,i256mem, SSE_PACK, loadv4i64, 0>, + VEX_4V, VEX_L, VEX_WIG; + defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, + VR256, i256mem, SSE_PACK, loadv4i64, 0>, + VEX_4V, VEX_L; } let Constraints = "$src1 = $dst" in { - defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, SSE_PACK, - memopv2i64>; - defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, SSE_PACK, - memopv2i64>; + defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128, + i128mem, SSE_PACK, memopv2i64>; + defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128, + i128mem, SSE_PACK, memopv2i64>; - defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, SSE_PACK, - memopv2i64>; + defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128, + i128mem, SSE_PACK, memopv2i64>; - defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, SSE_PACK, - memopv2i64>; + defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128, + i128mem, SSE_PACK, memopv2i64>; } } // ExeDomain = SSEPackedInt