From eebc8a1bc505ebb5c702e63c248a8956d88fda77 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 26 Mar 2007 07:53:08 +0000 Subject: [PATCH] Add support for the v1i64 type. This makes better code for this: #include extern __m64 C; void baz(__v2si *A, __v2si *B) { *A = C; _mm_empty(); } We get this: _baz: call "L1$pb" "L1$pb": popl %eax movl L_C$non_lazy_ptr-"L1$pb"(%eax), %eax movq (%eax), %mm0 movl 4(%esp), %eax movq %mm0, (%eax) emms ret GCC gives us this: _baz: pushl %ebx call L3 "L00000000001$pb": L3: popl %ebx subl $8, %esp movl L_C$non_lazy_ptr-"L00000000001$pb"(%ebx), %eax movl (%eax), %edx movl 4(%eax), %ecx movl 16(%esp), %eax movl %edx, (%eax) movl %ecx, 4(%eax) emms addl $8, %esp popl %ebx ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35351 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Intrinsics.td | 1 + lib/Target/X86/X86ISelLowering.cpp | 9 ++-- lib/Target/X86/X86InstrMMX.td | 75 ++++++++++++++++++++---------- lib/Target/X86/X86RegisterInfo.td | 2 +- utils/TableGen/CodeGenTarget.cpp | 2 + 5 files changed, 61 insertions(+), 28 deletions(-) diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 0f3dfb6e08e..b0aca080211 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -94,6 +94,7 @@ def llvm_v16i8_ty : LLVMVectorType; // 16 x i8 def llvm_v8i16_ty : LLVMVectorType; // 8 x i16 def llvm_v2i64_ty : LLVMVectorType; // 2 x i64 def llvm_v2i32_ty : LLVMVectorType; // 2 x i32 +def llvm_v1i64_ty : LLVMVectorType; // 1 x i64 def llvm_v4i32_ty : LLVMVectorType; // 4 x i32 def llvm_v4f32_ty : LLVMVectorType; // 4 x float def llvm_v2f64_ty : LLVMVectorType;// 2 x double diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1686c25ed09..2d97065c684 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -314,6 +314,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); + addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); // FIXME: add MMX packed arithmetics @@ -347,10 +348,12 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::XOR, MVT::v2i32, Legal); setOperationAction(ISD::LOAD, MVT::v8i8, Promote); - AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v2i32); + AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); setOperationAction(ISD::LOAD, MVT::v4i16, Promote); - AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v2i32); - setOperationAction(ISD::LOAD, MVT::v2i32, Legal); + AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); + setOperationAction(ISD::LOAD, MVT::v2i32, Promote); + AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); + setOperationAction(ISD::LOAD, MVT::v1i64, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand); diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 71fea05a7f7..08efcc11985 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -37,12 +37,13 @@ def IMPLICIT_DEF_VR64 : I<0, Pseudo, (ops VR64:$dst), def : Pat<(v8i8 (undef)), (IMPLICIT_DEF_VR64)>; def : Pat<(v4i16 (undef)), (IMPLICIT_DEF_VR64)>; def : Pat<(v2i32 (undef)), (IMPLICIT_DEF_VR64)>; +def : Pat<(v1i64 (undef)), (IMPLICIT_DEF_VR64)>; //===----------------------------------------------------------------------===// // MMX Pattern Fragments //===----------------------------------------------------------------------===// -def loadv2i32 : PatFrag<(ops node:$ptr), (v2i32 (load node:$ptr))>; +def loadv1i64 : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>; def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>; def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>; @@ -65,7 +66,7 @@ let isTwoAddress = 1 in { !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (OpVT (OpNode VR64:$src1, (bitconvert - (loadv2i32 addr:$src2)))))]>; + (loadv1i64 addr:$src2)))))]>; } multiclass MMXI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, @@ -78,25 +79,25 @@ let isTwoAddress = 1 in { def rm : MMXI; + (bitconvert (loadv1i64 addr:$src2))))]>; } - // MMXI_binop_rm_v2i32 - Simple MMX binary operator whose type is v2i32. + // MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64. // // FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew // to collapse (bitconvert VT to VT) into its operand. // - multiclass MMXI_binop_rm_v2i32 opc, string OpcodeStr, SDNode OpNode, + multiclass MMXI_binop_rm_v1i64 opc, string OpcodeStr, SDNode OpNode, bit Commutable = 0> { def rr : MMXI { + [(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> { let isCommutable = Commutable; } def rm : MMXI; + (OpNode VR64:$src1,(loadv1i64 addr:$src2)))]>; } multiclass MMXI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, @@ -107,7 +108,7 @@ let isTwoAddress = 1 in { def rm : MMXI; + (bitconvert (loadv1i64 addr:$src2))))]>; def ri : MMXIi8; def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, (ops VR64:$dst, VR64:$src1, VR64:$src2), @@ -190,38 +191,38 @@ def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, "punpckhwd {$src2, $dst|$dst, $src2}", [(set VR64:$dst, (v4i16 (vector_shuffle VR64:$src1, - (bc_v4i16 (loadv2i32 addr:$src2)), + (bc_v4i16 (loadv1i64 addr:$src2)), MMX_UNPCKH_shuffle_mask)))]>; def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, (ops VR64:$dst, VR64:$src1, VR64:$src2), "punpckhdq {$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (vector_shuffle VR64:$src1, VR64:$src2, + (v1i64 (vector_shuffle VR64:$src1, VR64:$src2, MMX_UNPCKH_shuffle_mask)))]>; def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem, (ops VR64:$dst, VR64:$src1, i64mem:$src2), "punpckhdq {$src2, $dst|$dst, $src2}", [(set VR64:$dst, - (v2i32 (vector_shuffle VR64:$src1, - (loadv2i32 addr:$src2), + (v1i64 (vector_shuffle VR64:$src1, + (loadv1i64 addr:$src2), MMX_UNPCKH_shuffle_mask)))]>; } // Logical Instructions -defm MMX_PAND : MMXI_binop_rm_v2i32<0xDB, "pand", and, 1>; -defm MMX_POR : MMXI_binop_rm_v2i32<0xEB, "por" , or, 1>; -defm MMX_PXOR : MMXI_binop_rm_v2i32<0xEF, "pxor", xor, 1>; +defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>; +defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>; +defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>; let isTwoAddress = 1 in { def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg, (ops VR64:$dst, VR64:$src1, VR64:$src2), "pandn {$src2, $dst|$dst, $src2}", - [(set VR64:$dst, (v2i32 (and (vnot VR64:$src1), + [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1), VR64:$src2)))]>; def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem, (ops VR64:$dst, VR64:$src1, i64mem:$src2), "pandn {$src2, $dst|$dst, $src2}", - [(set VR64:$dst, (v2i32 (and (vnot VR64:$src1), + [(set VR64:$dst, (v1i64 (and (vnot VR64:$src1), (load addr:$src2))))]>; } @@ -262,10 +263,10 @@ def MOVQ64rr : MMXI<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src), "movq {$src, $dst|$dst, $src}", []>; def MOVQ64rm : MMXI<0x6F, MRMSrcMem, (ops VR64:$dst, i64mem:$src), "movq {$src, $dst|$dst, $src}", - [(set VR64:$dst, (loadv2i32 addr:$src))]>; + [(set VR64:$dst, (loadv1i64 addr:$src))]>; def MOVQ64mr : MMXI<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src), "movq {$src, $dst|$dst, $src}", - [(store (v2i32 VR64:$src), addr:$dst)]>; + [(store (v1i64 VR64:$src), addr:$dst)]>; // Conversion instructions def CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src), @@ -308,6 +309,18 @@ def MASKMOVQ : I<0xF7, MRMDestMem, (ops VR64:$src, VR64:$mask), "maskmovq {$mask, $src|$src, $mask}", []>, TB, Requires<[HasMMX]>; +//===----------------------------------------------------------------------===// +// Alias Instructions +//===----------------------------------------------------------------------===// + +// Alias instructions that map zero vector to pxor. +// FIXME: remove when we can teach regalloc that xor reg, reg is ok. +let isReMaterializable = 1 in { +def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (ops VR64:$dst), + "pxor $dst, $dst", + [(set VR64:$dst, (v1i64 immAllZerosV))]>; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// @@ -317,21 +330,35 @@ def : Pat<(store (v8i8 VR64:$src), addr:$dst), (MOVQ64mr addr:$dst, VR64:$src)>; def : Pat<(store (v4i16 VR64:$src), addr:$dst), (MOVQ64mr addr:$dst, VR64:$src)>; +def : Pat<(store (v2i32 VR64:$src), addr:$dst), + (MOVQ64mr addr:$dst, VR64:$src)>; + +// 128-bit vector all zero's. +def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>; +def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>; +def : Pat<(v2i32 immAllZerosV), (MMX_V_SET0)>; +def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>; // Bit convert. +def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>; def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>; def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>; +def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>; def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>; def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>; +def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>; def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>; def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>; +def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>; +def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>; +def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>; -// Splat v2i32 +// Splat v1i64 let AddedComplexity = 10 in { - def : Pat<(vector_shuffle (v2i32 VR64:$src), (undef), + def : Pat<(vector_shuffle (v1i64 VR64:$src), (undef), MMX_splat_mask:$sm), (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; - def : Pat<(vector_shuffle (v2i32 VR64:$src), (undef), + def : Pat<(vector_shuffle (v1i64 VR64:$src), (undef), MMX_UNPCKH_shuffle_mask:$sm), (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>; } @@ -340,5 +367,5 @@ def MMX_X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>; // Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower 8 or // 16-bits matter. +def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>; def : Pat<(v4i16 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>; -def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 696068e7591..eb602ec1d7f 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -441,7 +441,7 @@ def RST : RegisterClass<"X86", [f64], 32, } // Generic vector registers: VR64 and VR128. -def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32], 64, +def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64, [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>; def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp index a261871861d..7cf80b02abe 100644 --- a/utils/TableGen/CodeGenTarget.cpp +++ b/utils/TableGen/CodeGenTarget.cpp @@ -52,6 +52,7 @@ std::string llvm::getName(MVT::ValueType T) { case MVT::v8i8: return "MVT::v8i8"; case MVT::v4i16: return "MVT::v4i16"; case MVT::v2i32: return "MVT::v2i32"; + case MVT::v1i64: return "MVT::v1i64"; case MVT::v16i8: return "MVT::v16i8"; case MVT::v8i16: return "MVT::v8i16"; case MVT::v4i32: return "MVT::v4i32"; @@ -82,6 +83,7 @@ std::string llvm::getEnumName(MVT::ValueType T) { case MVT::v8i8: return "MVT::v8i8"; case MVT::v4i16: return "MVT::v4i16"; case MVT::v2i32: return "MVT::v2i32"; + case MVT::v1i64: return "MVT::v1i64"; case MVT::v16i8: return "MVT::v16i8"; case MVT::v8i16: return "MVT::v8i16"; case MVT::v4i32: return "MVT::v4i32";