mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-26 06:14:42 +00:00
Add support for the v1i64 type. This makes better code for this:
#include <mmintrin.h> extern __m64 C; void baz(__v2si *A, __v2si *B) { *A = C; _mm_empty(); } We get this: _baz: call "L1$pb" "L1$pb": popl %eax movl L_C$non_lazy_ptr-"L1$pb"(%eax), %eax movq (%eax), %mm0 movl 4(%esp), %eax movq %mm0, (%eax) emms ret GCC gives us this: _baz: pushl %ebx call L3 "L00000000001$pb": L3: popl %ebx subl $8, %esp movl L_C$non_lazy_ptr-"L00000000001$pb"(%ebx), %eax movl (%eax), %edx movl 4(%eax), %ecx movl 16(%esp), %eax movl %edx, (%eax) movl %ecx, 4(%eax) emms addl $8, %esp popl %ebx ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35351 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
15213b77cf
commit
eebc8a1bc5
@ -94,6 +94,7 @@ def llvm_v16i8_ty : LLVMVectorType<v16i8,16, llvm_i8_ty>; // 16 x i8
|
||||
def llvm_v8i16_ty : LLVMVectorType<v8i16, 8, llvm_i16_ty>; // 8 x i16
|
||||
def llvm_v2i64_ty : LLVMVectorType<v2i64, 2, llvm_i64_ty>; // 2 x i64
|
||||
def llvm_v2i32_ty : LLVMVectorType<v2i32, 2, llvm_i32_ty>; // 2 x i32
|
||||
def llvm_v1i64_ty : LLVMVectorType<v1i64, 1, llvm_i64_ty>; // 1 x i64
|
||||
def llvm_v4i32_ty : LLVMVectorType<v4i32, 4, llvm_i32_ty>; // 4 x i32
|
||||
def llvm_v4f32_ty : LLVMVectorType<v4f32, 4, llvm_float_ty>; // 4 x float
|
||||
def llvm_v2f64_ty : LLVMVectorType<v2f64, 2, llvm_double_ty>;// 2 x double
|
||||
|
@ -314,6 +314,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
||||
addRegisterClass(MVT::v8i8, X86::VR64RegisterClass);
|
||||
addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
|
||||
addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
|
||||
addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);
|
||||
|
||||
// FIXME: add MMX packed arithmetics
|
||||
|
||||
@ -347,10 +348,12 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::XOR, MVT::v2i32, Legal);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::v8i8, Promote);
|
||||
AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v2i32);
|
||||
AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64);
|
||||
setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
|
||||
AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v2i32);
|
||||
setOperationAction(ISD::LOAD, MVT::v2i32, Legal);
|
||||
AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
|
||||
setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
|
||||
AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
|
||||
setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
|
||||
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand);
|
||||
|
@ -37,12 +37,13 @@ def IMPLICIT_DEF_VR64 : I<0, Pseudo, (ops VR64:$dst),
|
||||
def : Pat<(v8i8 (undef)), (IMPLICIT_DEF_VR64)>;
|
||||
def : Pat<(v4i16 (undef)), (IMPLICIT_DEF_VR64)>;
|
||||
def : Pat<(v2i32 (undef)), (IMPLICIT_DEF_VR64)>;
|
||||
def : Pat<(v1i64 (undef)), (IMPLICIT_DEF_VR64)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MMX Pattern Fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def loadv2i32 : PatFrag<(ops node:$ptr), (v2i32 (load node:$ptr))>;
|
||||
def loadv1i64 : PatFrag<(ops node:$ptr), (v1i64 (load node:$ptr))>;
|
||||
|
||||
def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>;
|
||||
def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
|
||||
@ -65,7 +66,7 @@ let isTwoAddress = 1 in {
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (OpVT (OpNode VR64:$src1,
|
||||
(bitconvert
|
||||
(loadv2i32 addr:$src2)))))]>;
|
||||
(loadv1i64 addr:$src2)))))]>;
|
||||
}
|
||||
|
||||
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
|
||||
@ -78,25 +79,25 @@ let isTwoAddress = 1 in {
|
||||
def rm : MMXI<opc, MRMSrcMem, (ops VR64:$dst, VR64:$src1, i64mem:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1,
|
||||
(bitconvert (loadv2i32 addr:$src2))))]>;
|
||||
(bitconvert (loadv1i64 addr:$src2))))]>;
|
||||
}
|
||||
|
||||
// MMXI_binop_rm_v2i32 - Simple MMX binary operator whose type is v2i32.
|
||||
// MMXI_binop_rm_v1i64 - Simple MMX binary operator whose type is v1i64.
|
||||
//
|
||||
// FIXME: we could eliminate this and use MMXI_binop_rm instead if tblgen knew
|
||||
// to collapse (bitconvert VT to VT) into its operand.
|
||||
//
|
||||
multiclass MMXI_binop_rm_v2i32<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
multiclass MMXI_binop_rm_v1i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
bit Commutable = 0> {
|
||||
def rr : MMXI<opc, MRMSrcReg, (ops VR64:$dst, VR64:$src1, VR64:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (v2i32 (OpNode VR64:$src1, VR64:$src2)))]> {
|
||||
[(set VR64:$dst, (v1i64 (OpNode VR64:$src1, VR64:$src2)))]> {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
def rm : MMXI<opc, MRMSrcMem, (ops VR64:$dst, VR64:$src1, i64mem:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst,
|
||||
(OpNode VR64:$src1,(loadv2i32 addr:$src2)))]>;
|
||||
(OpNode VR64:$src1,(loadv1i64 addr:$src2)))]>;
|
||||
}
|
||||
|
||||
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
|
||||
@ -107,7 +108,7 @@ let isTwoAddress = 1 in {
|
||||
def rm : MMXI<opc, MRMSrcMem, (ops VR64:$dst, VR64:$src1, i64mem:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1,
|
||||
(bitconvert (loadv2i32 addr:$src2))))]>;
|
||||
(bitconvert (loadv1i64 addr:$src2))))]>;
|
||||
def ri : MMXIi8<opc2, ImmForm, (ops VR64:$dst, VR64:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1,
|
||||
@ -177,7 +178,7 @@ def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
|
||||
"punpckhbw {$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v8i8 (vector_shuffle VR64:$src1,
|
||||
(bc_v8i8 (loadv2i32 addr:$src2)),
|
||||
(bc_v8i8 (loadv1i64 addr:$src2)),
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
|
||||
(ops VR64:$dst, VR64:$src1, VR64:$src2),
|
||||
@ -190,38 +191,38 @@ def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
|
||||
"punpckhwd {$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v4i16 (vector_shuffle VR64:$src1,
|
||||
(bc_v4i16 (loadv2i32 addr:$src2)),
|
||||
(bc_v4i16 (loadv1i64 addr:$src2)),
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
|
||||
(ops VR64:$dst, VR64:$src1, VR64:$src2),
|
||||
"punpckhdq {$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
|
||||
(v1i64 (vector_shuffle VR64:$src1, VR64:$src2,
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
|
||||
(ops VR64:$dst, VR64:$src1, i64mem:$src2),
|
||||
"punpckhdq {$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v2i32 (vector_shuffle VR64:$src1,
|
||||
(loadv2i32 addr:$src2),
|
||||
(v1i64 (vector_shuffle VR64:$src1,
|
||||
(loadv1i64 addr:$src2),
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
}
|
||||
|
||||
// Logical Instructions
|
||||
defm MMX_PAND : MMXI_binop_rm_v2i32<0xDB, "pand", and, 1>;
|
||||
defm MMX_POR : MMXI_binop_rm_v2i32<0xEB, "por" , or, 1>;
|
||||
defm MMX_PXOR : MMXI_binop_rm_v2i32<0xEF, "pxor", xor, 1>;
|
||||
defm MMX_PAND : MMXI_binop_rm_v1i64<0xDB, "pand", and, 1>;
|
||||
defm MMX_POR : MMXI_binop_rm_v1i64<0xEB, "por" , or, 1>;
|
||||
defm MMX_PXOR : MMXI_binop_rm_v1i64<0xEF, "pxor", xor, 1>;
|
||||
|
||||
let isTwoAddress = 1 in {
|
||||
def MMX_PANDNrr : MMXI<0xDF, MRMSrcReg,
|
||||
(ops VR64:$dst, VR64:$src1, VR64:$src2),
|
||||
"pandn {$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst, (v2i32 (and (vnot VR64:$src1),
|
||||
[(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
|
||||
VR64:$src2)))]>;
|
||||
def MMX_PANDNrm : MMXI<0xDF, MRMSrcMem,
|
||||
(ops VR64:$dst, VR64:$src1, i64mem:$src2),
|
||||
"pandn {$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst, (v2i32 (and (vnot VR64:$src1),
|
||||
[(set VR64:$dst, (v1i64 (and (vnot VR64:$src1),
|
||||
(load addr:$src2))))]>;
|
||||
}
|
||||
|
||||
@ -262,10 +263,10 @@ def MOVQ64rr : MMXI<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src),
|
||||
"movq {$src, $dst|$dst, $src}", []>;
|
||||
def MOVQ64rm : MMXI<0x6F, MRMSrcMem, (ops VR64:$dst, i64mem:$src),
|
||||
"movq {$src, $dst|$dst, $src}",
|
||||
[(set VR64:$dst, (loadv2i32 addr:$src))]>;
|
||||
[(set VR64:$dst, (loadv1i64 addr:$src))]>;
|
||||
def MOVQ64mr : MMXI<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src),
|
||||
"movq {$src, $dst|$dst, $src}",
|
||||
[(store (v2i32 VR64:$src), addr:$dst)]>;
|
||||
[(store (v1i64 VR64:$src), addr:$dst)]>;
|
||||
|
||||
// Conversion instructions
|
||||
def CVTPI2PSrr : MMXI<0x2A, MRMSrcReg, (ops VR128:$dst, VR64:$src),
|
||||
@ -308,6 +309,18 @@ def MASKMOVQ : I<0xF7, MRMDestMem, (ops VR64:$src, VR64:$mask),
|
||||
"maskmovq {$mask, $src|$src, $mask}", []>, TB,
|
||||
Requires<[HasMMX]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Alias Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Alias instructions that map zero vector to pxor.
|
||||
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
|
||||
let isReMaterializable = 1 in {
|
||||
def MMX_V_SET0 : MMXI<0xEF, MRMInitReg, (ops VR64:$dst),
|
||||
"pxor $dst, $dst",
|
||||
[(set VR64:$dst, (v1i64 immAllZerosV))]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Non-Instruction Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -317,21 +330,35 @@ def : Pat<(store (v8i8 VR64:$src), addr:$dst),
|
||||
(MOVQ64mr addr:$dst, VR64:$src)>;
|
||||
def : Pat<(store (v4i16 VR64:$src), addr:$dst),
|
||||
(MOVQ64mr addr:$dst, VR64:$src)>;
|
||||
def : Pat<(store (v2i32 VR64:$src), addr:$dst),
|
||||
(MOVQ64mr addr:$dst, VR64:$src)>;
|
||||
|
||||
// 128-bit vector all zero's.
|
||||
def : Pat<(v8i8 immAllZerosV), (MMX_V_SET0)>;
|
||||
def : Pat<(v4i16 immAllZerosV), (MMX_V_SET0)>;
|
||||
def : Pat<(v2i32 immAllZerosV), (MMX_V_SET0)>;
|
||||
def : Pat<(v1i64 immAllZerosV), (MMX_V_SET0)>;
|
||||
|
||||
// Bit convert.
|
||||
def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>;
|
||||
|
||||
// Splat v2i32
|
||||
// Splat v1i64
|
||||
let AddedComplexity = 10 in {
|
||||
def : Pat<(vector_shuffle (v2i32 VR64:$src), (undef),
|
||||
def : Pat<(vector_shuffle (v1i64 VR64:$src), (undef),
|
||||
MMX_splat_mask:$sm),
|
||||
(MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
|
||||
def : Pat<(vector_shuffle (v2i32 VR64:$src), (undef),
|
||||
def : Pat<(vector_shuffle (v1i64 VR64:$src), (undef),
|
||||
MMX_UNPCKH_shuffle_mask:$sm),
|
||||
(MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
|
||||
}
|
||||
@ -340,5 +367,5 @@ def MMX_X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
|
||||
|
||||
// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower 8 or
|
||||
// 16-bits matter.
|
||||
def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;
|
||||
def : Pat<(v4i16 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;
|
||||
def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;
|
||||
|
@ -441,7 +441,7 @@ def RST : RegisterClass<"X86", [f64], 32,
|
||||
}
|
||||
|
||||
// Generic vector registers: VR64 and VR128.
|
||||
def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32], 64,
|
||||
def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64,
|
||||
[MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
|
||||
def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
|
||||
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
|
@ -52,6 +52,7 @@ std::string llvm::getName(MVT::ValueType T) {
|
||||
case MVT::v8i8: return "MVT::v8i8";
|
||||
case MVT::v4i16: return "MVT::v4i16";
|
||||
case MVT::v2i32: return "MVT::v2i32";
|
||||
case MVT::v1i64: return "MVT::v1i64";
|
||||
case MVT::v16i8: return "MVT::v16i8";
|
||||
case MVT::v8i16: return "MVT::v8i16";
|
||||
case MVT::v4i32: return "MVT::v4i32";
|
||||
@ -82,6 +83,7 @@ std::string llvm::getEnumName(MVT::ValueType T) {
|
||||
case MVT::v8i8: return "MVT::v8i8";
|
||||
case MVT::v4i16: return "MVT::v4i16";
|
||||
case MVT::v2i32: return "MVT::v2i32";
|
||||
case MVT::v1i64: return "MVT::v1i64";
|
||||
case MVT::v16i8: return "MVT::v16i8";
|
||||
case MVT::v8i16: return "MVT::v8i16";
|
||||
case MVT::v4i32: return "MVT::v4i32";
|
||||
|
Loading…
x
Reference in New Issue
Block a user