mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-17 00:50:58 +00:00
Support added for shifts and unpacking MMX instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35266 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ecb7a77885
commit
a348c56fde
@ -585,3 +585,41 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v2i32_ty, llvm_v4i16_ty,
|
||||
llvm_v4i16_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Integer shift ops.
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
// Shift left logical
|
||||
def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">,
|
||||
Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">,
|
||||
Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">,
|
||||
Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">,
|
||||
Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">,
|
||||
Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">,
|
||||
Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">,
|
||||
Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
|
||||
Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector pack/unpack ops.
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_mmx_punpckh_dq : GCCBuiltin<"__builtin_ia32_punpckhdq">,
|
||||
Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
|
||||
llvm_v2i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
59
lib/Target/X86/README-MMX.txt
Normal file
59
lib/Target/X86/README-MMX.txt
Normal file
@ -0,0 +1,59 @@
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Random ideas for the X86 backend: MMX-specific stuff.
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
We should compile
|
||||
|
||||
#include <mmintrin.h>
|
||||
|
||||
extern __m64 C;
|
||||
|
||||
void baz(__v2si *A, __v2si *B)
|
||||
{
|
||||
*A = __builtin_ia32_psllq(*B, C);
|
||||
_mm_empty();
|
||||
}
|
||||
|
||||
to:
|
||||
|
||||
.globl _baz
|
||||
_baz:
|
||||
call L3
|
||||
"L00000000001$pb":
|
||||
L3:
|
||||
popl %ecx
|
||||
subl $12, %esp
|
||||
movl 20(%esp), %eax
|
||||
movq (%eax), %mm0
|
||||
movl L_C$non_lazy_ptr-"L00000000001$pb"(%ecx), %eax
|
||||
movq (%eax), %mm1
|
||||
movl 16(%esp), %eax
|
||||
psllq %mm1, %mm0
|
||||
movq %mm0, (%eax)
|
||||
emms
|
||||
addl $12, %esp
|
||||
ret
|
||||
|
||||
not:
|
||||
|
||||
_baz:
|
||||
subl $12, %esp
|
||||
call "L1$pb"
|
||||
"L1$pb":
|
||||
popl %eax
|
||||
movl L_C$non_lazy_ptr-"L1$pb"(%eax), %eax
|
||||
movl (%eax), %ecx
|
||||
movl %ecx, (%esp)
|
||||
movl 4(%eax), %eax
|
||||
movl %eax, 4(%esp)
|
||||
movl 20(%esp), %eax
|
||||
movq (%eax), %mm0
|
||||
movq (%esp), %mm1
|
||||
psllq %mm1, %mm0
|
||||
movl 16(%esp), %eax
|
||||
movq %mm0, (%eax)
|
||||
emms
|
||||
addl $12, %esp
|
||||
ret
|
@ -571,4 +571,44 @@ swizzle:
|
||||
movaps %xmm0, (%eax)
|
||||
ret
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
We should compile this:
|
||||
|
||||
#include <xmmintrin.h>
|
||||
|
||||
void foo(__m128i *A, __m128i *B) {
|
||||
*A = _mm_sll_epi16 (*A, *B);
|
||||
}
|
||||
|
||||
to:
|
||||
|
||||
_foo:
|
||||
subl $12, %esp
|
||||
movl 16(%esp), %edx
|
||||
movl 20(%esp), %eax
|
||||
movdqa (%edx), %xmm1
|
||||
movdqa (%eax), %xmm0
|
||||
psllw %xmm0, %xmm1
|
||||
movdqa %xmm1, (%edx)
|
||||
addl $12, %esp
|
||||
ret
|
||||
|
||||
not:
|
||||
|
||||
_foo:
|
||||
movl 8(%esp), %eax
|
||||
movdqa (%eax), %xmm0
|
||||
#IMPLICIT_DEF %eax
|
||||
pinsrw $2, %eax, %xmm0
|
||||
xorl %ecx, %ecx
|
||||
pinsrw $3, %ecx, %xmm0
|
||||
pinsrw $4, %eax, %xmm0
|
||||
pinsrw $5, %ecx, %xmm0
|
||||
pinsrw $6, %eax, %xmm0
|
||||
pinsrw $7, %ecx, %xmm0
|
||||
movl 4(%esp), %eax
|
||||
movdqa (%eax), %xmm1
|
||||
psllw %xmm0, %xmm1
|
||||
movdqa %xmm1, (%eax)
|
||||
ret
|
||||
|
@ -355,6 +355,10 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand);
|
||||
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
|
||||
}
|
||||
|
||||
if (Subtarget->hasSSE1()) {
|
||||
@ -2312,7 +2316,7 @@ static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
|
||||
return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
|
||||
}
|
||||
|
||||
/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
|
||||
/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
|
||||
///
|
||||
static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
|
||||
unsigned NumNonZero, unsigned NumZero,
|
||||
|
@ -44,6 +44,10 @@ def : Pat<(v2i32 (undef)), (IMPLICIT_DEF_VR64)>;
|
||||
|
||||
def loadv2i32 : PatFrag<(ops node:$ptr), (v2i32 (load node:$ptr))>;
|
||||
|
||||
def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>;
|
||||
def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
|
||||
def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MMX Multiclasses
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -94,13 +98,28 @@ let isTwoAddress = 1 in {
|
||||
[(set VR64:$dst,
|
||||
(OpNode VR64:$src1,(loadv2i32 addr:$src2)))]>;
|
||||
}
|
||||
|
||||
multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
|
||||
string OpcodeStr, Intrinsic IntId> {
|
||||
def rr : MMXI<opc, MRMSrcReg, (ops VR64:$dst, VR64:$src1, VR64:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
|
||||
def rm : MMXI<opc, MRMSrcMem, (ops VR64:$dst, VR64:$src1, i64mem:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1,
|
||||
(bitconvert (loadv2i32 addr:$src2))))]>;
|
||||
def ri : MMXIi8<opc2, ImmForm, (ops VR64:$dst, VR64:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1,
|
||||
(scalar_to_vector (i32 imm:$src2))))]>;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MMX EMMS Instruction
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def EMMS : MMXI<0x77, RawFrm, (ops), "emms", [(int_x86_mmx_emms)]>;
|
||||
def MMX_EMMS : MMXI<0x77, RawFrm, (ops), "emms", [(int_x86_mmx_emms)]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MMX Scalar Instructions
|
||||
@ -132,6 +151,53 @@ defm MMX_PMULLW : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>;
|
||||
defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw" , int_x86_mmx_pmulh_w , 1>;
|
||||
defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
|
||||
|
||||
|
||||
def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
|
||||
return X86::isUNPCKHMask(N);
|
||||
}]>;
|
||||
|
||||
let isTwoAddress = 1 in {
|
||||
def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
|
||||
(ops VR64:$dst, VR64:$src1, VR64:$src2),
|
||||
"punpckhbw {$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
|
||||
(ops VR64:$dst, VR64:$src1, i64mem:$src2),
|
||||
"punpckhbw {$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v8i8 (vector_shuffle VR64:$src1,
|
||||
(bc_v8i8 (loadv2i32 addr:$src2)),
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
|
||||
(ops VR64:$dst, VR64:$src1, VR64:$src2),
|
||||
"punpckhwd {$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
|
||||
(ops VR64:$dst, VR64:$src1, i64mem:$src2),
|
||||
"punpckhwd {$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v4i16 (vector_shuffle VR64:$src1,
|
||||
(bc_v4i16 (loadv2i32 addr:$src2)),
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
|
||||
(ops VR64:$dst, VR64:$src1, VR64:$src2),
|
||||
"punpckhdq {$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
|
||||
(ops VR64:$dst, VR64:$src1, i64mem:$src2),
|
||||
"punpckhdq {$src2, $dst|$dst, $src2}",
|
||||
[(set VR64:$dst,
|
||||
(v2i32 (vector_shuffle VR64:$src1,
|
||||
(loadv2i32 addr:$src2),
|
||||
MMX_UNPCKH_shuffle_mask)))]>;
|
||||
}
|
||||
|
||||
// Logical Instructions
|
||||
defm MMX_PAND : MMXI_binop_rm_v2i32<0xDB, "pand", and, 1>;
|
||||
defm MMX_POR : MMXI_binop_rm_v2i32<0xEB, "por" , or, 1>;
|
||||
@ -150,6 +216,26 @@ let isTwoAddress = 1 in {
|
||||
(load addr:$src2))))]>;
|
||||
}
|
||||
|
||||
// Shift Instructions
|
||||
defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
|
||||
int_x86_mmx_psrl_w>;
|
||||
defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
|
||||
int_x86_mmx_psrl_d>;
|
||||
defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
|
||||
int_x86_mmx_psrl_q>;
|
||||
|
||||
defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
|
||||
int_x86_mmx_psll_w>;
|
||||
defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
|
||||
int_x86_mmx_psll_d>;
|
||||
defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
|
||||
int_x86_mmx_psll_q>;
|
||||
|
||||
defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
|
||||
int_x86_mmx_psra_w>;
|
||||
defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
|
||||
int_x86_mmx_psra_d>;
|
||||
|
||||
// Move Instructions
|
||||
def MOVD64rr : MMXI<0x6E, MRMSrcReg, (ops VR64:$dst, GR32:$src),
|
||||
"movd {$src, $dst|$dst, $src}", []>;
|
||||
@ -225,3 +311,23 @@ def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
|
||||
|
||||
// Splat v2i32
|
||||
let AddedComplexity = 10 in {
|
||||
def : Pat<(vector_shuffle (v2i32 VR64:$src), (undef),
|
||||
MMX_UNPCKH_shuffle_mask:$sm),
|
||||
(MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
|
||||
}
|
||||
|
||||
// FIXME: Temporary workaround because 2-wide shuffle is broken.
|
||||
def : Pat<(int_x86_mmx_punpckh_dq VR64:$src1, VR64:$src2),
|
||||
(v2i32 (MMX_PUNPCKHDQrr VR64:$src1, VR64:$src2))>;
|
||||
def : Pat<(int_x86_mmx_punpckh_dq VR64:$src1, (load addr:$src2)),
|
||||
(v2i32 (MMX_PUNPCKHDQrm VR64:$src1, addr:$src2))>;
|
||||
|
||||
def MMX_X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
|
||||
|
||||
// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower 8 or
|
||||
// 16-bits matter.
|
||||
def : Pat<(v4i16 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;
|
||||
def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;
|
||||
|
Loading…
Reference in New Issue
Block a user