mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-14 13:57:51 +00:00
Move remaining MMX instructions from SSE to MMX.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113501 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
082bf2a977
commit
f73c5587fa
@ -148,6 +148,25 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
|
|||||||
(bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
|
(bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||||
|
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
|
||||||
|
string asm, Domain d> {
|
||||||
|
def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
|
||||||
|
[(set DstRC:$dst, (Int SrcRC:$src))], d>;
|
||||||
|
def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
|
||||||
|
[(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
|
||||||
|
}
|
||||||
|
|
||||||
|
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
|
||||||
|
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
|
||||||
|
PatFrag ld_frag, string asm, Domain d> {
|
||||||
|
def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
|
||||||
|
asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
|
||||||
|
def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
|
||||||
|
(ins DstRC:$src1, x86memop:$src2), asm,
|
||||||
|
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// MMX EMMS & FEMMS Instructions
|
// MMX EMMS & FEMMS Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -533,13 +552,6 @@ def : Pat<(v8i8 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
|
|||||||
|
|
||||||
// -- Conversion Instructions
|
// -- Conversion Instructions
|
||||||
let neverHasSideEffects = 1 in {
|
let neverHasSideEffects = 1 in {
|
||||||
def MMX_CVTPD2PIrr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
|
||||||
"cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
|
|
||||||
let mayLoad = 1 in
|
|
||||||
def MMX_CVTPD2PIrm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst),
|
|
||||||
(ins f128mem:$src),
|
|
||||||
"cvtpd2pi\t{$src, $dst|$dst, $src}", []>;
|
|
||||||
|
|
||||||
def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
|
def MMX_CVTPI2PDrr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
|
||||||
"cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
|
"cvtpi2pd\t{$src, $dst|$dst, $src}", []>;
|
||||||
let mayLoad = 1 in
|
let mayLoad = 1 in
|
||||||
@ -554,12 +566,6 @@ def MMX_CVTPI2PSrm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst),
|
|||||||
(ins i64mem:$src),
|
(ins i64mem:$src),
|
||||||
"cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
|
"cvtpi2ps\t{$src, $dst|$dst, $src}", []>;
|
||||||
|
|
||||||
def MMX_CVTPS2PIrr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
|
||||||
"cvtps2pi\t{$src, $dst|$dst, $src}", []>;
|
|
||||||
let mayLoad = 1 in
|
|
||||||
def MMX_CVTPS2PIrm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
|
|
||||||
"cvtps2pi\t{$src, $dst|$dst, $src}", []>;
|
|
||||||
|
|
||||||
def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
def MMX_CVTTPD2PIrr : MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
||||||
"cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
|
"cvttpd2pi\t{$src, $dst|$dst, $src}", []>;
|
||||||
let mayLoad = 1 in
|
let mayLoad = 1 in
|
||||||
@ -574,57 +580,33 @@ def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
|
|||||||
"cvttps2pi\t{$src, $dst|$dst, $src}", []>;
|
"cvttps2pi\t{$src, $dst|$dst, $src}", []>;
|
||||||
} // end neverHasSideEffects
|
} // end neverHasSideEffects
|
||||||
|
|
||||||
// Intrinsic versions.
|
// Intrinsic forms.
|
||||||
def MMX_CVTPD2PIirr : MMX2I<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
|
||||||
"cvtpd2pi\t{$src, $dst|$dst, $src}",
|
f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
|
SSEPackedSingle>, TB;
|
||||||
def MMX_CVTPD2PIirm : MMX2I<0x2D, MRMSrcMem, (outs VR64:$dst),
|
defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
|
||||||
(ins f128mem:$src),
|
f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
|
||||||
"cvtpd2pi\t{$src, $dst|$dst, $src}",
|
SSEPackedDouble>, TB, OpSize;
|
||||||
[(set VR64:$dst,
|
defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
|
||||||
(int_x86_sse_cvtpd2pi
|
f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
|
||||||
(bitconvert (loadv2i64 addr:$src))))]>;
|
SSEPackedSingle>, TB;
|
||||||
def MMX_CVTPI2PDirr : MMX2I<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
|
defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
|
||||||
"cvtpi2pd\t{$src, $dst|$dst, $src}",
|
f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
|
SSEPackedDouble>, TB, OpSize;
|
||||||
|
defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
|
||||||
|
i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
|
||||||
|
SSEPackedDouble>, TB, OpSize;
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
def MMX_CVTPI2PSirr : MMXI<0x2A, MRMSrcReg, (outs VR128:$dst),
|
defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
|
||||||
(ins VR128:$src1, VR64:$src2),
|
int_x86_sse_cvtpi2ps,
|
||||||
"cvtpi2ps\t{$src2, $dst|$dst, $src2}",
|
i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
SSEPackedSingle>, TB;
|
||||||
(int_x86_sse_cvtpi2ps VR128:$src1, VR64:$src2))]>;
|
|
||||||
def MMX_CVTPI2PSirm : MMXI<0x2A, MRMSrcMem, (outs VR128:$dst),
|
|
||||||
(ins VR128:$src1, i64mem:$src2),
|
|
||||||
"cvtpi2ps\t{$src2, $dst|$dst, $src2}",
|
|
||||||
[(set VR128:$dst,
|
|
||||||
(int_x86_sse_cvtpi2ps VR128:$src1,
|
|
||||||
(bitconvert (load_mmx addr:$src2))))]>;
|
|
||||||
}
|
}
|
||||||
def MMX_CVTPS2PIirr : MMXI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
// MMX->MMX vector casts.
|
||||||
"cvtps2pi\t{$src, $dst|$dst, $src}",
|
def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))),
|
||||||
[(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
|
(MMX_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>;
|
||||||
def MMX_CVTPS2PIirm : MMXI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
|
def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
|
||||||
"cvtps2pi\t{$src, $dst|$dst, $src}",
|
(MMX_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
|
||||||
[(set VR64:$dst,
|
|
||||||
(int_x86_sse_cvtps2pi
|
|
||||||
(bitconvert (load_mmx addr:$src))))]>;
|
|
||||||
def MMX_CVTTPD2PIirr: MMX2I<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
|
||||||
"cvttpd2pi\t{$src, $dst|$dst, $src}",
|
|
||||||
[(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
|
|
||||||
def MMX_CVTTPD2PIirm: MMX2I<0x2C, MRMSrcMem, (outs VR64:$dst),
|
|
||||||
(ins f128mem:$src),
|
|
||||||
"cvttpd2pi\t{$src, $dst|$dst, $src}",
|
|
||||||
[(set VR64:$dst,
|
|
||||||
(int_x86_sse_cvtpd2pi
|
|
||||||
(bitconvert (loadv2i64 addr:$src))))]>;
|
|
||||||
def MMX_CVTTPS2PIirr: MMXI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
|
||||||
"cvttps2pi\t{$src, $dst|$dst, $src}",
|
|
||||||
[(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
|
|
||||||
def MMX_CVTTPS2PIirm: MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
|
|
||||||
"cvttps2pi\t{$src, $dst|$dst, $src}",
|
|
||||||
[(set VR64:$dst,
|
|
||||||
(int_x86_sse_cvtpd2pi
|
|
||||||
(bitconvert (load_mmx addr:$src))))]>;
|
|
||||||
|
|
||||||
// Extract / Insert
|
// Extract / Insert
|
||||||
def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW",
|
def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW",
|
||||||
|
@ -598,14 +598,6 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
|
|||||||
|
|
||||||
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
|
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
|
||||||
// and/or XMM operand(s).
|
// and/or XMM operand(s).
|
||||||
multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
|
||||||
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
|
|
||||||
string asm, Domain d> {
|
|
||||||
def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
|
|
||||||
[(set DstRC:$dst, (Int SrcRC:$src))], d>;
|
|
||||||
def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
|
|
||||||
[(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
|
|
||||||
}
|
|
||||||
|
|
||||||
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
||||||
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
|
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
|
||||||
@ -618,16 +610,6 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
|
|||||||
[(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
|
[(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
|
|
||||||
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
|
|
||||||
PatFrag ld_frag, string asm, Domain d> {
|
|
||||||
def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
|
|
||||||
asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
|
|
||||||
def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
|
|
||||||
(ins DstRC:$src1, x86memop:$src2), asm,
|
|
||||||
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
|
|
||||||
}
|
|
||||||
|
|
||||||
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
|
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
|
||||||
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
|
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
|
||||||
PatFrag ld_frag, string asm, bit Is2Addr = 1> {
|
PatFrag ld_frag, string asm, bit Is2Addr = 1> {
|
||||||
@ -705,29 +687,6 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
"cvtsi2sd">, XD, REX_W;
|
"cvtsi2sd">, XD, REX_W;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Instructions below don't have an AVX form.
|
|
||||||
defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
|
|
||||||
f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
|
|
||||||
SSEPackedSingle>, TB;
|
|
||||||
defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
|
|
||||||
f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
|
|
||||||
SSEPackedDouble>, TB, OpSize;
|
|
||||||
defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
|
|
||||||
f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
|
|
||||||
SSEPackedSingle>, TB;
|
|
||||||
defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
|
|
||||||
f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
|
|
||||||
SSEPackedDouble>, TB, OpSize;
|
|
||||||
defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
|
|
||||||
i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
|
|
||||||
SSEPackedDouble>, TB, OpSize;
|
|
||||||
let Constraints = "$src1 = $dst" in {
|
|
||||||
defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
|
|
||||||
int_x86_sse_cvtpi2ps,
|
|
||||||
i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
|
|
||||||
SSEPackedSingle>, TB;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// SSE 1 Only
|
/// SSE 1 Only
|
||||||
|
|
||||||
// Aliases for intrinsics
|
// Aliases for intrinsics
|
||||||
@ -3957,10 +3916,6 @@ def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
|
|||||||
(Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
|
(Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
|
||||||
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
|
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
|
||||||
(Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
|
(Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
|
||||||
def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))),
|
|
||||||
(Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>;
|
|
||||||
def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
|
|
||||||
(Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
|
|
||||||
|
|
||||||
// Use movaps / movups for SSE integer load / store (one byte shorter).
|
// Use movaps / movups for SSE integer load / store (one byte shorter).
|
||||||
let Predicates = [HasSSE1] in {
|
let Predicates = [HasSSE1] in {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user