mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-04 09:45:00 +00:00
Add sqrt, rsqrt and rcp AVX instructions
llvm-svn: 107166
This commit is contained in:
parent
151bec3c2d
commit
2dca1dd168
@ -1873,7 +1873,7 @@ let isCommutable = 0 in {
|
||||
|
||||
/// sse1_fp_unop_s - SSE1 unops in scalar form.
|
||||
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, Intrinsic F32Int> {
|
||||
SDNode OpNode, Intrinsic F32Int> {
|
||||
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
|
||||
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
|
||||
[(set FR32:$dst, (OpNode FR32:$src))]>;
|
||||
@ -1906,6 +1906,26 @@ multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr,
|
||||
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
|
||||
}
|
||||
|
||||
/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
|
||||
multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, Intrinsic F32Int> {
|
||||
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
|
||||
!strconcat(!strconcat("v", OpcodeStr),
|
||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
|
||||
!strconcat(!strconcat("v", OpcodeStr),
|
||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>, XS, Requires<[HasAVX, HasSSE1, OptForSize]>;
|
||||
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(!strconcat("v", OpcodeStr),
|
||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, ssmem:$src2),
|
||||
!strconcat(!strconcat("v", OpcodeStr),
|
||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
}
|
||||
|
||||
/// sse2_fp_unop_s - SSE2 unops in scalar form.
|
||||
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, Intrinsic F64Int> {
|
||||
@ -1940,6 +1960,52 @@ multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
|
||||
[(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
|
||||
}
|
||||
|
||||
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
|
||||
multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
|
||||
SDNode OpNode, Intrinsic F64Int> {
|
||||
def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst),
|
||||
(ins FR64:$src1, f64mem:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
|
||||
def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>;
|
||||
def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, sdmem:$src2),
|
||||
!strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[]>;
|
||||
}
|
||||
|
||||
let isAsmParserOnly = 1 in {
|
||||
// Square root.
|
||||
let Predicates = [HasAVX, HasSSE2] in {
|
||||
defm VSQRT : sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
|
||||
VEX_4V;
|
||||
|
||||
defm VSQRT : sse2_fp_unop_p<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_pd>, VEX;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, HasSSE1] in {
|
||||
defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
|
||||
VEX_4V;
|
||||
defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ps>, VEX;
|
||||
// Reciprocal approximations. Note that these typically require refinement
|
||||
// in order to obtain suitable precision.
|
||||
defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt,
|
||||
int_x86_sse_rsqrt_ss>, VEX_4V;
|
||||
defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, int_x86_sse_rsqrt_ps>,
|
||||
VEX;
|
||||
defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
|
||||
VEX_4V;
|
||||
defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ps>,
|
||||
VEX;
|
||||
}
|
||||
}
|
||||
|
||||
// Square root.
|
||||
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
|
||||
sse1_fp_unop_p<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ps>,
|
||||
|
@ -10873,3 +10873,68 @@
|
||||
// CHECK: vcvtpd2ps %xmm2, %xmm3
|
||||
// CHECK: encoding: [0xc5,0xf9,0x5a,0xda]
|
||||
vcvtpd2ps %xmm2, %xmm3
|
||||
|
||||
// CHECK: vsqrtpd %xmm1, %xmm2
|
||||
// CHECK: encoding: [0xc5,0xf9,0x51,0xd1]
|
||||
vsqrtpd %xmm1, %xmm2
|
||||
|
||||
// CHECK: vsqrtpd (%eax), %xmm2
|
||||
// CHECK: encoding: [0xc5,0xf9,0x51,0x10]
|
||||
vsqrtpd (%eax), %xmm2
|
||||
|
||||
// CHECK: vsqrtps %xmm1, %xmm2
|
||||
// CHECK: encoding: [0xc5,0xf8,0x51,0xd1]
|
||||
vsqrtps %xmm1, %xmm2
|
||||
|
||||
// CHECK: vsqrtps (%eax), %xmm2
|
||||
// CHECK: encoding: [0xc5,0xf8,0x51,0x10]
|
||||
vsqrtps (%eax), %xmm2
|
||||
|
||||
// CHECK: vsqrtsd %xmm1, %xmm2, %xmm3
|
||||
// CHECK: encoding: [0xc5,0xeb,0x51,0xd9]
|
||||
vsqrtsd %xmm1, %xmm2, %xmm3
|
||||
|
||||
// CHECK: vsqrtsd (%eax), %xmm2, %xmm3
|
||||
// CHECK: encoding: [0xc5,0xeb,0x51,0x18]
|
||||
vsqrtsd (%eax), %xmm2, %xmm3
|
||||
|
||||
// CHECK: vsqrtss %xmm1, %xmm2, %xmm3
|
||||
// CHECK: encoding: [0xc5,0xea,0x51,0xd9]
|
||||
vsqrtss %xmm1, %xmm2, %xmm3
|
||||
|
||||
// CHECK: vsqrtss (%eax), %xmm2, %xmm3
|
||||
// CHECK: encoding: [0xc5,0xea,0x51,0x18]
|
||||
vsqrtss (%eax), %xmm2, %xmm3
|
||||
|
||||
// CHECK: vrsqrtps %xmm1, %xmm2
|
||||
// CHECK: encoding: [0xc5,0xf8,0x52,0xd1]
|
||||
vrsqrtps %xmm1, %xmm2
|
||||
|
||||
// CHECK: vrsqrtps (%eax), %xmm2
|
||||
// CHECK: encoding: [0xc5,0xf8,0x52,0x10]
|
||||
vrsqrtps (%eax), %xmm2
|
||||
|
||||
// CHECK: vrsqrtss %xmm1, %xmm2, %xmm3
|
||||
// CHECK: encoding: [0xc5,0xea,0x52,0xd9]
|
||||
vrsqrtss %xmm1, %xmm2, %xmm3
|
||||
|
||||
// CHECK: vrsqrtss (%eax), %xmm2, %xmm3
|
||||
// CHECK: encoding: [0xc5,0xea,0x52,0x18]
|
||||
vrsqrtss (%eax), %xmm2, %xmm3
|
||||
|
||||
// CHECK: vrcpps %xmm1, %xmm2
|
||||
// CHECK: encoding: [0xc5,0xf8,0x53,0xd1]
|
||||
vrcpps %xmm1, %xmm2
|
||||
|
||||
// CHECK: vrcpps (%eax), %xmm2
|
||||
// CHECK: encoding: [0xc5,0xf8,0x53,0x10]
|
||||
vrcpps (%eax), %xmm2
|
||||
|
||||
// CHECK: vrcpss %xmm1, %xmm2, %xmm3
|
||||
// CHECK: encoding: [0xc5,0xea,0x53,0xd9]
|
||||
vrcpss %xmm1, %xmm2, %xmm3
|
||||
|
||||
// CHECK: vrcpss (%eax), %xmm2, %xmm3
|
||||
// CHECK: encoding: [0xc5,0xea,0x53,0x18]
|
||||
vrcpss (%eax), %xmm2, %xmm3
|
||||
|
||||
|
@ -922,3 +922,67 @@ pshufb CPI1_0(%rip), %xmm1
|
||||
// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc]
|
||||
vcvtpd2ps %xmm12, %xmm11
|
||||
|
||||
// CHECK: vsqrtpd %xmm11, %xmm12
|
||||
// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3]
|
||||
vsqrtpd %xmm11, %xmm12
|
||||
|
||||
// CHECK: vsqrtpd (%rax), %xmm12
|
||||
// CHECK: encoding: [0xc5,0x79,0x51,0x20]
|
||||
vsqrtpd (%rax), %xmm12
|
||||
|
||||
// CHECK: vsqrtps %xmm11, %xmm12
|
||||
// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3]
|
||||
vsqrtps %xmm11, %xmm12
|
||||
|
||||
// CHECK: vsqrtps (%rax), %xmm12
|
||||
// CHECK: encoding: [0xc5,0x78,0x51,0x20]
|
||||
vsqrtps (%rax), %xmm12
|
||||
|
||||
// CHECK: vsqrtsd %xmm11, %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3]
|
||||
vsqrtsd %xmm11, %xmm12, %xmm10
|
||||
|
||||
// CHECK: vsqrtsd (%rax), %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc5,0x1b,0x51,0x10]
|
||||
vsqrtsd (%rax), %xmm12, %xmm10
|
||||
|
||||
// CHECK: vsqrtss %xmm11, %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3]
|
||||
vsqrtss %xmm11, %xmm12, %xmm10
|
||||
|
||||
// CHECK: vsqrtss (%rax), %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc5,0x1a,0x51,0x10]
|
||||
vsqrtss (%rax), %xmm12, %xmm10
|
||||
|
||||
// CHECK: vrsqrtps %xmm11, %xmm12
|
||||
// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3]
|
||||
vrsqrtps %xmm11, %xmm12
|
||||
|
||||
// CHECK: vrsqrtps (%rax), %xmm12
|
||||
// CHECK: encoding: [0xc5,0x78,0x52,0x20]
|
||||
vrsqrtps (%rax), %xmm12
|
||||
|
||||
// CHECK: vrsqrtss %xmm11, %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3]
|
||||
vrsqrtss %xmm11, %xmm12, %xmm10
|
||||
|
||||
// CHECK: vrsqrtss (%rax), %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc5,0x1a,0x52,0x10]
|
||||
vrsqrtss (%rax), %xmm12, %xmm10
|
||||
|
||||
// CHECK: vrcpps %xmm11, %xmm12
|
||||
// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3]
|
||||
vrcpps %xmm11, %xmm12
|
||||
|
||||
// CHECK: vrcpps (%rax), %xmm12
|
||||
// CHECK: encoding: [0xc5,0x78,0x53,0x20]
|
||||
vrcpps (%rax), %xmm12
|
||||
|
||||
// CHECK: vrcpss %xmm11, %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3]
|
||||
vrcpss %xmm11, %xmm12, %xmm10
|
||||
|
||||
// CHECK: vrcpss (%rax), %xmm12, %xmm10
|
||||
// CHECK: encoding: [0xc5,0x1a,0x53,0x10]
|
||||
vrcpss (%rax), %xmm12, %xmm10
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user