Add sqrt, rsqrt and rcp AVX instructions

llvm-svn: 107166
This commit is contained in:
Bruno Cardoso Lopes 2010-06-29 17:26:30 +00:00
parent 151bec3c2d
commit 2dca1dd168
3 changed files with 196 additions and 1 deletions

View File

@ -1873,7 +1873,7 @@ let isCommutable = 0 in {
/// sse1_fp_unop_s - SSE1 unops in scalar form.
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F32Int> {
SDNode OpNode, Intrinsic F32Int> {
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
[(set FR32:$dst, (OpNode FR32:$src))]>;
@ -1906,6 +1906,26 @@ multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
}
/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F32Int> {
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
!strconcat(!strconcat("v", OpcodeStr),
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
!strconcat(!strconcat("v", OpcodeStr),
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, XS, Requires<[HasAVX, HasSSE1, OptForSize]>;
def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(!strconcat("v", OpcodeStr),
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, ssmem:$src2),
!strconcat(!strconcat("v", OpcodeStr),
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
}
/// sse2_fp_unop_s - SSE2 unops in scalar form.
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F64Int> {
@ -1940,6 +1960,52 @@ multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
}
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F64Int> {
def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f64mem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>;
def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, sdmem:$src2),
!strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>;
}
let isAsmParserOnly = 1 in {
// Square root.
let Predicates = [HasAVX, HasSSE2] in {
defm VSQRT : sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
VEX_4V;
defm VSQRT : sse2_fp_unop_p<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_pd>, VEX;
}
let Predicates = [HasAVX, HasSSE1] in {
defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
VEX_4V;
defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ps>, VEX;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt,
int_x86_sse_rsqrt_ss>, VEX_4V;
defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, int_x86_sse_rsqrt_ps>,
VEX;
defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
VEX_4V;
defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ps>,
VEX;
}
}
// Square root.
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
sse1_fp_unop_p<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ps>,

View File

@ -10873,3 +10873,68 @@
// CHECK: vcvtpd2ps %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xf9,0x5a,0xda]
vcvtpd2ps %xmm2, %xmm3
// CHECK: vsqrtpd %xmm1, %xmm2
// CHECK: encoding: [0xc5,0xf9,0x51,0xd1]
vsqrtpd %xmm1, %xmm2
// CHECK: vsqrtpd (%eax), %xmm2
// CHECK: encoding: [0xc5,0xf9,0x51,0x10]
vsqrtpd (%eax), %xmm2
// CHECK: vsqrtps %xmm1, %xmm2
// CHECK: encoding: [0xc5,0xf8,0x51,0xd1]
vsqrtps %xmm1, %xmm2
// CHECK: vsqrtps (%eax), %xmm2
// CHECK: encoding: [0xc5,0xf8,0x51,0x10]
vsqrtps (%eax), %xmm2
// CHECK: vsqrtsd %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xeb,0x51,0xd9]
vsqrtsd %xmm1, %xmm2, %xmm3
// CHECK: vsqrtsd (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xeb,0x51,0x18]
vsqrtsd (%eax), %xmm2, %xmm3
// CHECK: vsqrtss %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xea,0x51,0xd9]
vsqrtss %xmm1, %xmm2, %xmm3
// CHECK: vsqrtss (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xea,0x51,0x18]
vsqrtss (%eax), %xmm2, %xmm3
// CHECK: vrsqrtps %xmm1, %xmm2
// CHECK: encoding: [0xc5,0xf8,0x52,0xd1]
vrsqrtps %xmm1, %xmm2
// CHECK: vrsqrtps (%eax), %xmm2
// CHECK: encoding: [0xc5,0xf8,0x52,0x10]
vrsqrtps (%eax), %xmm2
// CHECK: vrsqrtss %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xea,0x52,0xd9]
vrsqrtss %xmm1, %xmm2, %xmm3
// CHECK: vrsqrtss (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xea,0x52,0x18]
vrsqrtss (%eax), %xmm2, %xmm3
// CHECK: vrcpps %xmm1, %xmm2
// CHECK: encoding: [0xc5,0xf8,0x53,0xd1]
vrcpps %xmm1, %xmm2
// CHECK: vrcpps (%eax), %xmm2
// CHECK: encoding: [0xc5,0xf8,0x53,0x10]
vrcpps (%eax), %xmm2
// CHECK: vrcpss %xmm1, %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xea,0x53,0xd9]
vrcpss %xmm1, %xmm2, %xmm3
// CHECK: vrcpss (%eax), %xmm2, %xmm3
// CHECK: encoding: [0xc5,0xea,0x53,0x18]
vrcpss (%eax), %xmm2, %xmm3

View File

@ -922,3 +922,67 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc]
vcvtpd2ps %xmm12, %xmm11
// CHECK: vsqrtpd %xmm11, %xmm12
// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3]
vsqrtpd %xmm11, %xmm12
// CHECK: vsqrtpd (%rax), %xmm12
// CHECK: encoding: [0xc5,0x79,0x51,0x20]
vsqrtpd (%rax), %xmm12
// CHECK: vsqrtps %xmm11, %xmm12
// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3]
vsqrtps %xmm11, %xmm12
// CHECK: vsqrtps (%rax), %xmm12
// CHECK: encoding: [0xc5,0x78,0x51,0x20]
vsqrtps (%rax), %xmm12
// CHECK: vsqrtsd %xmm11, %xmm12, %xmm10
// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3]
vsqrtsd %xmm11, %xmm12, %xmm10
// CHECK: vsqrtsd (%rax), %xmm12, %xmm10
// CHECK: encoding: [0xc5,0x1b,0x51,0x10]
vsqrtsd (%rax), %xmm12, %xmm10
// CHECK: vsqrtss %xmm11, %xmm12, %xmm10
// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3]
vsqrtss %xmm11, %xmm12, %xmm10
// CHECK: vsqrtss (%rax), %xmm12, %xmm10
// CHECK: encoding: [0xc5,0x1a,0x51,0x10]
vsqrtss (%rax), %xmm12, %xmm10
// CHECK: vrsqrtps %xmm11, %xmm12
// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3]
vrsqrtps %xmm11, %xmm12
// CHECK: vrsqrtps (%rax), %xmm12
// CHECK: encoding: [0xc5,0x78,0x52,0x20]
vrsqrtps (%rax), %xmm12
// CHECK: vrsqrtss %xmm11, %xmm12, %xmm10
// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3]
vrsqrtss %xmm11, %xmm12, %xmm10
// CHECK: vrsqrtss (%rax), %xmm12, %xmm10
// CHECK: encoding: [0xc5,0x1a,0x52,0x10]
vrsqrtss (%rax), %xmm12, %xmm10
// CHECK: vrcpps %xmm11, %xmm12
// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3]
vrcpps %xmm11, %xmm12
// CHECK: vrcpps (%rax), %xmm12
// CHECK: encoding: [0xc5,0x78,0x53,0x20]
vrcpps (%rax), %xmm12
// CHECK: vrcpss %xmm11, %xmm12, %xmm10
// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3]
vrcpss %xmm11, %xmm12, %xmm10
// CHECK: vrcpss (%rax), %xmm12, %xmm10
// CHECK: encoding: [0xc5,0x1a,0x53,0x10]
vrcpss (%rax), %xmm12, %xmm10