mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-24 20:30:06 +00:00
Add instruction selection for ffloor of vectors when SSE4.1 or AVX is enabled.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163473 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
46b4b112d2
commit
12fb5c667f
@ -946,6 +946,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::FRINT, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
|
||||
|
||||
setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
|
||||
|
||||
// FIXME: Do we need to handle scalar-to-vector here?
|
||||
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
|
||||
|
||||
@ -1023,6 +1026,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
|
||||
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
|
||||
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
|
||||
setOperationAction(ISD::FFLOOR, MVT::v8f32, Legal);
|
||||
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
|
||||
setOperationAction(ISD::FABS, MVT::v8f32, Custom);
|
||||
|
||||
@ -1031,6 +1035,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
|
||||
setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
|
||||
setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
|
||||
setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
|
||||
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
|
||||
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
|
||||
|
||||
|
@ -6210,6 +6210,15 @@ let Predicates = [HasAVX] in {
|
||||
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
|
||||
def : Pat<(f64 (ftrunc FR64:$src)),
|
||||
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
|
||||
|
||||
def : Pat<(v4f32 (ffloor VR128:$src)),
|
||||
(VROUNDPSr VR128:$src, (i32 0x1))>;
|
||||
def : Pat<(v2f64 (ffloor VR128:$src)),
|
||||
(VROUNDPDr VR128:$src, (i32 0x1))>;
|
||||
def : Pat<(v8f32 (ffloor VR256:$src)),
|
||||
(VROUNDYPSr VR256:$src, (i32 0x1))>;
|
||||
def : Pat<(v4f64 (ffloor VR256:$src)),
|
||||
(VROUNDYPDr VR256:$src, (i32 0x1))>;
|
||||
}
|
||||
|
||||
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
|
||||
@ -6219,26 +6228,33 @@ let Constraints = "$src1 = $dst" in
|
||||
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
|
||||
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
|
||||
|
||||
def : Pat<(ffloor FR32:$src),
|
||||
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
|
||||
def : Pat<(f64 (ffloor FR64:$src)),
|
||||
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
|
||||
def : Pat<(f32 (fnearbyint FR32:$src)),
|
||||
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
|
||||
def : Pat<(f64 (fnearbyint FR64:$src)),
|
||||
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
|
||||
def : Pat<(f32 (fceil FR32:$src)),
|
||||
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
|
||||
def : Pat<(f64 (fceil FR64:$src)),
|
||||
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
|
||||
def : Pat<(f32 (frint FR32:$src)),
|
||||
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
|
||||
def : Pat<(f64 (frint FR64:$src)),
|
||||
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
|
||||
def : Pat<(f32 (ftrunc FR32:$src)),
|
||||
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
|
||||
def : Pat<(f64 (ftrunc FR64:$src)),
|
||||
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
|
||||
let Predicates = [UseSSE41] in {
|
||||
def : Pat<(ffloor FR32:$src),
|
||||
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
|
||||
def : Pat<(f64 (ffloor FR64:$src)),
|
||||
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
|
||||
def : Pat<(f32 (fnearbyint FR32:$src)),
|
||||
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
|
||||
def : Pat<(f64 (fnearbyint FR64:$src)),
|
||||
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
|
||||
def : Pat<(f32 (fceil FR32:$src)),
|
||||
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
|
||||
def : Pat<(f64 (fceil FR64:$src)),
|
||||
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
|
||||
def : Pat<(f32 (frint FR32:$src)),
|
||||
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
|
||||
def : Pat<(f64 (frint FR64:$src)),
|
||||
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
|
||||
def : Pat<(f32 (ftrunc FR32:$src)),
|
||||
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
|
||||
def : Pat<(f64 (ftrunc FR64:$src)),
|
||||
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
|
||||
|
||||
def : Pat<(v4f32 (ffloor VR128:$src)),
|
||||
(ROUNDPSr VR128:$src, (i32 0x1))>;
|
||||
def : Pat<(v2f64 (ffloor VR128:$src)),
|
||||
(ROUNDPDr VR128:$src, (i32 0x1))>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SSE4.1 - Packed Bit Test
|
||||
|
38
test/CodeGen/X86/vec_floor.ll
Normal file
38
test/CodeGen/X86/vec_floor.ll
Normal file
@ -0,0 +1,38 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
|
||||
define <2 x double> @floor_v2f64(<2 x double> %p)
|
||||
{
|
||||
; CHECK: floor_v2f64
|
||||
; CHECK: vroundpd
|
||||
%t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
|
||||
ret <2 x double> %t
|
||||
}
|
||||
declare <2 x double> @llvm.floor.v2f64(<2 x double> %p)
|
||||
|
||||
define <4 x float> @floor_v4f32(<4 x float> %p)
|
||||
{
|
||||
; CHECK: floor_v4f32
|
||||
; CHECK: vroundps
|
||||
%t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
|
||||
ret <4 x float> %t
|
||||
}
|
||||
declare <4 x float> @llvm.floor.v4f32(<4 x float> %p)
|
||||
|
||||
define <4 x double> @floor_v4f64(<4 x double> %p)
|
||||
{
|
||||
; CHECK: floor_v4f64
|
||||
; CHECK: vroundpd
|
||||
%t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
|
||||
ret <4 x double> %t
|
||||
}
|
||||
declare <4 x double> @llvm.floor.v4f64(<4 x double> %p)
|
||||
|
||||
define <8 x float> @floor_v8f32(<8 x float> %p)
|
||||
{
|
||||
; CHECK: floor_v8f32
|
||||
; CHECK: vroundps
|
||||
%t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
|
||||
ret <8 x float> %t
|
||||
}
|
||||
declare <8 x float> @llvm.floor.v8f32(<8 x float> %p)
|
Loading…
Reference in New Issue
Block a user