mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-11 13:46:13 +00:00
- Register v16i16 as valid VR256 register class
- Add more bitcasts for v16i16 - Since 135661 and 135662 already added the splat logic, just add one more splat test for v16i16 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135663 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
65b74e1d00
commit
dbd4fe2b0a
@ -970,11 +970,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||||||
setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
|
setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
|
||||||
|
|
||||||
if (!UseSoftFloat && Subtarget->hasAVX()) {
|
if (!UseSoftFloat && Subtarget->hasAVX()) {
|
||||||
addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
|
addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
|
||||||
addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
|
addRegisterClass(MVT::v16i16, X86::VR256RegisterClass);
|
||||||
addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
|
addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
|
||||||
addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
|
addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
|
||||||
addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
|
addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
|
||||||
|
addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
|
||||||
|
|
||||||
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
|
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
|
||||||
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
|
setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
|
||||||
|
@ -3668,20 +3668,22 @@ let Predicates = [HasXMMInt] in {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX] in {
|
let Predicates = [HasAVX] in {
|
||||||
def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
|
def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
|
||||||
def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
|
def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
|
||||||
def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
|
def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
|
||||||
def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
|
def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
|
||||||
def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
|
def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
|
||||||
def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
|
def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
|
||||||
def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
|
def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>;
|
||||||
def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
|
def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
|
||||||
def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
|
def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
|
||||||
def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
|
def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
|
||||||
def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
|
def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
|
||||||
def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
|
def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
|
||||||
def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
|
def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
|
||||||
def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
|
def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
|
||||||
|
def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
|
||||||
|
def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Move scalar to XMM zero-extended
|
// Move scalar to XMM zero-extended
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
||||||
|
|
||||||
; FIXME: use avx versions for punpcklbw and punpckhbw
|
; FIXME: use avx versions for punpcklbw, punpckhbw and punpckhwd
|
||||||
|
|
||||||
; CHECK: vextractf128 $0
|
; CHECK: vextractf128 $0
|
||||||
; CHECK-NEXT: punpcklbw
|
; CHECK-NEXT: punpcklbw
|
||||||
@ -14,3 +14,14 @@ entry:
|
|||||||
ret <32 x i8> %shuffle
|
ret <32 x i8> %shuffle
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK: vextractf128 $0
|
||||||
|
; CHECK-NEXT: punpckhwd
|
||||||
|
; CHECK-NEXT: vinsertf128 $0
|
||||||
|
; CHECK-NEXT: vinsertf128 $1
|
||||||
|
; CHECK-NEXT: vpermilps $85
|
||||||
|
define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
|
||||||
|
entry:
|
||||||
|
%shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
|
||||||
|
ret <16 x i16> %shuffle
|
||||||
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user