mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-13 23:18:58 +00:00
[X86] Use vrndscaleps/pd for 128/256 ffloor/ftrunc/fceil/fnearbyint/frint when avx512vl is enabled.
This matches what we do for scalar and 512-bit types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317991 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
04bd4860e0
commit
6e10efc50e
@ -8858,6 +8858,52 @@ def : Pat<(v8f64 (ftrunc VR512:$src)),
|
||||
(VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVLX] in {
|
||||
def : Pat<(v4f32 (ffloor VR128X:$src)),
|
||||
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
|
||||
def : Pat<(v4f32 (fnearbyint VR128X:$src)),
|
||||
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
|
||||
def : Pat<(v4f32 (fceil VR128X:$src)),
|
||||
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
|
||||
def : Pat<(v4f32 (frint VR128X:$src)),
|
||||
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
|
||||
def : Pat<(v4f32 (ftrunc VR128X:$src)),
|
||||
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
|
||||
|
||||
def : Pat<(v2f64 (ffloor VR128X:$src)),
|
||||
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
|
||||
def : Pat<(v2f64 (fnearbyint VR128X:$src)),
|
||||
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
|
||||
def : Pat<(v2f64 (fceil VR128X:$src)),
|
||||
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
|
||||
def : Pat<(v2f64 (frint VR128X:$src)),
|
||||
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
|
||||
def : Pat<(v2f64 (ftrunc VR128X:$src)),
|
||||
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
|
||||
|
||||
def : Pat<(v8f32 (ffloor VR256X:$src)),
|
||||
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
|
||||
def : Pat<(v8f32 (fnearbyint VR256X:$src)),
|
||||
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
|
||||
def : Pat<(v8f32 (fceil VR256X:$src)),
|
||||
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
|
||||
def : Pat<(v8f32 (frint VR256X:$src)),
|
||||
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
|
||||
def : Pat<(v8f32 (ftrunc VR256X:$src)),
|
||||
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
|
||||
|
||||
def : Pat<(v4f64 (ffloor VR256X:$src)),
|
||||
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
|
||||
def : Pat<(v4f64 (fnearbyint VR256X:$src)),
|
||||
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
|
||||
def : Pat<(v4f64 (fceil VR256X:$src)),
|
||||
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
|
||||
def : Pat<(v4f64 (frint VR256X:$src)),
|
||||
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
|
||||
def : Pat<(v4f64 (ftrunc VR256X:$src)),
|
||||
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
|
||||
}
|
||||
|
||||
multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
|
||||
bits<8> opc>{
|
||||
let Predicates = [HasAVX512] in {
|
||||
|
@ -5984,7 +5984,7 @@ let Predicates = [UseAVX] in {
|
||||
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def : Pat<(v4f32 (ffloor VR128:$src)),
|
||||
(VROUNDPSr VR128:$src, (i32 0x9))>;
|
||||
def : Pat<(v4f32 (fnearbyint VR128:$src)),
|
||||
|
@ -16,7 +16,7 @@ define <2 x double> @floor_v2f64(<2 x double> %p) {
|
||||
;
|
||||
; AVX512-LABEL: floor_v2f64:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundpd $9, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vrndscalepd $9, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <2 x double> @llvm.floor.v2f64(<2 x double> %p)
|
||||
ret <2 x double> %t
|
||||
@ -36,7 +36,7 @@ define <4 x float> @floor_v4f32(<4 x float> %p) {
|
||||
;
|
||||
; AVX512-LABEL: floor_v4f32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundps $9, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vrndscaleps $9, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <4 x float> @llvm.floor.v4f32(<4 x float> %p)
|
||||
ret <4 x float> %t
|
||||
@ -57,7 +57,7 @@ define <4 x double> @floor_v4f64(<4 x double> %p){
|
||||
;
|
||||
; AVX512-LABEL: floor_v4f64:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundpd $9, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vrndscalepd $9, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <4 x double> @llvm.floor.v4f64(<4 x double> %p)
|
||||
ret <4 x double> %t
|
||||
@ -78,7 +78,7 @@ define <8 x float> @floor_v8f32(<8 x float> %p) {
|
||||
;
|
||||
; AVX512-LABEL: floor_v8f32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundps $9, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vrndscaleps $9, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <8 x float> @llvm.floor.v8f32(<8 x float> %p)
|
||||
ret <8 x float> %t
|
||||
@ -146,7 +146,7 @@ define <2 x double> @ceil_v2f64(<2 x double> %p) {
|
||||
;
|
||||
; AVX512-LABEL: ceil_v2f64:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundpd $10, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vrndscalepd $10, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <2 x double> @llvm.ceil.v2f64(<2 x double> %p)
|
||||
ret <2 x double> %t
|
||||
@ -166,7 +166,7 @@ define <4 x float> @ceil_v4f32(<4 x float> %p) {
|
||||
;
|
||||
; AVX512-LABEL: ceil_v4f32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundps $10, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vrndscaleps $10, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %p)
|
||||
ret <4 x float> %t
|
||||
@ -187,7 +187,7 @@ define <4 x double> @ceil_v4f64(<4 x double> %p) {
|
||||
;
|
||||
; AVX512-LABEL: ceil_v4f64:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundpd $10, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vrndscalepd $10, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <4 x double> @llvm.ceil.v4f64(<4 x double> %p)
|
||||
ret <4 x double> %t
|
||||
@ -208,7 +208,7 @@ define <8 x float> @ceil_v8f32(<8 x float> %p) {
|
||||
;
|
||||
; AVX512-LABEL: ceil_v8f32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundps $10, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vrndscaleps $10, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <8 x float> @llvm.ceil.v8f32(<8 x float> %p)
|
||||
ret <8 x float> %t
|
||||
@ -276,7 +276,7 @@ define <2 x double> @trunc_v2f64(<2 x double> %p) {
|
||||
;
|
||||
; AVX512-LABEL: trunc_v2f64:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundpd $11, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vrndscalepd $11, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <2 x double> @llvm.trunc.v2f64(<2 x double> %p)
|
||||
ret <2 x double> %t
|
||||
@ -296,7 +296,7 @@ define <4 x float> @trunc_v4f32(<4 x float> %p) {
|
||||
;
|
||||
; AVX512-LABEL: trunc_v4f32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundps $11, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vrndscaleps $11, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <4 x float> @llvm.trunc.v4f32(<4 x float> %p)
|
||||
ret <4 x float> %t
|
||||
@ -317,7 +317,7 @@ define <4 x double> @trunc_v4f64(<4 x double> %p) {
|
||||
;
|
||||
; AVX512-LABEL: trunc_v4f64:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundpd $11, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vrndscalepd $11, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <4 x double> @llvm.trunc.v4f64(<4 x double> %p)
|
||||
ret <4 x double> %t
|
||||
@ -338,7 +338,7 @@ define <8 x float> @trunc_v8f32(<8 x float> %p) {
|
||||
;
|
||||
; AVX512-LABEL: trunc_v8f32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundps $11, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vrndscaleps $11, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <8 x float> @llvm.trunc.v8f32(<8 x float> %p)
|
||||
ret <8 x float> %t
|
||||
@ -406,7 +406,7 @@ define <2 x double> @rint_v2f64(<2 x double> %p) {
|
||||
;
|
||||
; AVX512-LABEL: rint_v2f64:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundpd $4, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vrndscalepd $4, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <2 x double> @llvm.rint.v2f64(<2 x double> %p)
|
||||
ret <2 x double> %t
|
||||
@ -426,7 +426,7 @@ define <4 x float> @rint_v4f32(<4 x float> %p) {
|
||||
;
|
||||
; AVX512-LABEL: rint_v4f32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundps $4, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vrndscaleps $4, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <4 x float> @llvm.rint.v4f32(<4 x float> %p)
|
||||
ret <4 x float> %t
|
||||
@ -447,7 +447,7 @@ define <4 x double> @rint_v4f64(<4 x double> %p) {
|
||||
;
|
||||
; AVX512-LABEL: rint_v4f64:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundpd $4, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vrndscalepd $4, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <4 x double> @llvm.rint.v4f64(<4 x double> %p)
|
||||
ret <4 x double> %t
|
||||
@ -468,7 +468,7 @@ define <8 x float> @rint_v8f32(<8 x float> %p) {
|
||||
;
|
||||
; AVX512-LABEL: rint_v8f32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundps $4, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vrndscaleps $4, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <8 x float> @llvm.rint.v8f32(<8 x float> %p)
|
||||
ret <8 x float> %t
|
||||
@ -536,7 +536,7 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %p) {
|
||||
;
|
||||
; AVX512-LABEL: nearbyint_v2f64:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundpd $12, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vrndscalepd $12, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p)
|
||||
ret <2 x double> %t
|
||||
@ -556,7 +556,7 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %p) {
|
||||
;
|
||||
; AVX512-LABEL: nearbyint_v4f32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundps $12, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vrndscaleps $12, %xmm0, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %p)
|
||||
ret <4 x float> %t
|
||||
@ -577,7 +577,7 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %p) {
|
||||
;
|
||||
; AVX512-LABEL: nearbyint_v4f64:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundpd $12, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vrndscalepd $12, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %p)
|
||||
ret <4 x double> %t
|
||||
@ -598,7 +598,7 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %p) {
|
||||
;
|
||||
; AVX512-LABEL: nearbyint_v8f32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vroundps $12, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vrndscaleps $12, %ymm0, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%t = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
|
||||
ret <8 x float> %t
|
||||
|
Loading…
Reference in New Issue
Block a user