mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 21:00:29 +00:00
[X86][SSE] Fix memory folding of (v)roundsd / (v)roundss
We only had partial memory folding support for the intrinsic definitions, and (as noted on PR27481) was causing FR32/FR64/VR128 mismatch errors with the machine verifier. This patch adds missing memory folding support for both intrinsics and the ffloor/fnearbyint/fceil/frint/ftrunc patterns and in doing so fixes the failing machine verifier stack folding tests from PR27481. Differential Revision: https://reviews.llvm.org/D23276 llvm-svn: 278106
This commit is contained in:
parent
96d3c1f66a
commit
d111e686cc
@ -1116,6 +1116,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
|
||||
{ X86::ROUNDSDr, X86::ROUNDSDm, 0 },
|
||||
{ X86::ROUNDSSr, X86::ROUNDSSm, 0 },
|
||||
{ X86::ROUNDSDr_Int, X86::ROUNDSDm_Int, 0 },
|
||||
{ X86::ROUNDSSr_Int, X86::ROUNDSSm_Int, 0 },
|
||||
{ X86::SBB32rr, X86::SBB32rm, 0 },
|
||||
{ X86::SBB64rr, X86::SBB64rm, 0 },
|
||||
{ X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
|
||||
@ -1412,6 +1414,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::VPXORrr, X86::VPXORrm, 0 },
|
||||
{ X86::VROUNDSDr, X86::VROUNDSDm, 0 },
|
||||
{ X86::VROUNDSSr, X86::VROUNDSSm, 0 },
|
||||
{ X86::VROUNDSDr_Int, X86::VROUNDSDm_Int, 0 },
|
||||
{ X86::VROUNDSSr_Int, X86::VROUNDSSm_Int, 0 },
|
||||
{ X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 },
|
||||
{ X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 },
|
||||
{ X86::VSUBPDrr, X86::VSUBPDrm, 0 },
|
||||
@ -6208,9 +6212,11 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
|
||||
case X86::ROUNDSDr:
|
||||
case X86::ROUNDSDm:
|
||||
case X86::ROUNDSDr_Int:
|
||||
case X86::ROUNDSDm_Int:
|
||||
case X86::ROUNDSSr:
|
||||
case X86::ROUNDSSm:
|
||||
case X86::ROUNDSSr_Int:
|
||||
case X86::ROUNDSSm_Int:
|
||||
case X86::RSQRTSSr:
|
||||
case X86::RSQRTSSm:
|
||||
case X86::RSQRTSSr_Int:
|
||||
@ -6289,9 +6295,11 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
|
||||
case X86::VROUNDSDr:
|
||||
case X86::VROUNDSDm:
|
||||
case X86::VROUNDSDr_Int:
|
||||
case X86::VROUNDSDm_Int:
|
||||
case X86::VROUNDSSr:
|
||||
case X86::VROUNDSSm:
|
||||
case X86::VROUNDSSr_Int:
|
||||
case X86::VROUNDSSm_Int:
|
||||
case X86::VRSQRTSSr:
|
||||
case X86::VRSQRTSSr_Int:
|
||||
case X86::VRSQRTSSm:
|
||||
|
@ -6409,6 +6409,17 @@ let ExeDomain = GenericDomain in {
|
||||
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[]>, Sched<[WriteFAdd]>;
|
||||
|
||||
// Operation, mem.
|
||||
let mayLoad = 1 in
|
||||
def SSm : SS4AIi8<opcss, MRMSrcMem,
|
||||
(outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[]>, Sched<[WriteFAddLd, ReadAfterLd]>;
|
||||
|
||||
// Intrinsic operation, reg.
|
||||
let isCodeGenOnly = 1 in
|
||||
def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
|
||||
@ -6422,7 +6433,8 @@ let ExeDomain = GenericDomain in {
|
||||
Sched<[WriteFAdd]>;
|
||||
|
||||
// Intrinsic operation, mem.
|
||||
def SSm : SS4AIi8<opcss, MRMSrcMem,
|
||||
let isCodeGenOnly = 1 in
|
||||
def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
@ -6444,6 +6456,17 @@ let ExeDomain = GenericDomain in {
|
||||
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[]>, Sched<[WriteFAdd]>;
|
||||
|
||||
// Operation, mem.
|
||||
let mayLoad = 1 in
|
||||
def SDm : SS4AIi8<opcsd, MRMSrcMem,
|
||||
(outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||
!strconcat(OpcodeStr,
|
||||
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
|
||||
[]>, Sched<[WriteFAddLd, ReadAfterLd]>;
|
||||
|
||||
// Intrinsic operation, reg.
|
||||
let isCodeGenOnly = 1 in
|
||||
def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
|
||||
@ -6457,7 +6480,8 @@ let ExeDomain = GenericDomain in {
|
||||
Sched<[WriteFAdd]>;
|
||||
|
||||
// Intrinsic operation, mem.
|
||||
def SDm : SS4AIi8<opcsd, MRMSrcMem,
|
||||
let isCodeGenOnly = 1 in
|
||||
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr,
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c < %s | FileCheck %s
|
||||
; RUN: llc -O3 -verify-machineinstrs -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
@ -1559,7 +1559,13 @@ define double @stack_fold_roundsd(double %a0) optsize {
|
||||
}
|
||||
declare double @llvm.floor.f64(double) nounwind readnone
|
||||
|
||||
; TODO stack_fold_roundsd_int
|
||||
define <2 x double> @stack_fold_roundsd_int(<2 x double> %a0, <2 x double> %a1) optsize {
|
||||
;CHECK-LABEL: stack_fold_roundsd_int
|
||||
;CHECK: vroundsd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
|
||||
|
||||
define float @stack_fold_roundss(float %a0) optsize {
|
||||
@ -1571,7 +1577,13 @@ define float @stack_fold_roundss(float %a0) optsize {
|
||||
}
|
||||
declare float @llvm.floor.f32(float) nounwind readnone
|
||||
|
||||
; TODO stack_fold_roundss_int
|
||||
define <4 x float> @stack_fold_roundss_int(<4 x float> %a0, <4 x float> %a1) optsize {
|
||||
;CHECK-LABEL: stack_fold_roundss_int
|
||||
;CHECK: vroundss $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7)
|
||||
ret <4 x float> %2
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
|
||||
|
||||
; TODO stack_fold_rsqrtps
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s
|
||||
; RUN: llc -O3 -verify-machineinstrs -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
@ -987,7 +987,13 @@ define double @stack_fold_roundsd(double %a0) optsize {
|
||||
}
|
||||
declare double @llvm.floor.f64(double) nounwind readnone
|
||||
|
||||
; TODO stack_fold_roundsd_int
|
||||
define <2 x double> @stack_fold_roundsd_int(<2 x double> %a0, <2 x double> %a1) optsize {
|
||||
;CHECK-LABEL: stack_fold_roundsd_int
|
||||
;CHECK: roundsd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7)
|
||||
ret <2 x double> %2
|
||||
}
|
||||
declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
|
||||
|
||||
define float @stack_fold_roundss(float %a0) minsize {
|
||||
@ -999,7 +1005,13 @@ define float @stack_fold_roundss(float %a0) minsize {
|
||||
}
|
||||
declare float @llvm.floor.f32(float) nounwind readnone
|
||||
|
||||
; TODO stack_fold_roundss_int
|
||||
define <4 x float> @stack_fold_roundss_int(<4 x float> %a0, <4 x float> %a1) optsize {
|
||||
;CHECK-LABEL: stack_fold_roundss_int
|
||||
;CHECK: roundss $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
|
||||
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||||
%2 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7)
|
||||
ret <4 x float> %2
|
||||
}
|
||||
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
|
||||
|
||||
; TODO stack_fold_rsqrtps
|
||||
|
Loading…
Reference in New Issue
Block a user