mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-13 23:18:58 +00:00
[X86] Add scalar register class versions of VRNDSCALE instructions and rename the existing versions to _Int.
This is consistent with out normal implementation of scalar instructions. While there disable load folding for the patterns with IMPLICIT_DEF unless optimizing for size which is also our standard practice. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317977 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d009633224
commit
dbf8de9323
@ -7656,58 +7656,74 @@ multiclass
|
||||
avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
|
||||
|
||||
let ExeDomain = _.ExeDomain in {
|
||||
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
|
||||
"$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
|
||||
|
||||
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
|
||||
"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
|
||||
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
|
||||
(i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
|
||||
|
||||
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
|
||||
OpcodeStr,
|
||||
"$src3, $src2, $src1", "$src1, $src2, $src3",
|
||||
(_.VT (X86RndScales (_.VT _.RC:$src1),
|
||||
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
|
||||
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
|
||||
}
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
|
||||
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x9))), _.FRC)>;
|
||||
def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
|
||||
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xa))), _.FRC)>;
|
||||
def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
|
||||
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xb))), _.FRC)>;
|
||||
def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
|
||||
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>;
|
||||
def : Pat<(fnearbyint _.FRC:$src), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
|
||||
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xc))), _.FRC)>;
|
||||
|
||||
def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0x9))), _.FRC)>;
|
||||
def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0xa))), _.FRC)>;
|
||||
def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0xb))), _.FRC)>;
|
||||
def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0x4))), _.FRC)>;
|
||||
def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
|
||||
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0xc))), _.FRC)>;
|
||||
let isCodeGenOnly = 1, hasSideEffects = 0 in {
|
||||
def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
|
||||
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[]>;
|
||||
|
||||
let mayLoad = 1 in
|
||||
def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
|
||||
(ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
|
||||
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[]>;
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(ffloor _.FRC:$src),
|
||||
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
|
||||
_.FRC:$src, (i32 0x9)))>;
|
||||
def : Pat<(fceil _.FRC:$src),
|
||||
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
|
||||
_.FRC:$src, (i32 0xa)))>;
|
||||
def : Pat<(ftrunc _.FRC:$src),
|
||||
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
|
||||
_.FRC:$src, (i32 0xb)))>;
|
||||
def : Pat<(frint _.FRC:$src),
|
||||
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
|
||||
_.FRC:$src, (i32 0x4)))>;
|
||||
def : Pat<(fnearbyint _.FRC:$src),
|
||||
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
|
||||
_.FRC:$src, (i32 0xc)))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512, OptForSize] in {
|
||||
def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
|
||||
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0x9)))>;
|
||||
def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
|
||||
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0xa)))>;
|
||||
def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
|
||||
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0xb)))>;
|
||||
def : Pat<(frint (_.ScalarLdFrag addr:$src)),
|
||||
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0x4)))>;
|
||||
def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
|
||||
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
|
||||
addr:$src, (i32 0xc)))>;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8141,11 +8141,15 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
|
||||
case X86::VCVTSS2SDZrm:
|
||||
case X86::VCVTSS2SDZrm_Int:
|
||||
case X86::VRNDSCALESDr:
|
||||
case X86::VRNDSCALESDrb:
|
||||
case X86::VRNDSCALESDr_Int:
|
||||
case X86::VRNDSCALESDrb_Int:
|
||||
case X86::VRNDSCALESDm:
|
||||
case X86::VRNDSCALESDm_Int:
|
||||
case X86::VRNDSCALESSr:
|
||||
case X86::VRNDSCALESSrb:
|
||||
case X86::VRNDSCALESSr_Int:
|
||||
case X86::VRNDSCALESSrb_Int:
|
||||
case X86::VRNDSCALESSm:
|
||||
case X86::VRNDSCALESSm_Int:
|
||||
case X86::VRCP14SSrr:
|
||||
case X86::VRCP14SSrm:
|
||||
case X86::VRSQRT14SSrr:
|
||||
|
@ -124,6 +124,18 @@ declare float @llvm.floor.f32(float %p)
|
||||
define float @floor_f32m(float* %aptr) {
|
||||
; CHECK-LABEL: floor_f32m:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
|
||||
; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero
|
||||
; CHECK-NEXT: vrndscaless $9, %xmm0, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x0a,0xc0,0x09]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%a = load float, float* %aptr, align 4
|
||||
%res = call float @llvm.floor.f32(float %a)
|
||||
ret float %res
|
||||
}
|
||||
|
||||
define float @floor_f32m_optsize(float* %aptr) optsize {
|
||||
; CHECK-LABEL: floor_f32m_optsize:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vrndscaless $9, (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x0a,0x07,0x09]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%a = load float, float* %aptr, align 4
|
||||
|
Loading…
Reference in New Issue
Block a user