[X86] Add scalar register class versions of VRNDSCALE instructions and rename the existing versions to _Int.

This is consistent with out normal implementation of scalar instructions.

While there disable load folding for the patterns with IMPLICIT_DEF unless optimizing for size which is also our standard practice.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317977 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2017-11-11 08:24:15 +00:00
parent d009633224
commit dbf8de9323
3 changed files with 69 additions and 37 deletions

View File

@ -7656,58 +7656,74 @@ multiclass
avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales (_.VT _.RC:$src1),
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
}
let Predicates = [HasAVX512] in {
def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x9))), _.FRC)>;
def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xa))), _.FRC)>;
def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xb))), _.FRC)>;
def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>;
def : Pat<(fnearbyint _.FRC:$src), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
(_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xc))), _.FRC)>;
def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
addr:$src, (i32 0x9))), _.FRC)>;
def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
addr:$src, (i32 0xa))), _.FRC)>;
def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
addr:$src, (i32 0xb))), _.FRC)>;
def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
addr:$src, (i32 0x4))), _.FRC)>;
def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
(_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
addr:$src, (i32 0xc))), _.FRC)>;
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>;
let mayLoad = 1 in
def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[]>;
}
}
let Predicates = [HasAVX512] in {
def : Pat<(ffloor _.FRC:$src),
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
_.FRC:$src, (i32 0x9)))>;
def : Pat<(fceil _.FRC:$src),
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
_.FRC:$src, (i32 0xa)))>;
def : Pat<(ftrunc _.FRC:$src),
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
_.FRC:$src, (i32 0xb)))>;
def : Pat<(frint _.FRC:$src),
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
_.FRC:$src, (i32 0x4)))>;
def : Pat<(fnearbyint _.FRC:$src),
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
_.FRC:$src, (i32 0xc)))>;
}
let Predicates = [HasAVX512, OptForSize] in {
def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
addr:$src, (i32 0x9)))>;
def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
addr:$src, (i32 0xa)))>;
def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
addr:$src, (i32 0xb)))>;
def : Pat<(frint (_.ScalarLdFrag addr:$src)),
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
addr:$src, (i32 0x4)))>;
def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
addr:$src, (i32 0xc)))>;
}
}

View File

@ -8141,11 +8141,15 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
case X86::VCVTSS2SDZrm:
case X86::VCVTSS2SDZrm_Int:
case X86::VRNDSCALESDr:
case X86::VRNDSCALESDrb:
case X86::VRNDSCALESDr_Int:
case X86::VRNDSCALESDrb_Int:
case X86::VRNDSCALESDm:
case X86::VRNDSCALESDm_Int:
case X86::VRNDSCALESSr:
case X86::VRNDSCALESSrb:
case X86::VRNDSCALESSr_Int:
case X86::VRNDSCALESSrb_Int:
case X86::VRNDSCALESSm:
case X86::VRNDSCALESSm_Int:
case X86::VRCP14SSrr:
case X86::VRCP14SSrm:
case X86::VRSQRT14SSrr:

View File

@ -124,6 +124,18 @@ declare float @llvm.floor.f32(float %p)
define float @floor_f32m(float* %aptr) {
; CHECK-LABEL: floor_f32m:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vrndscaless $9, %xmm0, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x0a,0xc0,0x09]
; CHECK-NEXT: retq ## encoding: [0xc3]
%a = load float, float* %aptr, align 4
%res = call float @llvm.floor.f32(float %a)
ret float %res
}
define float @floor_f32m_optsize(float* %aptr) optsize {
; CHECK-LABEL: floor_f32m_optsize:
; CHECK: ## BB#0:
; CHECK-NEXT: vrndscaless $9, (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x0a,0x07,0x09]
; CHECK-NEXT: retq ## encoding: [0xc3]
%a = load float, float* %aptr, align 4