[X86] Remove another weird scalar sqrt/rcp/rsqrt pattern.

This pattern turned a vector sqrt/rcp/rsqrt operation of sse_load_f32/f64 into the the scalar instruction for the operation and put undef into the upper bits. For correctness, the resulting code should still perform the sqrt/rcp/rsqrt on the upper bits after the load is extended since that's what the operation asked for. Particularly in the case where the upper bits are 0, in that case we need calculate the sqrt/rcp/rsqrt of the zeroes and keep the result in the upper-bits. This implies we should be using the packed instruction still.

The only test case for this pattern is one I just added so there was no coverage of this.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288784 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2016-12-06 08:08:12 +00:00
parent 10b8bf3510
commit 8368f754a7
2 changed files with 2 additions and 7 deletions

View File

@ -3414,9 +3414,6 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
}
let Predicates = [target] in {
def : Pat<(vt (OpNode mem_cpat:$src)),
(vt (COPY_TO_REGCLASS (vt (!cast<Instruction>(NAME#Suffix##m_Int)
(vt (IMPLICIT_DEF)), mem_cpat:$src)), RC))>;
// These are unary operations, but they are modeled as having 2 source operands
// because the high elements of the destination are unchanged in SSE.
def : Pat<(Intr VR128:$src),
@ -3490,9 +3487,6 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
def : Pat<(ScalarVT (OpNode (load addr:$src))),
(!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
addr:$src)>;
def : Pat<(vt (OpNode mem_cpat:$src)),
(!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),
mem_cpat:$src)>;
}
}

View File

@ -364,7 +364,8 @@ define <4 x float> @int_sqrt_ss() {
define <2 x double> @vector_sqrt_scalar_load(double* %a0) optsize {
; CHECK-LABEL: vector_sqrt_scalar_load:
; CHECK: ## BB#0:
; CHECK-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: vsqrtpd %xmm0, %xmm0
; CHECK-NEXT: retq
%a1 = load double, double* %a0
%a2 = insertelement <2 x double> undef, double %a1, i32 0