mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-09 21:50:50 +00:00
a3afb70a5d
The _Int instructions are special, in that they operate on the full VR128 instead of FR32. The load folding then looks at MOVSS, at the user, and bails out when it sees a size mismatch. What we really know is that the rm_Int instructions don't load the higher lanes, so folding is fine. This happens for the straightforward intrinsic code, e.g.: _mm_add_ss(a, _mm_load_ss(p)); Fixes PR23349. Differential Revision: http://reviews.llvm.org/D10554 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240326 91177308-0d34-0410-b5e6-96231b3b80d8
143 lines
3.9 KiB
LLVM
143 lines
3.9 KiB
LLVM
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
|
|
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
|
|
|
|
; Verify that we're folding the load into the math instruction.
|
|
; This pattern is generated out of the simplest intrinsics usage:
|
|
; _mm_add_ss(a, _mm_load_ss(b));
|
|
|
|
define <4 x float> @addss(<4 x float> %va, float* %pb) {
|
|
; SSE-LABEL: addss:
|
|
; SSE: # BB#0:
|
|
; SSE-NEXT: addss (%rdi), %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: addss:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%a = extractelement <4 x float> %va, i32 0
|
|
%b = load float, float* %pb
|
|
%r = fadd float %a, %b
|
|
%vr = insertelement <4 x float> %va, float %r, i32 0
|
|
ret <4 x float> %vr
|
|
}
|
|
|
|
define <2 x double> @addsd(<2 x double> %va, double* %pb) {
|
|
; SSE-LABEL: addsd:
|
|
; SSE: # BB#0:
|
|
; SSE-NEXT: addsd (%rdi), %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: addsd:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%a = extractelement <2 x double> %va, i32 0
|
|
%b = load double, double* %pb
|
|
%r = fadd double %a, %b
|
|
%vr = insertelement <2 x double> %va, double %r, i32 0
|
|
ret <2 x double> %vr
|
|
}
|
|
|
|
define <4 x float> @subss(<4 x float> %va, float* %pb) {
|
|
; SSE-LABEL: subss:
|
|
; SSE: # BB#0:
|
|
; SSE-NEXT: subss (%rdi), %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: subss:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%a = extractelement <4 x float> %va, i32 0
|
|
%b = load float, float* %pb
|
|
%r = fsub float %a, %b
|
|
%vr = insertelement <4 x float> %va, float %r, i32 0
|
|
ret <4 x float> %vr
|
|
}
|
|
|
|
define <2 x double> @subsd(<2 x double> %va, double* %pb) {
|
|
; SSE-LABEL: subsd:
|
|
; SSE: # BB#0:
|
|
; SSE-NEXT: subsd (%rdi), %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: subsd:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vsubsd (%rdi), %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%a = extractelement <2 x double> %va, i32 0
|
|
%b = load double, double* %pb
|
|
%r = fsub double %a, %b
|
|
%vr = insertelement <2 x double> %va, double %r, i32 0
|
|
ret <2 x double> %vr
|
|
}
|
|
|
|
define <4 x float> @mulss(<4 x float> %va, float* %pb) {
|
|
; SSE-LABEL: mulss:
|
|
; SSE: # BB#0:
|
|
; SSE-NEXT: mulss (%rdi), %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: mulss:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vmulss (%rdi), %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%a = extractelement <4 x float> %va, i32 0
|
|
%b = load float, float* %pb
|
|
%r = fmul float %a, %b
|
|
%vr = insertelement <4 x float> %va, float %r, i32 0
|
|
ret <4 x float> %vr
|
|
}
|
|
|
|
define <2 x double> @mulsd(<2 x double> %va, double* %pb) {
|
|
; SSE-LABEL: mulsd:
|
|
; SSE: # BB#0:
|
|
; SSE-NEXT: mulsd (%rdi), %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: mulsd:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vmulsd (%rdi), %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%a = extractelement <2 x double> %va, i32 0
|
|
%b = load double, double* %pb
|
|
%r = fmul double %a, %b
|
|
%vr = insertelement <2 x double> %va, double %r, i32 0
|
|
ret <2 x double> %vr
|
|
}
|
|
|
|
define <4 x float> @divss(<4 x float> %va, float* %pb) {
|
|
; SSE-LABEL: divss:
|
|
; SSE: # BB#0:
|
|
; SSE-NEXT: divss (%rdi), %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: divss:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%a = extractelement <4 x float> %va, i32 0
|
|
%b = load float, float* %pb
|
|
%r = fdiv float %a, %b
|
|
%vr = insertelement <4 x float> %va, float %r, i32 0
|
|
ret <4 x float> %vr
|
|
}
|
|
|
|
define <2 x double> @divsd(<2 x double> %va, double* %pb) {
|
|
; SSE-LABEL: divsd:
|
|
; SSE: # BB#0:
|
|
; SSE-NEXT: divsd (%rdi), %xmm0
|
|
; SSE-NEXT: retq
|
|
;
|
|
; AVX-LABEL: divsd:
|
|
; AVX: # BB#0:
|
|
; AVX-NEXT: vdivsd (%rdi), %xmm0, %xmm0
|
|
; AVX-NEXT: retq
|
|
%a = extractelement <2 x double> %va, i32 0
|
|
%b = load double, double* %pb
|
|
%r = fdiv double %a, %b
|
|
%vr = insertelement <2 x double> %va, double %r, i32 0
|
|
ret <2 x double> %vr
|
|
}
|