llvm/test/CodeGen/X86/pr17631.ll
Michael Kuperstein f48b1beeec [X86] Fix fptoui conversions
This fixes two issues in x86 fptoui lowering.
1) Makes conversions from f80 go through the right path on AVX-512.
2) Implements an inline sequence for fptoui i64 instead of a library
call. This improves performance by 6X on SSE3+ and 3X otherwise.
Incidentally, it also removes the use of ftol2 for fptoui, which was
wrong to begin with, as ftol2 converts to a signed i64, producing
wrong results for values >= 2^63.

Patch by: mitch.l.bodart@intel.com
Differential Revision: http://reviews.llvm.org/D11316

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245924 91177308-0d34-0410-b5e6-96231b3b80d8
2015-08-25 07:42:09 +00:00

35 lines
812 B
LLVM

; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s
%struct_type = type { [64 x <8 x float>], <8 x float> }
; Function Attrs: nounwind readnone
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
; Function Attrs: nounwind
define i32 @equal(<8 x i32> %A) {
allocas:
%first_alloc = alloca [64 x <8 x i32>]
%second_alloc = alloca %struct_type
%A1 = bitcast <8 x i32> %A to <8 x float>
%A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1)
ret i32 %A2
}
; CHECK: equal
; CHECK-NOT: vzeroupper
; CHECK: _chkstk
; CHECK: ret
define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) {
%i = fptoui double %x to i64
store i64 %i, i64* %p
%ret = fadd <8 x float> %y, %y
ret <8 x float> %ret
}
; CHECK: foo
; CHECK-NOT: vzeroupper
; CHECK: {{cvtt|fist}}
; CHECK: ret