mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-10 14:12:11 +00:00
7fcb422bb2
lanes in vector splats. The core problem here is that undef lanes can't *unilaterally* be considered to contribute to splats. Their handling needs to be more cautious. There is also a reported failure of the nightly testers (thanks Tobias!) that may well stem from the same core issue. I'm going to fix this theoretical issue, factor the APIs a bit better, and then verify that I don't see anything bad with Tobias's reduction from the test suite before recommitting. Original commit message for r212324: [x86] Generalize BuildVectorSDNode::getConstantSplatValue to work for any constant, constant FP, or undef splat and to tolerate any undef lanes in a splat, then replace all uses of isSplatVector in X86's lowering with it. This fixes issues where undef lanes in an otherwise splat vector would prevent the splat logic from firing. It is a touch more awkward to use this interface, but it is much more accurate. Suggestions for better interface structuring welcome. With this fix, the code generated with the widening legalization strategy for widen_cast-4.ll is *dramatically* improved as the special lowering strategies for a v16i8 SRA kick in even though the high lanes are undef. We also get a slightly different choice for broadcasting an aligned memory location, and use vpshufd instead of vbroadcastss. This looks like a minor win for pipelining and domain crossing, but a minor loss for the number of micro-ops. I suspect its a wash, but folks can easily tweak the lowering if they want. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212475 91177308-0d34-0410-b5e6-96231b3b80d8
95 lines
2.4 KiB
LLVM
95 lines
2.4 KiB
LLVM
; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7-avx | FileCheck %s
|
|
; RUN: opt -instsimplify -disable-output < %s
|
|
|
|
;CHECK-LABEL: AGEP0:
|
|
define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
|
|
entry:
|
|
;CHECK-LABEL: AGEP0
|
|
;CHECK: vbroadcast
|
|
;CHECK-NEXT: vpaddd
|
|
;CHECK-NEXT: ret
|
|
%vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0
|
|
%vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
|
|
%vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
|
|
%vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
|
|
%A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
|
|
%A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
|
|
ret <4 x i32*> %A3
|
|
}
|
|
|
|
;CHECK-LABEL: AGEP1:
|
|
define i32 @AGEP1(<4 x i32*> %param) nounwind {
|
|
entry:
|
|
;CHECK-LABEL: AGEP1
|
|
;CHECK: vpaddd
|
|
;CHECK-NEXT: vpextrd
|
|
;CHECK-NEXT: movl
|
|
%A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
|
|
%k = extractelement <4 x i32*> %A2, i32 3
|
|
%v = load i32* %k
|
|
ret i32 %v
|
|
;CHECK: ret
|
|
}
|
|
|
|
;CHECK-LABEL: AGEP2:
|
|
define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind {
|
|
entry:
|
|
;CHECK-LABEL: AGEP2
|
|
;CHECK: vpslld $2
|
|
;CHECK-NEXT: vpadd
|
|
%A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
|
|
%k = extractelement <4 x i32*> %A2, i32 3
|
|
%v = load i32* %k
|
|
ret i32 %v
|
|
;CHECK: ret
|
|
}
|
|
|
|
;CHECK-LABEL: AGEP3:
|
|
define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind {
|
|
entry:
|
|
;CHECK-LABEL: AGEP3
|
|
;CHECK: vpslld $2
|
|
;CHECK-NEXT: vpadd
|
|
%A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
|
|
%v = alloca i32
|
|
%k = insertelement <4 x i32*> %A2, i32* %v, i32 3
|
|
ret <4 x i32*> %k
|
|
;CHECK: ret
|
|
}
|
|
|
|
;CHECK-LABEL: AGEP4:
|
|
define <4 x i16*> @AGEP4(<4 x i16*> %param, <4 x i32> %off) nounwind {
|
|
entry:
|
|
;CHECK-LABEL: AGEP4
|
|
; Multiply offset by two (add it to itself).
|
|
;CHECK: vpadd
|
|
; add the base to the offset
|
|
;CHECK-NEXT: vpadd
|
|
%A = getelementptr <4 x i16*> %param, <4 x i32> %off
|
|
ret <4 x i16*> %A
|
|
;CHECK: ret
|
|
}
|
|
|
|
;CHECK-LABEL: AGEP5:
|
|
define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
|
|
entry:
|
|
;CHECK-LABEL: AGEP5
|
|
;CHECK: vpaddd
|
|
%A = getelementptr <4 x i8*> %param, <4 x i8> %off
|
|
ret <4 x i8*> %A
|
|
;CHECK: ret
|
|
}
|
|
|
|
|
|
; The size of each element is 1 byte. No need to multiply by element size.
|
|
;CHECK-LABEL: AGEP6:
|
|
define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind {
|
|
entry:
|
|
;CHECK-LABEL: AGEP6
|
|
;CHECK-NOT: pslld
|
|
%A = getelementptr <4 x i8*> %param, <4 x i32> %off
|
|
ret <4 x i8*> %A
|
|
;CHECK: ret
|
|
}
|
|
|