mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-24 12:19:53 +00:00
982f60be44
The initial code drop for VSX swap optimization permitted the optimization only when all operations in a web of related computation are lane-insensitive. For some lane-sensitive operations, we can still permit the optimization provided that we make adjustments to those operations. This patch adds special handling for vector splats so that their presence doesn't kill the optimization. Vector splats are lane-sensitive since they identify by number a vector element to be used as the source of a splat. When swap optimizations take place, the desired vector element will move to the opposite doubleword of the quadword vector. We thus replace the index I by (I + N/2) % N, where N is the number of elements in the vector. A new test case is added to test that swap optimization succeeds when vector splats are present, and that the proper input element is used as the source of the splat. An ancillary change removes SH_BUILDVEC as one of the kinds of special handling that may be required by VSX swap optimization. From experience with GCC, I had expected to need some modifications for vector build operations, but I did not find that to be the case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236606 91177308-0d34-0410-b5e6-96231b3b80d8
92 lines
2.8 KiB
LLVM
92 lines
2.8 KiB
LLVM
; RUN: llc -O3 -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
|
|
|
; Test swap removal when a vector splat must be adjusted to make it legal.
|
|
;
|
|
; Test generated from following C code:
|
|
;
|
|
; vector char vc = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
|
; vector char vcr;
|
|
; vector short vs = {0, 1, 2, 3, 4, 5, 6, 7};
|
|
; vector short vsr;
|
|
; vector int vi = {0, 1, 2, 3};
|
|
; vector int vir;
|
|
;
|
|
; void cfoo ()
|
|
; {
|
|
; vcr = (vector char){vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5],
|
|
; vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5]};
|
|
; }
|
|
;
|
|
; void sfoo ()
|
|
; {
|
|
; vsr = (vector short){vs[6], vs[6], vs[6], vs[6],
|
|
; vs[6], vs[6], vs[6], vs[6]};
|
|
; }
|
|
;
|
|
; void ifoo ()
|
|
; {
|
|
; vir = (vector int){vi[1], vi[1], vi[1], vi[1]};
|
|
; }
|
|
|
|
@vc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
|
|
@vs = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
|
|
@vi = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
|
|
@vcr = common global <16 x i8> zeroinitializer, align 16
|
|
@vsr = common global <8 x i16> zeroinitializer, align 16
|
|
@vir = common global <4 x i32> zeroinitializer, align 16
|
|
|
|
; Function Attrs: nounwind
|
|
define void @cfoo() {
|
|
entry:
|
|
%0 = load <16 x i8>, <16 x i8>* @vc, align 16
|
|
%vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
|
|
store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 16
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nounwind
|
|
define void @sfoo() {
|
|
entry:
|
|
%0 = load <8 x i16>, <8 x i16>* @vs, align 16
|
|
%vecinit14 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
|
|
store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 16
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: nounwind
|
|
define void @ifoo() {
|
|
entry:
|
|
%0 = load <4 x i32>, <4 x i32>* @vi, align 16
|
|
%vecinit6 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
|
store <4 x i32> %vecinit6, <4 x i32>* @vir, align 16
|
|
ret void
|
|
}
|
|
|
|
; Justification:
|
|
; Byte splat of element 5 (BE) becomes element 15-5 = 10 (LE)
|
|
; which becomes (10+8)%16 = 2 (LE swapped).
|
|
;
|
|
; Halfword splat of element 6 (BE) becomes element 7-6 = 1 (LE)
|
|
; which becomes (1+4)%8 = 5 (LE swapped).
|
|
;
|
|
; Word splat of element 1 (BE) becomes element 3-1 = 2 (LE)
|
|
; which becomes (2+2)%4 = 0 (LE swapped).
|
|
|
|
; CHECK-NOT: xxpermdi
|
|
; CHECK-NOT: xxswapd
|
|
|
|
; CHECK-LABEL: @cfoo
|
|
; CHECK: lxvd2x
|
|
; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 2
|
|
; CHECK: stxvd2x
|
|
|
|
; CHECK-LABEL: @sfoo
|
|
; CHECK: lxvd2x
|
|
; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 5
|
|
; CHECK: stxvd2x
|
|
|
|
; CHECK-LABEL: @ifoo
|
|
; CHECK: lxvd2x
|
|
; CHECK: vspltw {{[0-9]+}}, {{[0-9]+}}, 0
|
|
; CHECK: stxvd2x
|