mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-16 00:16:50 +00:00
1ac52953b4
This generalizes the build_vector -> vector_shuffle combine to support any number of inputs. The idea is to create a binary tree of shuffles, where the first layer performs pairwise shuffles of the input vectors placing each input element into the correct lane, and the rest of the tree blends these shuffles together. This doesn't try to be smart and create any sort of "optimal" shuffles. The assumption is that even a "poor" shuffle sequence is better than extracting and inserting the elements one by one. Differential Revision: https://reviews.llvm.org/D24683 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@283480 91177308-0d34-0410-b5e6-96231b3b80d8
20 lines
637 B
LLVM
20 lines
637 B
LLVM
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=+vsx < %s | FileCheck %s
|
|
|
|
define <4 x float> @bar(float* %p, float* %q) {
|
|
%1 = bitcast float* %p to <12 x float>*
|
|
%2 = bitcast float* %q to <12 x float>*
|
|
%3 = load <12 x float>, <12 x float>* %1, align 16
|
|
%4 = load <12 x float>, <12 x float>* %2, align 16
|
|
%5 = fsub <12 x float> %4, %3
|
|
%6 = shufflevector <12 x float> %5, <12 x float> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
|
|
ret <4 x float> %6
|
|
|
|
; CHECK: vsldoi
|
|
; CHECK-NEXT: vmrghw
|
|
; CHECK-NEXT: vmrglw
|
|
; CHECK-NEXT: vsldoi
|
|
; CHECK-NEXT: vsldoi
|
|
; CHECK-NEXT: vsldoi
|
|
; CHECK-NEXT: blr
|
|
}
|