mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-15 18:49:51 +00:00
Recognize unpckh* masks and match 256-bit versions. The new versions are
different from the previous 128-bit because they work in lanes. Update a few comments and add testcases git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@136157 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c680b92460
commit
4ea496846a
@ -167,23 +167,22 @@ void DecodeUNPCKLPMask(EVT VT,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
||||
// Handle vector lengths > 128 bits. Define a "section" as a set of
|
||||
// 128 bits. AVX defines UNPCK* to operate independently on 128-bit
|
||||
// sections.
|
||||
unsigned NumSections = VT.getSizeInBits() / 128;
|
||||
if (NumSections == 0 ) NumSections = 1; // Handle MMX
|
||||
unsigned NumSectionElts = NumElts / NumSections;
|
||||
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
|
||||
// independently on 128-bit lanes.
|
||||
unsigned NumLanes = VT.getSizeInBits() / 128;
|
||||
if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
|
||||
unsigned NumLaneElts = NumElts / NumLanes;
|
||||
|
||||
unsigned Start = 0;
|
||||
unsigned End = NumSectionElts / 2;
|
||||
for (unsigned s = 0; s < NumSections; ++s) {
|
||||
unsigned End = NumLaneElts / 2;
|
||||
for (unsigned s = 0; s < NumLanes; ++s) {
|
||||
for (unsigned i = Start; i != End; ++i) {
|
||||
ShuffleMask.push_back(i); // Reads from dest/src1
|
||||
ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2
|
||||
ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2
|
||||
}
|
||||
// Process the next 128 bits.
|
||||
Start += NumSectionElts;
|
||||
End += NumSectionElts;
|
||||
Start += NumLaneElts;
|
||||
End += NumLaneElts;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2711,6 +2711,8 @@ static bool isTargetShuffle(unsigned Opcode) {
|
||||
case X86ISD::PUNPCKLQDQ:
|
||||
case X86ISD::UNPCKHPS:
|
||||
case X86ISD::UNPCKHPD:
|
||||
case X86ISD::VUNPCKHPSY:
|
||||
case X86ISD::VUNPCKHPDY:
|
||||
case X86ISD::PUNPCKHWD:
|
||||
case X86ISD::PUNPCKHBW:
|
||||
case X86ISD::PUNPCKHDQ:
|
||||
@ -2782,6 +2784,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
|
||||
case X86ISD::PUNPCKLQDQ:
|
||||
case X86ISD::UNPCKHPS:
|
||||
case X86ISD::UNPCKHPD:
|
||||
case X86ISD::VUNPCKHPSY:
|
||||
case X86ISD::VUNPCKHPDY:
|
||||
case X86ISD::PUNPCKHWD:
|
||||
case X86ISD::PUNPCKHBW:
|
||||
case X86ISD::PUNPCKHDQ:
|
||||
@ -3219,20 +3223,22 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
|
||||
static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
||||
bool V2IsSplat = false) {
|
||||
int NumElts = VT.getVectorNumElements();
|
||||
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
|
||||
|
||||
assert((VT.is128BitVector() || VT.is256BitVector()) &&
|
||||
"Unsupported vector type for unpckh");
|
||||
|
||||
if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
|
||||
return false;
|
||||
|
||||
// Handle vector lengths > 128 bits. Define a "section" as a set of
|
||||
// 128 bits. AVX defines UNPCK* to operate independently on 128-bit
|
||||
// sections.
|
||||
unsigned NumSections = VT.getSizeInBits() / 128;
|
||||
if (NumSections == 0 ) NumSections = 1; // Handle MMX
|
||||
unsigned NumSectionElts = NumElts / NumSections;
|
||||
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
|
||||
// independently on 128-bit lanes.
|
||||
unsigned NumLanes = VT.getSizeInBits()/128;
|
||||
unsigned NumLaneElts = NumElts/NumLanes;
|
||||
|
||||
unsigned Start = 0;
|
||||
unsigned End = NumSectionElts;
|
||||
for (unsigned s = 0; s < NumSections; ++s) {
|
||||
for (unsigned i = Start, j = s * NumSectionElts;
|
||||
unsigned End = NumLaneElts;
|
||||
for (unsigned s = 0; s < NumLanes; ++s) {
|
||||
for (unsigned i = Start, j = s * NumLaneElts;
|
||||
i != End;
|
||||
i += 2, ++j) {
|
||||
int BitI = Mask[i];
|
||||
@ -3248,8 +3254,8 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
||||
}
|
||||
}
|
||||
// Process the next 128 bits.
|
||||
Start += NumSectionElts;
|
||||
End += NumSectionElts;
|
||||
Start += NumLaneElts;
|
||||
End += NumLaneElts;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -3266,21 +3272,38 @@ bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
|
||||
static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
|
||||
bool V2IsSplat = false) {
|
||||
int NumElts = VT.getVectorNumElements();
|
||||
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
|
||||
|
||||
assert((VT.is128BitVector() || VT.is256BitVector()) &&
|
||||
"Unsupported vector type for unpckh");
|
||||
|
||||
if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
|
||||
return false;
|
||||
|
||||
for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
|
||||
int BitI = Mask[i];
|
||||
int BitI1 = Mask[i+1];
|
||||
if (!isUndefOrEqual(BitI, j + NumElts/2))
|
||||
return false;
|
||||
if (V2IsSplat) {
|
||||
if (isUndefOrEqual(BitI1, NumElts))
|
||||
return false;
|
||||
} else {
|
||||
if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
|
||||
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
|
||||
// independently on 128-bit lanes.
|
||||
unsigned NumLanes = VT.getSizeInBits()/128;
|
||||
unsigned NumLaneElts = NumElts/NumLanes;
|
||||
|
||||
unsigned Start = 0;
|
||||
unsigned End = NumLaneElts;
|
||||
for (unsigned l = 0; l != NumLanes; ++l) {
|
||||
for (unsigned i = Start, j = (l*NumLaneElts)+NumLaneElts/2;
|
||||
i != End; i += 2, ++j) {
|
||||
int BitI = Mask[i];
|
||||
int BitI1 = Mask[i+1];
|
||||
if (!isUndefOrEqual(BitI, j))
|
||||
return false;
|
||||
if (V2IsSplat) {
|
||||
if (isUndefOrEqual(BitI1, NumElts))
|
||||
return false;
|
||||
} else {
|
||||
if (!isUndefOrEqual(BitI1, j+NumElts))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Process the next 128 bits.
|
||||
Start += NumLaneElts;
|
||||
End += NumLaneElts;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -3299,16 +3322,14 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
|
||||
if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
|
||||
return false;
|
||||
|
||||
// Handle vector lengths > 128 bits. Define a "section" as a set of
|
||||
// 128 bits. AVX defines UNPCK* to operate independently on 128-bit
|
||||
// sections.
|
||||
unsigned NumSections = VT.getSizeInBits() / 128;
|
||||
if (NumSections == 0 ) NumSections = 1; // Handle MMX
|
||||
unsigned NumSectionElts = NumElems / NumSections;
|
||||
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
|
||||
// independently on 128-bit lanes.
|
||||
unsigned NumLanes = VT.getSizeInBits() / 128;
|
||||
unsigned NumLaneElts = NumElems / NumLanes;
|
||||
|
||||
for (unsigned s = 0; s < NumSections; ++s) {
|
||||
for (unsigned i = s * NumSectionElts, j = s * NumSectionElts;
|
||||
i != NumSectionElts * (s + 1);
|
||||
for (unsigned s = 0; s < NumLanes; ++s) {
|
||||
for (unsigned i = s * NumLaneElts, j = s * NumLaneElts;
|
||||
i != NumLaneElts * (s + 1);
|
||||
i += 2, ++j) {
|
||||
int BitI = Mask[i];
|
||||
int BitI1 = Mask[i+1];
|
||||
@ -4095,6 +4116,8 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
|
||||
break;
|
||||
case X86ISD::UNPCKHPS:
|
||||
case X86ISD::UNPCKHPD:
|
||||
case X86ISD::VUNPCKHPSY:
|
||||
case X86ISD::VUNPCKHPDY:
|
||||
DecodeUNPCKHPMask(NumElems, ShuffleMask);
|
||||
break;
|
||||
case X86ISD::PUNPCKLBW:
|
||||
@ -5751,6 +5774,8 @@ static inline unsigned getUNPCKHOpcode(EVT VT) {
|
||||
case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
|
||||
case MVT::v4f32: return X86ISD::UNPCKHPS;
|
||||
case MVT::v2f64: return X86ISD::UNPCKHPD;
|
||||
case MVT::v8f32: return X86ISD::VUNPCKHPSY;
|
||||
case MVT::v4f64: return X86ISD::VUNPCKHPDY;
|
||||
case MVT::v16i8: return X86ISD::PUNPCKHBW;
|
||||
case MVT::v8i16: return X86ISD::PUNPCKHWD;
|
||||
default:
|
||||
@ -12597,6 +12622,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case X86ISD::PUNPCKHQDQ:
|
||||
case X86ISD::UNPCKHPS:
|
||||
case X86ISD::UNPCKHPD:
|
||||
case X86ISD::VUNPCKHPSY:
|
||||
case X86ISD::VUNPCKHPDY:
|
||||
case X86ISD::PUNPCKLBW:
|
||||
case X86ISD::PUNPCKLWD:
|
||||
case X86ISD::PUNPCKLDQ:
|
||||
|
@ -261,6 +261,8 @@ namespace llvm {
|
||||
VUNPCKLPDY,
|
||||
UNPCKHPS,
|
||||
UNPCKHPD,
|
||||
VUNPCKHPSY,
|
||||
VUNPCKHPDY,
|
||||
PUNPCKLBW,
|
||||
PUNPCKLWD,
|
||||
PUNPCKLDQ,
|
||||
|
@ -133,12 +133,15 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
|
||||
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
|
||||
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
|
||||
|
||||
def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
|
||||
def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
|
||||
def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
|
||||
def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
|
||||
def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
|
||||
def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
|
||||
def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
|
||||
def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
|
||||
|
||||
def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
|
||||
def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
|
||||
def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>;
|
||||
def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>;
|
||||
|
||||
def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
|
||||
def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
|
||||
|
@ -5677,6 +5677,12 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
|
||||
def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
|
||||
(UNPCKHPSrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
// Shuffle with VUNPCKHPSY
|
||||
def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))),
|
||||
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
|
||||
|
||||
// Shuffle with UNPCKLPD
|
||||
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
(VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
@ -5703,6 +5709,12 @@ def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
|
||||
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
|
||||
(UNPCKHPDrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
// Shuffle with VUNPCKHPDY
|
||||
def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))),
|
||||
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
|
||||
def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
|
||||
|
||||
// Shuffle with MOVLHPS
|
||||
def : Pat<(X86Movlhps VR128:$src1,
|
||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
|
||||
|
@ -1,5 +0,0 @@
|
||||
load_lib llvm.exp
|
||||
|
||||
if { [llvm_supports_target X86] } {
|
||||
RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
; RUN: llc < %s -mattr=+avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @try_([2 x <4 x double>]* noalias %incarray, [2 x <4 x double>]* noalias %incarrayb ) {
|
||||
entry:
|
||||
%incarray1 = alloca [2 x <4 x double>]*, align 8
|
||||
%incarrayb1 = alloca [2 x <4 x double>]*, align 8
|
||||
%carray = alloca [2 x <4 x double>], align 16
|
||||
%r = getelementptr [2 x <4 x double>]* %incarray, i32 0, i32 0
|
||||
%rb = getelementptr [2 x <4 x double>]* %incarrayb, i32 0, i32 0
|
||||
%r3 = load <4 x double>* %r, align 8
|
||||
%r4 = load <4 x double>* %rb, align 8
|
||||
%r11 = shufflevector <4 x double> %r3, <4 x double> %r4, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x double>> [#uses=1]
|
||||
; CHECK-NOT: vunpcklpd %ymm
|
||||
%r12 = getelementptr [2 x <4 x double>]* %carray, i32 0, i32 1
|
||||
store <4 x double> %r11, <4 x double>* %r12, align 4
|
||||
ret void
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
; RUN: llc < %s -mattr=+avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @try_([2 x <8 x float>]* noalias %incarray, [2 x <8 x float>]* noalias %incarrayb ) {
|
||||
enmtry:
|
||||
%incarray1 = alloca [2 x <8 x float>]*, align 8
|
||||
%incarrayb1 = alloca [2 x <8 x float>]*, align 8
|
||||
%carray = alloca [2 x <8 x float>], align 16
|
||||
%r = getelementptr [2 x <8 x float>]* %incarray, i32 0, i32 0
|
||||
%rb = getelementptr [2 x <8 x float>]* %incarrayb, i32 0, i32 0
|
||||
%r3 = load <8 x float>* %r, align 8
|
||||
%r4 = load <8 x float>* %rb, align 8
|
||||
%r8 = shufflevector <8 x float> %r3, <8 x float> %r4, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x float>> [#uses=1]
|
||||
; CHECK-NOT: vunpcklps %ymm
|
||||
%r9 = getelementptr [2 x <8 x float>]* %carray, i32 0, i32 0
|
||||
store <8 x float> %r8, <8 x float>* %r9, align 4
|
||||
ret void
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
; RUN: llc < %s -mattr=+avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @try_([2 x <4 x double>]* noalias %incarray, [2 x <4 x double>]* noalias %incarrayb ) {
|
||||
entry:
|
||||
%incarray1 = alloca [2 x <4 x double>]*, align 8
|
||||
%incarrayb1 = alloca [2 x <4 x double>]*, align 8
|
||||
%carray = alloca [2 x <4 x double>], align 16
|
||||
%r = getelementptr [2 x <4 x double>]* %incarray, i32 0, i32 0
|
||||
%rb = getelementptr [2 x <4 x double>]* %incarrayb, i32 0, i32 0
|
||||
%r3 = load <4 x double>* %r, align 8
|
||||
%r4 = load <4 x double>* %rb, align 8
|
||||
%r11 = shufflevector <4 x double> %r3, <4 x double> %r4, <4 x i32> < i32 0, i32 4, i32 2, i32 6 > ; <<4 x double>> [#uses=1]
|
||||
; CHECK: vunpcklpd
|
||||
%r12 = getelementptr [2 x <4 x double>]* %carray, i32 0, i32 1
|
||||
store <4 x double> %r11, <4 x double>* %r12, align 4
|
||||
ret void
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
; RUN: llc < %s -mattr=+avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @try_([2 x <8 x float>]* noalias %incarray, [2 x <8 x float>]* noalias %incarrayb ) {
|
||||
entry:
|
||||
%incarray1 = alloca [2 x <8 x float>]*, align 8
|
||||
%incarrayb1 = alloca [2 x <8 x float>]*, align 8
|
||||
%carray = alloca [2 x <8 x float>], align 16
|
||||
%r = getelementptr [2 x <8 x float>]* %incarray, i32 0, i32 0
|
||||
%rb = getelementptr [2 x <8 x float>]* %incarrayb, i32 0, i32 0
|
||||
%r3 = load <8 x float>* %r, align 8
|
||||
%r4 = load <8 x float>* %rb, align 8
|
||||
%r11 = shufflevector <8 x float> %r3, <8 x float> %r4, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13 > ; <<8 x float>> [#uses=1]
|
||||
; CHECK: vunpcklps
|
||||
%r12 = getelementptr [2 x <8 x float>]* %carray, i32 0, i32 1
|
||||
store <8 x float> %r11, <8 x float>* %r12, align 4
|
||||
ret void
|
||||
}
|
58
test/CodeGen/X86/avx-256-unpack.ll
Normal file
58
test/CodeGen/X86/avx-256-unpack.ll
Normal file
@ -0,0 +1,58 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
||||
|
||||
; CHECK: vunpckhps
|
||||
define <8 x float> @unpackhips(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
|
||||
ret <8 x float> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpckhpd
|
||||
define <4 x double> @unpackhipd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
|
||||
ret <4 x double> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpcklps
|
||||
define <8 x float> @unpacklops(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
|
||||
ret <8 x float> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK: vunpcklpd
|
||||
define <4 x double> @unpacklopd(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
|
||||
ret <4 x double> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK-NOT: vunpcklps %ymm
|
||||
define <8 x float> @unpacklops-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
|
||||
ret <8 x float> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK-NOT: vunpcklpd %ymm
|
||||
define <4 x double> @unpacklopd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
||||
ret <4 x double> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK-NOT: vunpckhps %ymm
|
||||
define <8 x float> @unpackhips-not(<8 x float> %src1, <8 x float> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <8 x float> %src1, <8 x float> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13>
|
||||
ret <8 x float> %shuffle.i
|
||||
}
|
||||
|
||||
; CHECK-NOT: vunpckhpd %ymm
|
||||
define <4 x double> @unpackhipd-not(<4 x double> %src1, <4 x double> %src2) nounwind uwtable readnone ssp {
|
||||
entry:
|
||||
%shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
|
||||
ret <4 x double> %shuffle.i
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user