mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-23 11:04:49 +00:00
[PowerPC] Cleanup cost model for unaligned vector loads/stores
I'm adding a regression test to better cover code generation for unaligned vector loads and stores, but there's no functional change to the code generation here. There is an improvement to the cost model for unaligned vector loads and stores, mostly for QPX (for which we were not previously accounting for the permutation-based loads), and the cost model implementation is cleaner. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@246712 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3d7575d7e2
commit
2551be3865
@ -320,31 +320,46 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
|
||||
int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
|
||||
|
||||
// VSX loads/stores support unaligned access.
|
||||
if (ST->hasVSX()) {
|
||||
if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
|
||||
return Cost;
|
||||
}
|
||||
// Aligned loads and stores are easy.
|
||||
unsigned SrcBytes = LT.second.getStoreSize();
|
||||
if (!SrcBytes || !Alignment || Alignment >= SrcBytes)
|
||||
return Cost;
|
||||
|
||||
bool UnalignedAltivec =
|
||||
Src->isVectorTy() &&
|
||||
Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
|
||||
LT.second.getSizeInBits() == 128 &&
|
||||
Opcode == Instruction::Load;
|
||||
bool IsAltivecType = ST->hasAltivec() &&
|
||||
(LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
|
||||
LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
|
||||
bool IsVSXType = ST->hasVSX() &&
|
||||
(LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
|
||||
bool IsQPXType = ST->hasQPX() &&
|
||||
(LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
|
||||
|
||||
// For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
|
||||
// P7, unaligned vector loads are more expensive than the permutation-based
|
||||
// load sequence, so that might be used instead, but regardless, the net cost
|
||||
// is about the same (not counting loop-invariant instructions).
|
||||
if (IsVSXType || (ST->hasVSX() && IsAltivecType))
|
||||
return Cost;
|
||||
|
||||
// If we can use the permutation-based load sequence, then this is also
|
||||
// relatively cheap (not counting loop-invariant instructions).
|
||||
bool PermutationLoad = Opcode == Instruction::Load &&
|
||||
(IsAltivecType || IsQPXType) &&
|
||||
Alignment >= LT.second.getScalarType().getStoreSize();
|
||||
if (PermutationLoad)
|
||||
return Cost;
|
||||
|
||||
// PPC in general does not support unaligned loads and stores. They'll need
|
||||
// to be decomposed based on the alignment factor.
|
||||
unsigned SrcBytes = LT.second.getStoreSize();
|
||||
if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
|
||||
Cost += LT.first*(SrcBytes/Alignment-1);
|
||||
|
||||
// For a vector type, there is also scalarization overhead (only for
|
||||
// stores, loads are expanded using the vector-load + permutation sequence,
|
||||
// which is much less expensive).
|
||||
if (Src->isVectorTy() && Opcode == Instruction::Store)
|
||||
for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
|
||||
Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
|
||||
}
|
||||
// Add the cost of each scalar load or store.
|
||||
Cost += LT.first*(SrcBytes/Alignment-1);
|
||||
|
||||
// For a vector type, there is also scalarization overhead (only for
|
||||
// stores, loads are expanded using the vector-load + permutation sequence,
|
||||
// which is much less expensive).
|
||||
if (Src->isVectorTy() && Opcode == Instruction::Store)
|
||||
for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
|
||||
Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
|
||||
|
||||
return Cost;
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
|
||||
; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
|
404
test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll
Normal file
404
test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll
Normal file
@ -0,0 +1,404 @@
|
||||
; RUN: opt < %s -cost-model -analyze | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 {
|
||||
entry:
|
||||
%r = load <16 x i8>, <16 x i8>* %p, align 1
|
||||
ret <16 x i8> %r
|
||||
|
||||
; CHECK-LABEL: test_l_v16i8
|
||||
; CHECK: cost of 1 for instruction: %r = load <16 x i8>, <16 x i8>* %p, align 1
|
||||
}
|
||||
|
||||
define <32 x i8> @test_l_v32i8(<32 x i8>* %p) #0 {
|
||||
entry:
|
||||
%r = load <32 x i8>, <32 x i8>* %p, align 1
|
||||
ret <32 x i8> %r
|
||||
|
||||
; CHECK-LABEL: test_l_v32i8
|
||||
; CHECK: cost of 2 for instruction: %r = load <32 x i8>, <32 x i8>* %p, align 1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_l_v8i16(<8 x i16>* %p) #0 {
|
||||
entry:
|
||||
%r = load <8 x i16>, <8 x i16>* %p, align 2
|
||||
ret <8 x i16> %r
|
||||
|
||||
; CHECK-LABEL: test_l_v8i16
|
||||
; CHECK: cost of 1 for instruction: %r = load <8 x i16>, <8 x i16>* %p, align 2
|
||||
}
|
||||
|
||||
define <16 x i16> @test_l_v16i16(<16 x i16>* %p) #0 {
|
||||
entry:
|
||||
%r = load <16 x i16>, <16 x i16>* %p, align 2
|
||||
ret <16 x i16> %r
|
||||
|
||||
; CHECK-LABEL: test_l_v16i16
|
||||
; CHECK: cost of 2 for instruction: %r = load <16 x i16>, <16 x i16>* %p, align 2
|
||||
}
|
||||
|
||||
define <4 x i32> @test_l_v4i32(<4 x i32>* %p) #0 {
|
||||
entry:
|
||||
%r = load <4 x i32>, <4 x i32>* %p, align 4
|
||||
ret <4 x i32> %r
|
||||
|
||||
; CHECK-LABEL: test_l_v4i32
|
||||
; CHECK: cost of 1 for instruction: %r = load <4 x i32>, <4 x i32>* %p, align 4
|
||||
}
|
||||
|
||||
define <8 x i32> @test_l_v8i32(<8 x i32>* %p) #0 {
|
||||
entry:
|
||||
%r = load <8 x i32>, <8 x i32>* %p, align 4
|
||||
ret <8 x i32> %r
|
||||
|
||||
; CHECK-LABEL: test_l_v8i32
|
||||
; CHECK: cost of 2 for instruction: %r = load <8 x i32>, <8 x i32>* %p, align 4
|
||||
}
|
||||
|
||||
define <2 x i64> @test_l_v2i64(<2 x i64>* %p) #0 {
|
||||
entry:
|
||||
%r = load <2 x i64>, <2 x i64>* %p, align 8
|
||||
ret <2 x i64> %r
|
||||
|
||||
; CHECK-LABEL: test_l_v2i64
|
||||
; CHECK: cost of 1 for instruction: %r = load <2 x i64>, <2 x i64>* %p, align 8
|
||||
}
|
||||
|
||||
define <4 x i64> @test_l_v4i64(<4 x i64>* %p) #0 {
|
||||
entry:
|
||||
%r = load <4 x i64>, <4 x i64>* %p, align 8
|
||||
ret <4 x i64> %r
|
||||
|
||||
; CHECK-LABEL: test_l_v4i64
|
||||
; CHECK: cost of 2 for instruction: %r = load <4 x i64>, <4 x i64>* %p, align 8
|
||||
}
|
||||
|
||||
define <4 x float> @test_l_v4float(<4 x float>* %p) #0 {
|
||||
entry:
|
||||
%r = load <4 x float>, <4 x float>* %p, align 4
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK-LABEL: test_l_v4float
|
||||
; CHECK: cost of 1 for instruction: %r = load <4 x float>, <4 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define <8 x float> @test_l_v8float(<8 x float>* %p) #0 {
|
||||
entry:
|
||||
%r = load <8 x float>, <8 x float>* %p, align 4
|
||||
ret <8 x float> %r
|
||||
|
||||
; CHECK-LABEL: test_l_v8float
|
||||
; CHECK: cost of 2 for instruction: %r = load <8 x float>, <8 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define <2 x double> @test_l_v2double(<2 x double>* %p) #0 {
|
||||
entry:
|
||||
%r = load <2 x double>, <2 x double>* %p, align 8
|
||||
ret <2 x double> %r
|
||||
|
||||
; CHECK-LABEL: test_l_v2double
|
||||
; CHECK: cost of 1 for instruction: %r = load <2 x double>, <2 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define <4 x double> @test_l_v4double(<4 x double>* %p) #0 {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %p, align 8
|
||||
ret <4 x double> %r
|
||||
|
||||
; CHECK-LABEL: test_l_v4double
|
||||
; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define <16 x i8> @test_l_p8v16i8(<16 x i8>* %p) #2 {
|
||||
entry:
|
||||
%r = load <16 x i8>, <16 x i8>* %p, align 1
|
||||
ret <16 x i8> %r
|
||||
|
||||
; CHECK-LABEL: test_l_p8v16i8
|
||||
; CHECK: cost of 1 for instruction: %r = load <16 x i8>, <16 x i8>* %p, align 1
|
||||
}
|
||||
|
||||
define <32 x i8> @test_l_p8v32i8(<32 x i8>* %p) #2 {
|
||||
entry:
|
||||
%r = load <32 x i8>, <32 x i8>* %p, align 1
|
||||
ret <32 x i8> %r
|
||||
|
||||
; CHECK-LABEL: test_l_p8v32i8
|
||||
; CHECK: cost of 2 for instruction: %r = load <32 x i8>, <32 x i8>* %p, align 1
|
||||
}
|
||||
|
||||
define <8 x i16> @test_l_p8v8i16(<8 x i16>* %p) #2 {
|
||||
entry:
|
||||
%r = load <8 x i16>, <8 x i16>* %p, align 2
|
||||
ret <8 x i16> %r
|
||||
|
||||
; CHECK-LABEL: test_l_p8v8i16
|
||||
; CHECK: cost of 1 for instruction: %r = load <8 x i16>, <8 x i16>* %p, align 2
|
||||
}
|
||||
|
||||
define <16 x i16> @test_l_p8v16i16(<16 x i16>* %p) #2 {
|
||||
entry:
|
||||
%r = load <16 x i16>, <16 x i16>* %p, align 2
|
||||
ret <16 x i16> %r
|
||||
|
||||
; CHECK-LABEL: test_l_p8v16i16
|
||||
; CHECK: cost of 2 for instruction: %r = load <16 x i16>, <16 x i16>* %p, align 2
|
||||
}
|
||||
|
||||
define <4 x i32> @test_l_p8v4i32(<4 x i32>* %p) #2 {
|
||||
entry:
|
||||
%r = load <4 x i32>, <4 x i32>* %p, align 4
|
||||
ret <4 x i32> %r
|
||||
|
||||
; CHECK-LABEL: test_l_p8v4i32
|
||||
; CHECK: cost of 1 for instruction: %r = load <4 x i32>, <4 x i32>* %p, align 4
|
||||
}
|
||||
|
||||
define <8 x i32> @test_l_p8v8i32(<8 x i32>* %p) #2 {
|
||||
entry:
|
||||
%r = load <8 x i32>, <8 x i32>* %p, align 4
|
||||
ret <8 x i32> %r
|
||||
|
||||
; CHECK-LABEL: test_l_p8v8i32
|
||||
; CHECK: cost of 2 for instruction: %r = load <8 x i32>, <8 x i32>* %p, align 4
|
||||
}
|
||||
|
||||
define <2 x i64> @test_l_p8v2i64(<2 x i64>* %p) #2 {
|
||||
entry:
|
||||
%r = load <2 x i64>, <2 x i64>* %p, align 8
|
||||
ret <2 x i64> %r
|
||||
|
||||
; CHECK-LABEL: test_l_p8v2i64
|
||||
; CHECK: cost of 1 for instruction: %r = load <2 x i64>, <2 x i64>* %p, align 8
|
||||
}
|
||||
|
||||
define <4 x i64> @test_l_p8v4i64(<4 x i64>* %p) #2 {
|
||||
entry:
|
||||
%r = load <4 x i64>, <4 x i64>* %p, align 8
|
||||
ret <4 x i64> %r
|
||||
|
||||
; CHECK-LABEL: test_l_p8v4i64
|
||||
; CHECK: cost of 2 for instruction: %r = load <4 x i64>, <4 x i64>* %p, align 8
|
||||
}
|
||||
|
||||
define <4 x float> @test_l_p8v4float(<4 x float>* %p) #2 {
|
||||
entry:
|
||||
%r = load <4 x float>, <4 x float>* %p, align 4
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK-LABEL: test_l_p8v4float
|
||||
; CHECK: cost of 1 for instruction: %r = load <4 x float>, <4 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define <8 x float> @test_l_p8v8float(<8 x float>* %p) #2 {
|
||||
entry:
|
||||
%r = load <8 x float>, <8 x float>* %p, align 4
|
||||
ret <8 x float> %r
|
||||
|
||||
; CHECK-LABEL: test_l_p8v8float
|
||||
; CHECK: cost of 2 for instruction: %r = load <8 x float>, <8 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define <2 x double> @test_l_p8v2double(<2 x double>* %p) #2 {
|
||||
entry:
|
||||
%r = load <2 x double>, <2 x double>* %p, align 8
|
||||
ret <2 x double> %r
|
||||
|
||||
; CHECK-LABEL: test_l_p8v2double
|
||||
; CHECK: cost of 1 for instruction: %r = load <2 x double>, <2 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define <4 x double> @test_l_p8v4double(<4 x double>* %p) #2 {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %p, align 8
|
||||
ret <4 x double> %r
|
||||
|
||||
; CHECK-LABEL: test_l_p8v4double
|
||||
; CHECK: cost of 2 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
|
||||
entry:
|
||||
%r = load <4 x float>, <4 x float>* %p, align 4
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK-LABEL: test_l_qv4float
|
||||
; CHECK: cost of 1 for instruction: %r = load <4 x float>, <4 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
|
||||
entry:
|
||||
%r = load <8 x float>, <8 x float>* %p, align 4
|
||||
ret <8 x float> %r
|
||||
|
||||
; CHECK-LABEL: test_l_qv8float
|
||||
; CHECK: cost of 2 for instruction: %r = load <8 x float>, <8 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %p, align 8
|
||||
ret <4 x double> %r
|
||||
|
||||
; CHECK-LABEL: test_l_qv4double
|
||||
; CHECK: cost of 1 for instruction: %r = load <4 x double>, <4 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
|
||||
entry:
|
||||
%r = load <8 x double>, <8 x double>* %p, align 8
|
||||
ret <8 x double> %r
|
||||
|
||||
; CHECK-LABEL: test_l_qv8double
|
||||
; CHECK: cost of 2 for instruction: %r = load <8 x double>, <8 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
|
||||
entry:
|
||||
store <16 x i8> %v, <16 x i8>* %p, align 1
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_v16i8
|
||||
; CHECK: cost of 1 for instruction: store <16 x i8> %v, <16 x i8>* %p, align 1
|
||||
}
|
||||
|
||||
define void @test_s_v32i8(<32 x i8>* %p, <32 x i8> %v) #0 {
|
||||
entry:
|
||||
store <32 x i8> %v, <32 x i8>* %p, align 1
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_v32i8
|
||||
; CHECK: cost of 2 for instruction: store <32 x i8> %v, <32 x i8>* %p, align 1
|
||||
}
|
||||
|
||||
define void @test_s_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
|
||||
entry:
|
||||
store <8 x i16> %v, <8 x i16>* %p, align 2
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_v8i16
|
||||
; CHECK: cost of 1 for instruction: store <8 x i16> %v, <8 x i16>* %p, align 2
|
||||
}
|
||||
|
||||
define void @test_s_v16i16(<16 x i16>* %p, <16 x i16> %v) #0 {
|
||||
entry:
|
||||
store <16 x i16> %v, <16 x i16>* %p, align 2
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_v16i16
|
||||
; CHECK: cost of 2 for instruction: store <16 x i16> %v, <16 x i16>* %p, align 2
|
||||
}
|
||||
|
||||
define void @test_s_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
|
||||
entry:
|
||||
store <4 x i32> %v, <4 x i32>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_v4i32
|
||||
; CHECK: cost of 1 for instruction: store <4 x i32> %v, <4 x i32>* %p, align 4
|
||||
}
|
||||
|
||||
define void @test_s_v8i32(<8 x i32>* %p, <8 x i32> %v) #0 {
|
||||
entry:
|
||||
store <8 x i32> %v, <8 x i32>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_v8i32
|
||||
; CHECK: cost of 2 for instruction: store <8 x i32> %v, <8 x i32>* %p, align 4
|
||||
}
|
||||
|
||||
define void @test_s_v2i64(<2 x i64>* %p, <2 x i64> %v) #0 {
|
||||
entry:
|
||||
store <2 x i64> %v, <2 x i64>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_v2i64
|
||||
; CHECK: cost of 1 for instruction: store <2 x i64> %v, <2 x i64>* %p, align 8
|
||||
}
|
||||
|
||||
define void @test_s_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
|
||||
entry:
|
||||
store <4 x i64> %v, <4 x i64>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_v4i64
|
||||
; CHECK: cost of 2 for instruction: store <4 x i64> %v, <4 x i64>* %p, align 8
|
||||
}
|
||||
|
||||
define void @test_s_v4float(<4 x float>* %p, <4 x float> %v) #0 {
|
||||
entry:
|
||||
store <4 x float> %v, <4 x float>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_v4float
|
||||
; CHECK: cost of 1 for instruction: store <4 x float> %v, <4 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define void @test_s_v8float(<8 x float>* %p, <8 x float> %v) #0 {
|
||||
entry:
|
||||
store <8 x float> %v, <8 x float>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_v8float
|
||||
; CHECK: cost of 2 for instruction: store <8 x float> %v, <8 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define void @test_s_v2double(<2 x double>* %p, <2 x double> %v) #0 {
|
||||
entry:
|
||||
store <2 x double> %v, <2 x double>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_v2double
|
||||
; CHECK: cost of 1 for instruction: store <2 x double> %v, <2 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define void @test_s_v4double(<4 x double>* %p, <4 x double> %v) #0 {
|
||||
entry:
|
||||
store <4 x double> %v, <4 x double>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_v4double
|
||||
; CHECK: cost of 2 for instruction: store <4 x double> %v, <4 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
|
||||
entry:
|
||||
store <4 x float> %v, <4 x float>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_qv4float
|
||||
; CHECK: cost of 7 for instruction: store <4 x float> %v, <4 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
|
||||
entry:
|
||||
store <8 x float> %v, <8 x float>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_qv8float
|
||||
; CHECK: cost of 15 for instruction: store <8 x float> %v, <8 x float>* %p, align 4
|
||||
}
|
||||
|
||||
define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
|
||||
entry:
|
||||
store <4 x double> %v, <4 x double>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_qv4double
|
||||
; CHECK: cost of 7 for instruction: store <4 x double> %v, <4 x double>* %p, align 8
|
||||
}
|
||||
|
||||
define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
|
||||
entry:
|
||||
store <8 x double> %v, <8 x double>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: test_s_qv8double
|
||||
; CHECK: cost of 15 for instruction: store <8 x double> %v, <8 x double>* %p, align 8
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="pwr7" }
|
||||
attributes #1 = { nounwind "target-cpu"="a2q" }
|
||||
attributes #2 = { nounwind "target-cpu"="pwr8" }
|
||||
|
580
test/CodeGen/PowerPC/unal-vec-ldst.ll
Normal file
580
test/CodeGen/PowerPC/unal-vec-ldst.ll
Normal file
@ -0,0 +1,580 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 {
|
||||
entry:
|
||||
%r = load <16 x i8>, <16 x i8>* %p, align 1
|
||||
ret <16 x i8> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_v16i8
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 15
|
||||
; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
|
||||
; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <32 x i8> @test_l_v32i8(<32 x i8>* %p) #0 {
|
||||
entry:
|
||||
%r = load <32 x i8>, <32 x i8>* %p, align 1
|
||||
ret <32 x i8> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_v32i8
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 31
|
||||
; CHECK-DAG: li [[REG2:[0-9]+]], 16
|
||||
; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
|
||||
; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: vperm 3, [[REG5]], [[REG4]], [[REG3]]
|
||||
; CHECK-DAG: vperm 2, [[REG6]], [[REG5]], [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <8 x i16> @test_l_v8i16(<8 x i16>* %p) #0 {
|
||||
entry:
|
||||
%r = load <8 x i16>, <8 x i16>* %p, align 2
|
||||
ret <8 x i16> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_v8i16
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 15
|
||||
; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
|
||||
; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <16 x i16> @test_l_v16i16(<16 x i16>* %p) #0 {
|
||||
entry:
|
||||
%r = load <16 x i16>, <16 x i16>* %p, align 2
|
||||
ret <16 x i16> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_v16i16
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 31
|
||||
; CHECK-DAG: li [[REG2:[0-9]+]], 16
|
||||
; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
|
||||
; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: vperm 3, [[REG5]], [[REG4]], [[REG3]]
|
||||
; CHECK-DAG: vperm 2, [[REG6]], [[REG5]], [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x i32> @test_l_v4i32(<4 x i32>* %p) #0 {
|
||||
entry:
|
||||
%r = load <4 x i32>, <4 x i32>* %p, align 4
|
||||
ret <4 x i32> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_v4i32
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 15
|
||||
; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
|
||||
; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <8 x i32> @test_l_v8i32(<8 x i32>* %p) #0 {
|
||||
entry:
|
||||
%r = load <8 x i32>, <8 x i32>* %p, align 4
|
||||
ret <8 x i32> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_v8i32
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 31
|
||||
; CHECK-DAG: li [[REG2:[0-9]+]], 16
|
||||
; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
|
||||
; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: vperm 3, [[REG5]], [[REG4]], [[REG3]]
|
||||
; CHECK-DAG: vperm 2, [[REG6]], [[REG5]], [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <2 x i64> @test_l_v2i64(<2 x i64>* %p) #0 {
|
||||
entry:
|
||||
%r = load <2 x i64>, <2 x i64>* %p, align 8
|
||||
ret <2 x i64> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_v2i64
|
||||
; CHECK: lxvd2x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x i64> @test_l_v4i64(<4 x i64>* %p) #0 {
|
||||
entry:
|
||||
%r = load <4 x i64>, <4 x i64>* %p, align 8
|
||||
ret <4 x i64> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_v4i64
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: lxvd2x 34, 0, 3
|
||||
; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x float> @test_l_v4float(<4 x float>* %p) #0 {
|
||||
entry:
|
||||
%r = load <4 x float>, <4 x float>* %p, align 4
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_v4float
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 15
|
||||
; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
|
||||
; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <8 x float> @test_l_v8float(<8 x float>* %p) #0 {
|
||||
entry:
|
||||
%r = load <8 x float>, <8 x float>* %p, align 4
|
||||
ret <8 x float> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_v8float
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 31
|
||||
; CHECK-DAG: li [[REG2:[0-9]+]], 16
|
||||
; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
|
||||
; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: vperm 3, [[REG5]], [[REG4]], [[REG3]]
|
||||
; CHECK-DAG: vperm 2, [[REG6]], [[REG5]], [[REG3]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <2 x double> @test_l_v2double(<2 x double>* %p) #0 {
|
||||
entry:
|
||||
%r = load <2 x double>, <2 x double>* %p, align 8
|
||||
ret <2 x double> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_v2double
|
||||
; CHECK: lxvd2x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x double> @test_l_v4double(<4 x double>* %p) #0 {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %p, align 8
|
||||
ret <4 x double> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_v4double
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: lxvd2x 34, 0, 3
|
||||
; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <16 x i8> @test_l_p8v16i8(<16 x i8>* %p) #2 {
|
||||
entry:
|
||||
%r = load <16 x i8>, <16 x i8>* %p, align 1
|
||||
ret <16 x i8> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_p8v16i8
|
||||
; CHECK: lxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <32 x i8> @test_l_p8v32i8(<32 x i8>* %p) #2 {
|
||||
entry:
|
||||
%r = load <32 x i8>, <32 x i8>* %p, align 1
|
||||
ret <32 x i8> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_p8v32i8
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: lxvw4x 34, 0, 3
|
||||
; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <8 x i16> @test_l_p8v8i16(<8 x i16>* %p) #2 {
|
||||
entry:
|
||||
%r = load <8 x i16>, <8 x i16>* %p, align 2
|
||||
ret <8 x i16> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_p8v8i16
|
||||
; CHECK: lxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <16 x i16> @test_l_p8v16i16(<16 x i16>* %p) #2 {
|
||||
entry:
|
||||
%r = load <16 x i16>, <16 x i16>* %p, align 2
|
||||
ret <16 x i16> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_p8v16i16
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: lxvw4x 34, 0, 3
|
||||
; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x i32> @test_l_p8v4i32(<4 x i32>* %p) #2 {
|
||||
entry:
|
||||
%r = load <4 x i32>, <4 x i32>* %p, align 4
|
||||
ret <4 x i32> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_p8v4i32
|
||||
; CHECK: lxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <8 x i32> @test_l_p8v8i32(<8 x i32>* %p) #2 {
|
||||
entry:
|
||||
%r = load <8 x i32>, <8 x i32>* %p, align 4
|
||||
ret <8 x i32> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_p8v8i32
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: lxvw4x 34, 0, 3
|
||||
; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <2 x i64> @test_l_p8v2i64(<2 x i64>* %p) #2 {
|
||||
entry:
|
||||
%r = load <2 x i64>, <2 x i64>* %p, align 8
|
||||
ret <2 x i64> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_p8v2i64
|
||||
; CHECK: lxvd2x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x i64> @test_l_p8v4i64(<4 x i64>* %p) #2 {
|
||||
entry:
|
||||
%r = load <4 x i64>, <4 x i64>* %p, align 8
|
||||
ret <4 x i64> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_p8v4i64
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: lxvd2x 34, 0, 3
|
||||
; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x float> @test_l_p8v4float(<4 x float>* %p) #2 {
|
||||
entry:
|
||||
%r = load <4 x float>, <4 x float>* %p, align 4
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_p8v4float
|
||||
; CHECK: lxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <8 x float> @test_l_p8v8float(<8 x float>* %p) #2 {
|
||||
entry:
|
||||
%r = load <8 x float>, <8 x float>* %p, align 4
|
||||
ret <8 x float> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_p8v8float
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: lxvw4x 34, 0, 3
|
||||
; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <2 x double> @test_l_p8v2double(<2 x double>* %p) #2 {
|
||||
entry:
|
||||
%r = load <2 x double>, <2 x double>* %p, align 8
|
||||
ret <2 x double> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_p8v2double
|
||||
; CHECK: lxvd2x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x double> @test_l_p8v4double(<4 x double>* %p) #2 {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %p, align 8
|
||||
ret <4 x double> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_p8v4double
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: lxvd2x 34, 0, 3
|
||||
; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
|
||||
entry:
|
||||
%r = load <4 x float>, <4 x float>* %p, align 4
|
||||
ret <4 x float> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_qv4float
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 15
|
||||
; CHECK-DAG: qvlpclsx 0, 0, 3
|
||||
; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: qvlfsx [[REG3:[0-9]+]], 0, 3
|
||||
; CHECK: qvfperm 1, [[REG3]], [[REG2]], 0
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
|
||||
entry:
|
||||
%r = load <8 x float>, <8 x float>* %p, align 4
|
||||
ret <8 x float> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_qv8float
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 31
|
||||
; CHECK-DAG: li [[REG2:[0-9]+]], 16
|
||||
; CHECK-DAG: qvlfsx [[REG3:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 3, [[REG2]]
|
||||
; CHECK-DAG: qvlpclsx [[REG5:[0-5]+]], 0, 3
|
||||
; CHECK-DAG: qvlfsx [[REG6:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: qvfperm 2, [[REG4]], [[REG3]], [[REG5]]
|
||||
; CHECK-DAG: qvfperm 1, [[REG6]], [[REG4]], [[REG5]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
|
||||
entry:
|
||||
%r = load <4 x double>, <4 x double>* %p, align 8
|
||||
ret <4 x double> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_qv4double
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 31
|
||||
; CHECK-DAG: qvlpcldx 0, 0, 3
|
||||
; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: qvlfdx [[REG3:[0-9]+]], 0, 3
|
||||
; CHECK: qvfperm 1, [[REG3]], [[REG2]], 0
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
|
||||
entry:
|
||||
%r = load <8 x double>, <8 x double>* %p, align 8
|
||||
ret <8 x double> %r
|
||||
|
||||
; CHECK-LABEL: @test_l_qv8double
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 63
|
||||
; CHECK-DAG: li [[REG2:[0-9]+]], 32
|
||||
; CHECK-DAG: qvlfdx [[REG3:[0-9]+]], 3, [[REG1]]
|
||||
; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 3, [[REG2]]
|
||||
; CHECK-DAG: qvlpcldx [[REG5:[0-5]+]], 0, 3
|
||||
; CHECK-DAG: qvlfdx [[REG6:[0-9]+]], 0, 3
|
||||
; CHECK-DAG: qvfperm 2, [[REG4]], [[REG3]], [[REG5]]
|
||||
; CHECK-DAG: qvfperm 1, [[REG6]], [[REG4]], [[REG5]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
|
||||
entry:
|
||||
store <16 x i8> %v, <16 x i8>* %p, align 1
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_v16i8
|
||||
; CHECK: stxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_v32i8(<32 x i8>* %p, <32 x i8> %v) #0 {
|
||||
entry:
|
||||
store <32 x i8> %v, <32 x i8>* %p, align 1
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_v32i8
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
|
||||
; CHECK-DAG: stxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
|
||||
entry:
|
||||
store <8 x i16> %v, <8 x i16>* %p, align 2
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_v8i16
|
||||
; CHECK: stxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_v16i16(<16 x i16>* %p, <16 x i16> %v) #0 {
|
||||
entry:
|
||||
store <16 x i16> %v, <16 x i16>* %p, align 2
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_v16i16
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
|
||||
; CHECK-DAG: stxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
|
||||
entry:
|
||||
store <4 x i32> %v, <4 x i32>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_v4i32
|
||||
; CHECK: stxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_v8i32(<8 x i32>* %p, <8 x i32> %v) #0 {
|
||||
entry:
|
||||
store <8 x i32> %v, <8 x i32>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_v8i32
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
|
||||
; CHECK-DAG: stxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_v2i64(<2 x i64>* %p, <2 x i64> %v) #0 {
|
||||
entry:
|
||||
store <2 x i64> %v, <2 x i64>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_v2i64
|
||||
; CHECK: stxvd2x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
|
||||
entry:
|
||||
store <4 x i64> %v, <4 x i64>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_v4i64
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: stxvd2x 35, 3, [[REG1]]
|
||||
; CHECK-DAG: stxvd2x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_v4float(<4 x float>* %p, <4 x float> %v) #0 {
|
||||
entry:
|
||||
store <4 x float> %v, <4 x float>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_v4float
|
||||
; CHECK: stxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_v8float(<8 x float>* %p, <8 x float> %v) #0 {
|
||||
entry:
|
||||
store <8 x float> %v, <8 x float>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_v8float
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
|
||||
; CHECK-DAG: stxvw4x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_v2double(<2 x double>* %p, <2 x double> %v) #0 {
|
||||
entry:
|
||||
store <2 x double> %v, <2 x double>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_v2double
|
||||
; CHECK: stxvd2x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_v4double(<4 x double>* %p, <4 x double> %v) #0 {
|
||||
entry:
|
||||
store <4 x double> %v, <4 x double>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_v4double
|
||||
; CHECK-DAG: li [[REG1:[0-9]+]], 16
|
||||
; CHECK-DAG: stxvd2x 35, 3, [[REG1]]
|
||||
; CHECK-DAG: stxvd2x 34, 0, 3
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
|
||||
entry:
|
||||
store <4 x float> %v, <4 x float>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_qv4float
|
||||
; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 1, 3
|
||||
; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 1, 2
|
||||
; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 1, 1
|
||||
; CHECK-DAG: stfs 1, 0(3)
|
||||
; CHECK-DAG: stfs [[REG1]], 12(3)
|
||||
; CHECK-DAG: stfs [[REG2]], 8(3)
|
||||
; CHECK-DAG: stfs [[REG3]], 4(3)
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
|
||||
entry:
|
||||
store <8 x float> %v, <8 x float>* %p, align 4
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_qv8float
|
||||
; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 2, 3
|
||||
; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 2, 2
|
||||
; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 2, 1
|
||||
; CHECK-DAG: qvesplati [[REG4:[0-9]+]], 1, 3
|
||||
; CHECK-DAG: qvesplati [[REG5:[0-9]+]], 1, 2
|
||||
; CHECK-DAG: qvesplati [[REG6:[0-9]+]], 1, 1
|
||||
; CHECK-DAG: stfs 2, 16(3)
|
||||
; CHECK-DAG: stfs 1, 0(3)
|
||||
; CHECK-DAG: stfs [[REG1]], 28(3)
|
||||
; CHECK-DAG: stfs [[REG2]], 24(3)
|
||||
; CHECK-DAG: stfs [[REG3]], 20(3)
|
||||
; CHECK-DAG: stfs [[REG4]], 12(3)
|
||||
; CHECK-DAG: stfs [[REG5]], 8(3)
|
||||
; CHECK-DAG: stfs [[REG6]], 4(3)
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
|
||||
entry:
|
||||
store <4 x double> %v, <4 x double>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_qv4double
|
||||
; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 1, 3
|
||||
; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 1, 2
|
||||
; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 1, 1
|
||||
; CHECK-DAG: stfd 1, 0(3)
|
||||
; CHECK-DAG: stfd [[REG1]], 24(3)
|
||||
; CHECK-DAG: stfd [[REG2]], 16(3)
|
||||
; CHECK-DAG: stfd [[REG3]], 8(3)
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
|
||||
entry:
|
||||
store <8 x double> %v, <8 x double>* %p, align 8
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_s_qv8double
|
||||
; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 2, 3
|
||||
; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 2, 2
|
||||
; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 2, 1
|
||||
; CHECK-DAG: qvesplati [[REG4:[0-9]+]], 1, 3
|
||||
; CHECK-DAG: qvesplati [[REG5:[0-9]+]], 1, 2
|
||||
; CHECK-DAG: qvesplati [[REG6:[0-9]+]], 1, 1
|
||||
; CHECK-DAG: stfd 2, 32(3)
|
||||
; CHECK-DAG: stfd 1, 0(3)
|
||||
; CHECK-DAG: stfd [[REG1]], 56(3)
|
||||
; CHECK-DAG: stfd [[REG2]], 48(3)
|
||||
; CHECK-DAG: stfd [[REG3]], 40(3)
|
||||
; CHECK-DAG: stfd [[REG4]], 24(3)
|
||||
; CHECK-DAG: stfd [[REG5]], 16(3)
|
||||
; CHECK-DAG: stfd [[REG6]], 8(3)
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="pwr7" }
|
||||
attributes #1 = { nounwind "target-cpu"="a2q" }
|
||||
attributes #2 = { nounwind "target-cpu"="pwr8" }
|
||||
|
Loading…
x
Reference in New Issue
Block a user