mirror of
https://github.com/RPCSX/llvm.git
synced 2025-03-06 03:47:44 +00:00
This patch adds ABI support for v1i128 data type.
It adds v1i128 to the appropriate register classes and checks parameter passing and return values. This is related to http://reviews.llvm.org/D9081, which will add instructions that exploit the v1i128 datatype. Phabricator review: http://reviews.llvm.org/D9475 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236503 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a5f2faff5c
commit
c3c0de39db
@ -62,7 +62,8 @@ def RetCC_PPC : CallingConv<[
|
|||||||
|
|
||||||
// Vector types returned as "direct" go into V2 .. V9; note that only the
|
// Vector types returned as "direct" go into V2 .. V9; note that only the
|
||||||
// ELFv2 ABI fully utilizes all these registers.
|
// ELFv2 ABI fully utilizes all these registers.
|
||||||
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
|
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
|
||||||
|
CCIfSubtarget<"hasAltivec()",
|
||||||
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
|
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
|
||||||
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
|
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
|
||||||
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
|
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
|
||||||
@ -114,7 +115,8 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
|
|||||||
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
|
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
|
||||||
CCIfType<[v4f64, v4f32, v4i1],
|
CCIfType<[v4f64, v4f32, v4i1],
|
||||||
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
|
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
|
||||||
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
|
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
|
||||||
|
CCIfSubtarget<"hasAltivec()",
|
||||||
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
|
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
|
||||||
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
|
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
|
||||||
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
|
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
|
||||||
@ -172,9 +174,9 @@ def CC_PPC32_SVR4 : CallingConv<[
|
|||||||
CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
|
CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
|
||||||
|
|
||||||
// The first 12 Vector arguments are passed in AltiVec registers.
|
// The first 12 Vector arguments are passed in AltiVec registers.
|
||||||
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
|
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
|
||||||
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9,
|
CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
|
||||||
V10, V11, V12, V13]>>>,
|
V8, V9, V10, V11, V12, V13]>>>,
|
||||||
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
|
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
|
||||||
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
|
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
|
||||||
VSH10, VSH11, VSH12, VSH13]>>>,
|
VSH10, VSH11, VSH12, VSH13]>>>,
|
||||||
|
@ -39,6 +39,7 @@
|
|||||||
#include "llvm/Support/MathExtras.h"
|
#include "llvm/Support/MathExtras.h"
|
||||||
#include "llvm/Support/raw_ostream.h"
|
#include "llvm/Support/raw_ostream.h"
|
||||||
#include "llvm/Target/TargetOptions.h"
|
#include "llvm/Target/TargetOptions.h"
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
// FIXME: Remove this once soft-float is supported.
|
// FIXME: Remove this once soft-float is supported.
|
||||||
@ -402,11 +403,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||||||
// will selectively turn on ones that can be effectively codegen'd.
|
// will selectively turn on ones that can be effectively codegen'd.
|
||||||
for (MVT VT : MVT::vector_valuetypes()) {
|
for (MVT VT : MVT::vector_valuetypes()) {
|
||||||
// add/sub are legal for all supported vector VT's.
|
// add/sub are legal for all supported vector VT's.
|
||||||
setOperationAction(ISD::ADD , VT, Legal);
|
// This check is temporary until support for quadword add/sub is added
|
||||||
setOperationAction(ISD::SUB , VT, Legal);
|
if (VT.SimpleTy != MVT::v1i128) {
|
||||||
|
setOperationAction(ISD::ADD , VT, Legal);
|
||||||
|
setOperationAction(ISD::SUB , VT, Legal);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
setOperationAction(ISD::ADD , VT, Expand);
|
||||||
|
setOperationAction(ISD::SUB , VT, Expand);
|
||||||
|
}
|
||||||
|
|
||||||
// Vector instructions introduced in P8
|
// Vector instructions introduced in P8
|
||||||
if (Subtarget.hasP8Altivec()) {
|
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
|
||||||
setOperationAction(ISD::CTPOP, VT, Legal);
|
setOperationAction(ISD::CTPOP, VT, Legal);
|
||||||
setOperationAction(ISD::CTLZ, VT, Legal);
|
setOperationAction(ISD::CTLZ, VT, Legal);
|
||||||
}
|
}
|
||||||
@ -620,8 +628,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
|||||||
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
|
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Subtarget.hasP8Altivec())
|
if (Subtarget.hasP8Altivec()) {
|
||||||
addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
|
addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
|
||||||
|
addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Subtarget.hasQPX()) {
|
if (Subtarget.hasQPX()) {
|
||||||
@ -2473,7 +2483,8 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
|
|||||||
// Altivec parameters are padded to a 16 byte boundary.
|
// Altivec parameters are padded to a 16 byte boundary.
|
||||||
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
|
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
|
||||||
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
|
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
|
||||||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
|
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
|
||||||
|
ArgVT == MVT::v1i128)
|
||||||
Align = 16;
|
Align = 16;
|
||||||
// QPX vector types stored in double-precision are padded to a 32 byte
|
// QPX vector types stored in double-precision are padded to a 32 byte
|
||||||
// boundary.
|
// boundary.
|
||||||
@ -2552,7 +2563,8 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
|
|||||||
}
|
}
|
||||||
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
|
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
|
||||||
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
|
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
|
||||||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
|
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
|
||||||
|
ArgVT == MVT::v1i128)
|
||||||
if (AvailableVRs > 0) {
|
if (AvailableVRs > 0) {
|
||||||
--AvailableVRs;
|
--AvailableVRs;
|
||||||
return false;
|
return false;
|
||||||
@ -3131,6 +3143,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
|
|||||||
case MVT::v16i8:
|
case MVT::v16i8:
|
||||||
case MVT::v2f64:
|
case MVT::v2f64:
|
||||||
case MVT::v2i64:
|
case MVT::v2i64:
|
||||||
|
case MVT::v1i128:
|
||||||
if (!Subtarget.hasQPX()) {
|
if (!Subtarget.hasQPX()) {
|
||||||
// These can be scalar arguments or elements of a vector array type
|
// These can be scalar arguments or elements of a vector array type
|
||||||
// passed directly. The latter are used to implement ELFv2 homogenous
|
// passed directly. The latter are used to implement ELFv2 homogenous
|
||||||
@ -4605,6 +4618,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
|||||||
case MVT::v16i8:
|
case MVT::v16i8:
|
||||||
case MVT::v2f64:
|
case MVT::v2f64:
|
||||||
case MVT::v2i64:
|
case MVT::v2i64:
|
||||||
|
case MVT::v1i128:
|
||||||
if (++NumVRsUsed <= NumVRs)
|
if (++NumVRsUsed <= NumVRs)
|
||||||
continue;
|
continue;
|
||||||
break;
|
break;
|
||||||
@ -4967,6 +4981,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
|||||||
case MVT::v16i8:
|
case MVT::v16i8:
|
||||||
case MVT::v2f64:
|
case MVT::v2f64:
|
||||||
case MVT::v2i64:
|
case MVT::v2i64:
|
||||||
|
case MVT::v1i128:
|
||||||
if (!Subtarget.hasQPX()) {
|
if (!Subtarget.hasQPX()) {
|
||||||
// These can be scalar arguments or elements of a vector array type
|
// These can be scalar arguments or elements of a vector array type
|
||||||
// passed directly. The latter are used to implement ELFv2 homogenous
|
// passed directly. The latter are used to implement ELFv2 homogenous
|
||||||
|
@ -817,26 +817,37 @@ def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
|
|||||||
def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
|
def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
|
||||||
def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
|
def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
|
||||||
def : Pat<(v16i8 (bitconvert (v2i64 VRRC:$src))), (v16i8 VRRC:$src)>;
|
def : Pat<(v16i8 (bitconvert (v2i64 VRRC:$src))), (v16i8 VRRC:$src)>;
|
||||||
|
def : Pat<(v16i8 (bitconvert (v1i128 VRRC:$src))), (v16i8 VRRC:$src)>;
|
||||||
|
|
||||||
def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
|
def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
|
||||||
def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
|
def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
|
||||||
def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
|
def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
|
||||||
def : Pat<(v8i16 (bitconvert (v2i64 VRRC:$src))), (v8i16 VRRC:$src)>;
|
def : Pat<(v8i16 (bitconvert (v2i64 VRRC:$src))), (v8i16 VRRC:$src)>;
|
||||||
|
def : Pat<(v8i16 (bitconvert (v1i128 VRRC:$src))), (v8i16 VRRC:$src)>;
|
||||||
|
|
||||||
def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
|
def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
|
||||||
def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
|
def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
|
||||||
def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
|
def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
|
||||||
def : Pat<(v4i32 (bitconvert (v2i64 VRRC:$src))), (v4i32 VRRC:$src)>;
|
def : Pat<(v4i32 (bitconvert (v2i64 VRRC:$src))), (v4i32 VRRC:$src)>;
|
||||||
|
def : Pat<(v4i32 (bitconvert (v1i128 VRRC:$src))), (v4i32 VRRC:$src)>;
|
||||||
|
|
||||||
def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
|
def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
|
||||||
def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
|
def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
|
||||||
def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
|
def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
|
||||||
def : Pat<(v4f32 (bitconvert (v2i64 VRRC:$src))), (v4f32 VRRC:$src)>;
|
def : Pat<(v4f32 (bitconvert (v2i64 VRRC:$src))), (v4f32 VRRC:$src)>;
|
||||||
|
def : Pat<(v4f32 (bitconvert (v1i128 VRRC:$src))), (v4f32 VRRC:$src)>;
|
||||||
|
|
||||||
def : Pat<(v2i64 (bitconvert (v16i8 VRRC:$src))), (v2i64 VRRC:$src)>;
|
def : Pat<(v2i64 (bitconvert (v16i8 VRRC:$src))), (v2i64 VRRC:$src)>;
|
||||||
def : Pat<(v2i64 (bitconvert (v8i16 VRRC:$src))), (v2i64 VRRC:$src)>;
|
def : Pat<(v2i64 (bitconvert (v8i16 VRRC:$src))), (v2i64 VRRC:$src)>;
|
||||||
def : Pat<(v2i64 (bitconvert (v4i32 VRRC:$src))), (v2i64 VRRC:$src)>;
|
def : Pat<(v2i64 (bitconvert (v4i32 VRRC:$src))), (v2i64 VRRC:$src)>;
|
||||||
def : Pat<(v2i64 (bitconvert (v4f32 VRRC:$src))), (v2i64 VRRC:$src)>;
|
def : Pat<(v2i64 (bitconvert (v4f32 VRRC:$src))), (v2i64 VRRC:$src)>;
|
||||||
|
def : Pat<(v2i64 (bitconvert (v1i128 VRRC:$src))), (v2i64 VRRC:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v1i128 (bitconvert (v16i8 VRRC:$src))), (v1i128 VRRC:$src)>;
|
||||||
|
def : Pat<(v1i128 (bitconvert (v8i16 VRRC:$src))), (v1i128 VRRC:$src)>;
|
||||||
|
def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>;
|
||||||
|
def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>;
|
||||||
|
def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>;
|
||||||
|
|
||||||
// Shuffles.
|
// Shuffles.
|
||||||
|
|
||||||
|
@ -890,6 +890,11 @@ def : Pat<(v2f64 (bitconvert v2i64:$A)),
|
|||||||
def : Pat<(v2i64 (bitconvert v2f64:$A)),
|
def : Pat<(v2i64 (bitconvert v2f64:$A)),
|
||||||
(COPY_TO_REGCLASS $A, VRRC)>;
|
(COPY_TO_REGCLASS $A, VRRC)>;
|
||||||
|
|
||||||
|
def : Pat<(v2f64 (bitconvert v1i128:$A)),
|
||||||
|
(COPY_TO_REGCLASS $A, VRRC)>;
|
||||||
|
def : Pat<(v1i128 (bitconvert v2f64:$A)),
|
||||||
|
(COPY_TO_REGCLASS $A, VRRC)>;
|
||||||
|
|
||||||
// sign extension patterns
|
// sign extension patterns
|
||||||
// To extend "in place" from v2i32 to v2i64, we have input data like:
|
// To extend "in place" from v2i32 to v2i64, we have input data like:
|
||||||
// | undef | i32 | undef | i32 |
|
// | undef | i32 | undef | i32 |
|
||||||
|
@ -288,7 +288,7 @@ def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
|
|||||||
(sequence "F%u", 31, 14))>;
|
(sequence "F%u", 31, 14))>;
|
||||||
def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
|
def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
|
||||||
|
|
||||||
def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v4f32], 128,
|
def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128,
|
||||||
(add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
|
(add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
|
||||||
V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
|
V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
|
||||||
V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
|
V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
|
||||||
|
284
test/CodeGen/PowerPC/ppc64-i128-abi.ll
Normal file
284
test/CodeGen/PowerPC/ppc64-i128-abi.ll
Normal file
@ -0,0 +1,284 @@
|
|||||||
|
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE
|
||||||
|
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
|
||||||
|
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
|
||||||
|
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
|
||||||
|
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
|
||||||
|
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX
|
||||||
|
|
||||||
|
@x = common global <1 x i128> zeroinitializer, align 16
|
||||||
|
@y = common global <1 x i128> zeroinitializer, align 16
|
||||||
|
@a = common global i128 zeroinitializer, align 16
|
||||||
|
@b = common global i128 zeroinitializer, align 16
|
||||||
|
|
||||||
|
; VSX:
|
||||||
|
; %a is passed in register 34
|
||||||
|
; On LE, ensure %a is swapped before being used (using xxswapd)
|
||||||
|
; Similarly, on LE ensure the results are swapped before being returned in
|
||||||
|
; register 34
|
||||||
|
; VMX (no VSX):
|
||||||
|
; %a is passed in register 2
|
||||||
|
; No swaps are necessary on LE
|
||||||
|
define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind {
|
||||||
|
%tmp = add <1 x i128> %a, <i128 1>
|
||||||
|
ret <1 x i128> %tmp
|
||||||
|
|
||||||
|
; CHECK-LE-LABEL: @v1i128_increment_by_one
|
||||||
|
; CHECK-LE: xxswapd [[PARAM1:[0-9]+]], 34
|
||||||
|
; CHECK-LE: stxvd2x [[PARAM1]], {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-LE: lxvd2x [[RESULT:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-LE: xxswapd 34, [[RESULT]]
|
||||||
|
; CHECK-LE: blr
|
||||||
|
|
||||||
|
; CHECK-BE-LABEL: @v1i128_increment_by_one
|
||||||
|
; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 34
|
||||||
|
; CHECK-BE: stxvd2x 34, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-BE: lxvd2x 34, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
|
||||||
|
; CHECK-BE: blr
|
||||||
|
|
||||||
|
; CHECK-NOVSX-LABEL: @v1i128_increment_by_one
|
||||||
|
; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX-NOT: stxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX: lvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX-NOT: lxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX: blr
|
||||||
|
}
|
||||||
|
|
||||||
|
; VSX:
|
||||||
|
; %a is passed in register 34
|
||||||
|
; %b is passed in register 35
|
||||||
|
; On LE, ensure the contents of 34 and 35 are swapped before being used
|
||||||
|
; Similarly, on LE ensure the results are swapped before being returned in
|
||||||
|
; register 34
|
||||||
|
; VMX (no VSX):
|
||||||
|
; %a is passewd in register 2
|
||||||
|
; %b is passed in register 3
|
||||||
|
; On LE, do not need to swap contents of 2 and 3 because the lvx/stvx
|
||||||
|
; instructions no not swap elements
|
||||||
|
define <1 x i128> @v1i128_increment_by_val(<1 x i128> %a, <1 x i128> %b) nounwind {
|
||||||
|
%tmp = add <1 x i128> %a, %b
|
||||||
|
ret <1 x i128> %tmp
|
||||||
|
|
||||||
|
; CHECK-LE-LABEL: @v1i128_increment_by_val
|
||||||
|
; CHECK-LE-DAG: xxswapd [[PARAM1:[0-9]+]], 34
|
||||||
|
; CHECK-LE-DAG: xxswapd [[PARAM2:[0-9]+]], 35
|
||||||
|
; CHECK-LE-DAG: stxvd2x [[PARAM1]], {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-LE-DAG: stxvd2x [[PARAM2]], {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-LE: lxvd2x [[RESULT:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-LE: xxswapd 34, [[RESULT]]
|
||||||
|
; CHECK-LE: blr
|
||||||
|
|
||||||
|
; CHECK-BE-LABEL: @v1i128_increment_by_val
|
||||||
|
; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 34
|
||||||
|
; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 35
|
||||||
|
; CHECK-BE-DAG: stxvd2x 34, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-BE-DAG: stxvd2x 35, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-BE: lxvd2x [[RESULT:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-BE-NOT: xxswapd 34, [[RESULT]]
|
||||||
|
; CHECK-BE: blr
|
||||||
|
|
||||||
|
; CHECK-NOVSX-LABEL: @v1i128_increment_by_val
|
||||||
|
; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX-DAG: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX-DAG: stvx 3, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX: lvx [[RESULT:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX-NOT: xxswapd 34, [[RESULT]]
|
||||||
|
; CHECK-NOVSX: blr
|
||||||
|
}
|
||||||
|
|
||||||
|
; Little Endian (VSX and VMX):
|
||||||
|
; Lower 64-bits of %a are passed in register 3
|
||||||
|
; Upper 64-bits of %a are passed in register 4
|
||||||
|
; Increment lower 64-bits using addic (immediate value of 1)
|
||||||
|
; Increment upper 64-bits using add zero extended
|
||||||
|
; Results are placed in registers 3 and 4
|
||||||
|
; Big Endian (VSX and VMX)
|
||||||
|
; Lower 64-bits of %a are passed in register 4
|
||||||
|
; Upper 64-bits of %a are passed in register 3
|
||||||
|
; Increment lower 64-bits using addic (immediate value of 1)
|
||||||
|
; Increment upper 64-bits using add zero extended
|
||||||
|
; Results are placed in registers 3 and 4
|
||||||
|
define i128 @i128_increment_by_one(i128 %a) nounwind {
|
||||||
|
%tmp = add i128 %a, 1
|
||||||
|
ret i128 %tmp
|
||||||
|
; CHECK-LE-LABEL: @i128_increment_by_one
|
||||||
|
; CHECK-LE: addic 3, 3, 1
|
||||||
|
; CHECK-LE-NEXT: addze 4, 4
|
||||||
|
; CHECK-LE: blr
|
||||||
|
|
||||||
|
; CHECK-BE-LABEL: @i128_increment_by_one
|
||||||
|
; CHECK-BE: addic 4, 4, 1
|
||||||
|
; CHECK-BE-NEXT: addze 3, 3
|
||||||
|
; CHECK-BE: blr
|
||||||
|
|
||||||
|
; CHECK-LE-NOVSX-LABEL: @i128_increment_by_one
|
||||||
|
; CHECK-LE-NOVSX: addic 3, 3, 1
|
||||||
|
; CHECK-LE-NOVSX-NEXT: addze 4, 4
|
||||||
|
; CHECK-LE-NOVSX: blr
|
||||||
|
|
||||||
|
; CHECK-BE-NOVSX-LABEL: @i128_increment_by_one
|
||||||
|
; CHECK-BE-NOVSX: addic 4, 4, 1
|
||||||
|
; CHECK-BE-NOVSX-NEXT: addze 3, 3
|
||||||
|
; CHECK-BE-NOVSX: blr
|
||||||
|
}
|
||||||
|
|
||||||
|
; Little Endian (VSX and VMX):
|
||||||
|
; Lower 64-bits of %a are passed in register 3
|
||||||
|
; Upper 64-bits of %a are passed in register 4
|
||||||
|
; Lower 64-bits of %b are passed in register 5
|
||||||
|
; Upper 64-bits of %b are passed in register 6
|
||||||
|
; Add the lower 64-bits using addc on registers 3 and 5
|
||||||
|
; Add the upper 64-bits using adde on registers 4 and 6
|
||||||
|
; Registers 3 and 4 should hold the result
|
||||||
|
; Big Endian (VSX and VMX):
|
||||||
|
; Upper 64-bits of %a are passed in register 3
|
||||||
|
; Lower 64-bits of %a are passed in register 4
|
||||||
|
; Upper 64-bits of %b are passed in register 5
|
||||||
|
; Lower 64-bits of %b are passed in register 6
|
||||||
|
; Add the lower 64-bits using addc on registers 4 and 6
|
||||||
|
; Add the upper 64-bits using adde on registers 3 and 5
|
||||||
|
; Registers 3 and 4 should hold the result
|
||||||
|
define i128 @i128_increment_by_val(i128 %a, i128 %b) nounwind {
|
||||||
|
%tmp = add i128 %a, %b
|
||||||
|
ret i128 %tmp
|
||||||
|
; CHECK-LE-LABEL: @i128_increment_by_val
|
||||||
|
; CHECK-LE: addc 3, 3, 5
|
||||||
|
; CHECK-LE-NEXT: adde 4, 4, 6
|
||||||
|
; CHECK-LE: blr
|
||||||
|
|
||||||
|
; CHECK-BE-LABEL: @i128_increment_by_val
|
||||||
|
; CHECK-BE: addc 4, 4, 6
|
||||||
|
; CHECK-BE-NEXT: adde 3, 3, 5
|
||||||
|
; CHECK-BE: blr
|
||||||
|
|
||||||
|
; CHECK-LE-NOVSX-LABEL: @i128_increment_by_val
|
||||||
|
; CHECK-LE-NOVSX: addc 3, 3, 5
|
||||||
|
; CHECK-LE-NOVSX-NEXT: adde 4, 4, 6
|
||||||
|
; CHECK-LE-NOVSX: blr
|
||||||
|
|
||||||
|
; CHECK-BE-NOVSX-LABEL: @i128_increment_by_val
|
||||||
|
; CHECK-BE-NOVSX: addc 4, 4, 6
|
||||||
|
; CHECK-BE-NOVSX-NEXT: adde 3, 3, 5
|
||||||
|
; CHECK-BE-NOVSX: blr
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; Callsites for the routines defined above.
|
||||||
|
; Ensure the parameters are loaded in the same order that is expected by the
|
||||||
|
; callee. See comments for individual functions above for details on registers
|
||||||
|
; used for parameters.
|
||||||
|
define <1 x i128> @call_v1i128_increment_by_one() nounwind {
|
||||||
|
%tmp = load <1 x i128>, <1 x i128>* @x, align 16
|
||||||
|
%ret = call <1 x i128> @v1i128_increment_by_one(<1 x i128> %tmp)
|
||||||
|
ret <1 x i128> %ret
|
||||||
|
|
||||||
|
; CHECK-LE-LABEL: @call_v1i128_increment_by_one
|
||||||
|
; CHECK-LE: lxvd2x [[PARAM:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-LE: xxswapd 34, [[PARAM]]
|
||||||
|
; CHECK-LE: bl v1i128_increment_by_one
|
||||||
|
; CHECK-LE: blr
|
||||||
|
|
||||||
|
; CHECK-BE-LABEL: @call_v1i128_increment_by_one
|
||||||
|
; CHECK-BE: lxvw4x 34, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
|
||||||
|
; CHECK-BE: bl v1i128_increment_by_one
|
||||||
|
; CHECK-BE: blr
|
||||||
|
|
||||||
|
; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_one
|
||||||
|
; CHECK-NOVSX: lvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX: bl v1i128_increment_by_one
|
||||||
|
; CHECK-NOVSX: blr
|
||||||
|
}
|
||||||
|
|
||||||
|
define <1 x i128> @call_v1i128_increment_by_val() nounwind {
|
||||||
|
%tmp = load <1 x i128>, <1 x i128>* @x, align 16
|
||||||
|
%tmp2 = load <1 x i128>, <1 x i128>* @y, align 16
|
||||||
|
%ret = call <1 x i128> @v1i128_increment_by_val(<1 x i128> %tmp, <1 x i128> %tmp2)
|
||||||
|
ret <1 x i128> %ret
|
||||||
|
|
||||||
|
; CHECK-LE-LABEL: @call_v1i128_increment_by_val
|
||||||
|
; CHECK-LE: lxvd2x [[PARAM1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-LE: lxvd2x [[PARAM2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-LE-DAG: xxswapd 34, [[PARAM1]]
|
||||||
|
; CHECK-LE-DAG: xxswapd 35, [[PARAM2]]
|
||||||
|
; CHECK-LE: bl v1i128_increment_by_val
|
||||||
|
; CHECK-LE: blr
|
||||||
|
|
||||||
|
; CHECK-BE-LABEL: @call_v1i128_increment_by_val
|
||||||
|
|
||||||
|
|
||||||
|
; CHECK-BE-DAG: lxvw4x 35, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
|
||||||
|
; CHECK-BE-NOT: xxswapd 35, {{[0-9]+}}
|
||||||
|
; CHECK-BE: bl v1i128_increment_by_val
|
||||||
|
; CHECK-BE: blr
|
||||||
|
|
||||||
|
; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_val
|
||||||
|
; CHECK-NOVSX-DAG: lvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX-DAG: lvx 3, {{[0-9]+}}, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX-NOT: xxswapd 34, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX-NOT: xxswapd 35, {{[0-9]+}}
|
||||||
|
; CHECK-NOVSX: bl v1i128_increment_by_val
|
||||||
|
; CHECK-NOVSX: blr
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
define i128 @call_i128_increment_by_one() nounwind {
|
||||||
|
%tmp = load i128, i128* @a, align 16
|
||||||
|
%ret = call i128 @i128_increment_by_one(i128 %tmp)
|
||||||
|
ret i128 %ret
|
||||||
|
; %ret4 = call i128 @i128_increment_by_val(i128 %tmp2, i128 %tmp2)
|
||||||
|
; CHECK-LE-LABEL: @call_i128_increment_by_one
|
||||||
|
; CHECK-LE-DAG: ld 3, 0([[BASEREG:[0-9]+]])
|
||||||
|
; CHECK-LE-DAG: ld 4, 8([[BASEREG]])
|
||||||
|
; CHECK-LE: bl i128_increment_by_one
|
||||||
|
; CHECK-LE: blr
|
||||||
|
|
||||||
|
; CHECK-BE-LABEL: @call_i128_increment_by_one
|
||||||
|
; CHECK-BE-DAG: ld 3, 0([[BASEREG:[0-9]+]])
|
||||||
|
; CHECK-BE-DAG: ld 4, 8([[BASEREG]])
|
||||||
|
; CHECK-BE: bl i128_increment_by_one
|
||||||
|
; CHECK-BE: blr
|
||||||
|
|
||||||
|
; CHECK-NOVSX-LABEL: @call_i128_increment_by_one
|
||||||
|
; CHECK-NOVSX-DAG: ld 3, 0([[BASEREG:[0-9]+]])
|
||||||
|
; CHECK-NOVSX-DAG: ld 4, 8([[BASEREG]])
|
||||||
|
; CHECK-NOVSX: bl i128_increment_by_one
|
||||||
|
; CHECK-NOVSX: blr
|
||||||
|
}
|
||||||
|
|
||||||
|
define i128 @call_i128_increment_by_val() nounwind {
|
||||||
|
%tmp = load i128, i128* @a, align 16
|
||||||
|
%tmp2 = load i128, i128* @b, align 16
|
||||||
|
%ret = call i128 @i128_increment_by_val(i128 %tmp, i128 %tmp2)
|
||||||
|
ret i128 %ret
|
||||||
|
; CHECK-LE-LABEL: @call_i128_increment_by_val
|
||||||
|
; CHECK-LE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
|
||||||
|
; CHECK-LE-DAG: ld 4, 8([[P1BASEREG]])
|
||||||
|
; CHECK-LE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
|
||||||
|
; CHECK-LE-DAG: ld 6, 8([[P2BASEREG]])
|
||||||
|
; CHECK-LE: bl i128_increment_by_val
|
||||||
|
; CHECK-LE: blr
|
||||||
|
|
||||||
|
; CHECK-BE-LABEL: @call_i128_increment_by_val
|
||||||
|
; CHECK-BE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
|
||||||
|
; CHECK-BE-DAG: ld 4, 8([[P1BASEREG]])
|
||||||
|
; CHECK-BE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
|
||||||
|
; CHECK-BE-DAG: ld 6, 8([[P2BASEREG]])
|
||||||
|
; CHECK-BE: bl i128_increment_by_val
|
||||||
|
; CHECK-BE: blr
|
||||||
|
|
||||||
|
; CHECK-NOVSX-LABEL: @call_i128_increment_by_val
|
||||||
|
; CHECK-NOVSX-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
|
||||||
|
; CHECK-NOVSX-DAG: ld 4, 8([[P1BASEREG]])
|
||||||
|
; CHECK-NOVSX-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
|
||||||
|
; CHECK-NOVSX-DAG: ld 6, 8([[P2BASEREG]])
|
||||||
|
; CHECK-NOVSX: bl i128_increment_by_val
|
||||||
|
; CHECK-NOVSX: blr
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user