mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-22 04:05:05 +00:00
[FastISel][AArch64] Add vector support to argument lowering.
Lower the first 8 vector arguments too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217850 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bad2c13aae
commit
c9bc145e31
@ -2096,8 +2096,7 @@ bool AArch64FastISel::fastLowerArguments() {
|
||||
if (CC != CallingConv::C)
|
||||
return false;
|
||||
|
||||
// Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and
|
||||
// FPR each.
|
||||
// Only handle simple cases of up to 8 GPR and FPR each.
|
||||
unsigned GPRCnt = 0;
|
||||
unsigned FPRCnt = 0;
|
||||
unsigned Idx = 0;
|
||||
@ -2111,32 +2110,34 @@ bool AArch64FastISel::fastLowerArguments() {
|
||||
return false;
|
||||
|
||||
Type *ArgTy = Arg.getType();
|
||||
if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
|
||||
if (ArgTy->isStructTy() || ArgTy->isArrayTy())
|
||||
return false;
|
||||
|
||||
EVT ArgVT = TLI.getValueType(ArgTy);
|
||||
if (!ArgVT.isSimple()) return false;
|
||||
switch (ArgVT.getSimpleVT().SimpleTy) {
|
||||
default: return false;
|
||||
case MVT::i1:
|
||||
case MVT::i8:
|
||||
case MVT::i16:
|
||||
case MVT::i32:
|
||||
case MVT::i64:
|
||||
if (!ArgVT.isSimple())
|
||||
return false;
|
||||
|
||||
MVT VT = ArgVT.getSimpleVT().SimpleTy;
|
||||
if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
|
||||
return false;
|
||||
|
||||
if (VT.isVector() &&
|
||||
(!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
|
||||
return false;
|
||||
|
||||
if (VT >= MVT::i1 && VT <= MVT::i64)
|
||||
++GPRCnt;
|
||||
break;
|
||||
case MVT::f16:
|
||||
case MVT::f32:
|
||||
case MVT::f64:
|
||||
else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
|
||||
VT.is128BitVector())
|
||||
++FPRCnt;
|
||||
break;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
if (GPRCnt > 8 || FPRCnt > 8)
|
||||
return false;
|
||||
}
|
||||
|
||||
static const MCPhysReg Registers[5][8] = {
|
||||
static const MCPhysReg Registers[6][8] = {
|
||||
{ AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
|
||||
AArch64::W5, AArch64::W6, AArch64::W7 },
|
||||
{ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
|
||||
@ -2146,7 +2147,9 @@ bool AArch64FastISel::fastLowerArguments() {
|
||||
{ AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
|
||||
AArch64::S5, AArch64::S6, AArch64::S7 },
|
||||
{ AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
|
||||
AArch64::D5, AArch64::D6, AArch64::D7 }
|
||||
AArch64::D5, AArch64::D6, AArch64::D7 },
|
||||
{ AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
|
||||
AArch64::Q5, AArch64::Q6, AArch64::Q7 }
|
||||
};
|
||||
|
||||
unsigned GPRIdx = 0;
|
||||
@ -2154,29 +2157,28 @@ bool AArch64FastISel::fastLowerArguments() {
|
||||
for (auto const &Arg : F->args()) {
|
||||
MVT VT = TLI.getSimpleValueType(Arg.getType());
|
||||
unsigned SrcReg;
|
||||
const TargetRegisterClass *RC = nullptr;
|
||||
switch (VT.SimpleTy) {
|
||||
default: llvm_unreachable("Unexpected value type.");
|
||||
case MVT::i1:
|
||||
case MVT::i8:
|
||||
case MVT::i16: VT = MVT::i32; // fall-through
|
||||
case MVT::i32:
|
||||
SrcReg = Registers[0][GPRIdx++]; RC = &AArch64::GPR32RegClass; break;
|
||||
case MVT::i64:
|
||||
SrcReg = Registers[1][GPRIdx++]; RC = &AArch64::GPR64RegClass; break;
|
||||
case MVT::f16:
|
||||
SrcReg = Registers[2][FPRIdx++]; RC = &AArch64::FPR16RegClass; break;
|
||||
case MVT::f32:
|
||||
SrcReg = Registers[3][FPRIdx++]; RC = &AArch64::FPR32RegClass; break;
|
||||
case MVT::f64:
|
||||
SrcReg = Registers[4][FPRIdx++]; RC = &AArch64::FPR64RegClass; break;
|
||||
}
|
||||
|
||||
// Skip unused arguments.
|
||||
if (Arg.use_empty()) {
|
||||
updateValueMap(&Arg, 0);
|
||||
continue;
|
||||
}
|
||||
const TargetRegisterClass *RC;
|
||||
if (VT >= MVT::i1 && VT <= MVT::i32) {
|
||||
SrcReg = Registers[0][GPRIdx++];
|
||||
RC = &AArch64::GPR32RegClass;
|
||||
VT = MVT::i32;
|
||||
} else if (VT == MVT::i64) {
|
||||
SrcReg = Registers[1][GPRIdx++];
|
||||
RC = &AArch64::GPR64RegClass;
|
||||
} else if (VT == MVT::f16) {
|
||||
SrcReg = Registers[2][FPRIdx++];
|
||||
RC = &AArch64::FPR16RegClass;
|
||||
} else if (VT == MVT::f32) {
|
||||
SrcReg = Registers[3][FPRIdx++];
|
||||
RC = &AArch64::FPR32RegClass;
|
||||
} else if ((VT == MVT::f64) || VT.is64BitVector()) {
|
||||
SrcReg = Registers[4][FPRIdx++];
|
||||
RC = &AArch64::FPR64RegClass;
|
||||
} else if (VT.is128BitVector()) {
|
||||
SrcReg = Registers[5][FPRIdx++];
|
||||
RC = &AArch64::FPR128RegClass;
|
||||
} else
|
||||
llvm_unreachable("Unexpected value type.");
|
||||
|
||||
unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
|
||||
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
|
||||
|
74
test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll
Normal file
74
test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll
Normal file
@ -0,0 +1,74 @@
|
||||
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -fast-isel-abort-args -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; Vector Integer Add
|
||||
define <8 x i8> @add_v8i8_rr(<8 x i8> %a, <8 x i8> %b) {
|
||||
; CHECK: add_v8i8_rr
|
||||
; CHECK: add.8b v0, v0, v1
|
||||
%1 = add <8 x i8> %a, %b
|
||||
ret <8 x i8> %1
|
||||
}
|
||||
|
||||
define <16 x i8> @add_v16i8_rr(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK: add_v16i8_rr
|
||||
; CHECK: add.16b v0, v0, v1
|
||||
%1 = add <16 x i8> %a, %b
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
|
||||
define <4 x i16> @add_v4i16_rr(<4 x i16> %a, <4 x i16> %b) {
|
||||
; CHECK: add_v4i16_rr
|
||||
; CHECK: add.4h v0, v0, v1
|
||||
%1 = add <4 x i16> %a, %b
|
||||
ret <4 x i16> %1
|
||||
}
|
||||
|
||||
define <8 x i16> @add_v8i16_rr(<8 x i16> %a, <8 x i16> %b) {
|
||||
; CHECK: add_v8i16_rr
|
||||
; CHECK: add.8h v0, v0, v1
|
||||
%1 = add <8 x i16> %a, %b
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
||||
define <2 x i32> @add_v2i32_rr(<2 x i32> %a, <2 x i32> %b) {
|
||||
; CHECK: add_v2i32_rr
|
||||
; CHECK: add.2s v0, v0, v1
|
||||
%1 = add <2 x i32> %a, %b
|
||||
ret <2 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @add_v4i32_rr(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK: add_v4i32_rr
|
||||
; CHECK: add.4s v0, v0, v1
|
||||
%1 = add <4 x i32> %a, %b
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <2 x i64> @add_v2i64_rr(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK: add_v2i64_rr
|
||||
; CHECK: add.2d v0, v0, v1
|
||||
%1 = add <2 x i64> %a, %b
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
||||
; Vector Floating-point Add
|
||||
define <2 x float> @add_v2f32_rr(<2 x float> %a, <2 x float> %b) {
|
||||
; CHECK: add_v2f32_rr
|
||||
; CHECK: fadd.2s v0, v0, v1
|
||||
%1 = fadd <2 x float> %a, %b
|
||||
ret <2 x float> %1
|
||||
}
|
||||
|
||||
define <4 x float> @add_v4f32_rr(<4 x float> %a, <4 x float> %b) {
|
||||
; CHECK: add_v4f32_rr
|
||||
; CHECK: fadd.4s v0, v0, v1
|
||||
%1 = fadd <4 x float> %a, %b
|
||||
ret <4 x float> %1
|
||||
}
|
||||
|
||||
define <2 x double> @add_v2f64_rr(<2 x double> %a, <2 x double> %b) {
|
||||
; CHECK: add_v2f64_rr
|
||||
; CHECK: fadd.2d v0, v0, v1
|
||||
%1 = fadd <2 x double> %a, %b
|
||||
ret <2 x double> %1
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user