mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-02 15:51:54 +00:00
[AArch64][GlobalISel] When copying from a gpr32 to an fpr16 reg, convert to fpr32 first.
This is a follow on commit to r[x] where we fix the other direction of copy. For this case, after converting the source from gpr32 -> fpr32, we use a subregister copy, which is essentially what EXTRACT_SUBREG does in SDAG land. https://reviews.llvm.org/D43444 llvm-svn: 325550
This commit is contained in:
parent
fc84bda806
commit
a99b25a021
@ -317,12 +317,39 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
|
||||
return GenericOpc;
|
||||
}
|
||||
|
||||
static bool selectFP16CopyFromGPR32(MachineInstr &I, const TargetInstrInfo &TII,
|
||||
MachineRegisterInfo &MRI, unsigned SrcReg) {
|
||||
// Copies from gpr32 to fpr16 need to use a sub-register copy.
|
||||
unsigned CopyReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
|
||||
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::COPY))
|
||||
.addDef(CopyReg)
|
||||
.addUse(SrcReg);
|
||||
unsigned SubRegCopy = MRI.createVirtualRegister(&AArch64::FPR16RegClass);
|
||||
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY))
|
||||
.addDef(SubRegCopy)
|
||||
.addUse(CopyReg, 0, AArch64::hsub);
|
||||
|
||||
MachineOperand &RegOp = I.getOperand(1);
|
||||
RegOp.setReg(SubRegCopy);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
|
||||
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
|
||||
const RegisterBankInfo &RBI) {
|
||||
|
||||
unsigned DstReg = I.getOperand(0).getReg();
|
||||
unsigned SrcReg = I.getOperand(1).getReg();
|
||||
|
||||
if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
|
||||
if (TRI.getRegClass(AArch64::FPR16RegClassID)->contains(DstReg) &&
|
||||
!TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
|
||||
const RegisterBank &RegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
|
||||
const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(
|
||||
MRI.getType(SrcReg), RegBank, RBI, /* GetAllRegSet */ true);
|
||||
if (SrcRC == &AArch64::GPR32allRegClass)
|
||||
return selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
|
||||
}
|
||||
assert(I.isCopy() && "Generic operators do not allow physical registers");
|
||||
return true;
|
||||
}
|
||||
@ -330,7 +357,6 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
|
||||
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
|
||||
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
|
||||
(void)DstSize;
|
||||
unsigned SrcReg = I.getOperand(1).getReg();
|
||||
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
|
||||
(void)SrcSize;
|
||||
assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
|
||||
@ -357,9 +383,7 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
|
||||
}
|
||||
|
||||
if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
|
||||
const RegClassOrRegBank &RegClassOrBank =
|
||||
MRI.getRegClassOrRegBank(SrcReg);
|
||||
|
||||
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(SrcReg);
|
||||
const TargetRegisterClass *SrcRC =
|
||||
RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
|
||||
const RegisterBank *RB = nullptr;
|
||||
@ -378,6 +402,9 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
|
||||
.addImm(AArch64::hsub);
|
||||
MachineOperand &RegOp = I.getOperand(1);
|
||||
RegOp.setReg(PromoteReg);
|
||||
} else if (RC == &AArch64::FPR16RegClass &&
|
||||
SrcRC == &AArch64::GPR32allRegClass) {
|
||||
selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,18 +6,21 @@
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-arm-none-eabi"
|
||||
|
||||
%struct.struct2 = type { [2 x half] }
|
||||
|
||||
@global_arg0 = common dso_local global %struct.struct2 zeroinitializer, align 2
|
||||
|
||||
; Function Attrs: noinline nounwind optnone
|
||||
define dso_local void @c_test([2 x half], [2 x half]* %addr) {
|
||||
store [2 x half] %0, [2 x half]* %addr, align 2
|
||||
define void @fp16_to_gpr([2 x half], [2 x half]* %addr) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @gpr_to_fp16() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @gpr_to_fp16_physreg() {
|
||||
ret void
|
||||
}
|
||||
...
|
||||
---
|
||||
name: c_test
|
||||
name: fp16_to_gpr
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
@ -40,7 +43,7 @@ body: |
|
||||
bb.1 (%ir-block.1):
|
||||
liveins: $h0, $h1, $x0
|
||||
|
||||
; CHECK-LABEL: name: c_test
|
||||
; CHECK-LABEL: name: fp16_to_gpr
|
||||
; CHECK: liveins: $h0, $h1, $x0
|
||||
; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY $h0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY $h1
|
||||
@ -67,3 +70,58 @@ body: |
|
||||
RET_ReallyLR
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: gpr_to_fp16
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gpr }
|
||||
- { id: 1, class: gpr }
|
||||
- { id: 2, class: fpr }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
liveins: $w0
|
||||
|
||||
; CHECK-LABEL: name: gpr_to_fp16
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]]
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
|
||||
; CHECK: $h0 = COPY [[COPY2]]
|
||||
; CHECK: RET_ReallyLR implicit $h0
|
||||
%0:gpr(s32) = COPY $w0
|
||||
%1:gpr(s16) = G_TRUNC %0(s32)
|
||||
%2:fpr(s16) = COPY %1(s16)
|
||||
$h0 = COPY %2(s16)
|
||||
RET_ReallyLR implicit $h0
|
||||
|
||||
...
|
||||
---
|
||||
name: gpr_to_fp16_physreg
|
||||
alignment: 2
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gpr }
|
||||
- { id: 1, class: gpr }
|
||||
body: |
|
||||
bb.1 (%ir-block.0):
|
||||
liveins: $w0
|
||||
|
||||
; CHECK-LABEL: name: gpr_to_fp16_physreg
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]]
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
|
||||
; CHECK: $h0 = COPY [[COPY2]]
|
||||
; CHECK: RET_ReallyLR implicit $h0
|
||||
%0:gpr(s32) = COPY $w0
|
||||
%1:gpr(s16) = G_TRUNC %0(s32)
|
||||
$h0 = COPY %1(s16)
|
||||
RET_ReallyLR implicit $h0
|
||||
|
||||
...
|
||||
|
@ -97,8 +97,12 @@ body: |
|
||||
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
|
||||
; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 0, 15
|
||||
; CHECK: [[UBFMWri1:%[0-9]+]]:gpr32 = UBFMWri [[COPY]], 15, 30
|
||||
; CHECK: $h0 = COPY [[UBFMWri]]
|
||||
; CHECK: $h1 = COPY [[UBFMWri1]]
|
||||
; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[UBFMWri]]
|
||||
; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
|
||||
; CHECK: $h0 = COPY [[COPY2]]
|
||||
; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[UBFMWri1]]
|
||||
; CHECK: [[COPY4:%[0-9]+]]:fpr16 = COPY [[COPY3]].hsub
|
||||
; CHECK: $h1 = COPY [[COPY4]]
|
||||
%0:gpr(s32) = COPY $w0
|
||||
|
||||
%1:gpr(s16) = G_EXTRACT %0, 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user