mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-04 01:26:41 +00:00
Finish converting the rest of the NEON VLD instructions to use pseudo-
instructions prior to regalloc. Since it's getting a little close to the 2.8 branch deadline, I'll have to leave the rest of the instructions handled by the NEONPreAllocPass for now, but I didn't want to leave half of the VLD instructions converted and the other half not. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112983 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2465dcf4c8
commit
f572191fe4
@ -105,16 +105,17 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
|
||||
D2 = TRI->getSubReg(DstReg, ARM::dsub_5);
|
||||
D3 = TRI->getSubReg(DstReg, ARM::dsub_7);
|
||||
}
|
||||
MIB.addReg(D0).addReg(D1);
|
||||
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
|
||||
.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
|
||||
if (NumRegs > 2)
|
||||
MIB.addReg(D2);
|
||||
MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
|
||||
if (NumRegs > 3)
|
||||
MIB.addReg(D3);
|
||||
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
|
||||
|
||||
if (hasWriteBack) {
|
||||
bool WBIsDead = MI.getOperand(OpIdx).isDead();
|
||||
unsigned WBReg = MI.getOperand(OpIdx++).getReg();
|
||||
MIB.addReg(WBReg, getDefRegState(true) | getDeadRegState(WBIsDead));
|
||||
MIB.addReg(WBReg, RegState::Define | getDeadRegState(WBIsDead));
|
||||
}
|
||||
// Copy the addrmode6 operands.
|
||||
bool AddrIsKill = MI.getOperand(OpIdx).isKill();
|
||||
@ -128,9 +129,12 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
|
||||
|
||||
MIB = AddDefaultPred(MIB);
|
||||
TransferImpOps(MI, MIB, MIB);
|
||||
// Add an implicit def for the super-reg.
|
||||
MIB.addReg(DstReg, (getDefRegState(true) | getDeadRegState(DstIsDead) |
|
||||
getImplRegState(true)));
|
||||
// For an instruction writing the odd subregs, add an implicit use of the
|
||||
// super-register because the even subregs were loaded separately.
|
||||
if (RegSpc == OddDblSpc)
|
||||
MIB.addReg(DstReg, RegState::Implicit);
|
||||
// Add an implicit def for the super-register.
|
||||
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
@ -147,7 +151,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
|
||||
if (hasWriteBack) {
|
||||
bool DstIsDead = MI.getOperand(OpIdx).isDead();
|
||||
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
|
||||
MIB.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead));
|
||||
MIB.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
|
||||
}
|
||||
// Copy the addrmode6 operands.
|
||||
bool AddrIsKill = MI.getOperand(OpIdx).isKill();
|
||||
@ -336,15 +340,63 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||
case ARM::VLD2q32Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break;
|
||||
|
||||
case ARM::VLD3d8Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break;
|
||||
case ARM::VLD3d16Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break;
|
||||
case ARM::VLD3d32Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break;
|
||||
case ARM::VLD1d64TPseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break;
|
||||
case ARM::VLD3d8Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VLD3d16Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VLD3d32Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VLD1d64TPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD1d64T, true, SingleSpc, 3); break;
|
||||
ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VLD3q8Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break;
|
||||
case ARM::VLD3q16Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break;
|
||||
case ARM::VLD3q32Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break;
|
||||
case ARM::VLD3q8oddPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break;
|
||||
case ARM::VLD3q16oddPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break;
|
||||
case ARM::VLD3q32oddPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break;
|
||||
|
||||
case ARM::VLD4d8Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break;
|
||||
case ARM::VLD4d16Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break;
|
||||
case ARM::VLD4d32Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break;
|
||||
case ARM::VLD1d64QPseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break;
|
||||
case ARM::VLD4d8Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VLD4d16Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VLD4d32Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VLD1d64QPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD1d64Q, true, SingleSpc, 4); break;
|
||||
ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VLD4q8Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break;
|
||||
case ARM::VLD4q16Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break;
|
||||
case ARM::VLD4q32Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break;
|
||||
case ARM::VLD4q8oddPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break;
|
||||
case ARM::VLD4q16oddPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break;
|
||||
case ARM::VLD4q32oddPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break;
|
||||
|
||||
case ARM::VST1q8Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
|
||||
|
@ -1111,120 +1111,79 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
|
||||
break;
|
||||
}
|
||||
|
||||
EVT ResTy;
|
||||
if (NumVecs == 1)
|
||||
ResTy = VT;
|
||||
else {
|
||||
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
|
||||
if (!is64BitVector)
|
||||
ResTyElts *= 2;
|
||||
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
|
||||
}
|
||||
|
||||
SDValue Pred = getAL(CurDAG);
|
||||
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
|
||||
SDValue SuperReg;
|
||||
if (is64BitVector) {
|
||||
unsigned Opc = DOpcodes[OpcodeIndex];
|
||||
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
|
||||
SDNode *VLd;
|
||||
if (NumVecs <= 2) {
|
||||
EVT ResTy;
|
||||
if (NumVecs == 1)
|
||||
ResTy = VT;
|
||||
else
|
||||
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs);
|
||||
VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
|
||||
} else {
|
||||
std::vector<EVT> ResTys(NumVecs, VT);
|
||||
ResTys.push_back(MVT::Other);
|
||||
VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
|
||||
}
|
||||
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
|
||||
if (NumVecs == 1)
|
||||
return VLd;
|
||||
|
||||
SDValue SuperReg;
|
||||
if (NumVecs <= 2)
|
||||
SuperReg = SDValue(VLd, 0);
|
||||
else {
|
||||
SDValue V0 = SDValue(VLd, 0);
|
||||
SDValue V1 = SDValue(VLd, 1);
|
||||
// Form a REG_SEQUENCE to force register allocation.
|
||||
SDValue V2 = SDValue(VLd, 2);
|
||||
// If it's a vld3, form a quad D-register but discard the last part.
|
||||
SDValue V3 = (NumVecs == 3)
|
||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
|
||||
: SDValue(VLd, 3);
|
||||
SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
|
||||
}
|
||||
|
||||
SuperReg = SDValue(VLd, 0);
|
||||
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
|
||||
SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
|
||||
dl, VT, SuperReg);
|
||||
ReplaceUses(SDValue(N, Vec), D);
|
||||
}
|
||||
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs <= 2 ? 1 : NumVecs));
|
||||
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
EVT RegVT = GetNEONSubregVT(VT);
|
||||
if (NumVecs <= 2) {
|
||||
// Quad registers are directly supported for VLD1 and VLD2,
|
||||
// loading pairs of D regs.
|
||||
unsigned Opc = QOpcodes0[OpcodeIndex];
|
||||
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
|
||||
|
||||
EVT ResTy;
|
||||
if (NumVecs == 1)
|
||||
ResTy = VT;
|
||||
else
|
||||
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, 2 * NumVecs);
|
||||
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
|
||||
|
||||
// Combine the even and odd subregs to produce the result.
|
||||
if (NumVecs == 1)
|
||||
return VLd;
|
||||
|
||||
SDValue QQ = SDValue(VLd, 0);
|
||||
SuperReg = SDValue(VLd, 0);
|
||||
Chain = SDValue(VLd, 1);
|
||||
|
||||
SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
|
||||
SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
|
||||
ReplaceUses(SDValue(N, 0), Q0);
|
||||
ReplaceUses(SDValue(N, 1), Q1);
|
||||
} else {
|
||||
// Otherwise, quad registers are loaded with two separate instructions,
|
||||
// where one loads the even registers and the other loads the odd registers.
|
||||
|
||||
std::vector<EVT> ResTys(NumVecs, RegVT);
|
||||
ResTys.push_back(MemAddr.getValueType());
|
||||
ResTys.push_back(MVT::Other);
|
||||
EVT AddrTy = MemAddr.getValueType();
|
||||
|
||||
// Load the even subregs.
|
||||
unsigned Opc = QOpcodes0[OpcodeIndex];
|
||||
const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain };
|
||||
SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6);
|
||||
Chain = SDValue(VLdA, NumVecs+1);
|
||||
SDValue ImplDef =
|
||||
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
|
||||
const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
|
||||
SDNode *VLdA =
|
||||
CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7);
|
||||
Chain = SDValue(VLdA, 2);
|
||||
|
||||
// Load the odd subregs.
|
||||
Opc = QOpcodes1[OpcodeIndex];
|
||||
const SDValue OpsB[] = { SDValue(VLdA, NumVecs),
|
||||
Align, Reg0, Pred, Reg0, Chain };
|
||||
SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
|
||||
Chain = SDValue(VLdB, NumVecs+1);
|
||||
const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0),
|
||||
Pred, Reg0, Chain };
|
||||
SDNode *VLdB =
|
||||
CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7);
|
||||
SuperReg = SDValue(VLdB, 0);
|
||||
Chain = SDValue(VLdB, 2);
|
||||
}
|
||||
|
||||
SDValue V0 = SDValue(VLdA, 0);
|
||||
SDValue V1 = SDValue(VLdB, 0);
|
||||
SDValue V2 = SDValue(VLdA, 1);
|
||||
SDValue V3 = SDValue(VLdB, 1);
|
||||
SDValue V4 = SDValue(VLdA, 2);
|
||||
SDValue V5 = SDValue(VLdB, 2);
|
||||
SDValue V6 = (NumVecs == 3)
|
||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
|
||||
: SDValue(VLdA, 3);
|
||||
SDValue V7 = (NumVecs == 3)
|
||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
|
||||
: SDValue(VLdB, 3);
|
||||
SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
|
||||
V4, V5, V6, V7), 0);
|
||||
|
||||
// Extract out the 3 / 4 Q registers.
|
||||
assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
|
||||
SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
|
||||
dl, VT, RegSeq);
|
||||
ReplaceUses(SDValue(N, Vec), Q);
|
||||
}
|
||||
// Extract out the Q registers.
|
||||
assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
|
||||
SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
|
||||
dl, VT, SuperReg);
|
||||
ReplaceUses(SDValue(N, Vec), Q);
|
||||
}
|
||||
ReplaceUses(SDValue(N, NumVecs), Chain);
|
||||
return NULL;
|
||||
@ -2166,26 +2125,26 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vld3: {
|
||||
unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
|
||||
ARM::VLD3d32, ARM::VLD1d64T };
|
||||
unsigned QOpcodes0[] = { ARM::VLD3q8_UPD,
|
||||
ARM::VLD3q16_UPD,
|
||||
ARM::VLD3q32_UPD };
|
||||
unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD,
|
||||
ARM::VLD3q16odd_UPD,
|
||||
ARM::VLD3q32odd_UPD };
|
||||
unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
|
||||
ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
|
||||
unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
|
||||
ARM::VLD3q16Pseudo_UPD,
|
||||
ARM::VLD3q32Pseudo_UPD };
|
||||
unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
|
||||
ARM::VLD3q16oddPseudo_UPD,
|
||||
ARM::VLD3q32oddPseudo_UPD };
|
||||
return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vld4: {
|
||||
unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
|
||||
ARM::VLD4d32, ARM::VLD1d64Q };
|
||||
unsigned QOpcodes0[] = { ARM::VLD4q8_UPD,
|
||||
ARM::VLD4q16_UPD,
|
||||
ARM::VLD4q32_UPD };
|
||||
unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD,
|
||||
ARM::VLD4q16odd_UPD,
|
||||
ARM::VLD4q32odd_UPD };
|
||||
unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
|
||||
ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
|
||||
unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
|
||||
ARM::VLD4q16Pseudo_UPD,
|
||||
ARM::VLD4q32Pseudo_UPD };
|
||||
unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
|
||||
ARM::VLD4q16oddPseudo_UPD,
|
||||
ARM::VLD4q32oddPseudo_UPD };
|
||||
return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
|
@ -181,6 +181,10 @@ class VLDQQWBPseudo
|
||||
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset), IIC_VST,
|
||||
"$addr.addr = $wb">;
|
||||
class VLDQQQQWBPseudo
|
||||
: PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
|
||||
"$addr.addr = $wb, $src = $dst">;
|
||||
|
||||
// VLD1 : Vector Load (multiple single elements)
|
||||
class VLD1D<bits<4> op7_4, string Dt>
|
||||
@ -356,6 +360,10 @@ def VLD3d8 : VLD3D<0b0100, 0b0000, "8">;
|
||||
def VLD3d16 : VLD3D<0b0100, 0b0100, "16">;
|
||||
def VLD3d32 : VLD3D<0b0100, 0b1000, "32">;
|
||||
|
||||
def VLD3d8Pseudo : VLDQQPseudo;
|
||||
def VLD3d16Pseudo : VLDQQPseudo;
|
||||
def VLD3d32Pseudo : VLDQQPseudo;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
: NLdSt<0, 0b10, op11_8, op7_4,
|
||||
@ -368,6 +376,10 @@ def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">;
|
||||
def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
|
||||
def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
|
||||
|
||||
def VLD3d8Pseudo_UPD : VLDQQWBPseudo;
|
||||
def VLD3d16Pseudo_UPD : VLDQQWBPseudo;
|
||||
def VLD3d32Pseudo_UPD : VLDQQWBPseudo;
|
||||
|
||||
// ...with double-spaced registers (non-updating versions for disassembly only):
|
||||
def VLD3q8 : VLD3D<0b0101, 0b0000, "8">;
|
||||
def VLD3q16 : VLD3D<0b0101, 0b0100, "16">;
|
||||
@ -376,10 +388,14 @@ def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">;
|
||||
def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
|
||||
def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
|
||||
|
||||
def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo;
|
||||
def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo;
|
||||
def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo;
|
||||
|
||||
// ...alternate versions to be allocated odd register numbers:
|
||||
def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">;
|
||||
def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">;
|
||||
def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">;
|
||||
def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo;
|
||||
def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo;
|
||||
def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo;
|
||||
|
||||
// VLD4 : Vector Load (multiple 4-element structures)
|
||||
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
@ -392,6 +408,10 @@ def VLD4d8 : VLD4D<0b0000, 0b0000, "8">;
|
||||
def VLD4d16 : VLD4D<0b0000, 0b0100, "16">;
|
||||
def VLD4d32 : VLD4D<0b0000, 0b1000, "32">;
|
||||
|
||||
def VLD4d8Pseudo : VLDQQPseudo;
|
||||
def VLD4d16Pseudo : VLDQQPseudo;
|
||||
def VLD4d32Pseudo : VLDQQPseudo;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
: NLdSt<0, 0b10, op11_8, op7_4,
|
||||
@ -404,6 +424,10 @@ def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">;
|
||||
def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
|
||||
def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
|
||||
|
||||
def VLD4d8Pseudo_UPD : VLDQQWBPseudo;
|
||||
def VLD4d16Pseudo_UPD : VLDQQWBPseudo;
|
||||
def VLD4d32Pseudo_UPD : VLDQQWBPseudo;
|
||||
|
||||
// ...with double-spaced registers (non-updating versions for disassembly only):
|
||||
def VLD4q8 : VLD4D<0b0001, 0b0000, "8">;
|
||||
def VLD4q16 : VLD4D<0b0001, 0b0100, "16">;
|
||||
@ -412,10 +436,14 @@ def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">;
|
||||
def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
|
||||
def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
|
||||
|
||||
def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo;
|
||||
def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo;
|
||||
def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo;
|
||||
|
||||
// ...alternate versions to be allocated odd register numbers:
|
||||
def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">;
|
||||
def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">;
|
||||
def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
|
||||
def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo;
|
||||
def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo;
|
||||
def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo;
|
||||
|
||||
// VLD1LN : Vector Load (single element to one lane)
|
||||
// FIXME: Not yet implemented.
|
||||
|
@ -74,10 +74,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD3d8:
|
||||
case ARM::VLD3d16:
|
||||
case ARM::VLD3d32:
|
||||
case ARM::VLD1d64T:
|
||||
case ARM::VLD3LNd8:
|
||||
case ARM::VLD3LNd16:
|
||||
case ARM::VLD3LNd32:
|
||||
@ -85,24 +81,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
||||
NumRegs = 3;
|
||||
return true;
|
||||
|
||||
case ARM::VLD3q8_UPD:
|
||||
case ARM::VLD3q16_UPD:
|
||||
case ARM::VLD3q32_UPD:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 3;
|
||||
Offset = 0;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD3q8odd_UPD:
|
||||
case ARM::VLD3q16odd_UPD:
|
||||
case ARM::VLD3q32odd_UPD:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 3;
|
||||
Offset = 1;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD3LNq16:
|
||||
case ARM::VLD3LNq32:
|
||||
FirstOpnd = 0;
|
||||
@ -119,10 +97,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD4d8:
|
||||
case ARM::VLD4d16:
|
||||
case ARM::VLD4d32:
|
||||
case ARM::VLD1d64Q:
|
||||
case ARM::VLD4LNd8:
|
||||
case ARM::VLD4LNd16:
|
||||
case ARM::VLD4LNd32:
|
||||
@ -130,24 +104,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
||||
NumRegs = 4;
|
||||
return true;
|
||||
|
||||
case ARM::VLD4q8_UPD:
|
||||
case ARM::VLD4q16_UPD:
|
||||
case ARM::VLD4q32_UPD:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 4;
|
||||
Offset = 0;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD4q8odd_UPD:
|
||||
case ARM::VLD4q16odd_UPD:
|
||||
case ARM::VLD4q32odd_UPD:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 4;
|
||||
Offset = 1;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD4LNq16:
|
||||
case ARM::VLD4LNq32:
|
||||
FirstOpnd = 0;
|
||||
|
Loading…
x
Reference in New Issue
Block a user