Switch all the NEON vld-lane and vst-lane instructions over to the new

pseudo-instruction approach.  Change ARMExpandPseudoInsts to use a table
to record all the NEON load/store information.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113812 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bob Wilson 2010-09-13 23:01:35 +00:00
parent b3e9681cc0
commit 8466fa1842
4 changed files with 624 additions and 487 deletions

View File

@ -24,13 +24,6 @@ using namespace llvm;
namespace {
class ARMExpandPseudo : public MachineFunctionPass {
// Constants for register spacing in NEON load/store instructions.
enum NEONRegSpacing {
SingleSpc,
EvenDblSpc,
OddDblSpc
};
public:
static char ID;
ARMExpandPseudo() : MachineFunctionPass(ID) {}
@ -48,10 +41,9 @@ namespace {
void TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
bool ExpandMBB(MachineBasicBlock &MBB);
void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc,
bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
void ExpandVLD(MachineBasicBlock::iterator &MBBI);
void ExpandVST(MachineBasicBlock::iterator &MBBI);
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
};
char ARMExpandPseudo::ID = 0;
}
@ -73,37 +65,289 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
}
}
namespace {
// Constants for register spacing in NEON load/store instructions.
// For quad-register load-lane and store-lane pseudo instructors, the
// spacing is initially assumed to be EvenDblSpc, and that is changed to
// OddDblSpc depending on the lane number operand.
enum NEONRegSpacing {
SingleSpc,
EvenDblSpc,
OddDblSpc
};
// Entries for NEON load/store information table. The table is sorted by
// PseudoOpc for fast binary-search lookups.
struct NEONLdStTableEntry {
unsigned PseudoOpc;
unsigned RealOpc;
bool IsLoad;
bool HasWriteBack;
NEONRegSpacing RegSpacing;
unsigned char NumRegs; // D registers loaded or stored
unsigned char RegElts; // elements per D register; used for lane ops
// Comparison methods for binary search of the table.
bool operator<(const NEONLdStTableEntry &TE) const {
return PseudoOpc < TE.PseudoOpc;
}
friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
return TE.PseudoOpc < PseudoOpc;
}
friend bool ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
const NEONLdStTableEntry &TE) {
return PseudoOpc < TE.PseudoOpc;
}
};
}
static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 },
{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 },
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 },
{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 },
{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 },
{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 },
{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 },
{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 },
{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 },
{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 },
{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 },
{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 },
{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 },
{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 },
{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 },
{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 },
{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 },
{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 },
{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 },
{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 },
{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 },
{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 },
{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 },
{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 },
{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 },
{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 },
{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 },
{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 },
{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 },
{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 },
{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 },
{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 },
{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 },
{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 },
{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 },
{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 },
{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 },
{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 },
{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 },
{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 },
{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 },
{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 },
{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 },
{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 },
{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 },
{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 },
{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 },
{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 },
{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 },
{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 },
{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 },
{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 },
{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 },
{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 },
{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 },
{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 },
{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 },
{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 },
{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 },
{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 },
{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 },
{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 },
{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 },
{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 },
{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 },
{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 },
{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 },
{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 },
{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 },
{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 },
{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 },
{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 },
{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 },
{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 },
{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 },
{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 },
{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 },
{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 },
{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 },
{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 },
{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 },
{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 },
{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 },
{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 },
{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 },
{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 },
{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 },
{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 },
{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 },
{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 },
{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 },
{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 },
{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 },
{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 },
{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 },
{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 },
{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4},
{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4},
{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2},
{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2},
{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 },
{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 },
{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 },
{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 },
{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 },
{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 },
{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 },
{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 },
{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 },
{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 },
{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 },
{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 },
{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 },
{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 },
{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 },
{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 },
{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 },
{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 },
{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4},
{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4},
{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2},
{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2},
{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 },
{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 },
{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 },
{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 },
{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 },
{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 },
{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 },
{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 },
{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 },
{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 },
{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 },
{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 },
{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 },
{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 },
{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 },
{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 },
{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 },
{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 },
{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4},
{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4},
{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2},
{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2},
{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 },
{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 },
{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 },
{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 },
{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 },
{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 },
{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 },
{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 },
{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 },
{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 },
{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 },
{ ARM::VST4q8oddPseudo_UPD , ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 }
};
/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
/// load or store pseudo instruction.
static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
unsigned NumEntries = array_lengthof(NEONLdStTable);
#ifndef NDEBUG
// Make sure the table is sorted.
static bool TableChecked = false;
if (!TableChecked) {
for (unsigned i = 0; i != NumEntries-1; ++i)
assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
"NEONLdStTable is not sorted!");
TableChecked = true;
}
#endif
const NEONLdStTableEntry *I =
std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
return I;
return NULL;
}
/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
/// corresponding to the specified register spacing. Not all of the results
/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
const TargetRegisterInfo *TRI, unsigned &D0,
unsigned &D1, unsigned &D2, unsigned &D3) {
if (RegSpc == SingleSpc) {
D0 = TRI->getSubReg(Reg, ARM::dsub_0);
D1 = TRI->getSubReg(Reg, ARM::dsub_1);
D2 = TRI->getSubReg(Reg, ARM::dsub_2);
D3 = TRI->getSubReg(Reg, ARM::dsub_3);
} else if (RegSpc == EvenDblSpc) {
D0 = TRI->getSubReg(Reg, ARM::dsub_0);
D1 = TRI->getSubReg(Reg, ARM::dsub_2);
D2 = TRI->getSubReg(Reg, ARM::dsub_4);
D3 = TRI->getSubReg(Reg, ARM::dsub_6);
} else {
assert(RegSpc == OddDblSpc && "unknown register spacing");
D0 = TRI->getSubReg(Reg, ARM::dsub_1);
D1 = TRI->getSubReg(Reg, ARM::dsub_3);
D2 = TRI->getSubReg(Reg, ARM::dsub_5);
D3 = TRI->getSubReg(Reg, ARM::dsub_7);
}
}
/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
/// operands to real VLD instructions with D register operands.
void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
unsigned Opc, bool hasWriteBack,
NEONRegSpacing RegSpc, unsigned NumRegs) {
void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
NEONRegSpacing RegSpc = TableEntry->RegSpacing;
unsigned NumRegs = TableEntry->NumRegs;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
bool DstIsDead = MI.getOperand(OpIdx).isDead();
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
unsigned D0, D1, D2, D3;
if (RegSpc == SingleSpc) {
D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
D2 = TRI->getSubReg(DstReg, ARM::dsub_2);
D3 = TRI->getSubReg(DstReg, ARM::dsub_3);
} else if (RegSpc == EvenDblSpc) {
D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
D1 = TRI->getSubReg(DstReg, ARM::dsub_2);
D2 = TRI->getSubReg(DstReg, ARM::dsub_4);
D3 = TRI->getSubReg(DstReg, ARM::dsub_6);
} else {
assert(RegSpc == OddDblSpc && "unknown register spacing for VLD");
D0 = TRI->getSubReg(DstReg, ARM::dsub_1);
D1 = TRI->getSubReg(DstReg, ARM::dsub_3);
D2 = TRI->getSubReg(DstReg, ARM::dsub_5);
D3 = TRI->getSubReg(DstReg, ARM::dsub_7);
}
GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
if (NumRegs > 2)
@ -111,14 +355,14 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
if (NumRegs > 3)
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
if (hasWriteBack)
if (TableEntry->HasWriteBack)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (hasWriteBack)
if (TableEntry->HasWriteBack)
MIB.addOperand(MI.getOperand(OpIdx++));
MIB = AddDefaultPred(MIB);
@ -138,45 +382,32 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
/// operands to real VST instructions with D register operands.
void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
unsigned Opc, bool hasWriteBack,
NEONRegSpacing RegSpc, unsigned NumRegs) {
void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
NEONRegSpacing RegSpc = TableEntry->RegSpacing;
unsigned NumRegs = TableEntry->NumRegs;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
if (hasWriteBack)
if (TableEntry->HasWriteBack)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (hasWriteBack)
if (TableEntry->HasWriteBack)
MIB.addOperand(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
unsigned SrcReg = MI.getOperand(OpIdx).getReg();
unsigned D0, D1, D2, D3;
if (RegSpc == SingleSpc) {
D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
} else if (RegSpc == EvenDblSpc) {
D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
} else {
assert(RegSpc == OddDblSpc && "unknown register spacing for VST");
D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
}
GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0).addReg(D1);
if (NumRegs > 2)
MIB.addReg(D2);
@ -190,6 +421,85 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
MI.eraseFromParent();
}
/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
/// register operands to real instructions with D register operands.
void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
MachineBasicBlock &MBB = *MI.getParent();
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
assert(TableEntry && "NEONLdStTable lookup failed");
NEONRegSpacing RegSpc = TableEntry->RegSpacing;
unsigned NumRegs = TableEntry->NumRegs;
unsigned RegElts = TableEntry->RegElts;
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
// The lane operand is always the 3rd from last operand, before the 2
// predicate operands.
unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
// Adjust the lane and spacing as needed for Q registers.
assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
if (RegSpc == EvenDblSpc && Lane >= RegElts) {
RegSpc = OddDblSpc;
Lane -= RegElts;
}
assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
unsigned DstReg, D0, D1, D2, D3;
bool DstIsDead;
if (TableEntry->IsLoad) {
DstIsDead = MI.getOperand(OpIdx).isDead();
DstReg = MI.getOperand(OpIdx++).getReg();
GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
if (NumRegs > 2)
MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
if (NumRegs > 3)
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
}
if (TableEntry->HasWriteBack)
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
MIB.addOperand(MI.getOperand(OpIdx++));
MIB.addOperand(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->HasWriteBack)
MIB.addOperand(MI.getOperand(OpIdx++));
// Grab the super-register source.
MachineOperand MO = MI.getOperand(OpIdx++);
if (!TableEntry->IsLoad)
GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
// Add the subregs as sources of the new instruction.
unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
getKillRegState(MO.isKill()));
MIB.addReg(D0, SrcFlags).addReg(D1, SrcFlags);
if (NumRegs > 2)
MIB.addReg(D2, SrcFlags);
if (NumRegs > 3)
MIB.addReg(D3, SrcFlags);
// Add the lane number operand.
MIB.addImm(Lane);
MIB = AddDefaultPred(MIB);
// Copy the super-register source to be an implicit source.
MO.setImplicit(true);
MIB.addOperand(MO);
if (TableEntry->IsLoad)
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
}
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
@ -292,204 +602,169 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
}
case ARM::VLD1q8Pseudo:
ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break;
case ARM::VLD1q16Pseudo:
ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break;
case ARM::VLD1q32Pseudo:
ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break;
case ARM::VLD1q64Pseudo:
ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break;
case ARM::VLD1q8Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break;
case ARM::VLD1q16Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break;
case ARM::VLD1q32Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break;
case ARM::VLD1q64Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break;
case ARM::VLD2d8Pseudo:
ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break;
case ARM::VLD2d16Pseudo:
ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break;
case ARM::VLD2d32Pseudo:
ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break;
case ARM::VLD2q8Pseudo:
ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break;
case ARM::VLD2q16Pseudo:
ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break;
case ARM::VLD2q32Pseudo:
ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break;
case ARM::VLD2d8Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break;
case ARM::VLD2d16Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break;
case ARM::VLD2d32Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break;
case ARM::VLD2q8Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break;
case ARM::VLD2q16Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break;
case ARM::VLD2q32Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break;
case ARM::VLD3d8Pseudo:
ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break;
case ARM::VLD3d16Pseudo:
ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break;
case ARM::VLD3d32Pseudo:
ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break;
case ARM::VLD1d64TPseudo:
ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break;
case ARM::VLD3d8Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break;
case ARM::VLD3d16Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break;
case ARM::VLD3d32Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break;
case ARM::VLD1d64TPseudo_UPD:
ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break;
case ARM::VLD3q8Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break;
case ARM::VLD3q16Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break;
case ARM::VLD3q32Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break;
case ARM::VLD3q8oddPseudo_UPD:
ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break;
case ARM::VLD3q16oddPseudo_UPD:
ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break;
case ARM::VLD3q32oddPseudo_UPD:
ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break;
case ARM::VLD4d8Pseudo:
ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break;
case ARM::VLD4d16Pseudo:
ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break;
case ARM::VLD4d32Pseudo:
ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break;
case ARM::VLD1d64QPseudo:
ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break;
case ARM::VLD4d8Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break;
case ARM::VLD4d16Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break;
case ARM::VLD4d32Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break;
case ARM::VLD1d64QPseudo_UPD:
ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break;
case ARM::VLD4q8Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break;
case ARM::VLD4q16Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break;
case ARM::VLD4q32Pseudo_UPD:
ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break;
case ARM::VLD4q8oddPseudo_UPD:
ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break;
case ARM::VLD4q16oddPseudo_UPD:
ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break;
case ARM::VLD4q32oddPseudo_UPD:
ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break;
ExpandVLD(MBBI);
break;
case ARM::VST1q8Pseudo:
ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
case ARM::VST1q16Pseudo:
ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break;
case ARM::VST1q32Pseudo:
ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break;
case ARM::VST1q64Pseudo:
ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break;
case ARM::VST1q8Pseudo_UPD:
ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break;
case ARM::VST1q16Pseudo_UPD:
ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break;
case ARM::VST1q32Pseudo_UPD:
ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break;
case ARM::VST1q64Pseudo_UPD:
ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break;
case ARM::VST2d8Pseudo:
ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break;
case ARM::VST2d16Pseudo:
ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break;
case ARM::VST2d32Pseudo:
ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break;
case ARM::VST2q8Pseudo:
ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break;
case ARM::VST2q16Pseudo:
ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break;
case ARM::VST2q32Pseudo:
ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break;
case ARM::VST2d8Pseudo_UPD:
ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break;
case ARM::VST2d16Pseudo_UPD:
ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break;
case ARM::VST2d32Pseudo_UPD:
ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break;
case ARM::VST2q8Pseudo_UPD:
ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break;
case ARM::VST2q16Pseudo_UPD:
ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break;
case ARM::VST2q32Pseudo_UPD:
ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break;
case ARM::VST3d8Pseudo:
ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
case ARM::VST3d16Pseudo:
ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break;
case ARM::VST3d32Pseudo:
ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break;
case ARM::VST1d64TPseudo:
ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break;
case ARM::VST3d8Pseudo_UPD:
ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break;
case ARM::VST3d16Pseudo_UPD:
ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break;
case ARM::VST3d32Pseudo_UPD:
ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break;
case ARM::VST1d64TPseudo_UPD:
ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break;
case ARM::VST3q8Pseudo_UPD:
ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break;
case ARM::VST3q16Pseudo_UPD:
ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break;
case ARM::VST3q32Pseudo_UPD:
ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break;
case ARM::VST3q8oddPseudo_UPD:
ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break;
case ARM::VST3q16oddPseudo_UPD:
ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break;
case ARM::VST3q32oddPseudo_UPD:
ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break;
case ARM::VST4d8Pseudo:
ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break;
case ARM::VST4d16Pseudo:
ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break;
case ARM::VST4d32Pseudo:
ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break;
case ARM::VST1d64QPseudo:
ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break;
case ARM::VST4d8Pseudo_UPD:
ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break;
case ARM::VST4d16Pseudo_UPD:
ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break;
case ARM::VST4d32Pseudo_UPD:
ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break;
case ARM::VST1d64QPseudo_UPD:
ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break;
case ARM::VST4q8Pseudo_UPD:
ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break;
case ARM::VST4q16Pseudo_UPD:
ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break;
case ARM::VST4q32Pseudo_UPD:
ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break;
case ARM::VST4q8oddPseudo_UPD:
ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break;
case ARM::VST4q16oddPseudo_UPD:
ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break;
case ARM::VST4q32oddPseudo_UPD:
ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break;
ExpandVST(MBBI);
break;
case ARM::VLD2LNd8Pseudo:
case ARM::VLD2LNd16Pseudo:
case ARM::VLD2LNd32Pseudo:
case ARM::VLD2LNq16Pseudo:
case ARM::VLD2LNq32Pseudo:
case ARM::VLD2LNd8Pseudo_UPD:
case ARM::VLD2LNd16Pseudo_UPD:
case ARM::VLD2LNd32Pseudo_UPD:
case ARM::VLD2LNq16Pseudo_UPD:
case ARM::VLD2LNq32Pseudo_UPD:
case ARM::VLD3LNd8Pseudo:
case ARM::VLD3LNd16Pseudo:
case ARM::VLD3LNd32Pseudo:
case ARM::VLD3LNq16Pseudo:
case ARM::VLD3LNq32Pseudo:
case ARM::VLD3LNd8Pseudo_UPD:
case ARM::VLD3LNd16Pseudo_UPD:
case ARM::VLD3LNd32Pseudo_UPD:
case ARM::VLD3LNq16Pseudo_UPD:
case ARM::VLD3LNq32Pseudo_UPD:
case ARM::VLD4LNd8Pseudo:
case ARM::VLD4LNd16Pseudo:
case ARM::VLD4LNd32Pseudo:
case ARM::VLD4LNq16Pseudo:
case ARM::VLD4LNq32Pseudo:
case ARM::VLD4LNd8Pseudo_UPD:
case ARM::VLD4LNd16Pseudo_UPD:
case ARM::VLD4LNd32Pseudo_UPD:
case ARM::VLD4LNq16Pseudo_UPD:
case ARM::VLD4LNq32Pseudo_UPD:
case ARM::VST2LNd8Pseudo:
case ARM::VST2LNd16Pseudo:
case ARM::VST2LNd32Pseudo:
case ARM::VST2LNq16Pseudo:
case ARM::VST2LNq32Pseudo:
case ARM::VST2LNd8Pseudo_UPD:
case ARM::VST2LNd16Pseudo_UPD:
case ARM::VST2LNd32Pseudo_UPD:
case ARM::VST2LNq16Pseudo_UPD:
case ARM::VST2LNq32Pseudo_UPD:
case ARM::VST3LNd8Pseudo:
case ARM::VST3LNd16Pseudo:
case ARM::VST3LNd32Pseudo:
case ARM::VST3LNq16Pseudo:
case ARM::VST3LNq32Pseudo:
case ARM::VST3LNd8Pseudo_UPD:
case ARM::VST3LNd16Pseudo_UPD:
case ARM::VST3LNd32Pseudo_UPD:
case ARM::VST3LNq16Pseudo_UPD:
case ARM::VST3LNq32Pseudo_UPD:
case ARM::VST4LNd8Pseudo:
case ARM::VST4LNd16Pseudo:
case ARM::VST4LNd32Pseudo:
case ARM::VST4LNq16Pseudo:
case ARM::VST4LNq32Pseudo:
case ARM::VST4LNd8Pseudo_UPD:
case ARM::VST4LNd16Pseudo_UPD:
case ARM::VST4LNd32Pseudo_UPD:
case ARM::VST4LNq16Pseudo_UPD:
case ARM::VST4LNq32Pseudo_UPD:
ExpandLaneOp(MBBI);
break;
}
if (ModifiedOp)

View File

@ -151,10 +151,9 @@ private:
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
/// load/store of D registers and even subregs and odd subregs of Q registers.
/// load/store of D registers and Q registers.
SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1);
unsigned *DOpcodes, unsigned *QOpcodes);
/// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
/// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
@ -196,10 +195,6 @@ private:
SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
// Form sequences of 8 consecutive D registers.
SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
SDValue V4, SDValue V5, SDValue V6, SDValue V7);
};
}
@ -1015,39 +1010,6 @@ SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
}
/// OctoDRegs - Form 8 consecutive D registers.
///
SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3,
SDValue V4, SDValue V5,
SDValue V6, SDValue V7) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32);
SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32);
SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32);
SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32);
const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3,
V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16);
}
/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type
/// for a 64-bit subregister of the vector.
static EVT GetNEONSubregVT(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled NEON type");
case MVT::v16i8: return MVT::v8i8;
case MVT::v8i16: return MVT::v4i16;
case MVT::v4f32: return MVT::v2f32;
case MVT::v4i32: return MVT::v2i32;
case MVT::v2i64: return MVT::v1i64;
}
}
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
@ -1281,8 +1243,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0,
unsigned *QOpcodes1) {
unsigned *QOpcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
@ -1296,16 +1257,6 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType();
bool is64BitVector = VT.is64BitVector();
// Quad registers are handled by load/store of subregs. Find the subreg info.
unsigned NumElts = 0;
bool Even = false;
EVT RegVT = VT;
if (!is64BitVector) {
RegVT = GetNEONSubregVT(VT);
NumElts = RegVT.getVectorNumElements();
Even = Lane < NumElts;
}
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld/vst lane type");
@ -1323,121 +1274,59 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SmallVector<SDValue, 10> Ops;
SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
unsigned Opc = 0;
if (is64BitVector) {
Opc = DOpcodes[OpcodeIndex];
SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
if (NumVecs == 2) {
RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
} else {
SDValue V2 = N->getOperand(2+3);
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
: N->getOperand(3+3);
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
Opc = QOpcodes[OpcodeIndex]);
// Now extract the D registers back out.
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
if (NumVecs > 2)
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq));
if (NumVecs > 3)
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq));
SDValue SuperReg;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
if (NumVecs == 2) {
if (is64BitVector)
SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
else
SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
} else {
// Check if this is loading the even or odd subreg of a Q register.
if (Lane < NumElts) {
Opc = QOpcodes0[OpcodeIndex];
} else {
Lane -= NumElts;
Opc = QOpcodes1[OpcodeIndex];
}
SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
if (NumVecs == 2) {
RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
} else {
SDValue V2 = N->getOperand(2+3);
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
: N->getOperand(3+3);
RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
}
// Extract the subregs of the input vector.
unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
RegSeq));
SDValue V2 = N->getOperand(2+3);
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
: N->getOperand(3+3);
if (is64BitVector)
SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
else
SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
}
Ops.push_back(SuperReg);
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
if (!IsLoad)
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6);
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 7);
std::vector<EVT> ResTys(NumVecs, RegVT);
ResTys.push_back(MVT::Other);
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6);
EVT ResTy;
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
if (!is64BitVector)
ResTyElts *= 2;
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
// Form a REG_SEQUENCE to force register allocation.
SDValue RegSeq;
if (is64BitVector) {
SDValue V0 = SDValue(VLdLn, 0);
SDValue V1 = SDValue(VLdLn, 1);
if (NumVecs == 2) {
RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
} else {
SDValue V2 = SDValue(VLdLn, 2);
// If it's a vld3, form a quad D-register but discard the last part.
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
: SDValue(VLdLn, 3);
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
} else {
// For 128-bit vectors, take the 64-bit results of the load and insert
// them as subregs into the result.
SDValue V[8];
for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
if (Even) {
V[i] = SDValue(VLdLn, Vec);
V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, RegVT), 0);
} else {
V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, RegVT), 0);
V[i+1] = SDValue(VLdLn, Vec);
}
}
if (NumVecs == 3)
V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, RegVT), 0);
if (NumVecs == 2)
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
else
RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
V[4], V[5], V[6], V[7]), 0);
}
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other,
Ops.data(), 7);
SuperReg = SDValue(VLdLn, 0);
Chain = SDValue(VLdLn, 1);
// Extract the subregisters.
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
ReplaceUses(SDValue(N, NumVecs), Chain);
return NULL;
}
@ -2119,24 +2008,24 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case Intrinsic::arm_neon_vld2lane: {
unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 };
unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd };
return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
ARM::VLD2LNd32Pseudo };
unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld3lane: {
unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 };
unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd };
return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
ARM::VLD3LNd32Pseudo };
unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld4lane: {
unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 };
unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd };
return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
ARM::VLD4LNd32Pseudo };
unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst1: {
@ -2180,24 +2069,24 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case Intrinsic::arm_neon_vst2lane: {
unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 };
unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd };
return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
ARM::VST2LNd32Pseudo };
unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst3lane: {
unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 };
unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd };
return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
ARM::VST3LNd32Pseudo };
unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst4lane: {
unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 };
unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd };
return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
ARM::VST4LNd32Pseudo };
unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes);
}
}
break;

View File

@ -445,6 +445,33 @@ def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo;
def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo;
def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo;
// Classes for VLD*LN pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
class VLDQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst),
(ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
itin, "$src = $dst">;
class VLDQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
class VLDQQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst),
(ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
itin, "$src = $dst">;
class VLDQQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
class VLDQQQQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst),
(ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
itin, "$src = $dst">;
class VLDQQQQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
// VLD1LN : Vector Load (single element to one lane)
// FIXME: Not yet implemented.
@ -459,13 +486,16 @@ def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">;
def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">;
def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">;
def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2>;
def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2>;
def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2>;
// ...with double-spaced registers:
def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">;
def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">;
// ...alternate versions to be allocated odd register numbers:
def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">;
def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">;
def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2>;
def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2>;
// ...with address register writeback:
class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@ -479,9 +509,16 @@ def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8">;
def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">;
def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">;
def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2>;
def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2>;
def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2>;
def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">;
def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">;
def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2>;
def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2>;
// VLD3LN : Vector Load (single 3-element structure to one lane)
class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
@ -494,13 +531,16 @@ def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">;
def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">;
def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">;
def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3>;
def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3>;
def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3>;
// ...with double-spaced registers:
def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">;
def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">;
// ...alternate versions to be allocated odd register numbers:
def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">;
def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">;
def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3>;
def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3>;
// ...with address register writeback:
class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@ -517,9 +557,16 @@ def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">;
def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">;
def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">;
def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3>;
def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3>;
def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3>;
def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">;
def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">;
def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3>;
def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3>;
// VLD4LN : Vector Load (single 4-element structure to one lane)
class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, op11_8, op7_4,
@ -533,13 +580,16 @@ def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">;
def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">;
def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">;
def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4>;
def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4>;
def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4>;
// ...with double-spaced registers:
def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">;
def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">;
// ...alternate versions to be allocated odd register numbers:
def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">;
def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">;
def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4>;
def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4>;
// ...with address register writeback:
class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@ -556,9 +606,16 @@ def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">;
def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">;
def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">;
def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>;
def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>;
def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>;
def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">;
def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4>;
def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4>;
// VLD1DUP : Vector Load (single element to all lanes)
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
@ -846,6 +903,30 @@ def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo;
def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
// Classes for VST*LN pseudo-instructions with multi-register operands.
// These are expanded to real instructions after register allocation.
class VSTQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
itin, "">;
class VSTQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb">;
class VSTQQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
itin, "">;
class VSTQQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb">;
class VSTQQQQLNPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
itin, "">;
class VSTQQQQLNWBPseudo<InstrItinClass itin>
: PseudoNLdSt<(outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
nohash_imm:$lane), itin, "$addr.addr = $wb">;
// VST1LN : Vector Store (single element from one lane)
// FIXME: Not yet implemented.
@ -860,13 +941,16 @@ def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">;
def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">;
def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">;
def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST>;
def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST>;
def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST>;
// ...with double-spaced registers:
def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">;
def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">;
// ...alternate versions to be allocated odd register numbers:
def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">;
def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">;
def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST>;
def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@ -880,9 +964,16 @@ def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">;
def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">;
def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">;
def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST>;
def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST>;
def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST>;
def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">;
def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">;
def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
// VST3LN : Vector Store (single 3-element structure from one lane)
class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b00, op11_8, op7_4, (outs),
@ -894,13 +985,16 @@ def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">;
def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">;
def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">;
def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST>;
def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST>;
def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST>;
// ...with double-spaced registers:
def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">;
def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">;
// ...alternate versions to be allocated odd register numbers:
def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">;
def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">;
def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST>;
def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST>;
// ...with address register writeback:
class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@ -915,9 +1009,16 @@ def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">;
def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">;
def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">;
def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">;
def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">;
def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST>;
def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST>;
// VST4LN : Vector Store (single 4-element structure from one lane)
class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b00, op11_8, op7_4, (outs),
@ -930,13 +1031,16 @@ def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">;
def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">;
def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">;
def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST>;
def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST>;
def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST>;
// ...with double-spaced registers:
def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">;
def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">;
// ...alternate versions to be allocated odd register numbers:
def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">;
def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">;
def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST>;
def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST>;
// ...with address register writeback:
class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@ -951,9 +1055,16 @@ def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">;
def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">;
def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST>;
def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST>;
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1

View File

@ -51,144 +51,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
default:
break;
case ARM::VLD2LNd8:
case ARM::VLD2LNd16:
case ARM::VLD2LNd32:
FirstOpnd = 0;
NumRegs = 2;
return true;
case ARM::VLD2LNq16:
case ARM::VLD2LNq32:
FirstOpnd = 0;
NumRegs = 2;
Offset = 0;
Stride = 2;
return true;
case ARM::VLD2LNq16odd:
case ARM::VLD2LNq32odd:
FirstOpnd = 0;
NumRegs = 2;
Offset = 1;
Stride = 2;
return true;
case ARM::VLD3LNd8:
case ARM::VLD3LNd16:
case ARM::VLD3LNd32:
FirstOpnd = 0;
NumRegs = 3;
return true;
case ARM::VLD3LNq16:
case ARM::VLD3LNq32:
FirstOpnd = 0;
NumRegs = 3;
Offset = 0;
Stride = 2;
return true;
case ARM::VLD3LNq16odd:
case ARM::VLD3LNq32odd:
FirstOpnd = 0;
NumRegs = 3;
Offset = 1;
Stride = 2;
return true;
case ARM::VLD4LNd8:
case ARM::VLD4LNd16:
case ARM::VLD4LNd32:
FirstOpnd = 0;
NumRegs = 4;
return true;
case ARM::VLD4LNq16:
case ARM::VLD4LNq32:
FirstOpnd = 0;
NumRegs = 4;
Offset = 0;
Stride = 2;
return true;
case ARM::VLD4LNq16odd:
case ARM::VLD4LNq32odd:
FirstOpnd = 0;
NumRegs = 4;
Offset = 1;
Stride = 2;
return true;
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
FirstOpnd = 2;
NumRegs = 2;
return true;
case ARM::VST2LNq16:
case ARM::VST2LNq32:
FirstOpnd = 2;
NumRegs = 2;
Offset = 0;
Stride = 2;
return true;
case ARM::VST2LNq16odd:
case ARM::VST2LNq32odd:
FirstOpnd = 2;
NumRegs = 2;
Offset = 1;
Stride = 2;
return true;
case ARM::VST3LNd8:
case ARM::VST3LNd16:
case ARM::VST3LNd32:
FirstOpnd = 2;
NumRegs = 3;
return true;
case ARM::VST3LNq16:
case ARM::VST3LNq32:
FirstOpnd = 2;
NumRegs = 3;
Offset = 0;
Stride = 2;
return true;
case ARM::VST3LNq16odd:
case ARM::VST3LNq32odd:
FirstOpnd = 2;
NumRegs = 3;
Offset = 1;
Stride = 2;
return true;
case ARM::VST4LNd8:
case ARM::VST4LNd16:
case ARM::VST4LNd32:
FirstOpnd = 2;
NumRegs = 4;
return true;
case ARM::VST4LNq16:
case ARM::VST4LNq32:
FirstOpnd = 2;
NumRegs = 4;
Offset = 0;
Stride = 2;
return true;
case ARM::VST4LNq16odd:
case ARM::VST4LNq32odd:
FirstOpnd = 2;
NumRegs = 4;
Offset = 1;
Stride = 2;
return true;
case ARM::VTBL2:
FirstOpnd = 1;
NumRegs = 2;