mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-03 01:12:59 +00:00
Switch all the NEON vld-lane and vst-lane instructions over to the new
pseudo-instruction approach. Change ARMExpandPseudoInsts to use a table to record all the NEON load/store information. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113812 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b3e9681cc0
commit
8466fa1842
@ -24,13 +24,6 @@ using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class ARMExpandPseudo : public MachineFunctionPass {
|
||||
// Constants for register spacing in NEON load/store instructions.
|
||||
enum NEONRegSpacing {
|
||||
SingleSpc,
|
||||
EvenDblSpc,
|
||||
OddDblSpc
|
||||
};
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
ARMExpandPseudo() : MachineFunctionPass(ID) {}
|
||||
@ -48,10 +41,9 @@ namespace {
|
||||
void TransferImpOps(MachineInstr &OldMI,
|
||||
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
|
||||
bool ExpandMBB(MachineBasicBlock &MBB);
|
||||
void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc,
|
||||
bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
|
||||
void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
|
||||
bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
|
||||
void ExpandVLD(MachineBasicBlock::iterator &MBBI);
|
||||
void ExpandVST(MachineBasicBlock::iterator &MBBI);
|
||||
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
|
||||
};
|
||||
char ARMExpandPseudo::ID = 0;
|
||||
}
|
||||
@ -73,37 +65,289 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Constants for register spacing in NEON load/store instructions.
|
||||
// For quad-register load-lane and store-lane pseudo instructors, the
|
||||
// spacing is initially assumed to be EvenDblSpc, and that is changed to
|
||||
// OddDblSpc depending on the lane number operand.
|
||||
enum NEONRegSpacing {
|
||||
SingleSpc,
|
||||
EvenDblSpc,
|
||||
OddDblSpc
|
||||
};
|
||||
|
||||
// Entries for NEON load/store information table. The table is sorted by
|
||||
// PseudoOpc for fast binary-search lookups.
|
||||
struct NEONLdStTableEntry {
|
||||
unsigned PseudoOpc;
|
||||
unsigned RealOpc;
|
||||
bool IsLoad;
|
||||
bool HasWriteBack;
|
||||
NEONRegSpacing RegSpacing;
|
||||
unsigned char NumRegs; // D registers loaded or stored
|
||||
unsigned char RegElts; // elements per D register; used for lane ops
|
||||
|
||||
// Comparison methods for binary search of the table.
|
||||
bool operator<(const NEONLdStTableEntry &TE) const {
|
||||
return PseudoOpc < TE.PseudoOpc;
|
||||
}
|
||||
friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
|
||||
return TE.PseudoOpc < PseudoOpc;
|
||||
}
|
||||
friend bool ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
|
||||
const NEONLdStTableEntry &TE) {
|
||||
return PseudoOpc < TE.PseudoOpc;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static const NEONLdStTableEntry NEONLdStTable[] = {
|
||||
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 },
|
||||
{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 },
|
||||
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 },
|
||||
{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 },
|
||||
|
||||
{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 },
|
||||
{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 },
|
||||
{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 },
|
||||
{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 },
|
||||
{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 },
|
||||
{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 },
|
||||
{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 },
|
||||
{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 },
|
||||
|
||||
{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 },
|
||||
{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 },
|
||||
{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 },
|
||||
{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 },
|
||||
{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 },
|
||||
{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 },
|
||||
{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 },
|
||||
{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 },
|
||||
{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 },
|
||||
{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 },
|
||||
|
||||
{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 },
|
||||
{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 },
|
||||
{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 },
|
||||
{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 },
|
||||
{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 },
|
||||
{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 },
|
||||
|
||||
{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 },
|
||||
{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 },
|
||||
{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 },
|
||||
{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 },
|
||||
{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 },
|
||||
{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 },
|
||||
|
||||
{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 },
|
||||
{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 },
|
||||
{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 },
|
||||
{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 },
|
||||
{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 },
|
||||
{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 },
|
||||
{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 },
|
||||
{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 },
|
||||
{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 },
|
||||
{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 },
|
||||
|
||||
{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 },
|
||||
{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 },
|
||||
{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 },
|
||||
{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 },
|
||||
{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 },
|
||||
{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 },
|
||||
|
||||
{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 },
|
||||
{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 },
|
||||
{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 },
|
||||
{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 },
|
||||
{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 },
|
||||
{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 },
|
||||
|
||||
{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 },
|
||||
{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 },
|
||||
{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 },
|
||||
{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 },
|
||||
{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 },
|
||||
{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 },
|
||||
{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 },
|
||||
{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 },
|
||||
{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 },
|
||||
{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 },
|
||||
|
||||
{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 },
|
||||
{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 },
|
||||
{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 },
|
||||
{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 },
|
||||
{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 },
|
||||
{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 },
|
||||
|
||||
{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 },
|
||||
{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 },
|
||||
{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 },
|
||||
{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 },
|
||||
{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 },
|
||||
{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 },
|
||||
|
||||
{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 },
|
||||
{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 },
|
||||
{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 },
|
||||
{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 },
|
||||
|
||||
{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 },
|
||||
{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 },
|
||||
{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 },
|
||||
{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 },
|
||||
{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 },
|
||||
{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 },
|
||||
{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 },
|
||||
{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 },
|
||||
|
||||
{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 },
|
||||
{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 },
|
||||
{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 },
|
||||
{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 },
|
||||
{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 },
|
||||
{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 },
|
||||
{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4},
|
||||
{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4},
|
||||
{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2},
|
||||
{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2},
|
||||
|
||||
{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 },
|
||||
{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 },
|
||||
{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 },
|
||||
{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 },
|
||||
{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 },
|
||||
{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 },
|
||||
|
||||
{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 },
|
||||
{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 },
|
||||
{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 },
|
||||
{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 },
|
||||
{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 },
|
||||
{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 },
|
||||
|
||||
{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 },
|
||||
{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 },
|
||||
{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 },
|
||||
{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 },
|
||||
{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 },
|
||||
{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 },
|
||||
{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4},
|
||||
{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4},
|
||||
{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2},
|
||||
{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2},
|
||||
|
||||
{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 },
|
||||
{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 },
|
||||
{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 },
|
||||
{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 },
|
||||
{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 },
|
||||
{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 },
|
||||
|
||||
{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 },
|
||||
{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 },
|
||||
{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 },
|
||||
{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 },
|
||||
{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 },
|
||||
{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 },
|
||||
|
||||
{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 },
|
||||
{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 },
|
||||
{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 },
|
||||
{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 },
|
||||
{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 },
|
||||
{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 },
|
||||
{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4},
|
||||
{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4},
|
||||
{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2},
|
||||
{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2},
|
||||
|
||||
{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 },
|
||||
{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 },
|
||||
{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 },
|
||||
{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 },
|
||||
{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 },
|
||||
{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 },
|
||||
|
||||
{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 },
|
||||
{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 },
|
||||
{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 },
|
||||
{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 },
|
||||
{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 },
|
||||
{ ARM::VST4q8oddPseudo_UPD , ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 }
|
||||
};
|
||||
|
||||
/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
|
||||
/// load or store pseudo instruction.
|
||||
static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
|
||||
unsigned NumEntries = array_lengthof(NEONLdStTable);
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Make sure the table is sorted.
|
||||
static bool TableChecked = false;
|
||||
if (!TableChecked) {
|
||||
for (unsigned i = 0; i != NumEntries-1; ++i)
|
||||
assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
|
||||
"NEONLdStTable is not sorted!");
|
||||
TableChecked = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
const NEONLdStTableEntry *I =
|
||||
std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
|
||||
if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
|
||||
return I;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
|
||||
/// corresponding to the specified register spacing. Not all of the results
|
||||
/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
|
||||
static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
|
||||
const TargetRegisterInfo *TRI, unsigned &D0,
|
||||
unsigned &D1, unsigned &D2, unsigned &D3) {
|
||||
if (RegSpc == SingleSpc) {
|
||||
D0 = TRI->getSubReg(Reg, ARM::dsub_0);
|
||||
D1 = TRI->getSubReg(Reg, ARM::dsub_1);
|
||||
D2 = TRI->getSubReg(Reg, ARM::dsub_2);
|
||||
D3 = TRI->getSubReg(Reg, ARM::dsub_3);
|
||||
} else if (RegSpc == EvenDblSpc) {
|
||||
D0 = TRI->getSubReg(Reg, ARM::dsub_0);
|
||||
D1 = TRI->getSubReg(Reg, ARM::dsub_2);
|
||||
D2 = TRI->getSubReg(Reg, ARM::dsub_4);
|
||||
D3 = TRI->getSubReg(Reg, ARM::dsub_6);
|
||||
} else {
|
||||
assert(RegSpc == OddDblSpc && "unknown register spacing");
|
||||
D0 = TRI->getSubReg(Reg, ARM::dsub_1);
|
||||
D1 = TRI->getSubReg(Reg, ARM::dsub_3);
|
||||
D2 = TRI->getSubReg(Reg, ARM::dsub_5);
|
||||
D3 = TRI->getSubReg(Reg, ARM::dsub_7);
|
||||
}
|
||||
}
|
||||
|
||||
/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
|
||||
/// operands to real VLD instructions with D register operands.
|
||||
void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
|
||||
unsigned Opc, bool hasWriteBack,
|
||||
NEONRegSpacing RegSpc, unsigned NumRegs) {
|
||||
void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
|
||||
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
|
||||
assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
|
||||
NEONRegSpacing RegSpc = TableEntry->RegSpacing;
|
||||
unsigned NumRegs = TableEntry->NumRegs;
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
||||
TII->get(TableEntry->RealOpc));
|
||||
unsigned OpIdx = 0;
|
||||
|
||||
bool DstIsDead = MI.getOperand(OpIdx).isDead();
|
||||
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
|
||||
unsigned D0, D1, D2, D3;
|
||||
if (RegSpc == SingleSpc) {
|
||||
D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
|
||||
D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
|
||||
D2 = TRI->getSubReg(DstReg, ARM::dsub_2);
|
||||
D3 = TRI->getSubReg(DstReg, ARM::dsub_3);
|
||||
} else if (RegSpc == EvenDblSpc) {
|
||||
D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
|
||||
D1 = TRI->getSubReg(DstReg, ARM::dsub_2);
|
||||
D2 = TRI->getSubReg(DstReg, ARM::dsub_4);
|
||||
D3 = TRI->getSubReg(DstReg, ARM::dsub_6);
|
||||
} else {
|
||||
assert(RegSpc == OddDblSpc && "unknown register spacing for VLD");
|
||||
D0 = TRI->getSubReg(DstReg, ARM::dsub_1);
|
||||
D1 = TRI->getSubReg(DstReg, ARM::dsub_3);
|
||||
D2 = TRI->getSubReg(DstReg, ARM::dsub_5);
|
||||
D3 = TRI->getSubReg(DstReg, ARM::dsub_7);
|
||||
}
|
||||
GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
|
||||
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
|
||||
.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
|
||||
if (NumRegs > 2)
|
||||
@ -111,14 +355,14 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
|
||||
if (NumRegs > 3)
|
||||
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
|
||||
|
||||
if (hasWriteBack)
|
||||
if (TableEntry->HasWriteBack)
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
|
||||
// Copy the addrmode6 operands.
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
// Copy the am6offset operand.
|
||||
if (hasWriteBack)
|
||||
if (TableEntry->HasWriteBack)
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
|
||||
MIB = AddDefaultPred(MIB);
|
||||
@ -138,45 +382,32 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
|
||||
|
||||
/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
|
||||
/// operands to real VST instructions with D register operands.
|
||||
void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
|
||||
unsigned Opc, bool hasWriteBack,
|
||||
NEONRegSpacing RegSpc, unsigned NumRegs) {
|
||||
void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
|
||||
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
|
||||
assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
|
||||
NEONRegSpacing RegSpc = TableEntry->RegSpacing;
|
||||
unsigned NumRegs = TableEntry->NumRegs;
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
||||
TII->get(TableEntry->RealOpc));
|
||||
unsigned OpIdx = 0;
|
||||
if (hasWriteBack)
|
||||
if (TableEntry->HasWriteBack)
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
|
||||
// Copy the addrmode6 operands.
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
// Copy the am6offset operand.
|
||||
if (hasWriteBack)
|
||||
if (TableEntry->HasWriteBack)
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
|
||||
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
|
||||
unsigned SrcReg = MI.getOperand(OpIdx).getReg();
|
||||
unsigned D0, D1, D2, D3;
|
||||
if (RegSpc == SingleSpc) {
|
||||
D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
|
||||
D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
|
||||
D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
|
||||
D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
|
||||
} else if (RegSpc == EvenDblSpc) {
|
||||
D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
|
||||
D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
|
||||
D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
|
||||
D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
|
||||
} else {
|
||||
assert(RegSpc == OddDblSpc && "unknown register spacing for VST");
|
||||
D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
|
||||
D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
|
||||
D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
|
||||
D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
|
||||
}
|
||||
|
||||
GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
|
||||
MIB.addReg(D0).addReg(D1);
|
||||
if (NumRegs > 2)
|
||||
MIB.addReg(D2);
|
||||
@ -190,6 +421,85 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
|
||||
/// register operands to real instructions with D register operands.
|
||||
void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
|
||||
const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
|
||||
assert(TableEntry && "NEONLdStTable lookup failed");
|
||||
NEONRegSpacing RegSpc = TableEntry->RegSpacing;
|
||||
unsigned NumRegs = TableEntry->NumRegs;
|
||||
unsigned RegElts = TableEntry->RegElts;
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
|
||||
TII->get(TableEntry->RealOpc));
|
||||
unsigned OpIdx = 0;
|
||||
// The lane operand is always the 3rd from last operand, before the 2
|
||||
// predicate operands.
|
||||
unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
|
||||
|
||||
// Adjust the lane and spacing as needed for Q registers.
|
||||
assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
|
||||
if (RegSpc == EvenDblSpc && Lane >= RegElts) {
|
||||
RegSpc = OddDblSpc;
|
||||
Lane -= RegElts;
|
||||
}
|
||||
assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
|
||||
|
||||
unsigned DstReg, D0, D1, D2, D3;
|
||||
bool DstIsDead;
|
||||
if (TableEntry->IsLoad) {
|
||||
DstIsDead = MI.getOperand(OpIdx).isDead();
|
||||
DstReg = MI.getOperand(OpIdx++).getReg();
|
||||
GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
|
||||
MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
|
||||
.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
|
||||
if (NumRegs > 2)
|
||||
MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
|
||||
if (NumRegs > 3)
|
||||
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
|
||||
}
|
||||
|
||||
if (TableEntry->HasWriteBack)
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
|
||||
// Copy the addrmode6 operands.
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
// Copy the am6offset operand.
|
||||
if (TableEntry->HasWriteBack)
|
||||
MIB.addOperand(MI.getOperand(OpIdx++));
|
||||
|
||||
// Grab the super-register source.
|
||||
MachineOperand MO = MI.getOperand(OpIdx++);
|
||||
if (!TableEntry->IsLoad)
|
||||
GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
|
||||
|
||||
// Add the subregs as sources of the new instruction.
|
||||
unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
|
||||
getKillRegState(MO.isKill()));
|
||||
MIB.addReg(D0, SrcFlags).addReg(D1, SrcFlags);
|
||||
if (NumRegs > 2)
|
||||
MIB.addReg(D2, SrcFlags);
|
||||
if (NumRegs > 3)
|
||||
MIB.addReg(D3, SrcFlags);
|
||||
|
||||
// Add the lane number operand.
|
||||
MIB.addImm(Lane);
|
||||
|
||||
MIB = AddDefaultPred(MIB);
|
||||
// Copy the super-register source to be an implicit source.
|
||||
MO.setImplicit(true);
|
||||
MIB.addOperand(MO);
|
||||
if (TableEntry->IsLoad)
|
||||
// Add an implicit def for the super-register.
|
||||
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
|
||||
TransferImpOps(MI, MIB, MIB);
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||
bool Modified = false;
|
||||
|
||||
@ -292,204 +602,169 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||
}
|
||||
|
||||
case ARM::VLD1q8Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break;
|
||||
case ARM::VLD1q16Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break;
|
||||
case ARM::VLD1q32Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break;
|
||||
case ARM::VLD1q64Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break;
|
||||
case ARM::VLD1q8Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break;
|
||||
case ARM::VLD1q16Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break;
|
||||
case ARM::VLD1q32Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break;
|
||||
case ARM::VLD1q64Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break;
|
||||
|
||||
case ARM::VLD2d8Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break;
|
||||
case ARM::VLD2d16Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break;
|
||||
case ARM::VLD2d32Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break;
|
||||
case ARM::VLD2q8Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break;
|
||||
case ARM::VLD2q16Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break;
|
||||
case ARM::VLD2q32Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break;
|
||||
case ARM::VLD2d8Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break;
|
||||
case ARM::VLD2d16Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break;
|
||||
case ARM::VLD2d32Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break;
|
||||
case ARM::VLD2q8Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break;
|
||||
case ARM::VLD2q16Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break;
|
||||
case ARM::VLD2q32Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break;
|
||||
|
||||
case ARM::VLD3d8Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break;
|
||||
case ARM::VLD3d16Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break;
|
||||
case ARM::VLD3d32Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break;
|
||||
case ARM::VLD1d64TPseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break;
|
||||
case ARM::VLD3d8Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VLD3d16Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VLD3d32Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VLD1d64TPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VLD3q8Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break;
|
||||
case ARM::VLD3q16Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break;
|
||||
case ARM::VLD3q32Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break;
|
||||
case ARM::VLD3q8oddPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break;
|
||||
case ARM::VLD3q16oddPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break;
|
||||
case ARM::VLD3q32oddPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break;
|
||||
|
||||
case ARM::VLD4d8Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break;
|
||||
case ARM::VLD4d16Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break;
|
||||
case ARM::VLD4d32Pseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break;
|
||||
case ARM::VLD1d64QPseudo:
|
||||
ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break;
|
||||
case ARM::VLD4d8Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VLD4d16Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VLD4d32Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VLD1d64QPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VLD4q8Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break;
|
||||
case ARM::VLD4q16Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break;
|
||||
case ARM::VLD4q32Pseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break;
|
||||
case ARM::VLD4q8oddPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break;
|
||||
case ARM::VLD4q16oddPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break;
|
||||
case ARM::VLD4q32oddPseudo_UPD:
|
||||
ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break;
|
||||
ExpandVLD(MBBI);
|
||||
break;
|
||||
|
||||
case ARM::VST1q8Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
|
||||
case ARM::VST1q16Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break;
|
||||
case ARM::VST1q32Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break;
|
||||
case ARM::VST1q64Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break;
|
||||
case ARM::VST1q8Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break;
|
||||
case ARM::VST1q16Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break;
|
||||
case ARM::VST1q32Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break;
|
||||
case ARM::VST1q64Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break;
|
||||
|
||||
case ARM::VST2d8Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break;
|
||||
case ARM::VST2d16Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break;
|
||||
case ARM::VST2d32Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break;
|
||||
case ARM::VST2q8Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break;
|
||||
case ARM::VST2q16Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break;
|
||||
case ARM::VST2q32Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break;
|
||||
case ARM::VST2d8Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break;
|
||||
case ARM::VST2d16Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break;
|
||||
case ARM::VST2d32Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break;
|
||||
case ARM::VST2q8Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VST2q16Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VST2q32Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break;
|
||||
|
||||
case ARM::VST3d8Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
|
||||
case ARM::VST3d16Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break;
|
||||
case ARM::VST3d32Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break;
|
||||
case ARM::VST1d64TPseudo:
|
||||
ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break;
|
||||
case ARM::VST3d8Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VST3d16Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VST3d32Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VST1d64TPseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VST3q8Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break;
|
||||
case ARM::VST3q16Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break;
|
||||
case ARM::VST3q32Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break;
|
||||
case ARM::VST3q8oddPseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break;
|
||||
case ARM::VST3q16oddPseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break;
|
||||
case ARM::VST3q32oddPseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break;
|
||||
|
||||
case ARM::VST4d8Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break;
|
||||
case ARM::VST4d16Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break;
|
||||
case ARM::VST4d32Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break;
|
||||
case ARM::VST1d64QPseudo:
|
||||
ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break;
|
||||
case ARM::VST4d8Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VST4d16Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VST4d32Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VST1d64QPseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VST4q8Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break;
|
||||
case ARM::VST4q16Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break;
|
||||
case ARM::VST4q32Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break;
|
||||
case ARM::VST4q8oddPseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break;
|
||||
case ARM::VST4q16oddPseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break;
|
||||
case ARM::VST4q32oddPseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break;
|
||||
ExpandVST(MBBI);
|
||||
break;
|
||||
|
||||
case ARM::VLD2LNd8Pseudo:
|
||||
case ARM::VLD2LNd16Pseudo:
|
||||
case ARM::VLD2LNd32Pseudo:
|
||||
case ARM::VLD2LNq16Pseudo:
|
||||
case ARM::VLD2LNq32Pseudo:
|
||||
case ARM::VLD2LNd8Pseudo_UPD:
|
||||
case ARM::VLD2LNd16Pseudo_UPD:
|
||||
case ARM::VLD2LNd32Pseudo_UPD:
|
||||
case ARM::VLD2LNq16Pseudo_UPD:
|
||||
case ARM::VLD2LNq32Pseudo_UPD:
|
||||
case ARM::VLD3LNd8Pseudo:
|
||||
case ARM::VLD3LNd16Pseudo:
|
||||
case ARM::VLD3LNd32Pseudo:
|
||||
case ARM::VLD3LNq16Pseudo:
|
||||
case ARM::VLD3LNq32Pseudo:
|
||||
case ARM::VLD3LNd8Pseudo_UPD:
|
||||
case ARM::VLD3LNd16Pseudo_UPD:
|
||||
case ARM::VLD3LNd32Pseudo_UPD:
|
||||
case ARM::VLD3LNq16Pseudo_UPD:
|
||||
case ARM::VLD3LNq32Pseudo_UPD:
|
||||
case ARM::VLD4LNd8Pseudo:
|
||||
case ARM::VLD4LNd16Pseudo:
|
||||
case ARM::VLD4LNd32Pseudo:
|
||||
case ARM::VLD4LNq16Pseudo:
|
||||
case ARM::VLD4LNq32Pseudo:
|
||||
case ARM::VLD4LNd8Pseudo_UPD:
|
||||
case ARM::VLD4LNd16Pseudo_UPD:
|
||||
case ARM::VLD4LNd32Pseudo_UPD:
|
||||
case ARM::VLD4LNq16Pseudo_UPD:
|
||||
case ARM::VLD4LNq32Pseudo_UPD:
|
||||
case ARM::VST2LNd8Pseudo:
|
||||
case ARM::VST2LNd16Pseudo:
|
||||
case ARM::VST2LNd32Pseudo:
|
||||
case ARM::VST2LNq16Pseudo:
|
||||
case ARM::VST2LNq32Pseudo:
|
||||
case ARM::VST2LNd8Pseudo_UPD:
|
||||
case ARM::VST2LNd16Pseudo_UPD:
|
||||
case ARM::VST2LNd32Pseudo_UPD:
|
||||
case ARM::VST2LNq16Pseudo_UPD:
|
||||
case ARM::VST2LNq32Pseudo_UPD:
|
||||
case ARM::VST3LNd8Pseudo:
|
||||
case ARM::VST3LNd16Pseudo:
|
||||
case ARM::VST3LNd32Pseudo:
|
||||
case ARM::VST3LNq16Pseudo:
|
||||
case ARM::VST3LNq32Pseudo:
|
||||
case ARM::VST3LNd8Pseudo_UPD:
|
||||
case ARM::VST3LNd16Pseudo_UPD:
|
||||
case ARM::VST3LNd32Pseudo_UPD:
|
||||
case ARM::VST3LNq16Pseudo_UPD:
|
||||
case ARM::VST3LNq32Pseudo_UPD:
|
||||
case ARM::VST4LNd8Pseudo:
|
||||
case ARM::VST4LNd16Pseudo:
|
||||
case ARM::VST4LNd32Pseudo:
|
||||
case ARM::VST4LNq16Pseudo:
|
||||
case ARM::VST4LNq32Pseudo:
|
||||
case ARM::VST4LNd8Pseudo_UPD:
|
||||
case ARM::VST4LNd16Pseudo_UPD:
|
||||
case ARM::VST4LNd32Pseudo_UPD:
|
||||
case ARM::VST4LNq16Pseudo_UPD:
|
||||
case ARM::VST4LNq32Pseudo_UPD:
|
||||
ExpandLaneOp(MBBI);
|
||||
break;
|
||||
}
|
||||
|
||||
if (ModifiedOp)
|
||||
|
@ -151,10 +151,9 @@ private:
|
||||
|
||||
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
|
||||
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
|
||||
/// load/store of D registers and even subregs and odd subregs of Q registers.
|
||||
/// load/store of D registers and Q registers.
|
||||
SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs,
|
||||
unsigned *DOpcodes, unsigned *QOpcodes0,
|
||||
unsigned *QOpcodes1);
|
||||
unsigned *DOpcodes, unsigned *QOpcodes);
|
||||
|
||||
/// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
|
||||
/// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
|
||||
@ -196,10 +195,6 @@ private:
|
||||
SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
|
||||
SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
|
||||
SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
|
||||
|
||||
// Form sequences of 8 consecutive D registers.
|
||||
SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
|
||||
SDValue V4, SDValue V5, SDValue V6, SDValue V7);
|
||||
};
|
||||
}
|
||||
|
||||
@ -1015,39 +1010,6 @@ SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
|
||||
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
|
||||
}
|
||||
|
||||
/// OctoDRegs - Form 8 consecutive D registers.
|
||||
///
|
||||
SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1,
|
||||
SDValue V2, SDValue V3,
|
||||
SDValue V4, SDValue V5,
|
||||
SDValue V6, SDValue V7) {
|
||||
DebugLoc dl = V0.getNode()->getDebugLoc();
|
||||
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
|
||||
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
|
||||
SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
|
||||
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
|
||||
SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32);
|
||||
SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32);
|
||||
SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32);
|
||||
SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32);
|
||||
const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3,
|
||||
V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 };
|
||||
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16);
|
||||
}
|
||||
|
||||
/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type
|
||||
/// for a 64-bit subregister of the vector.
|
||||
static EVT GetNEONSubregVT(EVT VT) {
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default: llvm_unreachable("unhandled NEON type");
|
||||
case MVT::v16i8: return MVT::v8i8;
|
||||
case MVT::v8i16: return MVT::v4i16;
|
||||
case MVT::v4f32: return MVT::v2f32;
|
||||
case MVT::v4i32: return MVT::v2i32;
|
||||
case MVT::v2i64: return MVT::v1i64;
|
||||
}
|
||||
}
|
||||
|
||||
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
|
||||
unsigned *DOpcodes, unsigned *QOpcodes0,
|
||||
unsigned *QOpcodes1) {
|
||||
@ -1281,8 +1243,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
||||
|
||||
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
|
||||
unsigned NumVecs, unsigned *DOpcodes,
|
||||
unsigned *QOpcodes0,
|
||||
unsigned *QOpcodes1) {
|
||||
unsigned *QOpcodes) {
|
||||
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
@ -1296,16 +1257,6 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
|
||||
EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType();
|
||||
bool is64BitVector = VT.is64BitVector();
|
||||
|
||||
// Quad registers are handled by load/store of subregs. Find the subreg info.
|
||||
unsigned NumElts = 0;
|
||||
bool Even = false;
|
||||
EVT RegVT = VT;
|
||||
if (!is64BitVector) {
|
||||
RegVT = GetNEONSubregVT(VT);
|
||||
NumElts = RegVT.getVectorNumElements();
|
||||
Even = Lane < NumElts;
|
||||
}
|
||||
|
||||
unsigned OpcodeIndex;
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default: llvm_unreachable("unhandled vld/vst lane type");
|
||||
@ -1323,121 +1274,59 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
|
||||
SDValue Pred = getAL(CurDAG);
|
||||
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
|
||||
|
||||
SmallVector<SDValue, 10> Ops;
|
||||
SmallVector<SDValue, 7> Ops;
|
||||
Ops.push_back(MemAddr);
|
||||
Ops.push_back(Align);
|
||||
|
||||
unsigned Opc = 0;
|
||||
if (is64BitVector) {
|
||||
Opc = DOpcodes[OpcodeIndex];
|
||||
SDValue RegSeq;
|
||||
SDValue V0 = N->getOperand(0+3);
|
||||
SDValue V1 = N->getOperand(1+3);
|
||||
if (NumVecs == 2) {
|
||||
RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
|
||||
} else {
|
||||
SDValue V2 = N->getOperand(2+3);
|
||||
SDValue V3 = (NumVecs == 3)
|
||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
|
||||
: N->getOperand(3+3);
|
||||
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
|
||||
}
|
||||
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
|
||||
Opc = QOpcodes[OpcodeIndex]);
|
||||
|
||||
// Now extract the D registers back out.
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
|
||||
if (NumVecs > 2)
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq));
|
||||
if (NumVecs > 3)
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq));
|
||||
SDValue SuperReg;
|
||||
SDValue V0 = N->getOperand(0+3);
|
||||
SDValue V1 = N->getOperand(1+3);
|
||||
if (NumVecs == 2) {
|
||||
if (is64BitVector)
|
||||
SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
|
||||
else
|
||||
SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
|
||||
} else {
|
||||
// Check if this is loading the even or odd subreg of a Q register.
|
||||
if (Lane < NumElts) {
|
||||
Opc = QOpcodes0[OpcodeIndex];
|
||||
} else {
|
||||
Lane -= NumElts;
|
||||
Opc = QOpcodes1[OpcodeIndex];
|
||||
}
|
||||
|
||||
SDValue RegSeq;
|
||||
SDValue V0 = N->getOperand(0+3);
|
||||
SDValue V1 = N->getOperand(1+3);
|
||||
if (NumVecs == 2) {
|
||||
RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
|
||||
} else {
|
||||
SDValue V2 = N->getOperand(2+3);
|
||||
SDValue V3 = (NumVecs == 3)
|
||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
|
||||
: N->getOperand(3+3);
|
||||
RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
|
||||
}
|
||||
|
||||
// Extract the subregs of the input vector.
|
||||
unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
||||
Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
|
||||
RegSeq));
|
||||
SDValue V2 = N->getOperand(2+3);
|
||||
SDValue V3 = (NumVecs == 3)
|
||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
|
||||
: N->getOperand(3+3);
|
||||
if (is64BitVector)
|
||||
SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
|
||||
else
|
||||
SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
|
||||
}
|
||||
Ops.push_back(SuperReg);
|
||||
Ops.push_back(getI32Imm(Lane));
|
||||
Ops.push_back(Pred);
|
||||
Ops.push_back(Reg0);
|
||||
Ops.push_back(Chain);
|
||||
|
||||
if (!IsLoad)
|
||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6);
|
||||
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 7);
|
||||
|
||||
std::vector<EVT> ResTys(NumVecs, RegVT);
|
||||
ResTys.push_back(MVT::Other);
|
||||
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6);
|
||||
EVT ResTy;
|
||||
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
|
||||
if (!is64BitVector)
|
||||
ResTyElts *= 2;
|
||||
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
|
||||
|
||||
// Form a REG_SEQUENCE to force register allocation.
|
||||
SDValue RegSeq;
|
||||
if (is64BitVector) {
|
||||
SDValue V0 = SDValue(VLdLn, 0);
|
||||
SDValue V1 = SDValue(VLdLn, 1);
|
||||
if (NumVecs == 2) {
|
||||
RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
|
||||
} else {
|
||||
SDValue V2 = SDValue(VLdLn, 2);
|
||||
// If it's a vld3, form a quad D-register but discard the last part.
|
||||
SDValue V3 = (NumVecs == 3)
|
||||
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
|
||||
: SDValue(VLdLn, 3);
|
||||
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
|
||||
}
|
||||
} else {
|
||||
// For 128-bit vectors, take the 64-bit results of the load and insert
|
||||
// them as subregs into the result.
|
||||
SDValue V[8];
|
||||
for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
|
||||
if (Even) {
|
||||
V[i] = SDValue(VLdLn, Vec);
|
||||
V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
|
||||
dl, RegVT), 0);
|
||||
} else {
|
||||
V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
|
||||
dl, RegVT), 0);
|
||||
V[i+1] = SDValue(VLdLn, Vec);
|
||||
}
|
||||
}
|
||||
if (NumVecs == 3)
|
||||
V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
|
||||
dl, RegVT), 0);
|
||||
|
||||
if (NumVecs == 2)
|
||||
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
|
||||
else
|
||||
RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
|
||||
V[4], V[5], V[6], V[7]), 0);
|
||||
}
|
||||
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other,
|
||||
Ops.data(), 7);
|
||||
SuperReg = SDValue(VLdLn, 0);
|
||||
Chain = SDValue(VLdLn, 1);
|
||||
|
||||
// Extract the subregisters.
|
||||
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
|
||||
assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
|
||||
unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
|
||||
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
|
||||
ReplaceUses(SDValue(N, Vec),
|
||||
CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
|
||||
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
|
||||
CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
|
||||
ReplaceUses(SDValue(N, NumVecs), Chain);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -2119,24 +2008,24 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vld2lane: {
|
||||
unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
|
||||
unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 };
|
||||
unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd };
|
||||
return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
|
||||
ARM::VLD2LNd32Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
|
||||
return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vld3lane: {
|
||||
unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
|
||||
unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 };
|
||||
unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd };
|
||||
return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
|
||||
ARM::VLD3LNd32Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
|
||||
return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vld4lane: {
|
||||
unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
|
||||
unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 };
|
||||
unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd };
|
||||
return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
|
||||
ARM::VLD4LNd32Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
|
||||
return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst1: {
|
||||
@ -2180,24 +2069,24 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst2lane: {
|
||||
unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
|
||||
unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 };
|
||||
unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd };
|
||||
return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
|
||||
ARM::VST2LNd32Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
|
||||
return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst3lane: {
|
||||
unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
|
||||
unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 };
|
||||
unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd };
|
||||
return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
|
||||
ARM::VST3LNd32Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
|
||||
return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes);
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst4lane: {
|
||||
unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
|
||||
unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 };
|
||||
unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd };
|
||||
return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
|
||||
ARM::VST4LNd32Pseudo };
|
||||
unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
|
||||
return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -445,6 +445,33 @@ def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo;
|
||||
def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo;
|
||||
def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo;
|
||||
|
||||
// Classes for VLD*LN pseudo-instructions with multi-register operands.
|
||||
// These are expanded to real instructions after register allocation.
|
||||
class VLDQLNPseudo<InstrItinClass itin>
|
||||
: PseudoNLdSt<(outs QPR:$dst),
|
||||
(ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
|
||||
itin, "$src = $dst">;
|
||||
class VLDQLNWBPseudo<InstrItinClass itin>
|
||||
: PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset, QPR:$src,
|
||||
nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
|
||||
class VLDQQLNPseudo<InstrItinClass itin>
|
||||
: PseudoNLdSt<(outs QQPR:$dst),
|
||||
(ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
|
||||
itin, "$src = $dst">;
|
||||
class VLDQQLNWBPseudo<InstrItinClass itin>
|
||||
: PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
|
||||
nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
|
||||
class VLDQQQQLNPseudo<InstrItinClass itin>
|
||||
: PseudoNLdSt<(outs QQQQPR:$dst),
|
||||
(ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
|
||||
itin, "$src = $dst">;
|
||||
class VLDQQQQLNWBPseudo<InstrItinClass itin>
|
||||
: PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
|
||||
nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
|
||||
|
||||
// VLD1LN : Vector Load (single element to one lane)
|
||||
// FIXME: Not yet implemented.
|
||||
|
||||
@ -459,13 +486,16 @@ def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">;
|
||||
def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">;
|
||||
def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">;
|
||||
|
||||
def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2>;
|
||||
def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2>;
|
||||
def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2>;
|
||||
|
||||
// ...with double-spaced registers:
|
||||
def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">;
|
||||
def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">;
|
||||
|
||||
// ...alternate versions to be allocated odd register numbers:
|
||||
def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">;
|
||||
def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">;
|
||||
def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2>;
|
||||
def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2>;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
@ -479,9 +509,16 @@ def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8">;
|
||||
def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">;
|
||||
def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">;
|
||||
|
||||
def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2>;
|
||||
def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2>;
|
||||
def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2>;
|
||||
|
||||
def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">;
|
||||
def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">;
|
||||
|
||||
def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2>;
|
||||
def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2>;
|
||||
|
||||
// VLD3LN : Vector Load (single 3-element structure to one lane)
|
||||
class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
: NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
|
||||
@ -494,13 +531,16 @@ def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">;
|
||||
def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">;
|
||||
def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">;
|
||||
|
||||
def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3>;
|
||||
def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3>;
|
||||
def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3>;
|
||||
|
||||
// ...with double-spaced registers:
|
||||
def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">;
|
||||
def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">;
|
||||
|
||||
// ...alternate versions to be allocated odd register numbers:
|
||||
def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">;
|
||||
def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">;
|
||||
def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3>;
|
||||
def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3>;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
@ -517,9 +557,16 @@ def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">;
|
||||
def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">;
|
||||
def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">;
|
||||
|
||||
def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3>;
|
||||
def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3>;
|
||||
def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3>;
|
||||
|
||||
def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">;
|
||||
def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">;
|
||||
|
||||
def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3>;
|
||||
def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3>;
|
||||
|
||||
// VLD4LN : Vector Load (single 4-element structure to one lane)
|
||||
class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
: NLdSt<1, 0b10, op11_8, op7_4,
|
||||
@ -533,13 +580,16 @@ def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">;
|
||||
def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">;
|
||||
def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">;
|
||||
|
||||
def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4>;
|
||||
def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4>;
|
||||
def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4>;
|
||||
|
||||
// ...with double-spaced registers:
|
||||
def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">;
|
||||
def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">;
|
||||
|
||||
// ...alternate versions to be allocated odd register numbers:
|
||||
def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">;
|
||||
def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">;
|
||||
def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4>;
|
||||
def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4>;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
@ -556,9 +606,16 @@ def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">;
|
||||
def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">;
|
||||
def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">;
|
||||
|
||||
def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>;
|
||||
def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>;
|
||||
def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4>;
|
||||
|
||||
def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">;
|
||||
def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
|
||||
|
||||
def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4>;
|
||||
def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4>;
|
||||
|
||||
// VLD1DUP : Vector Load (single element to all lanes)
|
||||
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
|
||||
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
|
||||
@ -846,6 +903,30 @@ def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo;
|
||||
def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
|
||||
def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
|
||||
|
||||
// Classes for VST*LN pseudo-instructions with multi-register operands.
|
||||
// These are expanded to real instructions after register allocation.
|
||||
class VSTQLNPseudo<InstrItinClass itin>
|
||||
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
|
||||
itin, "">;
|
||||
class VSTQLNWBPseudo<InstrItinClass itin>
|
||||
: PseudoNLdSt<(outs GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset, QPR:$src,
|
||||
nohash_imm:$lane), itin, "$addr.addr = $wb">;
|
||||
class VSTQQLNPseudo<InstrItinClass itin>
|
||||
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
|
||||
itin, "">;
|
||||
class VSTQQLNWBPseudo<InstrItinClass itin>
|
||||
: PseudoNLdSt<(outs GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
|
||||
nohash_imm:$lane), itin, "$addr.addr = $wb">;
|
||||
class VSTQQQQLNPseudo<InstrItinClass itin>
|
||||
: PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
|
||||
itin, "">;
|
||||
class VSTQQQQLNWBPseudo<InstrItinClass itin>
|
||||
: PseudoNLdSt<(outs GPR:$wb),
|
||||
(ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
|
||||
nohash_imm:$lane), itin, "$addr.addr = $wb">;
|
||||
|
||||
// VST1LN : Vector Store (single element from one lane)
|
||||
// FIXME: Not yet implemented.
|
||||
|
||||
@ -860,13 +941,16 @@ def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">;
|
||||
def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">;
|
||||
def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">;
|
||||
|
||||
def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST>;
|
||||
def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST>;
|
||||
def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST>;
|
||||
|
||||
// ...with double-spaced registers:
|
||||
def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">;
|
||||
def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">;
|
||||
|
||||
// ...alternate versions to be allocated odd register numbers:
|
||||
def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">;
|
||||
def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">;
|
||||
def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST>;
|
||||
def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST>;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
@ -880,9 +964,16 @@ def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">;
|
||||
def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">;
|
||||
def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">;
|
||||
|
||||
def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST>;
|
||||
def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST>;
|
||||
def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST>;
|
||||
|
||||
def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">;
|
||||
def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">;
|
||||
|
||||
def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
|
||||
def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
|
||||
|
||||
// VST3LN : Vector Store (single 3-element structure from one lane)
|
||||
class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
: NLdSt<1, 0b00, op11_8, op7_4, (outs),
|
||||
@ -894,13 +985,16 @@ def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">;
|
||||
def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">;
|
||||
def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">;
|
||||
|
||||
def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST>;
|
||||
def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST>;
|
||||
def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST>;
|
||||
|
||||
// ...with double-spaced registers:
|
||||
def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">;
|
||||
def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">;
|
||||
|
||||
// ...alternate versions to be allocated odd register numbers:
|
||||
def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">;
|
||||
def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">;
|
||||
def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST>;
|
||||
def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST>;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
@ -915,9 +1009,16 @@ def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">;
|
||||
def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">;
|
||||
def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">;
|
||||
|
||||
def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
|
||||
def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
|
||||
def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
|
||||
|
||||
def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">;
|
||||
def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">;
|
||||
|
||||
def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST>;
|
||||
def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST>;
|
||||
|
||||
// VST4LN : Vector Store (single 4-element structure from one lane)
|
||||
class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
: NLdSt<1, 0b00, op11_8, op7_4, (outs),
|
||||
@ -930,13 +1031,16 @@ def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">;
|
||||
def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">;
|
||||
def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">;
|
||||
|
||||
def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST>;
|
||||
def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST>;
|
||||
def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST>;
|
||||
|
||||
// ...with double-spaced registers:
|
||||
def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">;
|
||||
def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">;
|
||||
|
||||
// ...alternate versions to be allocated odd register numbers:
|
||||
def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">;
|
||||
def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">;
|
||||
def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST>;
|
||||
def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST>;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
@ -951,9 +1055,16 @@ def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">;
|
||||
def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">;
|
||||
def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
|
||||
|
||||
def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
|
||||
def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
|
||||
def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST>;
|
||||
|
||||
def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
|
||||
def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
|
||||
|
||||
def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST>;
|
||||
def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST>;
|
||||
|
||||
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
|
||||
|
||||
|
||||
|
@ -51,144 +51,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
||||
default:
|
||||
break;
|
||||
|
||||
case ARM::VLD2LNd8:
|
||||
case ARM::VLD2LNd16:
|
||||
case ARM::VLD2LNd32:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD2LNq16:
|
||||
case ARM::VLD2LNq32:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 2;
|
||||
Offset = 0;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD2LNq16odd:
|
||||
case ARM::VLD2LNq32odd:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 2;
|
||||
Offset = 1;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD3LNd8:
|
||||
case ARM::VLD3LNd16:
|
||||
case ARM::VLD3LNd32:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 3;
|
||||
return true;
|
||||
|
||||
case ARM::VLD3LNq16:
|
||||
case ARM::VLD3LNq32:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 3;
|
||||
Offset = 0;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD3LNq16odd:
|
||||
case ARM::VLD3LNq32odd:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 3;
|
||||
Offset = 1;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD4LNd8:
|
||||
case ARM::VLD4LNd16:
|
||||
case ARM::VLD4LNd32:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 4;
|
||||
return true;
|
||||
|
||||
case ARM::VLD4LNq16:
|
||||
case ARM::VLD4LNq32:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 4;
|
||||
Offset = 0;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VLD4LNq16odd:
|
||||
case ARM::VLD4LNq32odd:
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 4;
|
||||
Offset = 1;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST2LNd8:
|
||||
case ARM::VST2LNd16:
|
||||
case ARM::VST2LNd32:
|
||||
FirstOpnd = 2;
|
||||
NumRegs = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST2LNq16:
|
||||
case ARM::VST2LNq32:
|
||||
FirstOpnd = 2;
|
||||
NumRegs = 2;
|
||||
Offset = 0;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST2LNq16odd:
|
||||
case ARM::VST2LNq32odd:
|
||||
FirstOpnd = 2;
|
||||
NumRegs = 2;
|
||||
Offset = 1;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST3LNd8:
|
||||
case ARM::VST3LNd16:
|
||||
case ARM::VST3LNd32:
|
||||
FirstOpnd = 2;
|
||||
NumRegs = 3;
|
||||
return true;
|
||||
|
||||
case ARM::VST3LNq16:
|
||||
case ARM::VST3LNq32:
|
||||
FirstOpnd = 2;
|
||||
NumRegs = 3;
|
||||
Offset = 0;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST3LNq16odd:
|
||||
case ARM::VST3LNq32odd:
|
||||
FirstOpnd = 2;
|
||||
NumRegs = 3;
|
||||
Offset = 1;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST4LNd8:
|
||||
case ARM::VST4LNd16:
|
||||
case ARM::VST4LNd32:
|
||||
FirstOpnd = 2;
|
||||
NumRegs = 4;
|
||||
return true;
|
||||
|
||||
case ARM::VST4LNq16:
|
||||
case ARM::VST4LNq32:
|
||||
FirstOpnd = 2;
|
||||
NumRegs = 4;
|
||||
Offset = 0;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST4LNq16odd:
|
||||
case ARM::VST4LNq32odd:
|
||||
FirstOpnd = 2;
|
||||
NumRegs = 4;
|
||||
Offset = 1;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VTBL2:
|
||||
FirstOpnd = 1;
|
||||
NumRegs = 2;
|
||||
|
Loading…
Reference in New Issue
Block a user