mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-19 18:24:05 +00:00
Use pseudo instructions for VST3.
llvm-svn: 112208
This commit is contained in:
parent
9565184a05
commit
efc503afd2
@ -48,8 +48,8 @@ namespace {
|
||||
void TransferImpOps(MachineInstr &OldMI,
|
||||
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
|
||||
bool ExpandMBB(MachineBasicBlock &MBB);
|
||||
void ExpandVST4(MachineBasicBlock::iterator &MBBI, unsigned Opc,
|
||||
bool hasWriteBack, NEONRegSpacing RegSpc);
|
||||
void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
|
||||
bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
|
||||
};
|
||||
char ARMExpandPseudo::ID = 0;
|
||||
}
|
||||
@ -72,11 +72,11 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
|
||||
}
|
||||
}
|
||||
|
||||
/// ExpandVST4 - Translate VST4 pseudo instructions with QQ or QQQQ register
|
||||
/// operands to real VST4 instructions with 4 D register operands.
|
||||
void ARMExpandPseudo::ExpandVST4(MachineBasicBlock::iterator &MBBI,
|
||||
unsigned Opc, bool hasWriteBack,
|
||||
NEONRegSpacing RegSpc) {
|
||||
/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
|
||||
/// operands to real VST instructions with D register operands.
|
||||
void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
|
||||
unsigned Opc, bool hasWriteBack,
|
||||
NEONRegSpacing RegSpc, unsigned NumRegs) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
|
||||
@ -111,7 +111,7 @@ void ARMExpandPseudo::ExpandVST4(MachineBasicBlock::iterator &MBBI,
|
||||
D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
|
||||
D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
|
||||
} else {
|
||||
assert(RegSpc == OddDblSpc && "unknown register spacing for VST4");
|
||||
assert(RegSpc == OddDblSpc && "unknown register spacing for VST");
|
||||
D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
|
||||
D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
|
||||
D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
|
||||
@ -120,8 +120,9 @@ void ARMExpandPseudo::ExpandVST4(MachineBasicBlock::iterator &MBBI,
|
||||
|
||||
MIB.addReg(D0, getKillRegState(SrcIsKill))
|
||||
.addReg(D1, getKillRegState(SrcIsKill))
|
||||
.addReg(D2, getKillRegState(SrcIsKill))
|
||||
.addReg(D3, getKillRegState(SrcIsKill));
|
||||
.addReg(D2, getKillRegState(SrcIsKill));
|
||||
if (NumRegs > 3)
|
||||
MIB.addReg(D3, getKillRegState(SrcIsKill));
|
||||
MIB = AddDefaultPred(MIB);
|
||||
TransferImpOps(MI, MIB, MIB);
|
||||
MI.eraseFromParent();
|
||||
@ -223,35 +224,63 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
case ARM::VST3d8Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
|
||||
case ARM::VST3d16Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break;
|
||||
case ARM::VST3d32Pseudo:
|
||||
ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break;
|
||||
case ARM::VST1d64TPseudo:
|
||||
ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break;
|
||||
case ARM::VST3d8Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VST3d16Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VST3d32Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VST1d64TPseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break;
|
||||
case ARM::VST3q8Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break;
|
||||
case ARM::VST3q16Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break;
|
||||
case ARM::VST3q32Pseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break;
|
||||
case ARM::VST3q8oddPseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break;
|
||||
case ARM::VST3q16oddPseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break;
|
||||
case ARM::VST3q32oddPseudo_UPD:
|
||||
ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break;
|
||||
|
||||
case ARM::VST4d8Pseudo:
|
||||
ExpandVST4(MBBI, ARM::VST4d8, false, SingleSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break;
|
||||
case ARM::VST4d16Pseudo:
|
||||
ExpandVST4(MBBI, ARM::VST4d16, false, SingleSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break;
|
||||
case ARM::VST4d32Pseudo:
|
||||
ExpandVST4(MBBI, ARM::VST4d32, false, SingleSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break;
|
||||
case ARM::VST1d64QPseudo:
|
||||
ExpandVST4(MBBI, ARM::VST1d64Q, false, SingleSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break;
|
||||
case ARM::VST4d8Pseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4d8_UPD, true, SingleSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VST4d16Pseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4d16_UPD, true, SingleSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VST4d32Pseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4d32_UPD, true, SingleSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VST1d64QPseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break;
|
||||
case ARM::VST4q8Pseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break;
|
||||
case ARM::VST4q16Pseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break;
|
||||
case ARM::VST4q32Pseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break;
|
||||
case ARM::VST4q8oddPseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4q8_UPD, true, OddDblSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break;
|
||||
case ARM::VST4q16oddPseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4q16_UPD, true, OddDblSpc); break;
|
||||
ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break;
|
||||
case ARM::VST4q32oddPseudo_UPD:
|
||||
ExpandVST4(MBBI, ARM::VST4q32_UPD, true, OddDblSpc); break;
|
||||
break;
|
||||
ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break;
|
||||
}
|
||||
|
||||
if (ModifiedOp)
|
||||
|
@ -1262,7 +1262,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
||||
|
||||
// FIXME: This is a temporary flag to distinguish VSTs that have been
|
||||
// converted to pseudo instructions.
|
||||
bool usePseudoInstrs = (NumVecs == 4);
|
||||
bool usePseudoInstrs = (NumVecs >= 3);
|
||||
|
||||
if (is64BitVector) {
|
||||
if (NumVecs >= 2) {
|
||||
@ -2317,14 +2317,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
}
|
||||
|
||||
case Intrinsic::arm_neon_vst3: {
|
||||
unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
|
||||
ARM::VST3d32, ARM::VST1d64T };
|
||||
unsigned QOpcodes0[] = { ARM::VST3q8_UPD,
|
||||
ARM::VST3q16_UPD,
|
||||
ARM::VST3q32_UPD };
|
||||
unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD,
|
||||
ARM::VST3q16odd_UPD,
|
||||
ARM::VST3q32odd_UPD };
|
||||
unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
|
||||
ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
|
||||
unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
|
||||
ARM::VST3q16Pseudo_UPD,
|
||||
ARM::VST3q32Pseudo_UPD };
|
||||
unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
|
||||
ARM::VST3q16oddPseudo_UPD,
|
||||
ARM::VST3q32oddPseudo_UPD };
|
||||
return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
|
||||
}
|
||||
|
||||
|
@ -560,6 +560,9 @@ def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
|
||||
def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
|
||||
def VST1d64T_UPD : VST1D3WB<0b1100, "64">;
|
||||
|
||||
def VST1d64TPseudo : VSTQQPseudo;
|
||||
def VST1d64TPseudo_UPD : VSTQQWBPseudo;
|
||||
|
||||
// ...with 4 registers (some of these are only for the disassembler):
|
||||
class VST1D4<bits<4> op7_4, string Dt>
|
||||
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
|
||||
@ -644,6 +647,10 @@ def VST3d8 : VST3D<0b0100, 0b0000, "8">;
|
||||
def VST3d16 : VST3D<0b0100, 0b0100, "16">;
|
||||
def VST3d32 : VST3D<0b0100, 0b1000, "32">;
|
||||
|
||||
def VST3d8Pseudo : VSTQQPseudo;
|
||||
def VST3d16Pseudo : VSTQQPseudo;
|
||||
def VST3d32Pseudo : VSTQQPseudo;
|
||||
|
||||
// ...with address register writeback:
|
||||
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
|
||||
@ -656,6 +663,10 @@ def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">;
|
||||
def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
|
||||
def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
|
||||
|
||||
def VST3d8Pseudo_UPD : VSTQQWBPseudo;
|
||||
def VST3d16Pseudo_UPD : VSTQQWBPseudo;
|
||||
def VST3d32Pseudo_UPD : VSTQQWBPseudo;
|
||||
|
||||
// ...with double-spaced registers (non-updating versions for disassembly only):
|
||||
def VST3q8 : VST3D<0b0101, 0b0000, "8">;
|
||||
def VST3q16 : VST3D<0b0101, 0b0100, "16">;
|
||||
@ -664,10 +675,14 @@ def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">;
|
||||
def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
|
||||
def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
|
||||
|
||||
def VST3q8Pseudo_UPD : VSTQQQQWBPseudo;
|
||||
def VST3q16Pseudo_UPD : VSTQQQQWBPseudo;
|
||||
def VST3q32Pseudo_UPD : VSTQQQQWBPseudo;
|
||||
|
||||
// ...alternate versions to be allocated odd register numbers:
|
||||
def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">;
|
||||
def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">;
|
||||
def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">;
|
||||
def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo;
|
||||
def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo;
|
||||
def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo;
|
||||
|
||||
// VST4 : Vector Store (multiple 4-element structures)
|
||||
class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
|
||||
|
@ -215,10 +215,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST3d8:
|
||||
case ARM::VST3d16:
|
||||
case ARM::VST3d32:
|
||||
case ARM::VST1d64T:
|
||||
case ARM::VST3LNd8:
|
||||
case ARM::VST3LNd16:
|
||||
case ARM::VST3LNd32:
|
||||
@ -226,24 +222,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
|
||||
NumRegs = 3;
|
||||
return true;
|
||||
|
||||
case ARM::VST3q8_UPD:
|
||||
case ARM::VST3q16_UPD:
|
||||
case ARM::VST3q32_UPD:
|
||||
FirstOpnd = 4;
|
||||
NumRegs = 3;
|
||||
Offset = 0;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST3q8odd_UPD:
|
||||
case ARM::VST3q16odd_UPD:
|
||||
case ARM::VST3q32odd_UPD:
|
||||
FirstOpnd = 4;
|
||||
NumRegs = 3;
|
||||
Offset = 1;
|
||||
Stride = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST3LNq16:
|
||||
case ARM::VST3LNq32:
|
||||
FirstOpnd = 2;
|
||||
|
Loading…
x
Reference in New Issue
Block a user