mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-29 06:30:39 +00:00
Reapply r143206, with fixes. Disallow physical register lifetimes
across calls, and only check for nested dependences on the special call-sequence-resource register. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143660 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
89b6f2ea9f
commit
65fd6564b8
File diff suppressed because it is too large
Load Diff
@ -1084,7 +1084,6 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
|
||||
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
|
||||
TLI.getPointerTy());
|
||||
|
||||
// Splice the libcall in wherever FindInputOutputChains tells us to.
|
||||
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
|
||||
std::pair<SDValue, SDValue> CallInfo =
|
||||
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
|
||||
|
@ -315,8 +315,10 @@ void ScheduleDAGRRList::Schedule() {
|
||||
IssueCount = 0;
|
||||
MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
|
||||
NumLiveRegs = 0;
|
||||
LiveRegDefs.resize(TRI->getNumRegs(), NULL);
|
||||
LiveRegGens.resize(TRI->getNumRegs(), NULL);
|
||||
// Allocate slots for each physical register, plus one for a special register
|
||||
// to track the virtual resource of a calling sequence.
|
||||
LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
|
||||
LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
|
||||
|
||||
// Build the scheduling graph.
|
||||
BuildSchedGraph(NULL);
|
||||
@ -386,6 +388,109 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
|
||||
}
|
||||
}
|
||||
|
||||
/// IsChainDependent - Test if Outer is reachable from Inner through
|
||||
/// chain dependencies.
|
||||
static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
|
||||
unsigned NestLevel,
|
||||
const TargetInstrInfo *TII) {
|
||||
SDNode *N = Outer;
|
||||
for (;;) {
|
||||
if (N == Inner)
|
||||
return true;
|
||||
// For a TokenFactor, examine each operand. There may be multiple ways
|
||||
// to get to the CALLSEQ_BEGIN, but we need to find the path with the
|
||||
// most nesting in order to ensure that we find the corresponding match.
|
||||
if (N->getOpcode() == ISD::TokenFactor) {
|
||||
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
||||
if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
// Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
|
||||
if (N->isMachineOpcode()) {
|
||||
if (N->getMachineOpcode() ==
|
||||
(unsigned)TII->getCallFrameDestroyOpcode()) {
|
||||
++NestLevel;
|
||||
} else if (N->getMachineOpcode() ==
|
||||
(unsigned)TII->getCallFrameSetupOpcode()) {
|
||||
if (NestLevel == 0)
|
||||
return false;
|
||||
--NestLevel;
|
||||
}
|
||||
}
|
||||
// Otherwise, find the chain and continue climbing.
|
||||
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
||||
if (N->getOperand(i).getValueType() == MVT::Other) {
|
||||
N = N->getOperand(i).getNode();
|
||||
goto found_chain_operand;
|
||||
}
|
||||
return false;
|
||||
found_chain_operand:;
|
||||
if (N->getOpcode() == ISD::EntryToken)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
|
||||
/// the corresponding (lowered) CALLSEQ_BEGIN node.
|
||||
///
|
||||
/// NestLevel and MaxNested are used in recursion to indcate the current level
|
||||
/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
|
||||
/// level seen so far.
|
||||
///
|
||||
/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
|
||||
/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
|
||||
static SDNode *
|
||||
FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
|
||||
const TargetInstrInfo *TII) {
|
||||
for (;;) {
|
||||
// For a TokenFactor, examine each operand. There may be multiple ways
|
||||
// to get to the CALLSEQ_BEGIN, but we need to find the path with the
|
||||
// most nesting in order to ensure that we find the corresponding match.
|
||||
if (N->getOpcode() == ISD::TokenFactor) {
|
||||
SDNode *Best = 0;
|
||||
unsigned BestMaxNest = MaxNest;
|
||||
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
|
||||
unsigned MyNestLevel = NestLevel;
|
||||
unsigned MyMaxNest = MaxNest;
|
||||
if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
|
||||
MyNestLevel, MyMaxNest, TII))
|
||||
if (!Best || (MyMaxNest > BestMaxNest)) {
|
||||
Best = New;
|
||||
BestMaxNest = MyMaxNest;
|
||||
}
|
||||
}
|
||||
assert(Best);
|
||||
MaxNest = BestMaxNest;
|
||||
return Best;
|
||||
}
|
||||
// Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
|
||||
if (N->isMachineOpcode()) {
|
||||
if (N->getMachineOpcode() ==
|
||||
(unsigned)TII->getCallFrameDestroyOpcode()) {
|
||||
++NestLevel;
|
||||
MaxNest = std::max(MaxNest, NestLevel);
|
||||
} else if (N->getMachineOpcode() ==
|
||||
(unsigned)TII->getCallFrameSetupOpcode()) {
|
||||
assert(NestLevel != 0);
|
||||
--NestLevel;
|
||||
if (NestLevel == 0)
|
||||
return N;
|
||||
}
|
||||
}
|
||||
// Otherwise, find the chain and continue climbing.
|
||||
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
||||
if (N->getOperand(i).getValueType() == MVT::Other) {
|
||||
N = N->getOperand(i).getNode();
|
||||
goto found_chain_operand;
|
||||
}
|
||||
return 0;
|
||||
found_chain_operand:;
|
||||
if (N->getOpcode() == ISD::EntryToken)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Call ReleasePred for each predecessor, then update register live def/gen.
|
||||
/// Always update LiveRegDefs for a register dependence even if the current SU
|
||||
/// also defines the register. This effectively create one large live range
|
||||
@ -423,6 +528,25 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we're scheduling a lowered CALLSEQ_END, find the corresponding
|
||||
// CALLSEQ_BEGIN. Inject an artificial physical register dependence between
|
||||
// these nodes, to prevent other calls from being interscheduled with them.
|
||||
unsigned CallResource = TRI->getNumRegs();
|
||||
if (!LiveRegDefs[CallResource])
|
||||
for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
|
||||
if (Node->isMachineOpcode() &&
|
||||
Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
|
||||
unsigned NestLevel = 0;
|
||||
unsigned MaxNest = 0;
|
||||
SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
|
||||
|
||||
SUnit *Def = &SUnits[N->getNodeId()];
|
||||
++NumLiveRegs;
|
||||
LiveRegDefs[CallResource] = Def;
|
||||
LiveRegGens[CallResource] = SU;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Check to see if any of the pending instructions are ready to issue. If
|
||||
@ -605,6 +729,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
|
||||
LiveRegGens[I->getReg()] = NULL;
|
||||
}
|
||||
}
|
||||
// Release the special call resource dependence, if this is the beginning
|
||||
// of a call.
|
||||
unsigned CallResource = TRI->getNumRegs();
|
||||
if (LiveRegDefs[CallResource] == SU)
|
||||
for (const SDNode *SUNode = SU->getNode(); SUNode;
|
||||
SUNode = SUNode->getGluedNode()) {
|
||||
if (SUNode->isMachineOpcode() &&
|
||||
SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
|
||||
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
|
||||
--NumLiveRegs;
|
||||
LiveRegDefs[CallResource] = NULL;
|
||||
LiveRegGens[CallResource] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
resetVRegCycle(SU);
|
||||
|
||||
@ -661,6 +799,33 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
|
||||
}
|
||||
}
|
||||
|
||||
// Reclaim the special call resource dependence, if this is the beginning
|
||||
// of a call.
|
||||
unsigned CallResource = TRI->getNumRegs();
|
||||
for (const SDNode *SUNode = SU->getNode(); SUNode;
|
||||
SUNode = SUNode->getGluedNode()) {
|
||||
if (SUNode->isMachineOpcode() &&
|
||||
SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
|
||||
++NumLiveRegs;
|
||||
LiveRegDefs[CallResource] = SU;
|
||||
LiveRegGens[CallResource] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// Release the special call resource dependence, if this is the end
|
||||
// of a call.
|
||||
if (LiveRegGens[CallResource] == SU)
|
||||
for (const SDNode *SUNode = SU->getNode(); SUNode;
|
||||
SUNode = SUNode->getGluedNode()) {
|
||||
if (SUNode->isMachineOpcode() &&
|
||||
SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
|
||||
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
|
||||
--NumLiveRegs;
|
||||
LiveRegDefs[CallResource] = NULL;
|
||||
LiveRegGens[CallResource] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
|
||||
I != E; ++I) {
|
||||
if (I->isAssignedRegDep()) {
|
||||
@ -1083,6 +1248,20 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
|
||||
|
||||
if (!Node->isMachineOpcode())
|
||||
continue;
|
||||
// If we're in the middle of scheduling a call, don't begin scheduling
|
||||
// another call. Also, don't allow any physical registers to be live across
|
||||
// the call.
|
||||
if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
|
||||
// Check the special calling-sequence resource.
|
||||
unsigned CallResource = TRI->getNumRegs();
|
||||
if (LiveRegDefs[CallResource]) {
|
||||
SDNode *Gen = LiveRegGens[CallResource]->getNode();
|
||||
while (SDNode *Glued = Gen->getGluedNode())
|
||||
Gen = Glued;
|
||||
if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
|
||||
LRegs.push_back(CallResource);
|
||||
}
|
||||
}
|
||||
const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
|
||||
if (!MCID.ImplicitDefs)
|
||||
continue;
|
||||
|
@ -5290,6 +5290,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
|
||||
// already exists there, recursively merge the results together.
|
||||
AddModifiedNodeToCSEMaps(User, &Listener);
|
||||
}
|
||||
|
||||
// If we just RAUW'd the root, take note.
|
||||
if (FromN == getRoot())
|
||||
setRoot(To);
|
||||
}
|
||||
|
||||
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
|
||||
@ -5335,6 +5339,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
|
||||
// already exists there, recursively merge the results together.
|
||||
AddModifiedNodeToCSEMaps(User, &Listener);
|
||||
}
|
||||
|
||||
// If we just RAUW'd the root, take note.
|
||||
if (From == getRoot().getNode())
|
||||
setRoot(SDValue(To, getRoot().getResNo()));
|
||||
}
|
||||
|
||||
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
|
||||
@ -5373,6 +5381,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
|
||||
// already exists there, recursively merge the results together.
|
||||
AddModifiedNodeToCSEMaps(User, &Listener);
|
||||
}
|
||||
|
||||
// If we just RAUW'd the root, take note.
|
||||
if (From == getRoot().getNode())
|
||||
setRoot(SDValue(To[getRoot().getResNo()]));
|
||||
}
|
||||
|
||||
/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
|
||||
@ -5431,6 +5443,10 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
|
||||
// already exists there, recursively merge the results together.
|
||||
AddModifiedNodeToCSEMaps(User, &Listener);
|
||||
}
|
||||
|
||||
// If we just RAUW'd the root, take note.
|
||||
if (From == getRoot())
|
||||
setRoot(To);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -1353,12 +1353,10 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
|
||||
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
|
||||
MVT::i32);
|
||||
// TODO: Disable AlwaysInline when it becomes possible
|
||||
// to emit a nested call sequence.
|
||||
MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
|
||||
Flags.getByValAlign(),
|
||||
/*isVolatile=*/false,
|
||||
/*AlwaysInline=*/true,
|
||||
/*AlwaysInline=*/false,
|
||||
MachinePointerInfo(0),
|
||||
MachinePointerInfo(0)));
|
||||
|
||||
@ -4350,9 +4348,24 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
// If this is undef splat, generate it via "just" vdup, if possible.
|
||||
if (Lane == -1) Lane = 0;
|
||||
|
||||
// Test if V1 is a SCALAR_TO_VECTOR.
|
||||
if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
||||
return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
|
||||
}
|
||||
// Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
|
||||
// (and probably will turn into a SCALAR_TO_VECTOR once legalization
|
||||
// reaches it).
|
||||
if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
|
||||
!isa<ConstantSDNode>(V1.getOperand(0))) {
|
||||
bool IsScalarToVector = true;
|
||||
for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
|
||||
if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
|
||||
IsScalarToVector = false;
|
||||
break;
|
||||
}
|
||||
if (IsScalarToVector)
|
||||
return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
|
||||
}
|
||||
return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
|
||||
DAG.getConstant(Lane, MVT::i32));
|
||||
}
|
||||
|
@ -2114,7 +2114,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
HasNoSignedComparisonUses(Node))
|
||||
// Look past the truncate if CMP is the only use of it.
|
||||
N0 = N0.getOperand(0);
|
||||
if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
|
||||
if ((N0.getNode()->getOpcode() == ISD::AND ||
|
||||
(N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
|
||||
N0.getNode()->hasOneUse() &&
|
||||
N0.getValueType() != MVT::i8 &&
|
||||
X86::isZeroNode(N1)) {
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
|
||||
|
@ -4221,6 +4221,29 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Test whether the given value is a vector value which will be legalized
|
||||
// into a load.
|
||||
static bool WillBeConstantPoolLoad(SDNode *N) {
|
||||
if (N->getOpcode() != ISD::BUILD_VECTOR)
|
||||
return false;
|
||||
|
||||
// Check for any non-constant elements.
|
||||
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
|
||||
switch (N->getOperand(i).getNode()->getOpcode()) {
|
||||
case ISD::UNDEF:
|
||||
case ISD::ConstantFP:
|
||||
case ISD::Constant:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
// Vectors of all-zeros and all-ones are materialized with special
|
||||
// instructions rather than being loaded.
|
||||
return !ISD::isBuildVectorAllZeros(N) &&
|
||||
!ISD::isBuildVectorAllOnes(N);
|
||||
}
|
||||
|
||||
/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
|
||||
/// match movlp{s|d}. The lower half elements should come from lower half of
|
||||
/// V1 (and in order), and the upper half elements should come from the upper
|
||||
@ -4236,7 +4259,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
|
||||
return false;
|
||||
// Is V2 is a vector load, don't do this transformation. We will try to use
|
||||
// load folding shufps op.
|
||||
if (ISD::isNON_EXTLoad(V2))
|
||||
if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2))
|
||||
return false;
|
||||
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
@ -6352,6 +6375,8 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
|
||||
if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
|
||||
CanFoldLoad = true;
|
||||
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
||||
|
||||
// Both of them can't be memory operations though.
|
||||
if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
|
||||
CanFoldLoad = false;
|
||||
@ -6361,10 +6386,11 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
|
||||
return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
|
||||
|
||||
if (NumElems == 4)
|
||||
return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
|
||||
// If we don't care about the second element, procede to use movss.
|
||||
if (SVOp->getMaskElt(1) != -1)
|
||||
return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
|
||||
}
|
||||
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
||||
// movl and movlp will both match v2i64, but v2i64 is never matched by
|
||||
// movl earlier because we make it strict to avoid messing with the movlp load
|
||||
// folding logic (see the code above getMOVLP call). Match it here then,
|
||||
@ -8682,8 +8708,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
// If condition flag is set by a X86ISD::CMP, then use it as the condition
|
||||
// setting operand in place of the X86ISD::SETCC.
|
||||
if (Cond.getOpcode() == X86ISD::SETCC ||
|
||||
Cond.getOpcode() == X86ISD::SETCC_CARRY) {
|
||||
unsigned CondOpcode = Cond.getOpcode();
|
||||
if (CondOpcode == X86ISD::SETCC ||
|
||||
CondOpcode == X86ISD::SETCC_CARRY) {
|
||||
CC = Cond.getOperand(0);
|
||||
|
||||
SDValue Cmp = Cond.getOperand(1);
|
||||
@ -8700,6 +8727,39 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
Cond = Cmp;
|
||||
addTest = false;
|
||||
}
|
||||
} else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
|
||||
CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
|
||||
((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
|
||||
Cond.getOperand(0).getValueType() != MVT::i8)) {
|
||||
SDValue LHS = Cond.getOperand(0);
|
||||
SDValue RHS = Cond.getOperand(1);
|
||||
unsigned X86Opcode;
|
||||
unsigned X86Cond;
|
||||
SDVTList VTs;
|
||||
switch (CondOpcode) {
|
||||
case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
|
||||
case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
|
||||
case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
|
||||
case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
|
||||
case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
|
||||
case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
|
||||
default: llvm_unreachable("unexpected overflowing operator");
|
||||
}
|
||||
if (CondOpcode == ISD::UMULO)
|
||||
VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
|
||||
MVT::i32);
|
||||
else
|
||||
VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
|
||||
|
||||
SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS);
|
||||
|
||||
if (CondOpcode == ISD::UMULO)
|
||||
Cond = X86Op.getValue(2);
|
||||
else
|
||||
Cond = X86Op.getValue(1);
|
||||
|
||||
CC = DAG.getConstant(X86Cond, MVT::i8);
|
||||
addTest = false;
|
||||
}
|
||||
|
||||
if (addTest) {
|
||||
@ -8781,11 +8841,27 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue Dest = Op.getOperand(2);
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
SDValue CC;
|
||||
bool Inverted = false;
|
||||
|
||||
if (Cond.getOpcode() == ISD::SETCC) {
|
||||
SDValue NewCond = LowerSETCC(Cond, DAG);
|
||||
if (NewCond.getNode())
|
||||
Cond = NewCond;
|
||||
// Check for setcc([su]{add,sub,mul}o == 0).
|
||||
if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
|
||||
isa<ConstantSDNode>(Cond.getOperand(1)) &&
|
||||
cast<ConstantSDNode>(Cond.getOperand(1))->isNullValue() &&
|
||||
Cond.getOperand(0).getResNo() == 1 &&
|
||||
(Cond.getOperand(0).getOpcode() == ISD::SADDO ||
|
||||
Cond.getOperand(0).getOpcode() == ISD::UADDO ||
|
||||
Cond.getOperand(0).getOpcode() == ISD::SSUBO ||
|
||||
Cond.getOperand(0).getOpcode() == ISD::USUBO ||
|
||||
Cond.getOperand(0).getOpcode() == ISD::SMULO ||
|
||||
Cond.getOperand(0).getOpcode() == ISD::UMULO)) {
|
||||
Inverted = true;
|
||||
Cond = Cond.getOperand(0);
|
||||
} else {
|
||||
SDValue NewCond = LowerSETCC(Cond, DAG);
|
||||
if (NewCond.getNode())
|
||||
Cond = NewCond;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
// FIXME: LowerXALUO doesn't handle these!!
|
||||
@ -8806,8 +8882,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
// If condition flag is set by a X86ISD::CMP, then use it as the condition
|
||||
// setting operand in place of the X86ISD::SETCC.
|
||||
if (Cond.getOpcode() == X86ISD::SETCC ||
|
||||
Cond.getOpcode() == X86ISD::SETCC_CARRY) {
|
||||
unsigned CondOpcode = Cond.getOpcode();
|
||||
if (CondOpcode == X86ISD::SETCC ||
|
||||
CondOpcode == X86ISD::SETCC_CARRY) {
|
||||
CC = Cond.getOperand(0);
|
||||
|
||||
SDValue Cmp = Cond.getOperand(1);
|
||||
@ -8828,6 +8905,43 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
CondOpcode = Cond.getOpcode();
|
||||
if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
|
||||
CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
|
||||
((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
|
||||
Cond.getOperand(0).getValueType() != MVT::i8)) {
|
||||
SDValue LHS = Cond.getOperand(0);
|
||||
SDValue RHS = Cond.getOperand(1);
|
||||
unsigned X86Opcode;
|
||||
unsigned X86Cond;
|
||||
SDVTList VTs;
|
||||
switch (CondOpcode) {
|
||||
case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
|
||||
case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
|
||||
case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
|
||||
case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
|
||||
case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
|
||||
case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
|
||||
default: llvm_unreachable("unexpected overflowing operator");
|
||||
}
|
||||
if (Inverted)
|
||||
X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond);
|
||||
if (CondOpcode == ISD::UMULO)
|
||||
VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
|
||||
MVT::i32);
|
||||
else
|
||||
VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
|
||||
|
||||
SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS);
|
||||
|
||||
if (CondOpcode == ISD::UMULO)
|
||||
Cond = X86Op.getValue(2);
|
||||
else
|
||||
Cond = X86Op.getValue(1);
|
||||
|
||||
CC = DAG.getConstant(X86Cond, MVT::i8);
|
||||
addTest = false;
|
||||
} else {
|
||||
unsigned CondOpc;
|
||||
if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
|
||||
@ -8891,6 +9005,66 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
|
||||
CC = DAG.getConstant(CCode, MVT::i8);
|
||||
Cond = Cond.getOperand(0).getOperand(1);
|
||||
addTest = false;
|
||||
} else if (Cond.getOpcode() == ISD::SETCC &&
|
||||
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETOEQ) {
|
||||
// For FCMP_OEQ, we can emit
|
||||
// two branches instead of an explicit AND instruction with a
|
||||
// separate test. However, we only do this if this block doesn't
|
||||
// have a fall-through edge, because this requires an explicit
|
||||
// jmp when the condition is false.
|
||||
if (Op.getNode()->hasOneUse()) {
|
||||
SDNode *User = *Op.getNode()->use_begin();
|
||||
// Look for an unconditional branch following this conditional branch.
|
||||
// We need this because we need to reverse the successors in order
|
||||
// to implement FCMP_OEQ.
|
||||
if (User->getOpcode() == ISD::BR) {
|
||||
SDValue FalseBB = User->getOperand(1);
|
||||
SDNode *NewBR =
|
||||
DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
|
||||
assert(NewBR == User);
|
||||
(void)NewBR;
|
||||
Dest = FalseBB;
|
||||
|
||||
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
|
||||
Cond.getOperand(0), Cond.getOperand(1));
|
||||
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
|
||||
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
|
||||
Chain, Dest, CC, Cmp);
|
||||
CC = DAG.getConstant(X86::COND_P, MVT::i8);
|
||||
Cond = Cmp;
|
||||
addTest = false;
|
||||
}
|
||||
}
|
||||
} else if (Cond.getOpcode() == ISD::SETCC &&
|
||||
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUNE) {
|
||||
// For FCMP_UNE, we can emit
|
||||
// two branches instead of an explicit AND instruction with a
|
||||
// separate test. However, we only do this if this block doesn't
|
||||
// have a fall-through edge, because this requires an explicit
|
||||
// jmp when the condition is false.
|
||||
if (Op.getNode()->hasOneUse()) {
|
||||
SDNode *User = *Op.getNode()->use_begin();
|
||||
// Look for an unconditional branch following this conditional branch.
|
||||
// We need this because we need to reverse the successors in order
|
||||
// to implement FCMP_UNE.
|
||||
if (User->getOpcode() == ISD::BR) {
|
||||
SDValue FalseBB = User->getOperand(1);
|
||||
SDNode *NewBR =
|
||||
DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
|
||||
assert(NewBR == User);
|
||||
(void)NewBR;
|
||||
|
||||
SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
|
||||
Cond.getOperand(0), Cond.getOperand(1));
|
||||
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
|
||||
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
|
||||
Chain, Dest, CC, Cmp);
|
||||
CC = DAG.getConstant(X86::COND_NP, MVT::i8);
|
||||
Cond = Cmp;
|
||||
addTest = false;
|
||||
Dest = FalseBB;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -386,6 +386,15 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
|
||||
Offset = off;
|
||||
return true;
|
||||
}
|
||||
// Check for an aligned global variable.
|
||||
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(*Root)) {
|
||||
const GlobalValue *GV = GA->getGlobal();
|
||||
if (GA->getOffset() == 0 && GV->getAlignment() >= 4) {
|
||||
AlignedBase = Base;
|
||||
Offset = off;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,9 @@
|
||||
; RUN: grep andhi %t1.s | count 30
|
||||
; RUN: grep andbi %t1.s | count 4
|
||||
|
||||
; CellSPU legalization is over-sensitive to Legalize's traversal order.
|
||||
; XFAIL: *
|
||||
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
||||
|
@ -15,6 +15,9 @@
|
||||
; RUN: grep ai %t2.s | count 9
|
||||
; RUN: grep dispatch_tab %t2.s | count 6
|
||||
|
||||
; CellSPU legalization is over-sensitive to Legalize's traversal order.
|
||||
; XFAIL: *
|
||||
|
||||
; ModuleID = 'call_indirect.bc'
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
|
||||
target triple = "spu-unknown-elf"
|
||||
|
@ -3,6 +3,10 @@
|
||||
; RUN: grep and %t1.s | count 94
|
||||
; RUN: grep xsbh %t1.s | count 2
|
||||
; RUN: grep xshw %t1.s | count 4
|
||||
|
||||
; CellSPU legalization is over-sensitive to Legalize's traversal order.
|
||||
; XFAIL: *
|
||||
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
||||
|
@ -6,6 +6,9 @@
|
||||
; RUN: grep orbi %t1.s | count 15
|
||||
; RUN: FileCheck %s < %t1.s
|
||||
|
||||
; CellSPU legalization is over-sensitive to Legalize's traversal order.
|
||||
; XFAIL: *
|
||||
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
||||
|
@ -1,6 +1,9 @@
|
||||
; RUN: llc < %s -march=cellspu > %t1.s
|
||||
; RUN: grep selb %t1.s | count 56
|
||||
|
||||
; CellSPU legalization is over-sensitive to Legalize's traversal order.
|
||||
; XFAIL: *
|
||||
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
||||
|
@ -22,6 +22,9 @@
|
||||
; RUN: grep shufb %t2.s | count 7
|
||||
; RUN: grep stqd %t2.s | count 7
|
||||
|
||||
; CellSPU legalization is over-sensitive to Legalize's traversal order.
|
||||
; XFAIL: *
|
||||
|
||||
; ModuleID = 'struct_1.bc'
|
||||
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
|
||||
target triple = "spu"
|
||||
|
@ -1,8 +1,4 @@
|
||||
; DISABLED: llc -march=mipsel < %s | FileCheck %s
|
||||
; RUN: false
|
||||
|
||||
; byval is currently unsupported.
|
||||
; XFAIL: *
|
||||
; RUN: llc -march=mipsel < %s | FileCheck %s
|
||||
|
||||
; CHECK: .set macro
|
||||
; CHECK-NEXT: .cprestore
|
||||
|
@ -1,8 +1,4 @@
|
||||
; DISABLED: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
|
||||
; RUN: false
|
||||
|
||||
; byval is currently unsupported.
|
||||
; XFAIL: *
|
||||
; RUN: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
|
||||
|
||||
%struct.S1 = type { [65536 x i8] }
|
||||
|
||||
|
@ -1,11 +1,7 @@
|
||||
; DISABLED: llc -mtriple=thumbv6-apple-darwin < %s
|
||||
; RUN: false
|
||||
; RUN: llc -mtriple=thumbv6-apple-darwin < %s
|
||||
; rdar://problem/9416774
|
||||
; ModuleID = 'reduced.ll'
|
||||
|
||||
; byval is currently unsupported.
|
||||
; XFAIL: *
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
|
||||
target triple = "thumbv7-apple-ios"
|
||||
|
||||
|
35
test/CodeGen/X86/legalize-libcalls.ll
Normal file
35
test/CodeGen/X86/legalize-libcalls.ll
Normal file
@ -0,0 +1,35 @@
|
||||
; RUN: llc -march=x86 < %s
|
||||
; RUN: llc -march=x86-64 < %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
|
||||
|
||||
define float @MakeSphere(float %theta.079) nounwind {
|
||||
entry:
|
||||
%add36 = fadd float %theta.079, undef
|
||||
%call = call float @cosf(float %theta.079) nounwind readnone
|
||||
%call45 = call float @sinf(float %theta.079) nounwind readnone
|
||||
%call37 = call float @sinf(float %add36) nounwind readnone
|
||||
store float %call, float* undef, align 8
|
||||
store float %call37, float* undef, align 8
|
||||
store float %call45, float* undef, align 8
|
||||
ret float %add36
|
||||
}
|
||||
|
||||
define hidden fastcc void @unroll_loop(i64 %storemerge32129) nounwind {
|
||||
entry:
|
||||
call fastcc void @copy_rtx() nounwind
|
||||
call fastcc void @copy_rtx() nounwind
|
||||
%tmp225 = alloca i8, i64 %storemerge32129, align 8 ; [#uses=0 type=i8*]
|
||||
%cmp651201 = icmp slt i64 %storemerge32129, 0 ; [#uses=1 type=i1]
|
||||
br i1 %cmp651201, label %for.body653.lr.ph, label %if.end638.for.end659_crit_edge
|
||||
|
||||
for.body653.lr.ph: ; preds = %entry
|
||||
unreachable
|
||||
|
||||
if.end638.for.end659_crit_edge: ; preds = %entry
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare float @cosf(float) nounwind readnone
|
||||
declare float @sinf(float) nounwind readnone
|
||||
declare hidden fastcc void @copy_rtx() nounwind
|
@ -16,10 +16,8 @@ entry:
|
||||
ret void
|
||||
|
||||
; X64: t0:
|
||||
; X64: movddup (%rsi), %xmm0
|
||||
; X64: pshuflw $0, %xmm0, %xmm0
|
||||
; X64: xorl %eax, %eax
|
||||
; X64: pinsrw $0, %eax, %xmm0
|
||||
; X64: movdqa (%rsi), %xmm0
|
||||
; X64: pslldq $2, %xmm0
|
||||
; X64: movdqa %xmm0, (%rdi)
|
||||
; X64: ret
|
||||
}
|
||||
@ -31,9 +29,8 @@ define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
|
||||
ret <8 x i16> %tmp3
|
||||
|
||||
; X64: t1:
|
||||
; X64: movl (%rsi), %eax
|
||||
; X64: movdqa (%rdi), %xmm0
|
||||
; X64: pinsrw $0, %eax, %xmm0
|
||||
; X64: pinsrw $0, (%rsi), %xmm0
|
||||
; X64: ret
|
||||
}
|
||||
|
||||
@ -168,7 +165,7 @@ define internal void @t10() nounwind {
|
||||
ret void
|
||||
; X64: t10:
|
||||
; X64: pextrw $4, [[X0:%xmm[0-9]+]], %eax
|
||||
; X64: unpcklpd [[X1:%xmm[0-9]+]]
|
||||
; X64: movlhps [[X1:%xmm[0-9]+]]
|
||||
; X64: pshuflw $8, [[X1]], [[X2:%xmm[0-9]+]]
|
||||
; X64: pinsrw $2, %eax, [[X2]]
|
||||
; X64: pextrw $6, [[X0]], %eax
|
||||
@ -250,13 +247,12 @@ entry:
|
||||
%tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 2, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
|
||||
ret <16 x i8> %tmp9
|
||||
; X64: t16:
|
||||
; X64: pinsrw $0, %eax, [[X1:%xmm[0-9]+]]
|
||||
; X64: pextrw $8, [[X0:%xmm[0-9]+]], %eax
|
||||
; X64: pinsrw $1, %eax, [[X1]]
|
||||
; X64: pextrw $1, [[X1]], %ecx
|
||||
; X64: movd [[X1]], %edx
|
||||
; X64: pinsrw $0, %edx, %xmm
|
||||
; X64: pinsrw $1, %eax, %xmm
|
||||
; X64: movdqa %xmm1, %xmm0
|
||||
; X64: pslldq $2, %xmm0
|
||||
; X64: pextrw $1, %xmm0, %eax
|
||||
; X64: movd %xmm0, %ecx
|
||||
; X64: pinsrw $0, %ecx, %xmm0
|
||||
; X64: pextrw $8, %xmm1, %ecx
|
||||
; X64: ret
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user