[X86] Make wide loads be managed by AtomicExpand

Summary:
AtomicExpand already had logic for expanding wide loads and stores on LL/SC
architectures, and for expanding wide stores on CmpXchg architectures, but
not for wide loads on CmpXchg architectures. This patch fills this hole,
and makes use of this new feature in the X86 backend.

Only one functionnal change: we now lose the SynchScope attribute.
It is regrettable, but I have another patch that I will submit soon that will
solve this for all of AtomicExpand (it seemed better to split it apart as it
is a different concern).

Test Plan: make check-all (lots of tests for this functionality already exist)

Reviewers: jfb

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D5404

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218332 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Robin Morisset 2014-09-23 20:59:25 +00:00
parent 58bca6e8ec
commit 30e7514d01
3 changed files with 41 additions and 32 deletions

View File

@ -44,6 +44,8 @@ namespace {
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
bool IsStore, bool IsLoad);
bool expandAtomicLoad(LoadInst *LI);
bool expandAtomicLoadToLL(LoadInst *LI);
bool expandAtomicLoadToCmpXchg(LoadInst *LI);
bool expandAtomicStore(StoreInst *SI);
bool expandAtomicRMW(AtomicRMWInst *AI);
bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
@ -160,6 +162,15 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
}
bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
if (TM->getSubtargetImpl()
->getTargetLowering()
->hasLoadLinkedStoreConditional())
return expandAtomicLoadToLL(LI);
else
return expandAtomicLoadToCmpXchg(LI);
}
bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
auto TLI = TM->getSubtargetImpl()->getTargetLowering();
IRBuilder<> Builder(LI);
@ -175,6 +186,24 @@ bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
return true;
}
bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
IRBuilder<> Builder(LI);
AtomicOrdering Order = LI->getOrdering();
Value *Addr = LI->getPointerOperand();
Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
Constant *DummyVal = Constant::getNullValue(Ty);
Value *Pair = Builder.CreateAtomicCmpXchg(
Addr, DummyVal, DummyVal, Order,
AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
LI->replaceAllUsesWith(Loaded);
LI->eraseFromParent();
return true;
}
bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
// This function is only called on atomic stores that are too large to be
// atomic if implemented as a native store. So we replace them by an

View File

@ -17477,8 +17477,11 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
return needsCmpXchgNb(SI->getValueOperand()->getType());
}
bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *SI) const {
return false; // FIXME, currently these are expanded separately in this file.
// Note: this turns large loads into lock cmpxchg8b/16b.
// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
auto PTy = cast<PointerType>(LI->getPointerOperand()->getType());
return needsCmpXchgNb(PTy->getElementType());
}
bool X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
@ -17855,29 +17858,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
static void ReplaceATOMIC_LOAD(SDNode *Node,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) {
SDLoc dl(Node);
EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
// Convert wide load -> cmpxchg8b/cmpxchg16b
// FIXME: On 32-bit, load -> fild or movq would be more efficient
// (The only way to get a 16-byte load is cmpxchg16b)
// FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
SDValue Zero = DAG.getConstant(0, VT);
SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
SDValue Swap =
DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, VT, VTs,
Node->getOperand(0), Node->getOperand(1), Zero, Zero,
cast<AtomicSDNode>(Node)->getMemOperand(),
cast<AtomicSDNode>(Node)->getOrdering(),
cast<AtomicSDNode>(Node)->getOrdering(),
cast<AtomicSDNode>(Node)->getSynchScope());
Results.push_back(Swap.getValue(0));
Results.push_back(Swap.getValue(2));
}
/// ReplaceNodeResults - Replace a node with an illegal result type
/// with a new node built out of custom code.
void X86TargetLowering::ReplaceNodeResults(SDNode *N,
@ -18036,12 +18016,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
case ISD::ATOMIC_LOAD: {
// Delegate to generic TypeLegalization. Situations we can really handle
// should have already been dealt with by AtomicExpandPass.cpp.
break;
case ISD::ATOMIC_LOAD: {
ReplaceATOMIC_LOAD(N, Results, DAG);
return;
}
case ISD::BITCAST: {
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");

View File

@ -4,16 +4,18 @@
; FIXME: The generated code can be substantially improved.
define void @test1(i64* %ptr, i64 %val1) {
; CHECK: test1
; CHECK: cmpxchg8b
; CHECK-LABEL: test1
; CHECK: lock
; CHECK-NEXT: cmpxchg8b
; CHECK-NEXT: jne
store atomic i64 %val1, i64* %ptr seq_cst, align 8
ret void
}
define i64 @test2(i64* %ptr) {
; CHECK: test2
; CHECK: cmpxchg8b
; CHECK-LABEL: test2
; CHECK: lock
; CHECK-NEXT: cmpxchg8b
%val = load atomic i64* %ptr seq_cst, align 8
ret i64 %val
}