Refactor AtomicExpand::expandAtomicRMWToCmpXchg into a standalone function.

Summary:
This is useful for PNaCl's `RewriteAtomics` pass. NaCl intrinsics don't exist for some of the more exotic RMW instructions, so by refactoring this function into its own, `RewriteAtomics` can share code rewriting those atomics with `AtomicExpand` while additionally saving a few cycles by generating the `cmpxchg` NaCl-specific intrinsic with the callback. Without this patch, `RewriteAtomics` would require two extra passes over functions, by first requiring use of the full `AtomicExpand` pass to just expand the leftover exotic RMWs and then running itself again to expand resulting `cmpxchg`s.

NFC

Reviewers: jfb

Subscribers: jfb, llvm-commits

Differential Revision: http://reviews.llvm.org/D11422

llvm-svn: 243880
This commit is contained in:
JF Bastien 2015-08-03 15:29:47 +00:00
parent f28b982862
commit e43a24e432
2 changed files with 139 additions and 66 deletions

View File

@ -0,0 +1,57 @@
//===-- AtomicExpandUtils.h - Utilities for expanding atomic instructions -===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/IRBuilder.h"
namespace llvm {
class Value;
class AtomicRMWInst;
/// Parameters (see the expansion example below):
/// (the builder, %addr, %loaded, %new_val, ordering,
/// /* OUT */ %success, /* OUT */ %new_loaded)
typedef function_ref<void(IRBuilder<> &, Value *, Value *, Value *,
AtomicOrdering, Value *&, Value *&)> CreateCmpXchgInstFun;
/// \brief Expand an atomic RMW instruction into a loop utilizing
/// cmpxchg. You'll want to make sure your target machine likes cmpxchg
/// instructions in the first place and that there isn't another, better,
/// transformation available (for example AArch32/AArch64 have linked loads).
///
/// This is useful in passes which can't rewrite the more exotic RMW
/// instructions directly into a platform specific intrinsics (because, say,
/// those intrinsics don't exist). If such a pass is able to expand cmpxchg
/// instructions directly however, then, with this function, it could avoid two
/// extra module passes (avoiding passes by `-atomic-expand` and itself). A
/// specific example would be PNaCl's `RewriteAtomics` pass.
///
/// Given: atomicrmw some_op iN* %addr, iN %incr ordering
///
/// The standard expansion we produce is:
/// [...]
/// %init_loaded = load atomic iN* %addr
/// br label %loop
/// loop:
/// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
/// %new = some_op iN %loaded, %incr
/// ; This is what -atomic-expand will produce using this function on i686 targets:
/// %pair = cmpxchg iN* %addr, iN %loaded, iN %new_val
/// %new_loaded = extractvalue { iN, i1 } %pair, 0
/// %success = extractvalue { iN, i1 } %pair, 1
/// ; End callback produced IR
/// br i1 %success, label %atomicrmw.end, label %loop
/// atomicrmw.end:
/// [...]
///
/// Returns true if the containing function was modified.
bool
expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory);
}

View File

@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@ -50,7 +51,6 @@ namespace {
bool expandAtomicStore(StoreInst *SI);
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool isIdempotentRMW(AtomicRMWInst *AI);
bool simplifyIdempotentRMW(AtomicRMWInst *AI);
@ -226,6 +226,17 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
return tryExpandAtomicRMW(AI);
}
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
Value *Loaded, Value *NewVal,
AtomicOrdering MemOpOrder,
Value *&Success, Value *&NewLoaded) {
Value* Pair = Builder.CreateAtomicCmpXchg(
Addr, Loaded, NewVal, MemOpOrder,
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
Success = Builder.CreateExtractValue(Pair, 1, "success");
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
}
bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
case TargetLoweringBase::AtomicRMWExpansionKind::None:
@ -239,7 +250,7 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
return expandAtomicRMWToLLSC(AI);
}
case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: {
return expandAtomicRMWToCmpXchg(AI);
return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
}
}
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
@ -337,70 +348,6 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
return true;
}
bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
AtomicOrdering MemOpOrder =
AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
Value *Addr = AI->getPointerOperand();
BasicBlock *BB = AI->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
//
// The standard expansion we produce is:
// [...]
// %init_loaded = load atomic iN* %addr
// br label %loop
// loop:
// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
// %new = some_op iN %loaded, %incr
// %pair = cmpxchg iN* %addr, iN %loaded, iN %new
// %new_loaded = extractvalue { iN, i1 } %pair, 0
// %success = extractvalue { iN, i1 } %pair, 1
// br i1 %success, label %atomicrmw.end, label %loop
// atomicrmw.end:
// [...]
BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
// This grabs the DebugLoc from AI.
IRBuilder<> Builder(AI);
// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place), but we want a load. It's easiest to just remove
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
LoadInst *InitLoaded = Builder.CreateLoad(Addr);
// Atomics require at least natural alignment.
InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
Loaded->addIncoming(InitLoaded, BB);
Value *NewVal =
performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
Value *Pair = Builder.CreateAtomicCmpXchg(
Addr, Loaded, NewVal, MemOpOrder,
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
Loaded->addIncoming(NewLoaded, LoopBB);
Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
Builder.CreateCondBr(Success, ExitBB, LoopBB);
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
AI->replaceAllUsesWith(NewLoaded);
AI->eraseFromParent();
return true;
}
bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
AtomicOrdering FailureOrder = CI->getFailureOrdering();
@ -562,3 +509,72 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
}
return false;
}
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
CreateCmpXchgInstFun CreateCmpXchg) {
assert(AI);
AtomicOrdering MemOpOrder =
AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
Value *Addr = AI->getPointerOperand();
BasicBlock *BB = AI->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
//
// The standard expansion we produce is:
// [...]
// %init_loaded = load atomic iN* %addr
// br label %loop
// loop:
// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
// %new = some_op iN %loaded, %incr
// %pair = cmpxchg iN* %addr, iN %loaded, iN %new
// %new_loaded = extractvalue { iN, i1 } %pair, 0
// %success = extractvalue { iN, i1 } %pair, 1
// br i1 %success, label %atomicrmw.end, label %loop
// atomicrmw.end:
// [...]
BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
// This grabs the DebugLoc from AI.
IRBuilder<> Builder(AI);
// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place), but we want a load. It's easiest to just remove
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
LoadInst *InitLoaded = Builder.CreateLoad(Addr);
// Atomics require at least natural alignment.
InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
Loaded->addIncoming(InitLoaded, BB);
Value *NewVal =
performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
Value *NewLoaded = nullptr;
Value *Success = nullptr;
CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder,
Success, NewLoaded);
assert(Success && NewLoaded);
Loaded->addIncoming(NewLoaded, LoopBB);
Builder.CreateCondBr(Success, ExitBB, LoopBB);
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
AI->replaceAllUsesWith(NewLoaded);
AI->eraseFromParent();
return true;
}