mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-01 15:40:53 +00:00
Support expanding partial-word cmpxchg to full-word cmpxchg in AtomicExpandPass.
Many CPUs only have the ability to do a 4-byte cmpxchg (or ll/sc), not 1 or 2-byte. For those, you need to mask and shift the 1 or 2 byte values appropriately to use the 4-byte instruction. This change adds support for cmpxchg-based instruction sets (only SPARC, in LLVM). The support can be extended for LL/SC-based PPC and MIPS in the future, supplanting the ISel expansions those architectures currently use. Tests added for the IR transform and SPARCv9. Differential Revision: http://reviews.llvm.org/D21029 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273025 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d2e7196d0a
commit
8d30502e60
@ -1136,6 +1136,15 @@ public:
|
||||
return MaxAtomicSizeInBitsSupported;
|
||||
}
|
||||
|
||||
/// Returns the size of the smallest cmpxchg or ll/sc instruction
|
||||
/// the backend supports. Any smaller operations are widened in
|
||||
/// AtomicExpandPass.
|
||||
///
|
||||
/// Note that *unlike* operations above the maximum size, atomic ops
|
||||
/// are still natively supported below the minimum; they just
|
||||
/// require a more complex expansion.
|
||||
unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; }
|
||||
|
||||
/// Whether AtomicExpandPass should automatically insert fences and reduce
|
||||
/// ordering for this atomic. This should be true for most architectures with
|
||||
/// weak memory ordering. Defaults to false.
|
||||
@ -1552,6 +1561,11 @@ protected:
|
||||
MaxAtomicSizeInBitsSupported = SizeInBits;
|
||||
}
|
||||
|
||||
// Sets the minimum cmpxchg or ll/sc size supported by the backend.
|
||||
void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
|
||||
MinCmpXchgSizeInBits = SizeInBits;
|
||||
}
|
||||
|
||||
public:
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Addressing mode description hooks (used by LSR etc).
|
||||
@ -1965,6 +1979,10 @@ private:
|
||||
/// Accesses larger than this will be expanded by AtomicExpandPass.
|
||||
unsigned MaxAtomicSizeInBitsSupported;
|
||||
|
||||
/// Size in bits of the minimum cmpxchg or ll/sc operation the
|
||||
/// backend supports.
|
||||
unsigned MinCmpXchgSizeInBits;
|
||||
|
||||
/// If set to a physical register, this specifies the register that
|
||||
/// llvm.savestack/llvm.restorestack should save and restore.
|
||||
unsigned StackPointerRegisterToSaveRestore;
|
||||
|
@ -57,10 +57,25 @@ namespace {
|
||||
StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
|
||||
bool expandAtomicStore(StoreInst *SI);
|
||||
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
|
||||
bool expandAtomicOpToLLSC(
|
||||
Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
|
||||
Value *
|
||||
insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
|
||||
AtomicOrdering MemOpOrder,
|
||||
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
|
||||
void expandAtomicOpToLLSC(
|
||||
Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
|
||||
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
|
||||
void expandPartwordAtomicRMW(
|
||||
AtomicRMWInst *I,
|
||||
TargetLoweringBase::AtomicExpansionKind ExpansionKind);
|
||||
void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
|
||||
|
||||
AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
|
||||
static Value *insertRMWCmpXchgLoop(
|
||||
IRBuilder<> &Builder, Type *ResultType, Value *Addr,
|
||||
AtomicOrdering MemOpOrder,
|
||||
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
|
||||
CreateCmpXchgInstFun CreateCmpXchg);
|
||||
|
||||
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
|
||||
bool isIdempotentRMW(AtomicRMWInst *AI);
|
||||
bool simplifyIdempotentRMW(AtomicRMWInst *AI);
|
||||
@ -74,6 +89,10 @@ namespace {
|
||||
void expandAtomicStoreToLibcall(StoreInst *LI);
|
||||
void expandAtomicRMWToLibcall(AtomicRMWInst *I);
|
||||
void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
|
||||
|
||||
friend bool
|
||||
llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
|
||||
CreateCmpXchgInstFun CreateCmpXchg);
|
||||
};
|
||||
}
|
||||
|
||||
@ -285,9 +304,17 @@ bool AtomicExpand::runOnFunction(Function &F) {
|
||||
"invariant broken");
|
||||
MadeChange = true;
|
||||
}
|
||||
|
||||
if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
|
||||
MadeChange |= expandAtomicCmpXchg(CASI);
|
||||
|
||||
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
|
||||
unsigned ValueSize = getAtomicOpSize(CASI);
|
||||
if (ValueSize < MinCASSize) {
|
||||
assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
|
||||
"MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
|
||||
expandPartwordCmpXchg(CASI);
|
||||
} else {
|
||||
if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
|
||||
MadeChange |= expandAtomicCmpXchg(CASI);
|
||||
}
|
||||
}
|
||||
}
|
||||
return MadeChange;
|
||||
@ -355,9 +382,10 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
|
||||
case TargetLoweringBase::AtomicExpansionKind::None:
|
||||
return false;
|
||||
case TargetLoweringBase::AtomicExpansionKind::LLSC:
|
||||
return expandAtomicOpToLLSC(
|
||||
LI, LI->getPointerOperand(), LI->getOrdering(),
|
||||
expandAtomicOpToLLSC(
|
||||
LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
|
||||
[](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
|
||||
return true;
|
||||
case TargetLoweringBase::AtomicExpansionKind::LLOnly:
|
||||
return expandAtomicLoadToLL(LI);
|
||||
case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
|
||||
@ -498,32 +526,353 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
|
||||
switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
|
||||
case TargetLoweringBase::AtomicExpansionKind::None:
|
||||
return false;
|
||||
case TargetLoweringBase::AtomicExpansionKind::LLSC:
|
||||
return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(),
|
||||
[&](IRBuilder<> &Builder, Value *Loaded) {
|
||||
return performAtomicOp(AI->getOperation(),
|
||||
Builder, Loaded,
|
||||
AI->getValOperand());
|
||||
});
|
||||
case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
|
||||
return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
|
||||
case TargetLoweringBase::AtomicExpansionKind::LLSC: {
|
||||
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
|
||||
unsigned ValueSize = getAtomicOpSize(AI);
|
||||
if (ValueSize < MinCASSize) {
|
||||
llvm_unreachable(
|
||||
"MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
|
||||
} else {
|
||||
auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
|
||||
return performAtomicOp(AI->getOperation(), Builder, Loaded,
|
||||
AI->getValOperand());
|
||||
};
|
||||
expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
|
||||
AI->getOrdering(), PerformOp);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
|
||||
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
|
||||
unsigned ValueSize = getAtomicOpSize(AI);
|
||||
if (ValueSize < MinCASSize) {
|
||||
expandPartwordAtomicRMW(AI,
|
||||
TargetLoweringBase::AtomicExpansionKind::CmpXChg);
|
||||
} else {
|
||||
expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
|
||||
}
|
||||
}
|
||||
|
||||
bool AtomicExpand::expandAtomicOpToLLSC(
|
||||
Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
|
||||
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
|
||||
namespace {
|
||||
|
||||
/// Result values from createMaskInstrs helper.
|
||||
struct PartwordMaskValues {
|
||||
Type *WordType;
|
||||
Type *ValueType;
|
||||
Value *AlignedAddr;
|
||||
Value *ShiftAmt;
|
||||
Value *Mask;
|
||||
Value *Inv_Mask;
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
/// This is a helper function which builds instructions to provide
|
||||
/// values necessary for partword atomic operations. It takes an
|
||||
/// incoming address, Addr, and ValueType, and constructs the address,
|
||||
/// shift-amounts and masks needed to work with a larger value of size
|
||||
/// WordSize.
|
||||
///
|
||||
/// AlignedAddr: Addr rounded down to a multiple of WordSize
|
||||
///
|
||||
/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
|
||||
/// from AlignAddr for it to have the same value as if
|
||||
/// ValueType was loaded from Addr.
|
||||
///
|
||||
/// Mask: Value to mask with the value loaded from AlignAddr to
|
||||
/// include only the part that would've been loaded from Addr.
|
||||
///
|
||||
/// Inv_Mask: The inverse of Mask.
|
||||
|
||||
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
|
||||
Type *ValueType, Value *Addr,
|
||||
unsigned WordSize) {
|
||||
PartwordMaskValues Ret;
|
||||
|
||||
BasicBlock *BB = I->getParent();
|
||||
Function *F = BB->getParent();
|
||||
Module *M = I->getModule();
|
||||
|
||||
LLVMContext &Ctx = F->getContext();
|
||||
const DataLayout &DL = M->getDataLayout();
|
||||
|
||||
unsigned ValueSize = DL.getTypeStoreSize(ValueType);
|
||||
|
||||
assert(ValueSize < WordSize);
|
||||
|
||||
Ret.ValueType = ValueType;
|
||||
Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
|
||||
|
||||
Type *WordPtrType =
|
||||
Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
|
||||
|
||||
Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
|
||||
Ret.AlignedAddr = Builder.CreateIntToPtr(
|
||||
Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
|
||||
"AlignedAddr");
|
||||
|
||||
Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
|
||||
if (DL.isLittleEndian()) {
|
||||
// turn bytes into bits
|
||||
Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
|
||||
} else {
|
||||
// turn bytes into bits, and count from the other side.
|
||||
Ret.ShiftAmt =
|
||||
Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
|
||||
}
|
||||
|
||||
Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
|
||||
Ret.Mask = Builder.CreateShl(
|
||||
ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
|
||||
"Mask");
|
||||
Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
|
||||
|
||||
return Ret;
|
||||
}
|
||||
|
||||
/// Emit IR to implement a masked version of a given atomicrmw
|
||||
/// operation. (That is, only the bits under the Mask should be
|
||||
/// affected by the operation)
|
||||
static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
|
||||
IRBuilder<> &Builder, Value *Loaded,
|
||||
Value *Shifted_Inc, Value *Inc,
|
||||
const PartwordMaskValues &PMV) {
|
||||
switch (Op) {
|
||||
case AtomicRMWInst::Xchg: {
|
||||
Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
|
||||
Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
|
||||
return FinalVal;
|
||||
}
|
||||
case AtomicRMWInst::Or:
|
||||
case AtomicRMWInst::Xor:
|
||||
// Or/Xor won't affect any other bits, so can just be done
|
||||
// directly.
|
||||
return performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
|
||||
case AtomicRMWInst::Add:
|
||||
case AtomicRMWInst::Sub:
|
||||
case AtomicRMWInst::And:
|
||||
case AtomicRMWInst::Nand: {
|
||||
// The other arithmetic ops need to be masked into place.
|
||||
Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
|
||||
Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
|
||||
Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
|
||||
Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
|
||||
return FinalVal;
|
||||
}
|
||||
case AtomicRMWInst::Max:
|
||||
case AtomicRMWInst::Min:
|
||||
case AtomicRMWInst::UMax:
|
||||
case AtomicRMWInst::UMin: {
|
||||
// Finally, comparison ops will operate on the full value, so
|
||||
// truncate down to the original size, and expand out again after
|
||||
// doing the operation.
|
||||
Value *Loaded_Shiftdown = Builder.CreateTrunc(
|
||||
Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
|
||||
Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
|
||||
Value *NewVal_Shiftup = Builder.CreateShl(
|
||||
Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
|
||||
Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
|
||||
Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
|
||||
return FinalVal;
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("Unknown atomic op");
|
||||
}
|
||||
}
|
||||
|
||||
/// Expand a sub-word atomicrmw operation into an appropriate
|
||||
/// word-sized operation.
|
||||
///
|
||||
/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
|
||||
/// way as a typical atomicrmw expansion. The only difference here is
|
||||
/// that the operation inside of the loop must operate only upon a
|
||||
/// part of the value.
|
||||
void AtomicExpand::expandPartwordAtomicRMW(
|
||||
AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
|
||||
|
||||
assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg);
|
||||
|
||||
AtomicOrdering MemOpOrder = AI->getOrdering();
|
||||
|
||||
IRBuilder<> Builder(AI);
|
||||
|
||||
PartwordMaskValues PMV =
|
||||
createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
|
||||
TLI->getMinCmpXchgSizeInBits() / 8);
|
||||
|
||||
Value *ValOperand_Shifted =
|
||||
Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
|
||||
PMV.ShiftAmt, "ValOperand_Shifted");
|
||||
|
||||
auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
|
||||
return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
|
||||
ValOperand_Shifted, AI->getValOperand(), PMV);
|
||||
};
|
||||
|
||||
// TODO: When we're ready to support LLSC conversions too, use
|
||||
// insertRMWLLSCLoop here for ExpansionKind==LLSC.
|
||||
Value *OldResult =
|
||||
insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
|
||||
PerformPartwordOp, createCmpXchgInstFun);
|
||||
Value *FinalOldResult = Builder.CreateTrunc(
|
||||
Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
|
||||
AI->replaceAllUsesWith(FinalOldResult);
|
||||
AI->eraseFromParent();
|
||||
}
|
||||
|
||||
void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
|
||||
// The basic idea here is that we're expanding a cmpxchg of a
|
||||
// smaller memory size up to a word-sized cmpxchg. To do this, we
|
||||
// need to add a retry-loop for strong cmpxchg, so that
|
||||
// modifications to other parts of the word don't cause a spurious
|
||||
// failure.
|
||||
|
||||
// This generates code like the following:
|
||||
// [[Setup mask values PMV.*]]
|
||||
// %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
|
||||
// %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
|
||||
// %InitLoaded = load i32* %addr
|
||||
// %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
|
||||
// br partword.cmpxchg.loop
|
||||
// partword.cmpxchg.loop:
|
||||
// %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
|
||||
// [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
|
||||
// %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
|
||||
// %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
|
||||
// %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
|
||||
// i32 %FullWord_NewVal success_ordering failure_ordering
|
||||
// %OldVal = extractvalue { i32, i1 } %NewCI, 0
|
||||
// %Success = extractvalue { i32, i1 } %NewCI, 1
|
||||
// br i1 %Success, label %partword.cmpxchg.end,
|
||||
// label %partword.cmpxchg.failure
|
||||
// partword.cmpxchg.failure:
|
||||
// %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
|
||||
// %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
|
||||
// br i1 %ShouldContinue, label %partword.cmpxchg.loop,
|
||||
// label %partword.cmpxchg.end
|
||||
// partword.cmpxchg.end:
|
||||
// %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
|
||||
// %FinalOldVal = trunc i32 %tmp1 to i8
|
||||
// %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
|
||||
// %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
|
||||
|
||||
Value *Addr = CI->getPointerOperand();
|
||||
Value *Cmp = CI->getCompareOperand();
|
||||
Value *NewVal = CI->getNewValOperand();
|
||||
|
||||
BasicBlock *BB = CI->getParent();
|
||||
Function *F = BB->getParent();
|
||||
IRBuilder<> Builder(CI);
|
||||
LLVMContext &Ctx = Builder.getContext();
|
||||
|
||||
const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
|
||||
|
||||
BasicBlock *EndBB =
|
||||
BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
|
||||
auto FailureBB =
|
||||
BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
|
||||
auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
|
||||
|
||||
// The split call above "helpfully" added a branch at the end of BB
|
||||
// (to the wrong place).
|
||||
std::prev(BB->end())->eraseFromParent();
|
||||
Builder.SetInsertPoint(BB);
|
||||
|
||||
PartwordMaskValues PMV = createMaskInstrs(
|
||||
Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
|
||||
|
||||
// Shift the incoming values over, into the right location in the word.
|
||||
Value *NewVal_Shifted =
|
||||
Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
|
||||
Value *Cmp_Shifted =
|
||||
Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
|
||||
|
||||
// Load the entire current word, and mask into place the expected and new
|
||||
// values
|
||||
LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
|
||||
InitLoaded->setVolatile(CI->isVolatile());
|
||||
Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
|
||||
Builder.CreateBr(LoopBB);
|
||||
|
||||
// partword.cmpxchg.loop:
|
||||
Builder.SetInsertPoint(LoopBB);
|
||||
PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
|
||||
Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
|
||||
|
||||
// Mask/Or the expected and new values into place in the loaded word.
|
||||
Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
|
||||
Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
|
||||
AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
|
||||
PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
|
||||
CI->getFailureOrdering(), CI->getSynchScope());
|
||||
NewCI->setVolatile(CI->isVolatile());
|
||||
// When we're building a strong cmpxchg, we need a loop, so you
|
||||
// might think we could use a weak cmpxchg inside. But, using strong
|
||||
// allows the below comparison for ShouldContinue, and we're
|
||||
// expecting the underlying cmpxchg to be a machine instruction,
|
||||
// which is strong anyways.
|
||||
NewCI->setWeak(CI->isWeak());
|
||||
|
||||
Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
|
||||
Value *Success = Builder.CreateExtractValue(NewCI, 1);
|
||||
|
||||
if (CI->isWeak())
|
||||
Builder.CreateBr(EndBB);
|
||||
else
|
||||
Builder.CreateCondBr(Success, EndBB, FailureBB);
|
||||
|
||||
// partword.cmpxchg.failure:
|
||||
Builder.SetInsertPoint(FailureBB);
|
||||
// Upon failure, verify that the masked-out part of the loaded value
|
||||
// has been modified. If it didn't, abort the cmpxchg, since the
|
||||
// masked-in part must've.
|
||||
Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
|
||||
Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
|
||||
Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
|
||||
|
||||
// Add the second value to the phi from above
|
||||
Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
|
||||
|
||||
// partword.cmpxchg.end:
|
||||
Builder.SetInsertPoint(CI);
|
||||
|
||||
Value *FinalOldVal = Builder.CreateTrunc(
|
||||
Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
|
||||
Value *Res = UndefValue::get(CI->getType());
|
||||
Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
|
||||
Res = Builder.CreateInsertValue(Res, Success, 1);
|
||||
|
||||
CI->replaceAllUsesWith(Res);
|
||||
CI->eraseFromParent();
|
||||
}
|
||||
|
||||
void AtomicExpand::expandAtomicOpToLLSC(
|
||||
Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
|
||||
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
|
||||
IRBuilder<> Builder(I);
|
||||
Value *Loaded =
|
||||
insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
|
||||
|
||||
I->replaceAllUsesWith(Loaded);
|
||||
I->eraseFromParent();
|
||||
}
|
||||
|
||||
Value *AtomicExpand::insertRMWLLSCLoop(
|
||||
IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
|
||||
AtomicOrdering MemOpOrder,
|
||||
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
|
||||
LLVMContext &Ctx = Builder.getContext();
|
||||
BasicBlock *BB = Builder.GetInsertBlock();
|
||||
Function *F = BB->getParent();
|
||||
|
||||
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
|
||||
//
|
||||
// The standard expansion we produce is:
|
||||
// [...]
|
||||
// fence?
|
||||
// atomicrmw.start:
|
||||
// %loaded = @load.linked(%addr)
|
||||
// %new = some_op iN %loaded, %incr
|
||||
@ -531,17 +880,13 @@ bool AtomicExpand::expandAtomicOpToLLSC(
|
||||
// %try_again = icmp i32 ne %stored, 0
|
||||
// br i1 %try_again, label %loop, label %atomicrmw.end
|
||||
// atomicrmw.end:
|
||||
// fence?
|
||||
// [...]
|
||||
BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end");
|
||||
BasicBlock *ExitBB =
|
||||
BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
|
||||
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
|
||||
|
||||
// This grabs the DebugLoc from I.
|
||||
IRBuilder<> Builder(I);
|
||||
|
||||
// The split call above "helpfully" added a branch at the end of BB (to the
|
||||
// wrong place), but we might want a fence too. It's easiest to just remove
|
||||
// the branch entirely.
|
||||
// wrong place).
|
||||
std::prev(BB->end())->eraseFromParent();
|
||||
Builder.SetInsertPoint(BB);
|
||||
Builder.CreateBr(LoopBB);
|
||||
@ -559,11 +904,7 @@ bool AtomicExpand::expandAtomicOpToLLSC(
|
||||
Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
|
||||
|
||||
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
|
||||
|
||||
I->replaceAllUsesWith(Loaded);
|
||||
I->eraseFromParent();
|
||||
|
||||
return true;
|
||||
return Loaded;
|
||||
}
|
||||
|
||||
/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
|
||||
@ -867,17 +1208,14 @@ bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
|
||||
CreateCmpXchgInstFun CreateCmpXchg) {
|
||||
assert(AI);
|
||||
|
||||
AtomicOrdering MemOpOrder = AI->getOrdering() == AtomicOrdering::Unordered
|
||||
? AtomicOrdering::Monotonic
|
||||
: AI->getOrdering();
|
||||
Value *Addr = AI->getPointerOperand();
|
||||
BasicBlock *BB = AI->getParent();
|
||||
Value *AtomicExpand::insertRMWCmpXchgLoop(
|
||||
IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
|
||||
AtomicOrdering MemOpOrder,
|
||||
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
|
||||
CreateCmpXchgInstFun CreateCmpXchg) {
|
||||
LLVMContext &Ctx = Builder.getContext();
|
||||
BasicBlock *BB = Builder.GetInsertBlock();
|
||||
Function *F = BB->getParent();
|
||||
LLVMContext &Ctx = F->getContext();
|
||||
|
||||
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
|
||||
//
|
||||
@ -894,34 +1232,34 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
|
||||
// br i1 %success, label %atomicrmw.end, label %loop
|
||||
// atomicrmw.end:
|
||||
// [...]
|
||||
BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end");
|
||||
BasicBlock *ExitBB =
|
||||
BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
|
||||
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
|
||||
|
||||
// This grabs the DebugLoc from AI.
|
||||
IRBuilder<> Builder(AI);
|
||||
|
||||
// The split call above "helpfully" added a branch at the end of BB (to the
|
||||
// wrong place), but we want a load. It's easiest to just remove
|
||||
// the branch entirely.
|
||||
std::prev(BB->end())->eraseFromParent();
|
||||
Builder.SetInsertPoint(BB);
|
||||
LoadInst *InitLoaded = Builder.CreateLoad(Addr);
|
||||
LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
|
||||
// Atomics require at least natural alignment.
|
||||
InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8);
|
||||
InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
|
||||
Builder.CreateBr(LoopBB);
|
||||
|
||||
// Start the main loop block now that we've taken care of the preliminaries.
|
||||
Builder.SetInsertPoint(LoopBB);
|
||||
PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
|
||||
PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
|
||||
Loaded->addIncoming(InitLoaded, BB);
|
||||
|
||||
Value *NewVal =
|
||||
performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
|
||||
Value *NewVal = PerformOp(Builder, Loaded);
|
||||
|
||||
Value *NewLoaded = nullptr;
|
||||
Value *Success = nullptr;
|
||||
|
||||
CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder,
|
||||
CreateCmpXchg(Builder, Addr, Loaded, NewVal,
|
||||
MemOpOrder == AtomicOrdering::Unordered
|
||||
? AtomicOrdering::Monotonic
|
||||
: MemOpOrder,
|
||||
Success, NewLoaded);
|
||||
assert(Success && NewLoaded);
|
||||
|
||||
@ -930,10 +1268,23 @@ bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
|
||||
Builder.CreateCondBr(Success, ExitBB, LoopBB);
|
||||
|
||||
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
|
||||
return NewLoaded;
|
||||
}
|
||||
|
||||
AI->replaceAllUsesWith(NewLoaded);
|
||||
// Note: This function is exposed externally by AtomicExpandUtils.h
|
||||
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
|
||||
CreateCmpXchgInstFun CreateCmpXchg) {
|
||||
IRBuilder<> Builder(AI);
|
||||
Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
|
||||
Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
|
||||
[&](IRBuilder<> &Builder, Value *Loaded) {
|
||||
return performAtomicOp(AI->getOperation(), Builder, Loaded,
|
||||
AI->getValOperand());
|
||||
},
|
||||
CreateCmpXchg);
|
||||
|
||||
AI->replaceAllUsesWith(Loaded);
|
||||
AI->eraseFromParent();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -830,6 +830,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
|
||||
// with the Target-specific changes necessary.
|
||||
MaxAtomicSizeInBitsSupported = 1024;
|
||||
|
||||
MinCmpXchgSizeInBits = 0;
|
||||
|
||||
std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr);
|
||||
|
||||
InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple());
|
||||
|
@ -1647,6 +1647,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
|
||||
else
|
||||
setMaxAtomicSizeInBitsSupported(0);
|
||||
|
||||
setMinCmpXchgSizeInBits(32);
|
||||
|
||||
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Legal);
|
||||
|
||||
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Legal);
|
||||
|
@ -64,6 +64,90 @@ entry:
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
;; TODO: the "move %icc" and related instructions are totally
|
||||
;; redundant here. There's something weird happening in optimization
|
||||
;; of the success value of cmpxchg.
|
||||
|
||||
; CHECK-LABEL: test_cmpxchg_i8
|
||||
; CHECK: and %o1, -4, %o2
|
||||
; CHECK: mov 3, %o3
|
||||
; CHECK: andn %o3, %o1, %o1
|
||||
; CHECK: sll %o1, 3, %o1
|
||||
; CHECK: mov 255, %o3
|
||||
; CHECK: sll %o3, %o1, %o5
|
||||
; CHECK: xor %o5, -1, %o3
|
||||
; CHECK: mov 123, %o4
|
||||
; CHECK: ld [%o2], %g2
|
||||
; CHECK: sll %o4, %o1, %o4
|
||||
; CHECK: and %o0, 255, %o0
|
||||
; CHECK: sll %o0, %o1, %o0
|
||||
; CHECK: andn %g2, %o5, %g2
|
||||
; CHECK: sethi 0, %o5
|
||||
; CHECK: [[LABEL1:\.L.*]]:
|
||||
; CHECK: or %g2, %o4, %g3
|
||||
; CHECK: or %g2, %o0, %g4
|
||||
; CHECK: cas [%o2], %g4, %g3
|
||||
; CHECK: cmp %g3, %g4
|
||||
; CHECK: mov %o5, %g4
|
||||
; CHECK: move %icc, 1, %g4
|
||||
; CHECK: cmp %g4, 0
|
||||
; CHECK: bne [[LABEL2:\.L.*]]
|
||||
; CHECK: nop
|
||||
; CHECK: and %g3, %o3, %g4
|
||||
; CHECK: cmp %g2, %g4
|
||||
; CHECK: bne [[LABEL1]]
|
||||
; CHECK: mov %g4, %g2
|
||||
; CHECK: [[LABEL2]]:
|
||||
; CHECK: retl
|
||||
; CHECK: srl %g3, %o1, %o0
|
||||
define i8 @test_cmpxchg_i8(i8 %a, i8* %ptr) {
|
||||
entry:
|
||||
%pair = cmpxchg i8* %ptr, i8 %a, i8 123 monotonic monotonic
|
||||
%b = extractvalue { i8, i1 } %pair, 0
|
||||
ret i8 %b
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_cmpxchg_i16
|
||||
|
||||
; CHECK: and %o1, -4, %o2
|
||||
; CHECK: and %o1, 3, %o1
|
||||
; CHECK: xor %o1, 2, %o1
|
||||
; CHECK: sll %o1, 3, %o1
|
||||
; CHECK: sethi 63, %o3
|
||||
; CHECK: or %o3, 1023, %o4
|
||||
; CHECK: sll %o4, %o1, %o5
|
||||
; CHECK: xor %o5, -1, %o3
|
||||
; CHECK: and %o0, %o4, %o4
|
||||
; CHECK: ld [%o2], %g2
|
||||
; CHECK: mov 123, %o0
|
||||
; CHECK: sll %o0, %o1, %o0
|
||||
; CHECK: sll %o4, %o1, %o4
|
||||
; CHECK: andn %g2, %o5, %g2
|
||||
; CHECK: sethi 0, %o5
|
||||
; CHECK: [[LABEL1:\.L.*]]:
|
||||
; CHECK: or %g2, %o0, %g3
|
||||
; CHECK: or %g2, %o4, %g4
|
||||
; CHECK: cas [%o2], %g4, %g3
|
||||
; CHECK: cmp %g3, %g4
|
||||
; CHECK: mov %o5, %g4
|
||||
; CHECK: move %icc, 1, %g4
|
||||
; CHECK: cmp %g4, 0
|
||||
; CHECK: bne [[LABEL2:\.L.*]]
|
||||
; CHECK: nop
|
||||
; CHECK: and %g3, %o3, %g4
|
||||
; CHECK: cmp %g2, %g4
|
||||
; CHECK: bne [[LABEL1]]
|
||||
; CHECK: mov %g4, %g2
|
||||
; CHECK: [[LABEL2]]:
|
||||
; CHECK: retl
|
||||
; CHECK: srl %g3, %o1, %o0
|
||||
define i16 @test_cmpxchg_i16(i16 %a, i16* %ptr) {
|
||||
entry:
|
||||
%pair = cmpxchg i16* %ptr, i16 %a, i16 123 monotonic monotonic
|
||||
%b = extractvalue { i16, i1 } %pair, 0
|
||||
ret i16 %b
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_cmpxchg_i32
|
||||
; CHECK: mov 123, [[R:%[gilo][0-7]]]
|
||||
; CHECK: cas [%o1], %o0, [[R]]
|
||||
@ -86,6 +170,26 @@ entry:
|
||||
ret i64 %b
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_swap_i8
|
||||
; CHECK: mov 42, [[R:%[gilo][0-7]]]
|
||||
; CHECK: cas
|
||||
|
||||
define i8 @test_swap_i8(i8 %a, i8* %ptr) {
|
||||
entry:
|
||||
%b = atomicrmw xchg i8* %ptr, i8 42 monotonic
|
||||
ret i8 %b
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_swap_i16
|
||||
; CHECK: mov 42, [[R:%[gilo][0-7]]]
|
||||
; CHECK: cas
|
||||
|
||||
define i16 @test_swap_i16(i16 %a, i16* %ptr) {
|
||||
entry:
|
||||
%b = atomicrmw xchg i16* %ptr, i16 42 monotonic
|
||||
ret i16 %b
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_swap_i32
|
||||
; CHECK: mov 42, [[R:%[gilo][0-7]]]
|
||||
; CHECK: swap [%o1], [[R]]
|
||||
@ -105,12 +209,36 @@ entry:
|
||||
ret i64 %b
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_load_add_32
|
||||
; CHECK-LABEL: test_load_sub_i8
|
||||
; CHECK: membar
|
||||
; CHECK: .L{{.*}}:
|
||||
; CHECK: sub
|
||||
; CHECK: cas [{{%[gilo][0-7]}}]
|
||||
; CHECK: membar
|
||||
define zeroext i8 @test_load_sub_i8(i8* %p, i8 zeroext %v) {
|
||||
entry:
|
||||
%0 = atomicrmw sub i8* %p, i8 %v seq_cst
|
||||
ret i8 %0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_load_sub_i16
|
||||
; CHECK: membar
|
||||
; CHECK: .L{{.*}}:
|
||||
; CHECK: sub
|
||||
; CHECK: cas [{{%[gilo][0-7]}}]
|
||||
; CHECK: membar
|
||||
define zeroext i16 @test_load_sub_i16(i16* %p, i16 zeroext %v) {
|
||||
entry:
|
||||
%0 = atomicrmw sub i16* %p, i16 %v seq_cst
|
||||
ret i16 %0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_load_add_i32
|
||||
; CHECK: membar
|
||||
; CHECK: add [[V:%[gilo][0-7]]], %o1, [[U:%[gilo][0-7]]]
|
||||
; CHECK: cas [%o0], [[V]], [[U]]
|
||||
; CHECK: membar
|
||||
define zeroext i32 @test_load_add_32(i32* %p, i32 zeroext %v) {
|
||||
define zeroext i32 @test_load_add_i32(i32* %p, i32 zeroext %v) {
|
||||
entry:
|
||||
%0 = atomicrmw add i32* %p, i32 %v seq_cst
|
||||
ret i32 %0
|
||||
|
166
test/Transforms/AtomicExpand/SPARC/partword.ll
Normal file
166
test/Transforms/AtomicExpand/SPARC/partword.ll
Normal file
@ -0,0 +1,166 @@
|
||||
; RUN: opt -S %s -atomic-expand | FileCheck %s
|
||||
|
||||
;; Verify the cmpxchg and atomicrmw expansions where sub-word-size
|
||||
;; instructions are not available.
|
||||
|
||||
;;; NOTE: this test is mostly target-independent -- any target which
|
||||
;;; doesn't support cmpxchg of sub-word sizes would do.
|
||||
target datalayout = "E-m:e-i64:64-n32:64-S128"
|
||||
target triple = "sparcv9-unknown-unknown"
|
||||
|
||||
; CHECK-LABEL: @test_cmpxchg_i8(
|
||||
; CHECK: fence seq_cst
|
||||
; CHECK: %0 = ptrtoint i8* %arg to i64
|
||||
; CHECK: %1 = and i64 %0, -4
|
||||
; CHECK: %AlignedAddr = inttoptr i64 %1 to i32*
|
||||
; CHECK: %PtrLSB = and i64 %0, 3
|
||||
; CHECK: %2 = xor i64 %PtrLSB, 3
|
||||
; CHECK: %3 = shl i64 %2, 3
|
||||
; CHECK: %ShiftAmt = trunc i64 %3 to i32
|
||||
; CHECK: %Mask = shl i32 255, %ShiftAmt
|
||||
; CHECK: %Inv_Mask = xor i32 %Mask, -1
|
||||
; CHECK: %4 = zext i8 %new to i32
|
||||
; CHECK: %5 = shl i32 %4, %ShiftAmt
|
||||
; CHECK: %6 = zext i8 %old to i32
|
||||
; CHECK: %7 = shl i32 %6, %ShiftAmt
|
||||
; CHECK: %8 = load i32, i32* %AlignedAddr
|
||||
; CHECK: %9 = and i32 %8, %Inv_Mask
|
||||
; CHECK: br label %partword.cmpxchg.loop
|
||||
; CHECK:partword.cmpxchg.loop:
|
||||
; CHECK: %10 = phi i32 [ %9, %entry ], [ %16, %partword.cmpxchg.failure ]
|
||||
; CHECK: %11 = or i32 %10, %5
|
||||
; CHECK: %12 = or i32 %10, %7
|
||||
; CHECK: %13 = cmpxchg i32* %AlignedAddr, i32 %12, i32 %11 monotonic monotonic
|
||||
; CHECK: %14 = extractvalue { i32, i1 } %13, 0
|
||||
; CHECK: %15 = extractvalue { i32, i1 } %13, 1
|
||||
; CHECK: br i1 %15, label %partword.cmpxchg.end, label %partword.cmpxchg.failure
|
||||
; CHECK:partword.cmpxchg.failure:
|
||||
; CHECK: %16 = and i32 %14, %Inv_Mask
|
||||
; CHECK: %17 = icmp ne i32 %10, %16
|
||||
; CHECK: br i1 %17, label %partword.cmpxchg.loop, label %partword.cmpxchg.end
|
||||
; CHECK:partword.cmpxchg.end:
|
||||
; CHECK: %18 = lshr i32 %14, %ShiftAmt
|
||||
; CHECK: %19 = trunc i32 %18 to i8
|
||||
; CHECK: %20 = insertvalue { i8, i1 } undef, i8 %19, 0
|
||||
; CHECK: %21 = insertvalue { i8, i1 } %20, i1 %15, 1
|
||||
; CHECK: fence seq_cst
|
||||
; CHECK: %ret = extractvalue { i8, i1 } %21, 0
|
||||
; CHECK: ret i8 %ret
|
||||
define i8 @test_cmpxchg_i8(i8* %arg, i8 %old, i8 %new) {
|
||||
entry:
|
||||
%ret_succ = cmpxchg i8* %arg, i8 %old, i8 %new seq_cst monotonic
|
||||
%ret = extractvalue { i8, i1 } %ret_succ, 0
|
||||
ret i8 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test_cmpxchg_i16(
|
||||
; CHECK: fence seq_cst
|
||||
; CHECK: %0 = ptrtoint i16* %arg to i64
|
||||
; CHECK: %1 = and i64 %0, -4
|
||||
; CHECK: %AlignedAddr = inttoptr i64 %1 to i32*
|
||||
; CHECK: %PtrLSB = and i64 %0, 3
|
||||
; CHECK: %2 = xor i64 %PtrLSB, 2
|
||||
; CHECK: %3 = shl i64 %2, 3
|
||||
; CHECK: %ShiftAmt = trunc i64 %3 to i32
|
||||
; CHECK: %Mask = shl i32 65535, %ShiftAmt
|
||||
; CHECK: %Inv_Mask = xor i32 %Mask, -1
|
||||
; CHECK: %4 = zext i16 %new to i32
|
||||
; CHECK: %5 = shl i32 %4, %ShiftAmt
|
||||
; CHECK: %6 = zext i16 %old to i32
|
||||
; CHECK: %7 = shl i32 %6, %ShiftAmt
|
||||
; CHECK: %8 = load i32, i32* %AlignedAddr
|
||||
; CHECK: %9 = and i32 %8, %Inv_Mask
|
||||
; CHECK: br label %partword.cmpxchg.loop
|
||||
; CHECK:partword.cmpxchg.loop:
|
||||
; CHECK: %10 = phi i32 [ %9, %entry ], [ %16, %partword.cmpxchg.failure ]
|
||||
; CHECK: %11 = or i32 %10, %5
|
||||
; CHECK: %12 = or i32 %10, %7
|
||||
; CHECK: %13 = cmpxchg i32* %AlignedAddr, i32 %12, i32 %11 monotonic monotonic
|
||||
; CHECK: %14 = extractvalue { i32, i1 } %13, 0
|
||||
; CHECK: %15 = extractvalue { i32, i1 } %13, 1
|
||||
; CHECK: br i1 %15, label %partword.cmpxchg.end, label %partword.cmpxchg.failure
|
||||
; CHECK:partword.cmpxchg.failure:
|
||||
; CHECK: %16 = and i32 %14, %Inv_Mask
|
||||
; CHECK: %17 = icmp ne i32 %10, %16
|
||||
; CHECK: br i1 %17, label %partword.cmpxchg.loop, label %partword.cmpxchg.end
|
||||
; CHECK:partword.cmpxchg.end:
|
||||
; CHECK: %18 = lshr i32 %14, %ShiftAmt
|
||||
; CHECK: %19 = trunc i32 %18 to i16
|
||||
; CHECK: %20 = insertvalue { i16, i1 } undef, i16 %19, 0
|
||||
; CHECK: %21 = insertvalue { i16, i1 } %20, i1 %15, 1
|
||||
; CHECK: fence seq_cst
|
||||
; CHECK: %ret = extractvalue { i16, i1 } %21, 0
|
||||
; CHECK: ret i16 %ret
|
||||
define i16 @test_cmpxchg_i16(i16* %arg, i16 %old, i16 %new) {
|
||||
entry:
|
||||
%ret_succ = cmpxchg i16* %arg, i16 %old, i16 %new seq_cst monotonic
|
||||
%ret = extractvalue { i16, i1 } %ret_succ, 0
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
|
||||
; CHECK-LABEL: @test_add_i16(
|
||||
; CHECK: fence seq_cst
|
||||
; CHECK: %0 = ptrtoint i16* %arg to i64
|
||||
; CHECK: %1 = and i64 %0, -4
|
||||
; CHECK: %AlignedAddr = inttoptr i64 %1 to i32*
|
||||
; CHECK: %PtrLSB = and i64 %0, 3
|
||||
; CHECK: %2 = xor i64 %PtrLSB, 2
|
||||
; CHECK: %3 = shl i64 %2, 3
|
||||
; CHECK: %ShiftAmt = trunc i64 %3 to i32
|
||||
; CHECK: %Mask = shl i32 65535, %ShiftAmt
|
||||
; CHECK: %Inv_Mask = xor i32 %Mask, -1
|
||||
; CHECK: %4 = zext i16 %val to i32
|
||||
; CHECK: %ValOperand_Shifted = shl i32 %4, %ShiftAmt
|
||||
; CHECK: %5 = load i32, i32* %AlignedAddr, align 4
|
||||
; CHECK: br label %atomicrmw.start
|
||||
; CHECK:atomicrmw.start:
|
||||
; CHECK: %loaded = phi i32 [ %5, %entry ], [ %newloaded, %atomicrmw.start ]
|
||||
; CHECK: %new = add i32 %loaded, %ValOperand_Shifted
|
||||
; CHECK: %6 = and i32 %new, %Mask
|
||||
; CHECK: %7 = and i32 %loaded, %Inv_Mask
|
||||
; CHECK: %8 = or i32 %7, %6
|
||||
; CHECK: %9 = cmpxchg i32* %AlignedAddr, i32 %loaded, i32 %8 monotonic monotonic
|
||||
; CHECK: %success = extractvalue { i32, i1 } %9, 1
|
||||
; CHECK: %newloaded = extractvalue { i32, i1 } %9, 0
|
||||
; CHECK: br i1 %success, label %atomicrmw.end, label %atomicrmw.start
|
||||
; CHECK:atomicrmw.end:
|
||||
; CHECK: %10 = lshr i32 %newloaded, %ShiftAmt
|
||||
; CHECK: %11 = trunc i32 %10 to i16
|
||||
; CHECK: fence seq_cst
|
||||
; CHECK: ret i16 %11
|
||||
define i16 @test_add_i16(i16* %arg, i16 %val) {
|
||||
entry:
|
||||
%ret = atomicrmw add i16* %arg, i16 %val seq_cst
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test_xor_i16(
|
||||
; (I'm going to just assert on the bits that differ from add, above.)
|
||||
; CHECK:atomicrmw.start:
|
||||
; CHECK: %new = xor i32 %loaded, %ValOperand_Shifted
|
||||
; CHECK: %6 = cmpxchg i32* %AlignedAddr, i32 %loaded, i32 %new monotonic monotonic
|
||||
; CHECK:atomicrmw.end:
|
||||
define i16 @test_xor_i16(i16* %arg, i16 %val) {
|
||||
entry:
|
||||
%ret = atomicrmw xor i16* %arg, i16 %val seq_cst
|
||||
ret i16 %ret
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test_min_i16(
|
||||
; CHECK:atomicrmw.start:
|
||||
; CHECK: %6 = lshr i32 %loaded, %ShiftAmt
|
||||
; CHECK: %7 = trunc i32 %6 to i16
|
||||
; CHECK: %8 = icmp sle i16 %7, %val
|
||||
; CHECK: %new = select i1 %8, i16 %7, i16 %val
|
||||
; CHECK: %9 = zext i16 %new to i32
|
||||
; CHECK: %10 = shl i32 %9, %ShiftAmt
|
||||
; CHECK: %11 = and i32 %loaded, %Inv_Mask
|
||||
; CHECK: %12 = or i32 %11, %10
|
||||
; CHECK: %13 = cmpxchg i32* %AlignedAddr, i32 %loaded, i32 %12 monotonic monotonic
|
||||
; CHECK:atomicrmw.end:
|
||||
define i16 @test_min_i16(i16* %arg, i16 %val) {
|
||||
entry:
|
||||
%ret = atomicrmw min i16* %arg, i16 %val seq_cst
|
||||
ret i16 %ret
|
||||
}
|
Loading…
Reference in New Issue
Block a user