From 95528943e9abf1308d285276f0aba1f0414e6a9e Mon Sep 17 00:00:00 2001 From: Andrew Lenharth Date: Thu, 21 Feb 2008 06:45:13 +0000 Subject: [PATCH] Atomic op support. If any gcc test uses __sync builtins, it might start failing on archs that haven't implemented them yet llvm-svn: 47430 --- llvm/docs/LangRef.html | 175 +++++++++++++++++- llvm/include/llvm/CodeGen/SelectionDAG.h | 10 + llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 49 ++++- llvm/include/llvm/Intrinsics.td | 13 +- llvm/include/llvm/Target/TargetLowering.h | 2 +- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 46 +++++ .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 41 ++++ .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 34 +++- llvm/lib/Target/Alpha/AlphaISelLowering.cpp | 93 ++++++++++ llvm/lib/Target/Alpha/AlphaISelLowering.h | 3 + llvm/lib/Target/Alpha/AlphaInstrInfo.td | 34 +++- llvm/lib/Target/TargetSelectionDAG.td | 91 +++++++++ llvm/lib/Target/X86/X86InstrInfo.td | 10 +- 13 files changed, 589 insertions(+), 12 deletions(-) diff --git a/llvm/docs/LangRef.html b/llvm/docs/LangRef.html index af88237adc81..d09d34860e25 100644 --- a/llvm/docs/LangRef.html +++ b/llvm/docs/LangRef.html @@ -206,7 +206,10 @@
  • Atomic intrinsics
      -
    1. llvm.memory_barrier
    2. +
    3. llvm.memory_barrier
    4. +
    5. llvm.atomic.lcs
    6. +
    7. llvm.atomic.las
    8. +
    9. llvm.atomic.swap
  • General intrinsics @@ -5339,6 +5342,176 @@ i1 <device> ) + + +
    +
    Syntax:
    +

    + This is an overloaded intrinsic. You can use llvm.atomic.lcs on any + integer bit width. Not all targets support all bit widths however.

    + +
    +declare i8 @llvm.atomic.lcs.i8( i8* <ptr>, i8 <cmp>, i8 <val> )
    +declare i16 @llvm.atomic.lcs.i16( i16* <ptr>, i16 <cmp>, i16 <val> )
    +declare i32 @llvm.atomic.lcs.i32( i32* <ptr>, i32 <cmp>, i32 <val> )
    +declare i64 @llvm.atomic.lcs.i64( i64* <ptr>, i64 <cmp>, i64 <val> )
    +
    +
    +
    Overview:
    +

    + This loads a value in memory and compares it to a given value. If they are + equal, it stores a new value into the memory. +

    +
    Arguments:
    +

    + The llvm.atomic.lcs intrinsic takes three arguments. The result as + well as both cmp and val must be integer values with the + same bit width. The ptr argument must be a pointer to a value of + this integer type. While any bit width integer may be used, targets may only + lower representations they support in hardware. + +

    +
    Semantics:
    +

    + This entire intrinsic must be executed atomically. It first loads the value + in memory pointed to by ptr and compares it with the value + cmp. If they are equal, val is stored into the memory. The + loaded value is yielded in all cases. This provides the equivalent of an + atomic compare-and-swap operation within the SSA framework. +

    +
    Examples:
    + +
    +%ptr      = malloc i32
    +            store i32 4, %ptr
    +
    +%val1     = add i32 4, 4
    +%result1  = call i32 @llvm.atomic.lcs.i32( i32* %ptr, i32 4, %val1 )
    +                                          ; yields {i32}:result1 = 4
    +%stored1  = icmp eq i32 %result1, 4       ; yields {i1}:stored1 = true
    +%memval1  = load i32* %ptr                ; yields {i32}:memval1 = 8
    +
    +%val2     = add i32 1, 1
    +%result2  = call i32 @llvm.atomic.lcs.i32( i32* %ptr, i32 5, %val2 )
    +                                          ; yields {i32}:result2 = 8
    +%stored2  = icmp eq i32 %result2, 5       ; yields {i1}:stored2 = false
    +
    +%memval2  = load i32* %ptr                ; yields {i32}:memval2 = 8
    +
    +
    + + + +
    +
    Syntax:
    + +

    + This is an overloaded intrinsic. You can use llvm.atomic.swap on any + integer bit width. Not all targets support all bit widths however.

    +
    +declare i8 @llvm.atomic.swap.i8( i8* <ptr>, i8 <val> )
    +declare i16 @llvm.atomic.swap.i16( i16* <ptr>, i16 <val> )
    +declare i32 @llvm.atomic.swap.i32( i32* <ptr>, i32 <val> )
    +declare i64 @llvm.atomic.swap.i64( i64* <ptr>, i64 <val> )
    +
    +
    +
    Overview:
    +

    + This intrinsic loads the value stored in memory at ptr and yields + the value from memory. It then stores the value in val in the memory + at ptr. +

    +
    Arguments:
    + +

    + The llvm.atomic.ls intrinsic takes two arguments. Both the + val argument and the result must be integers of the same bit width. + The first argument, ptr, must be a pointer to a value of this + integer type. The targets may only lower integer representations they + support. +

    +
    Semantics:
    +

    + This intrinsic loads the value pointed to by ptr, yields it, and + stores val back into ptr atomically. This provides the + equivalent of an atomic swap operation within the SSA framework. + +

    +
    Examples:
    +
    +%ptr      = malloc i32
    +            store i32 4, %ptr
    +
    +%val1     = add i32 4, 4
    +%result1  = call i32 @llvm.atomic.swap.i32( i32* %ptr, i32 %val1 )
    +                                        ; yields {i32}:result1 = 4
    +%stored1  = icmp eq i32 %result1, 4     ; yields {i1}:stored1 = true
    +%memval1  = load i32* %ptr              ; yields {i32}:memval1 = 8
    +
    +%val2     = add i32 1, 1
    +%result2  = call i32 @llvm.atomic.swap.i32( i32* %ptr, i32 %val2 )
    +                                        ; yields {i32}:result2 = 8
    +
    +%stored2  = icmp eq i32 %result2, 8     ; yields {i1}:stored2 = true
    +%memval2  = load i32* %ptr              ; yields {i32}:memval2 = 2
    +
    +
    + + + +
    +
    Syntax:
    +

    + This is an overloaded intrinsic. You can use llvm.atomic.las on any + integer bit width. Not all targets support all bit widths however.

    +
    +declare i8 @llvm.atomic.las.i8.( i8* <ptr>, i8 <delta> )
    +declare i16 @llvm.atomic.las.i16.( i16* <ptr>, i16 <delta> )
    +declare i32 @llvm.atomic.las.i32.( i32* <ptr>, i32 <delta> )
    +declare i64 @llvm.atomic.las.i64.( i64* <ptr>, i64 <delta> )
    +
    +
    +
    Overview:
    +

    + This intrinsic adds delta to the value stored in memory at + ptr. It yields the original value at ptr. +

    +
    Arguments:
    +

    + + The intrinsic takes two arguments, the first a pointer to an integer value + and the second an integer value. The result is also an integer value. These + integer types can have any bit width, but they must all have the same bit + width. The targets may only lower integer representations they support. +

    +
    Semantics:
    +

    + This intrinsic does a series of operations atomically. It first loads the + value stored at ptr. It then adds delta, stores the result + to ptr. It yields the original value stored at ptr. +

    + +
    Examples:
    +
    +%ptr      = malloc i32
    +        store i32 4, %ptr
    +%result1  = call i32 @llvm.atomic.las.i32( i32* %ptr, i32 4 )
    +                                ; yields {i32}:result1 = 4
    +%result2  = call i32 @llvm.atomic.las.i32( i32* %ptr, i32 2 )
    +                                ; yields {i32}:result2 = 8
    +%result3  = call i32 @llvm.atomic.las.i32( i32* %ptr, i32 5 )
    +                                ; yields {i32}:result3 = 10
    +%memval   = load i32* %ptr      ; yields {i32}:memval1 = 15
    +
    +
    +
    diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 22487dd5c37b..70c7185c613a 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -356,6 +356,16 @@ public: SDOperand getVAArg(MVT::ValueType VT, SDOperand Chain, SDOperand Ptr, SDOperand SV); + /// getAtomic - Gets a node for an atomic op, produces result and chain, takes + // 3 operands + SDOperand getAtomic(unsigned Opcode, SDOperand Chain, SDOperand Ptr, + SDOperand A2, SDOperand A3, MVT::ValueType VT); + + /// getAtomic - Gets a node for an atomic op, produces result and chain, takes + // 2 operands + SDOperand getAtomic(unsigned Opcode, SDOperand Chain, SDOperand Ptr, + SDOperand A2, MVT::ValueType VT); + /// getLoad - Loads are not normal binary operators: their result type is not /// determined by their operands, and they produce a value AND a token chain. /// diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index c1a50bb7dc7e..293bb73a2390 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -441,7 +441,7 @@ namespace ISD { // is added / subtracted from the base pointer to form the address (for // indexed memory ops). LOAD, STORE, - + // DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned // to a specified boundary. This node always has two return values: a new // stack pointer value and a chain. The first operand is the token chain, @@ -591,12 +591,30 @@ namespace ISD { // OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load, // store-store, device) - // This corresponds to the atomic.barrier intrinsic. + // This corresponds to the memory.barrier intrinsic. // it takes an input chain, 4 operands to specify the type of barrier, an // operand specifying if the barrier applies to device and uncached memory // and produces an output chain. MEMBARRIER, + // Val, OUTCHAIN = ATOMIC_LCS(INCHAIN, ptr, cmp, swap) + // this corresponds to the atomic.lcs intrinsic. + // cmp is compared to *ptr, and if equal, swap is stored in *ptr. + // the return is always the original value in *ptr + ATOMIC_LCS, + + // Val, OUTCHAIN = ATOMIC_LAS(INCHAIN, ptr, amt) + // this corresponds to the atomic.las intrinsic. + // *ptr + amt is stored to *ptr atomically. + // the return is always the original value in *ptr + ATOMIC_LAS, + + // Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) + // this corresponds to the atomic.swap intrinsic. + // amt is stored to *ptr atomically. + // the return is always the original value in *ptr + ATOMIC_SWAP, + // BUILTIN_OP_END - This must be the last enum value in this list. BUILTIN_OP_END }; @@ -1170,6 +1188,33 @@ public: SDOperand getValue() const { return Op; } }; +class AtomicSDNode : public SDNode { + virtual void ANCHOR(); // Out-of-line virtual method to give class a home. + SDOperand Ops[4]; + MVT::ValueType OrigVT; +public: + AtomicSDNode(unsigned Opc, SDVTList VTL, SDOperand Chain, SDOperand X, + SDOperand Y, SDOperand Z, MVT::ValueType VT) + : SDNode(Opc, VTL) { + Ops[0] = Chain; + Ops[1] = X; + Ops[2] = Y; + Ops[3] = Z; + InitOperands(Ops, 4); + OrigVT=VT; + } + AtomicSDNode(unsigned Opc, SDVTList VTL, SDOperand Chain, SDOperand X, + SDOperand Y, MVT::ValueType VT) + : SDNode(Opc, VTL) { + Ops[0] = Chain; + Ops[1] = X; + Ops[2] = Y; + InitOperands(Ops, 3); + OrigVT=VT; + } + MVT::ValueType getVT() const { return OrigVT; } +}; + class StringSDNode : public SDNode { std::string Value; virtual void ANCHOR(); // Out-of-line virtual method to give class a home. diff --git a/llvm/include/llvm/Intrinsics.td b/llvm/include/llvm/Intrinsics.td index 64b3abf52bfe..c29bd407fdcd 100644 --- a/llvm/include/llvm/Intrinsics.td +++ b/llvm/include/llvm/Intrinsics.td @@ -64,7 +64,7 @@ class LLVMPointerType class LLVMMatchType : LLVMType{ int Number = num; -} +} def llvm_void_ty : LLVMType; def llvm_anyint_ty : LLVMType; @@ -267,6 +267,17 @@ def int_init_trampoline : Intrinsic<[llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, def int_memory_barrier : Intrinsic<[llvm_void_ty, llvm_i1_ty, llvm_i1_ty, llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], []>; +def int_atomic_lcs : Intrinsic<[llvm_anyint_ty, LLVMPointerType>, + LLVMMatchType<0>, LLVMMatchType<0>], + [IntrWriteArgMem]>, GCCBuiltin<"__sync_val_compare_and_swap">; +def int_atomic_las : Intrinsic<[llvm_anyint_ty, LLVMPointerType>, + LLVMMatchType<0>], + [IntrWriteArgMem]>, GCCBuiltin<"__sync_fetch_and_add">; +def int_atomic_swap : Intrinsic<[llvm_anyint_ty, LLVMPointerType>, + LLVMMatchType<0>], + [IntrWriteArgMem]>, GCCBuiltin<"__sync_lock_test_and_set">; + + //===-------------------------- Other Intrinsics --------------------------===// // def int_flt_rounds : Intrinsic<[llvm_i32_ty]>, diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h index 0d03cdad1dc0..4515b90b2baa 100644 --- a/llvm/include/llvm/Target/TargetLowering.h +++ b/llvm/include/llvm/Target/TargetLowering.h @@ -1252,7 +1252,7 @@ private: /// TargetDAGCombineArray - Targets can specify ISD nodes that they would /// like PerformDAGCombine callbacks for by calling setTargetDAGCombine(), /// which sets a bit in this array. - unsigned char TargetDAGCombineArray[156/(sizeof(unsigned char)*8)]; + unsigned char TargetDAGCombineArray[160/(sizeof(unsigned char)*8)]; /// PromoteToType - For operations that must be promoted to a specific type, /// this holds the destination type. This map should be sparse, so don't hold diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 429855db163c..91e52a6c9edc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1153,6 +1153,31 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) { break; } + case ISD::ATOMIC_LCS: + case ISD::ATOMIC_LAS: + case ISD::ATOMIC_SWAP: { + assert(((Node->getNumOperands() == 4 && Node->getOpcode() == ISD::ATOMIC_LCS) || + (Node->getNumOperands() == 3 && Node->getOpcode() == ISD::ATOMIC_LAS) || + (Node->getNumOperands() == 3 && Node->getOpcode() == ISD::ATOMIC_SWAP)) && + "Invalid MemBarrier node!"); + int num = Node->getOpcode() == ISD::ATOMIC_LCS ? 4 : 3; + MVT::ValueType VT = Node->getValueType(0); + switch (TLI.getOperationAction(ISD::ATOMIC_LCS, VT)) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Legal: { + SDOperand Ops[4]; + for (int x = 0; x < num; ++x) + Ops[x] = LegalizeOp(Node->getOperand(x)); + Result = DAG.UpdateNodeOperands(Result, &Ops[0], num); + AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0)); + AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1)); + return Result.getValue(Op.ResNo); + break; + } + } + break; + } + case ISD::Constant: { ConstantSDNode *CN = cast(Node); unsigned opAction = @@ -4228,6 +4253,27 @@ SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) { break; } + case ISD::ATOMIC_LCS: { + Tmp2 = PromoteOp(Node->getOperand(2)); + Tmp3 = PromoteOp(Node->getOperand(3)); + Result = DAG.getAtomic(Node->getOpcode(), Node->getOperand(0), + Node->getOperand(1), Tmp2, Tmp3, + cast(Node)->getVT()); + // Remember that we legalized the chain. + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1))); + break; + } + case ISD::ATOMIC_LAS: + case ISD::ATOMIC_SWAP: { + Tmp2 = PromoteOp(Node->getOperand(2)); + Result = DAG.getAtomic(Node->getOpcode(), Node->getOperand(0), + Node->getOperand(1), Tmp2, + cast(Node)->getVT()); + // Remember that we legalized the chain. + AddLegalizedOperand(Op.getValue(1), LegalizeOp(Result.getValue(1))); + break; + } + case ISD::AND: case ISD::OR: case ISD::XOR: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 28f612d8cd1e..803a38cbe135 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2427,6 +2427,43 @@ SDOperand SelectionDAG::getMemset(SDOperand Chain, SDOperand Dest, return getNode(ISD::MEMSET, MVT::Other, Ops, 6); } +SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain, + SDOperand Ptr, SDOperand A2, + SDOperand A3, MVT::ValueType VT) { + assert(Opcode == ISD::ATOMIC_LCS && "Invalid Atomic Op"); + SDVTList VTs = getVTList(A2.getValueType(), MVT::Other); + FoldingSetNodeID ID; + SDOperand Ops[] = {Chain, Ptr, A2, A3}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 4); + ID.AddInteger((unsigned int)VT); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode* N = new AtomicSDNode(Opcode, VTs, Chain, Ptr, A2, A3, VT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + +SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain, + SDOperand Ptr, SDOperand A2, + MVT::ValueType VT) { + assert((Opcode == ISD::ATOMIC_LAS || Opcode == ISD::ATOMIC_SWAP) + && "Invalid Atomic Op"); + SDVTList VTs = getVTList(A2.getValueType(), MVT::Other); + FoldingSetNodeID ID; + SDOperand Ops[] = {Chain, Ptr, A2}; + AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + ID.AddInteger((unsigned int)VT); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDOperand(E, 0); + SDNode* N = new AtomicSDNode(Opcode, VTs, Chain, Ptr, A2, VT); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDOperand(N, 0); +} + SDOperand SelectionDAG::getLoad(MVT::ValueType VT, SDOperand Chain, SDOperand Ptr, const Value *SV, int SVOffset, @@ -3593,6 +3630,7 @@ void CondCodeSDNode::ANCHOR() {} void VTSDNode::ANCHOR() {} void LoadSDNode::ANCHOR() {} void StoreSDNode::ANCHOR() {} +void AtomicSDNode::ANCHOR() {} HandleSDNode::~HandleSDNode() { SDVTList VTs = { 0, 0 }; @@ -3821,6 +3859,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { } case ISD::MEMBARRIER: return "MemBarrier"; + case ISD::ATOMIC_LCS: return "AtomicLCS"; + case ISD::ATOMIC_LAS: return "AtomicLAS"; + case ISD::ATOMIC_SWAP: return "AtomicSWAP"; case ISD::PCMARKER: return "PCMarker"; case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; case ISD::SRCVALUE: return "SrcValue"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index d4e33b0d1036..def4f9a01820 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -221,7 +221,7 @@ static bool isSelector(Instruction *I) { /// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by /// PHI nodes or outside of the basic block that defines it, or used by a -/// switch instruction, which may expand to multiple basic blocks. +/// switch or atomic instruction, which may expand to multiple basic blocks. static bool isUsedOutsideOfDefiningBlock(Instruction *I) { if (isa(I)) return true; BasicBlock *BB = I->getParent(); @@ -3059,6 +3059,38 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, MVT::Other, &Ops[0], 6)); return 0; } + case Intrinsic::atomic_lcs: { + SDOperand Root = getRoot(); + SDOperand O3 = getValue(I.getOperand(3)); + SDOperand L = DAG.getAtomic(ISD::ATOMIC_LCS, Root, + getValue(I.getOperand(1)), + getValue(I.getOperand(2)), + O3, O3.getValueType()); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; + } + case Intrinsic::atomic_las: { + SDOperand Root = getRoot(); + SDOperand O2 = getValue(I.getOperand(2)); + SDOperand L = DAG.getAtomic(ISD::ATOMIC_LAS, Root, + getValue(I.getOperand(1)), + O2, O2.getValueType()); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; + } + case Intrinsic::atomic_swap: { + SDOperand Root = getRoot(); + SDOperand O2 = getValue(I.getOperand(2)); + SDOperand L = DAG.getAtomic(ISD::ATOMIC_SWAP, Root, + getValue(I.getOperand(1)), + O2, O2.getValueType()); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; + } + } } diff --git a/llvm/lib/Target/Alpha/AlphaISelLowering.cpp b/llvm/lib/Target/Alpha/AlphaISelLowering.cpp index 028f8851038b..907415d73c3d 100644 --- a/llvm/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/llvm/lib/Target/Alpha/AlphaISelLowering.cpp @@ -629,3 +629,96 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, return std::vector(); } +//===----------------------------------------------------------------------===// +// Other Lowering Code +//===----------------------------------------------------------------------===// + +MachineBasicBlock * +AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + assert((MI->getOpcode() == Alpha::CAS32 || + MI->getOpcode() == Alpha::CAS64 || + MI->getOpcode() == Alpha::LAS32 || + MI->getOpcode() == Alpha::LAS64 || + MI->getOpcode() == Alpha::SWAP32 || + MI->getOpcode() == Alpha::SWAP64) && + "Unexpected instr type to insert"); + + bool is32 = MI->getOpcode() == Alpha::CAS32 || + MI->getOpcode() == Alpha::LAS32 || + MI->getOpcode() == Alpha::SWAP32; + + //Load locked store conditional for atomic ops take on the same form + //start: + //ll + //do stuff (maybe branch to exit) + //sc + //test sc and maybe branck to start + //exit: + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + ilist::iterator It = BB; + ++It; + + MachineBasicBlock *thisMBB = BB; + MachineBasicBlock *llscMBB = new MachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); + + for(MachineBasicBlock::succ_iterator i = thisMBB->succ_begin(), + e = thisMBB->succ_end(); i != e; ++i) + sinkMBB->addSuccessor(*i); + while(!thisMBB->succ_empty()) + thisMBB->removeSuccessor(thisMBB->succ_begin()); + + MachineFunction *F = BB->getParent(); + F->getBasicBlockList().insert(It, llscMBB); + F->getBasicBlockList().insert(It, sinkMBB); + + BuildMI(thisMBB, TII->get(Alpha::BR)).addMBB(llscMBB); + + unsigned reg_res = MI->getOperand(0).getReg(), + reg_ptr = MI->getOperand(1).getReg(), + reg_v2 = MI->getOperand(2).getReg(), + reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass); + + BuildMI(llscMBB, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L), + reg_res).addImm(0).addReg(reg_ptr); + switch (MI->getOpcode()) { + case Alpha::CAS32: + case Alpha::CAS64: { + unsigned reg_cmp + = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass); + BuildMI(llscMBB, TII->get(Alpha::CMPEQ), reg_cmp) + .addReg(reg_v2).addReg(reg_res); + BuildMI(llscMBB, TII->get(Alpha::BEQ)) + .addImm(0).addReg(reg_cmp).addMBB(sinkMBB); + BuildMI(llscMBB, TII->get(Alpha::BISr), reg_store) + .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg()); + break; + } + case Alpha::LAS32: + case Alpha::LAS64: { + BuildMI(llscMBB, TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store) + .addReg(reg_res).addReg(reg_v2); + break; + } + case Alpha::SWAP32: + case Alpha::SWAP64: { + BuildMI(llscMBB, TII->get(Alpha::BISr), reg_store) + .addReg(reg_v2).addReg(reg_v2); + break; + } + } + BuildMI(llscMBB, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store) + .addReg(reg_store).addImm(0).addReg(reg_ptr); + BuildMI(llscMBB, TII->get(Alpha::BEQ)) + .addImm(0).addReg(reg_store).addMBB(llscMBB); + BuildMI(llscMBB, TII->get(Alpha::BR)).addMBB(sinkMBB); + + thisMBB->addSuccessor(llscMBB); + llscMBB->addSuccessor(llscMBB); + llscMBB->addSuccessor(sinkMBB); + delete MI; // The pseudo instruction is gone now. + + return sinkMBB; +} diff --git a/llvm/lib/Target/Alpha/AlphaISelLowering.h b/llvm/lib/Target/Alpha/AlphaISelLowering.h index a118d99462f7..41a4b54c5f9a 100644 --- a/llvm/lib/Target/Alpha/AlphaISelLowering.h +++ b/llvm/lib/Target/Alpha/AlphaISelLowering.h @@ -88,6 +88,9 @@ namespace llvm { MVT::ValueType VT) const; bool hasITOF() { return useITOF; } + + MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB); }; } diff --git a/llvm/lib/Target/Alpha/AlphaInstrInfo.td b/llvm/lib/Target/Alpha/AlphaInstrInfo.td index 6274a3ef7223..19a846054be3 100644 --- a/llvm/lib/Target/Alpha/AlphaInstrInfo.td +++ b/llvm/lib/Target/Alpha/AlphaInstrInfo.td @@ -167,6 +167,23 @@ def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64 "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>; +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. +def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "", + [(set GPRC:$dst, (atomic_lcs_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>; +def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "", + [(set GPRC:$dst, (atomic_lcs_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>; + +def LAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "", + [(set GPRC:$dst, (atomic_las_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>; +def LAS64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "", + [(set GPRC:$dst, (atomic_las_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>; + +def SWAP32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "", + [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>; +def SWAP64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "", + [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>; +} + //*********************** //Real instructions //*********************** @@ -568,6 +585,18 @@ def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal", def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB), (LDQl texternalsym:$ext, GPRC:$RB)>; +let OutOperandList = (outs GPRC:$RR), + InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB), + Constraints = "$RA = $RR", + DisableEncoding = "$RR" in { +def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>; +def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>; +} +let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>; +def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>; +} + def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier @@ -965,7 +994,6 @@ def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP), //S_floating : IEEE Single //T_floating : IEEE Double - //Unused instructions //Mnemonic Format Opcode Description //CALL_PAL Pcd 00 Trap to PALcode @@ -973,12 +1001,8 @@ def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP), //EXCB Mfc 18.0400 Exception barrier //FETCH Mfc 18.8000 Prefetch data //FETCH_M Mfc 18.A000 Prefetch data, modify intent -//LDL_L Mem 2A Load sign-extended longword locked -//LDQ_L Mem 2B Load quadword locked //LDQ_U Mem 0B Load unaligned quadword //MB Mfc 18.4000 Memory barrier -//STL_C Mem 2E Store longword conditional -//STQ_C Mem 2F Store quadword conditional //STQ_U Mem 0F Store unaligned quadword //TRAPB Mfc 18.0000 Trap barrier //WH64 Mfc 18.F800 Write hint  64 bytes diff --git a/llvm/lib/Target/TargetSelectionDAG.td b/llvm/lib/Target/TargetSelectionDAG.td index eeed99429694..47d9ba48b7b6 100644 --- a/llvm/lib/Target/TargetSelectionDAG.td +++ b/llvm/lib/Target/TargetSelectionDAG.td @@ -189,6 +189,12 @@ def STDMemBarrier : SDTypeProfile<0, 5, [ SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisSameAs<0,4>, SDTCisInt<0> ]>; +def STDAtomic3 : SDTypeProfile<1, 3, [ + SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisInt<0>, SDTCisPtrTy<1> +]>; +def STDAtomic2 : SDTypeProfile<1, 2, [ + SDTCisSameAs<0,2>, SDTCisInt<0>, SDTCisPtrTy<1> +]>; class SDCallSeqStart constraints> : SDTypeProfile<0, 1, constraints>; @@ -336,6 +342,13 @@ def trap : SDNode<"ISD::TRAP" , SDTNone, [SDNPHasChain, SDNPSideEffect]>; def membarrier : SDNode<"ISD::MEMBARRIER" , STDMemBarrier, [SDNPHasChain, SDNPSideEffect]>; +// Do not use atomic_* directly, use atomic_*_size (see below) +def atomic_lcs : SDNode<"ISD::ATOMIC_LCS", STDAtomic3, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_las : SDNode<"ISD::ATOMIC_LAS", STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; // Do not use ld, st directly. Use load, extload, sextload, zextload, store, // and truncst (see below). @@ -722,6 +735,84 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), return false; }]>; +//Atomic patterns +def atomic_lcs_8 : PatFrag<(ops node:$ptr, node:$cmp, node:$swp), + (atomic_lcs node:$ptr, node:$cmp, node:$swp), [{ + if (AtomicSDNode* V = dyn_cast(N)) + return V->getVT() == MVT::i8; + return false; +}]>; +def atomic_lcs_16 : PatFrag<(ops node:$ptr, node:$cmp, node:$swp), + (atomic_lcs node:$ptr, node:$cmp, node:$swp), [{ + if (AtomicSDNode* V = dyn_cast(N)) + return V->getVT() == MVT::i16; + return false; +}]>; +def atomic_lcs_32 : PatFrag<(ops node:$ptr, node:$cmp, node:$swp), + (atomic_lcs node:$ptr, node:$cmp, node:$swp), [{ + if (AtomicSDNode* V = dyn_cast(N)) + return V->getVT() == MVT::i32; + return false; +}]>; +def atomic_lcs_64 : PatFrag<(ops node:$ptr, node:$cmp, node:$swp), + (atomic_lcs node:$ptr, node:$cmp, node:$swp), [{ + if (AtomicSDNode* V = dyn_cast(N)) + return V->getVT() == MVT::i64; + return false; +}]>; + +def atomic_las_8 : PatFrag<(ops node:$ptr, node:$inc), + (atomic_las node:$ptr, node:$inc), [{ + if (AtomicSDNode* V = dyn_cast(N)) + return V->getVT() == MVT::i8; + return false; +}]>; +def atomic_las_16 : PatFrag<(ops node:$ptr, node:$inc), + (atomic_las node:$ptr, node:$inc), [{ + if (AtomicSDNode* V = dyn_cast(N)) + return V->getVT() == MVT::i16; + return false; +}]>; +def atomic_las_32 : PatFrag<(ops node:$ptr, node:$inc), + (atomic_las node:$ptr, node:$inc), [{ + if (AtomicSDNode* V = dyn_cast(N)) + return V->getVT() == MVT::i32; + return false; +}]>; +def atomic_las_64 : PatFrag<(ops node:$ptr, node:$inc), + (atomic_las node:$ptr, node:$inc), [{ + if (AtomicSDNode* V = dyn_cast(N)) + return V->getVT() == MVT::i64; + return false; +}]>; + +def atomic_swap_8 : PatFrag<(ops node:$ptr, node:$inc), + (atomic_swap node:$ptr, node:$inc), [{ + if (AtomicSDNode* V = dyn_cast(N)) + return V->getVT() == MVT::i8; + return false; +}]>; +def atomic_swap_16 : PatFrag<(ops node:$ptr, node:$inc), + (atomic_swap node:$ptr, node:$inc), [{ + if (AtomicSDNode* V = dyn_cast(N)) + return V->getVT() == MVT::i16; + return false; +}]>; +def atomic_swap_32 : PatFrag<(ops node:$ptr, node:$inc), + (atomic_swap node:$ptr, node:$inc), [{ + if (AtomicSDNode* V = dyn_cast(N)) + return V->getVT() == MVT::i32; + return false; +}]>; +def atomic_swap_64 : PatFrag<(ops node:$ptr, node:$inc), + (atomic_swap node:$ptr, node:$inc), [{ + if (AtomicSDNode* V = dyn_cast(N)) + return V->getVT() == MVT::i64; + return false; +}]>; + + + // setcc convenience fragments. def setoeq : PatFrag<(ops node:$lhs, node:$rhs), (setcc node:$lhs, node:$rhs, SETOEQ)>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 0071c00262e2..98c588525086 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -2535,6 +2535,15 @@ def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), } +//===----------------------------------------------------------------------===// +// Atomic support +// +let Defs = [EAX] in +def LCMPXCHGL : I<0, Pseudo, (outs GR32:$dst), + (ins GR32:$ptr, GR32:$cmp, GR32:$swap), + "movl $cmp, %eax ; lock cmpxchgl $swap,($ptr) ; movl %eax, $dst", + [(set GR32:$dst, (atomic_lcs_32 GR32:$ptr, GR32:$cmp, GR32:$swap))]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// @@ -2683,7 +2692,6 @@ def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt), (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), (SHLD16mrCL addr:$dst, GR16:$src2)>; - //===----------------------------------------------------------------------===// // Floating Point Stack Support //===----------------------------------------------------------------------===//