diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 0b3173c5b1d..6fbe17005cd 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -236,6 +236,11 @@ public: /// succ_iterator removeSuccessor(succ_iterator I); + /// transferSuccessors - Transfers all the successors from MBB to this + /// machine basic block (i.e., copies all the successors fromMBB and + /// remove all the successors fromBB). + void transferSuccessors(MachineBasicBlock *fromMBB); + /// isSuccessor - Return true if the specified MBB is a successor of this /// block. bool isSuccessor(MachineBasicBlock *MBB) const; diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index c9872cf19f9..a02f7b96f0d 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -594,6 +594,24 @@ namespace ISD { // the return is always the original value in *ptr ATOMIC_SWAP, + // Val, OUTCHAIN = ATOMIC_LSS(INCHAIN, ptr, amt) + // this corresponds to the atomic.lss intrinsic. + // *ptr - amt is stored to *ptr atomically. + // the return is always the original value in *ptr + ATOMIC_LSS, + + // Val, OUTCHAIN = ATOMIC_L[OpName]S(INCHAIN, ptr, amt) + // this corresponds to the atomic.[OpName] intrinsic. + // op(*ptr, amt) is stored to *ptr atomically. + // the return is always the original value in *ptr + ATOMIC_LOAD_AND, + ATOMIC_LOAD_OR, + ATOMIC_LOAD_XOR, + ATOMIC_LOAD_MIN, + ATOMIC_LOAD_MAX, + ATOMIC_LOAD_UMIN, + ATOMIC_LOAD_UMAX, + // BUILTIN_OP_END - This must be the last enum value in this list. BUILTIN_OP_END }; diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 184e0632b99..633063eb48f 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -267,22 +267,62 @@ def int_init_trampoline : Intrinsic<[llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty, def int_memory_barrier : Intrinsic<[llvm_void_ty, llvm_i1_ty, llvm_i1_ty, llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], []>; -def int_atomic_lcs : Intrinsic<[llvm_anyint_ty, - LLVMPointerType>, - LLVMMatchType<0>, LLVMMatchType<0>], +def int_atomic_lcs : Intrinsic<[llvm_anyint_ty, + LLVMPointerType>, + LLVMMatchType<0>, LLVMMatchType<0>], + [IntrWriteArgMem]>, + GCCBuiltin<"__sync_val_compare_and_swap">; +def int_atomic_las : Intrinsic<[llvm_anyint_ty, + LLVMPointerType>, + LLVMMatchType<0>], + [IntrWriteArgMem]>, + GCCBuiltin<"__sync_fetch_and_add">; +def int_atomic_swap : Intrinsic<[llvm_anyint_ty, + LLVMPointerType>, + LLVMMatchType<0>], [IntrWriteArgMem]>, - GCCBuiltin<"__sync_val_compare_and_swap">; -def int_atomic_las : Intrinsic<[llvm_anyint_ty, - LLVMPointerType>, - LLVMMatchType<0>], - [IntrWriteArgMem]>, - GCCBuiltin<"__sync_fetch_and_add">; -def int_atomic_swap : Intrinsic<[llvm_anyint_ty, - LLVMPointerType>, - LLVMMatchType<0>], - [IntrWriteArgMem]>, - GCCBuiltin<"__sync_lock_test_and_set">; - + GCCBuiltin<"__sync_lock_test_and_set">; +def int_atomic_lss : Intrinsic<[llvm_anyint_ty, + LLVMPointerType>, + LLVMMatchType<0>], + [IntrWriteArgMem]>, + GCCBuiltin<"__sync_fetch_and_sub">; +def int_atomic_load_and : Intrinsic<[llvm_anyint_ty, + LLVMPointerType>, + LLVMMatchType<0>], + [IntrWriteArgMem]>, + GCCBuiltin<"__sync_fetch_and_and">; +def int_atomic_load_or : Intrinsic<[llvm_anyint_ty, + LLVMPointerType>, + LLVMMatchType<0>], + [IntrWriteArgMem]>, + GCCBuiltin<"__sync_fetch_and_or">; +def int_atomic_load_xor : Intrinsic<[llvm_anyint_ty, + LLVMPointerType>, + LLVMMatchType<0>], + [IntrWriteArgMem]>, + GCCBuiltin<"__sync_fetch_and_xor">; +def int_atomic_load_min : Intrinsic<[llvm_anyint_ty, + LLVMPointerType>, + LLVMMatchType<0>], + [IntrWriteArgMem]>, + GCCBuiltin<"__sync_fetch_and_min">; +def int_atomic_load_max : Intrinsic<[llvm_anyint_ty, + LLVMPointerType>, + LLVMMatchType<0>], + [IntrWriteArgMem]>, + GCCBuiltin<"__sync_fetch_and_max">; +def int_atomic_load_umin : Intrinsic<[llvm_anyint_ty, + LLVMPointerType>, + LLVMMatchType<0>], + [IntrWriteArgMem]>, + GCCBuiltin<"__sync_fetch_and_umin">; +def int_atomic_load_umax : Intrinsic<[llvm_anyint_ty, + LLVMPointerType>, + LLVMMatchType<0>], + [IntrWriteArgMem]>, + GCCBuiltin<"__sync_fetch_and_umax">; + //===-------------------------- Other Intrinsics --------------------------===// // def int_flt_rounds : Intrinsic<[llvm_i32_ty]>, diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 5f8c1c2ae4f..0b8dda8f977 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1340,12 +1340,15 @@ private: /// by the system, this holds the same type (e.g. i32 -> i32). MVT::ValueType TransformToType[MVT::LAST_VALUETYPE]; + // Defines the capacity of the TargetLowering::OpActions table + static const int OpActionsCapacity = 173; + /// OpActions - For each operation and each value type, keep a LegalizeAction /// that indicates how instruction selection should deal with the operation. /// Most operations are Legal (aka, supported natively by the target), but /// operations that are not should be described. Note that operations on /// non-legal value types are not described here. - uint64_t OpActions[156]; + uint64_t OpActions[OpActionsCapacity]; /// LoadXActions - For each load of load extension type and each value type, /// keep a LegalizeAction that indicates how instruction selection should deal @@ -1378,7 +1381,7 @@ private: /// TargetDAGCombineArray - Targets can specify ISD nodes that they would /// like PerformDAGCombine callbacks for by calling setTargetDAGCombine(), /// which sets a bit in this array. - unsigned char TargetDAGCombineArray[160/(sizeof(unsigned char)*8)]; + unsigned char TargetDAGCombineArray[168/(sizeof(unsigned char)*8)]; /// PromoteToType - For operations that must be promoted to a specific type, /// this holds the destination type. This map should be sparse, so don't hold diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 58ca6efcfe2..01aaba5282b 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -252,6 +252,19 @@ void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { Predecessors.erase(I); } +void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) +{ + if (this == fromMBB) + return; + + for(MachineBasicBlock::succ_iterator iter = fromMBB->succ_begin(), + end = fromMBB->succ_end(); iter != end; ++iter) { + addSuccessor(*iter); + } + while(!fromMBB->succ_empty()) + fromMBB->removeSuccessor(fromMBB->succ_begin()); +} + bool MachineBasicBlock::isSuccessor(MachineBasicBlock *MBB) const { std::vector::const_iterator I = std::find(Successors.begin(), Successors.end(), MBB); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f8e44ec6956..4aa38cd1fab 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1235,32 +1235,58 @@ SDOperand SelectionDAGLegalize::LegalizeOp(SDOperand Op) { break; } - case ISD::ATOMIC_LCS: - case ISD::ATOMIC_LAS: - case ISD::ATOMIC_SWAP: { - assert(((Node->getNumOperands() == 4 && Node->getOpcode() == ISD::ATOMIC_LCS) || - (Node->getNumOperands() == 3 && Node->getOpcode() == ISD::ATOMIC_LAS) || - (Node->getNumOperands() == 3 && Node->getOpcode() == ISD::ATOMIC_SWAP)) && - "Invalid Atomic node!"); - int num = Node->getOpcode() == ISD::ATOMIC_LCS ? 4 : 3; + case ISD::ATOMIC_LCS: { + unsigned int num_operands = 4; + assert(Node->getNumOperands() == num_operands && "Invalid Atomic node!"); SDOperand Ops[4]; - for (int x = 0; x < num; ++x) + for (unsigned int x = 0; x < num_operands; ++x) Ops[x] = LegalizeOp(Node->getOperand(x)); - Result = DAG.UpdateNodeOperands(Result, &Ops[0], num); + Result = DAG.UpdateNodeOperands(Result, &Ops[0], num_operands); + + switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + Result = TLI.LowerOperation(Result, DAG); + break; + case TargetLowering::Legal: + break; + } + AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0)); + AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1)); + return Result.getValue(Op.ResNo); + } + case ISD::ATOMIC_LAS: + case ISD::ATOMIC_LSS: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: { + unsigned int num_operands = 3; + assert(Node->getNumOperands() == num_operands && "Invalid Atomic node!"); + SDOperand Ops[3]; + for (unsigned int x = 0; x < num_operands; ++x) + Ops[x] = LegalizeOp(Node->getOperand(x)); + Result = DAG.UpdateNodeOperands(Result, &Ops[0], num_operands); switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { default: assert(0 && "This action is not supported yet!"); case TargetLowering::Custom: Result = TLI.LowerOperation(Result, DAG); break; + case TargetLowering::Expand: + Result = SDOperand(TLI.ExpandOperationResult(Op.Val, DAG),0); + break; case TargetLowering::Legal: break; } AddLegalizedOperand(SDOperand(Node, 0), Result.getValue(0)); AddLegalizedOperand(SDOperand(Node, 1), Result.getValue(1)); return Result.getValue(Op.ResNo); - } - + } case ISD::Constant: { ConstantSDNode *CN = cast(Node); unsigned opAction = @@ -4242,6 +4268,14 @@ SDOperand SelectionDAGLegalize::PromoteOp(SDOperand Op) { break; } case ISD::ATOMIC_LAS: + case ISD::ATOMIC_LSS: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: case ISD::ATOMIC_SWAP: { Tmp2 = PromoteOp(Node->getOperand(2)); Result = DAG.getAtomic(Node->getOpcode(), Node->getOperand(0), diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 6b2f54393d4..26d55486174 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2855,7 +2855,11 @@ SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain, SDOperand SelectionDAG::getAtomic(unsigned Opcode, SDOperand Chain, SDOperand Ptr, SDOperand Val, MVT::ValueType VT) { - assert((Opcode == ISD::ATOMIC_LAS || Opcode == ISD::ATOMIC_SWAP) + assert(( Opcode == ISD::ATOMIC_LAS || Opcode == ISD::ATOMIC_LSS + || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_LOAD_AND + || Opcode == ISD::ATOMIC_LOAD_OR || Opcode == ISD::ATOMIC_LOAD_XOR + || Opcode == ISD::ATOMIC_LOAD_MIN || Opcode == ISD::ATOMIC_LOAD_MAX + || Opcode == ISD::ATOMIC_LOAD_UMIN || Opcode == ISD::ATOMIC_LOAD_UMAX) && "Invalid Atomic Op"); SDVTList VTs = getVTList(Val.getValueType(), MVT::Other); FoldingSetNodeID ID; @@ -4269,7 +4273,15 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::MEMBARRIER: return "MemBarrier"; case ISD::ATOMIC_LCS: return "AtomicLCS"; case ISD::ATOMIC_LAS: return "AtomicLAS"; - case ISD::ATOMIC_SWAP: return "AtomicSWAP"; + case ISD::ATOMIC_LSS: return "AtomicLSS"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::ATOMIC_SWAP: return "AtomicSWAP"; case ISD::PCMARKER: return "PCMarker"; case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; case ISD::SRCVALUE: return "SrcValue"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 7fc5d617059..15e36298493 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -732,6 +732,10 @@ public: assert(0 && "UserOp2 should not exist at instruction selection time!"); abort(); } + +private: + inline const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op); + }; } // end namespace llvm @@ -2769,6 +2773,22 @@ static void addCatchInfo(CallInst &I, MachineModuleInfo *MMI, } } + +/// Inlined utility function to implement binary input atomic intrinsics for +// visitIntrinsicCall: I is a call instruction +// Op is the associated NodeType for I +const char * +SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { + SDOperand Root = getRoot(); + SDOperand O2 = getValue(I.getOperand(2)); + SDOperand L = DAG.getAtomic(Op, Root, + getValue(I.getOperand(1)), + O2, O2.getValueType()); + setValue(&I, L); + DAG.setRoot(L.getValue(1)); + return 0; +} + /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If /// we want to emit this as a call to a named external function, return the name /// otherwise lower it and return null. @@ -3205,27 +3225,26 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { DAG.setRoot(L.getValue(1)); return 0; } - case Intrinsic::atomic_las: { - SDOperand Root = getRoot(); - SDOperand O2 = getValue(I.getOperand(2)); - SDOperand L = DAG.getAtomic(ISD::ATOMIC_LAS, Root, - getValue(I.getOperand(1)), - O2, O2.getValueType()); - setValue(&I, L); - DAG.setRoot(L.getValue(1)); - return 0; - } - case Intrinsic::atomic_swap: { - SDOperand Root = getRoot(); - SDOperand O2 = getValue(I.getOperand(2)); - SDOperand L = DAG.getAtomic(ISD::ATOMIC_SWAP, Root, - getValue(I.getOperand(1)), - O2, O2.getValueType()); - setValue(&I, L); - DAG.setRoot(L.getValue(1)); - return 0; - } - + case Intrinsic::atomic_las: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LAS); + case Intrinsic::atomic_lss: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LSS); + case Intrinsic::atomic_load_and: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND); + case Intrinsic::atomic_load_or: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR); + case Intrinsic::atomic_load_xor: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR); + case Intrinsic::atomic_load_min: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN); + case Intrinsic::atomic_load_max: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX); + case Intrinsic::atomic_load_umin: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN); + case Intrinsic::atomic_load_umax: + return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX); + case Intrinsic::atomic_swap: + return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP); } } @@ -4519,8 +4538,6 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } - - bool SelectionDAGISel::runOnFunction(Function &Fn) { // Get alias analysis for load/store combining. AA = &getAnalysis(); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5ccad9ebf8c..371fbab2b81 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -165,7 +165,7 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) { TargetLowering::TargetLowering(TargetMachine &tm) : TM(tm), TD(TM.getTargetData()) { - assert(ISD::BUILTIN_OP_END <= 156 && + assert(ISD::BUILTIN_OP_END <= OpActionsCapacity && "Fixed size array in TargetLowering is not large enough!"); // All operations default to being supported. memset(OpActions, 0, sizeof(OpActions)); diff --git a/lib/Target/TargetSelectionDAG.td b/lib/Target/TargetSelectionDAG.td index d70cc75da13..209cda0cebb 100644 --- a/lib/Target/TargetSelectionDAG.td +++ b/lib/Target/TargetSelectionDAG.td @@ -358,6 +358,22 @@ def atomic_las : SDNode<"ISD::ATOMIC_LAS" , STDAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", STDAtomic2, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_lss : SDNode<"ISD::ATOMIC_LSS" , STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; +def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", STDAtomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; // Do not use ld, st directly. Use load, extload, sextload, zextload, store, // and truncst (see below). diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e90c9303f3d..2df8af5e505 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -292,10 +292,12 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) if (!Subtarget->hasSSE2()) setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + // Expand certain atomics setOperationAction(ISD::ATOMIC_LCS , MVT::i8, Custom); setOperationAction(ISD::ATOMIC_LCS , MVT::i16, Custom); setOperationAction(ISD::ATOMIC_LCS , MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LCS , MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_LSS , MVT::i32, Expand); // Use the default ISD::LOCATION, ISD::DECLARE expansion. setOperationAction(ISD::LOCATION, MVT::Other, Expand); @@ -5511,6 +5513,15 @@ SDNode* X86TargetLowering::ExpandATOMIC_LCS(SDNode* Op, SelectionDAG &DAG) { return DAG.getNode(ISD::MERGE_VALUES, Tys, ResultVal, cpOutH.getValue(1)).Val; } +SDNode* X86TargetLowering::ExpandATOMIC_LSS(SDNode* Op, SelectionDAG &DAG) { + MVT::ValueType T = cast(Op)->getVT(); + assert (T == MVT::i32 && "Only know how to expand i32 LSS"); + SDOperand negOp = DAG.getNode(ISD::SUB, T, + DAG.getConstant(0, T), Op->getOperand(2)); + return DAG.getAtomic(ISD::ATOMIC_LAS, Op->getOperand(0), + Op->getOperand(1), negOp, T).Val; +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { @@ -5568,6 +5579,7 @@ SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { case ISD::FP_TO_SINT: return ExpandFP_TO_SINT(N, DAG); case ISD::READCYCLECOUNTER: return ExpandREADCYCLECOUNTER(N, DAG); case ISD::ATOMIC_LCS: return ExpandATOMIC_LCS(N, DAG); + case ISD::ATOMIC_LSS: return ExpandATOMIC_LSS(N,DAG); } } @@ -5732,6 +5744,187 @@ X86TargetLowering::isVectorClearMaskLegal(const std::vector &BVOps, // X86 Scheduler Hooks //===----------------------------------------------------------------------===// +// private utility function +MachineBasicBlock * +X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, + MachineBasicBlock *MBB, + unsigned regOpc, + unsigned immOpc) { + // For the atomic bitwise operator, we generate + // thisMBB: + // newMBB: + // ld EAX = [bitinstr.addr] + // mov t1 = EAX + // op t2 = t1, [bitinstr.val] + // lcs dest = [bitinstr.addr], t2 [EAX is implicit] + // bz newMBB + // fallthrough -->nextMBB + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + ilist::iterator MBBIter = MBB; + ++MBBIter; + + /// First build the CFG + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB); + MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB); + F->getBasicBlockList().insert(MBBIter, newMBB); + F->getBasicBlockList().insert(MBBIter, nextMBB); + + // Move all successors to thisMBB to nextMBB + nextMBB->transferSuccessors(thisMBB); + + // Update thisMBB to fall through to newMBB + thisMBB->addSuccessor(newMBB); + + // newMBB jumps to itself and fall through to nextMBB + newMBB->addSuccessor(nextMBB); + newMBB->addSuccessor(newMBB); + + // Insert instructions into newMBB based on incoming instruction + assert(bInstr->getNumOperands() < 8 && "unexpected number of operands"); + MachineOperand& destOper = bInstr->getOperand(0); + MachineOperand* argOpers[6]; + int numArgs = bInstr->getNumOperands() - 1; + for (int i=0; i < numArgs; ++i) + argOpers[i] = &bInstr->getOperand(i+1); + + // x86 address has 4 operands: base, index, scale, and displacement + int lastAddrIndx = 3; // [0,3] + int valArgIndx = 4; + + MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), X86::EAX); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + + unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t1); + MIB.addReg(X86::EAX); + + unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm()) + && "invalid operand"); + if (argOpers[valArgIndx]->isReg()) + MIB = BuildMI(newMBB, TII->get(regOpc), t2); + else + MIB = BuildMI(newMBB, TII->get(immOpc), t2); + MIB.addReg(t1); + (*MIB).addOperand(*argOpers[valArgIndx]); + + MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32)); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + MIB.addReg(t2); + + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg()); + MIB.addReg(X86::EAX); + + // insert branch + BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); + + delete bInstr; // The pseudo instruction is gone now. + return nextMBB; +} + +// private utility function +MachineBasicBlock * +X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, + MachineBasicBlock *MBB, + unsigned cmovOpc) { + // For the atomic min/max operator, we generate + // thisMBB: + // newMBB: + // ld EAX = [min/max.addr] + // mov t1 = EAX + // mov t2 = [min/max.val] + // cmp t1, t2 + // cmov[cond] t2 = t1 + // lcs dest = [bitinstr.addr], t2 [EAX is implicit] + // bz newMBB + // fallthrough -->nextMBB + // + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + ilist::iterator MBBIter = MBB; + ++MBBIter; + + /// First build the CFG + MachineFunction *F = MBB->getParent(); + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *newMBB = new MachineBasicBlock(LLVM_BB); + MachineBasicBlock *nextMBB = new MachineBasicBlock(LLVM_BB); + F->getBasicBlockList().insert(MBBIter, newMBB); + F->getBasicBlockList().insert(MBBIter, nextMBB); + + // Move all successors to thisMBB to nextMBB + nextMBB->transferSuccessors(thisMBB); + + // Update thisMBB to fall through to newMBB + thisMBB->addSuccessor(newMBB); + + // newMBB jumps to newMBB and fall through to nextMBB + newMBB->addSuccessor(nextMBB); + newMBB->addSuccessor(newMBB); + + // Insert instructions into newMBB based on incoming instruction + assert(mInstr->getNumOperands() < 8 && "unexpected number of operands"); + MachineOperand& destOper = mInstr->getOperand(0); + MachineOperand* argOpers[6]; + int numArgs = mInstr->getNumOperands() - 1; + for (int i=0; i < numArgs; ++i) + argOpers[i] = &mInstr->getOperand(i+1); + + // x86 address has 4 operands: base, index, scale, and displacement + int lastAddrIndx = 3; // [0,3] + int valArgIndx = 4; + + MachineInstrBuilder MIB = BuildMI(newMBB, TII->get(X86::MOV32rm), X86::EAX); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + + unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t1); + MIB.addReg(X86::EAX); + + // We only support register and immediate values + assert( (argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm()) + && "invalid operand"); + + unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + if (argOpers[valArgIndx]->isReg()) + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); + else + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), t2); + (*MIB).addOperand(*argOpers[valArgIndx]); + + MIB = BuildMI(newMBB, TII->get(X86::CMP32rr)); + MIB.addReg(t1); + MIB.addReg(t2); + + // Generate movc + unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); + MIB = BuildMI(newMBB, TII->get(cmovOpc),t3); + MIB.addReg(t2); + MIB.addReg(t1); + + // Cmp and exchange if none has modified the memory location + MIB = BuildMI(newMBB, TII->get(X86::LCMPXCHG32)); + for (int i=0; i <= lastAddrIndx; ++i) + (*MIB).addOperand(*argOpers[i]); + MIB.addReg(t3); + + MIB = BuildMI(newMBB, TII->get(X86::MOV32rr), destOper.getReg()); + MIB.addReg(X86::EAX); + + // insert branch + BuildMI(newMBB, TII->get(X86::JNE)).addMBB(newMBB); + + delete mInstr; // The pseudo instruction is gone now. + return nextMBB; +} + + MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) { @@ -5766,15 +5959,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); F->getBasicBlockList().insert(It, copy0MBB); F->getBasicBlockList().insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current + // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - sinkMBB->addSuccessor(*i); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while(!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + sinkMBB->transferSuccessors(BB); + + // Add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -5874,6 +6063,23 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, delete MI; // The pseudo instruction is gone now. return BB; } + case X86::ATOMAND32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, + X86::AND32ri); + case X86::ATOMOR32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, + X86::OR32ri); + case X86::ATOMXOR32: + return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, + X86::XOR32ri); + case X86::ATOMMIN32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); + case X86::ATOMMAX32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr); + case X86::ATOMUMIN32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); + case X86::ATOMUMAX32: + return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 55ad70e6426..287903913ed 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -346,6 +346,7 @@ namespace llvm { virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB); + /// getTargetNodeName - This method returns the name of a target specific /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; @@ -524,7 +525,8 @@ namespace llvm { SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG); SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG); SDNode *ExpandATOMIC_LCS(SDNode *N, SelectionDAG &DAG); - + SDNode *ExpandATOMIC_LSS(SDNode *N, SelectionDAG &DAG); + SDOperand EmitTargetCodeForMemset(SelectionDAG &DAG, SDOperand Chain, SDOperand Dst, SDOperand Src, @@ -537,6 +539,23 @@ namespace llvm { bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff); + + /// Utility function to emit atomic bitwise operations (and, or, xor). + // It takes the bitwise instruction to expand, the associated machine basic + // block, and the associated X86 opcodes for reg/reg and reg/imm. + MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter( + MachineInstr *BInstr, + MachineBasicBlock *BB, + unsigned regOpc, + unsigned immOpc); + + /// Utility function to emit atomic min and max. It takes the min/max + // instruction to expand, the associated basic block, and the associated + // cmov opcode for moving the min or max value. + MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr, + MachineBasicBlock *BB, + unsigned cmovOpc); + }; } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index c539eca940c..af615403612 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2598,6 +2598,63 @@ def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val), TB, LOCK; } +// Atomic exchange and and, or, xor +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMAND32 : I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMAND32 PSUEDO!", + [(set GR32:$dst, (atomic_load_and addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMOR32 : I<0xC1, MRMSrcMem, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMOR32 PSUEDO!", + [(set GR32:$dst, (atomic_load_or addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMXOR32 : I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMXOR32 PSUEDO!", + [(set GR32:$dst, (atomic_load_xor addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMMIN32: I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), + "#ATOMMIN32 PSUEDO!", + [(set GR32:$dst, (atomic_load_min addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMMAX32: I<0xC1, MRMSrcMem, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMMAX32 PSUEDO!", + [(set GR32:$dst, (atomic_load_max addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMUMIN32: I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMUMIN32 PSUEDO!", + [(set GR32:$dst, (atomic_load_umin addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + +let Constraints = "$val = $dst", Defs = [EFLAGS], + usesCustomDAGSchedInserter = 1 in { +def ATOMUMAX32: I<0xC1, MRMSrcMem,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), + "#ATOMUMAX32 PSUEDO!", + [(set GR32:$dst, (atomic_load_umax addr:$ptr, GR32:$val))]>, + TB, LOCK; +} + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 2dd7b45974d..c0040779a9a 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -247,7 +247,7 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) , MaxInlineSizeThreshold(128) , Is64Bit(is64Bit) , TargetType(isELF) { // Default to ELF unless otherwise specified. - + // Determine default and user specified characteristics if (!FS.empty()) { // If feature string is not empty, parse features string. diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll new file mode 100644 index 00000000000..4687da11171 --- /dev/null +++ b/test/CodeGen/X86/atomic_op.ll @@ -0,0 +1,93 @@ +; RUN: llvm-as < %s | llc -march=x86 -o %t1 -f +; RUN: grep "lock xaddl" %t1 | count 4 +; RUN: grep "lock cmpxchgl" %t1 | count 13 +; RUN: grep "xchgl" %t1 | count 14 +; RUN: grep "cmova" %t1 | count 2 +; RUN: grep "cmovb" %t1 | count 2 +; RUN: grep "cmovg" %t1 | count 2 +; RUN: grep "cmovl" %t1 | count 2 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +define void @main(i32 %argc, i8** %argv) { +entry: + %argc.addr = alloca i32 ; [#uses=1] + %argv.addr = alloca i8** ; [#uses=1] + %val1 = alloca i32 ; [#uses=2] + %val2 = alloca i32 ; [#uses=15] + %andt = alloca i32 ; [#uses=2] + %ort = alloca i32 ; [#uses=2] + %xort = alloca i32 ; [#uses=2] + %old = alloca i32 ; [#uses=18] + %temp = alloca i32 ; [#uses=2] + store i32 %argc, i32* %argc.addr + store i8** %argv, i8*** %argv.addr + store i32 0, i32* %val1 + store i32 31, i32* %val2 + store i32 3855, i32* %andt + store i32 3855, i32* %ort + store i32 3855, i32* %xort + store i32 4, i32* %temp + %tmp = load i32* %temp ; [#uses=1] + call i32 @llvm.atomic.las.i32( i32* %val1, i32 %tmp ) ; :0 [#uses=1] + store i32 %0, i32* %old + call i32 @llvm.atomic.lss.i32( i32* %val2, i32 30 ) ; :1 [#uses=1] + store i32 %1, i32* %old + call i32 @llvm.atomic.las.i32( i32* %val2, i32 1 ) ; :2 [#uses=1] + store i32 %2, i32* %old + call i32 @llvm.atomic.lss.i32( i32* %val2, i32 1 ) ; :3 [#uses=1] + store i32 %3, i32* %old + call i32 @llvm.atomic.load.and.i32( i32* %andt, i32 4080 ) ; :4 [#uses=1] + store i32 %4, i32* %old + call i32 @llvm.atomic.load.or.i32( i32* %ort, i32 4080 ) ; :5 [#uses=1] + store i32 %5, i32* %old + call i32 @llvm.atomic.load.xor.i32( i32* %xort, i32 4080 ) ; :6 [#uses=1] + store i32 %6, i32* %old + call i32 @llvm.atomic.load.min.i32( i32* %val2, i32 16 ) ; :7 [#uses=1] + store i32 %7, i32* %old + %neg = sub i32 0, 1 ; [#uses=1] + call i32 @llvm.atomic.load.min.i32( i32* %val2, i32 %neg ) ; :8 [#uses=1] + store i32 %8, i32* %old + call i32 @llvm.atomic.load.max.i32( i32* %val2, i32 1 ) ; :9 [#uses=1] + store i32 %9, i32* %old + call i32 @llvm.atomic.load.max.i32( i32* %val2, i32 0 ) ; :10 [#uses=1] + store i32 %10, i32* %old + call i32 @llvm.atomic.load.umax.i32( i32* %val2, i32 65535 ) ; :11 [#uses=1] + store i32 %11, i32* %old + call i32 @llvm.atomic.load.umax.i32( i32* %val2, i32 10 ) ; :12 [#uses=1] + store i32 %12, i32* %old + call i32 @llvm.atomic.load.umin.i32( i32* %val2, i32 1 ) ; :13 [#uses=1] + store i32 %13, i32* %old + call i32 @llvm.atomic.load.umin.i32( i32* %val2, i32 10 ) ; :14 [#uses=1] + store i32 %14, i32* %old + call i32 @llvm.atomic.swap.i32( i32* %val2, i32 1976 ) ; :15 [#uses=1] + store i32 %15, i32* %old + %neg1 = sub i32 0, 10 ; [#uses=1] + call i32 @llvm.atomic.lcs.i32( i32* %val2, i32 %neg1, i32 1 ) ; :16 [#uses=1] + store i32 %16, i32* %old + call i32 @llvm.atomic.lcs.i32( i32* %val2, i32 1976, i32 1 ) ; :17 [#uses=1] + store i32 %17, i32* %old + ret void +} + +declare i32 @llvm.atomic.las.i32(i32*, i32) nounwind + +declare i32 @llvm.atomic.lss.i32(i32*, i32) nounwind + +declare i32 @llvm.atomic.load.and.i32(i32*, i32) nounwind + +declare i32 @llvm.atomic.load.or.i32(i32*, i32) nounwind + +declare i32 @llvm.atomic.load.xor.i32(i32*, i32) nounwind + +declare i32 @llvm.atomic.load.min.i32(i32*, i32) nounwind + +declare i32 @llvm.atomic.load.max.i32(i32*, i32) nounwind + +declare i32 @llvm.atomic.load.umax.i32(i32*, i32) nounwind + +declare i32 @llvm.atomic.load.umin.i32(i32*, i32) nounwind + +declare i32 @llvm.atomic.swap.i32(i32*, i32) nounwind + +declare i32 @llvm.atomic.lcs.i32(i32*, i32, i32) nounwind