From b96976e30de64e7ee32851c909e0ab909016b4ce Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 17 Dec 2015 22:19:27 +0000 Subject: [PATCH] [InstCombine] Extend peephole DSE to handle unordered atomics This extends the same line of reasoning used in EarlyCSE w/http://reviews.llvm.org/D15352 to the DSE implementation in InstCombine. Key points: * We only remove unordered or simple stores. * The loads producing values consumed by dead stores don't influence whether the store is dead. Differential Revision: http://reviews.llvm.org/D15354 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255932 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombineLoadStoreAlloca.cpp | 17 ++- test/Transforms/InstCombine/store.ll | 113 ++++++++++++++++++ 2 files changed, 124 insertions(+), 6 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 12566922c83..aca014f04cd 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1038,9 +1038,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { return &SI; } - // Don't hack volatile/atomic stores. - // FIXME: Some bits are legal for atomic stores; needs refactoring. - if (!SI.isSimple()) return nullptr; + // Don't hack volatile/ordered stores. + // FIXME: Some bits are legal for ordered atomic stores; needs refactoring. + if (!SI.isUnordered()) return nullptr; // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. @@ -1072,7 +1072,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (StoreInst *PrevSI = dyn_cast(BBI)) { // Prev store isn't volatile, and stores to the same location? - if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), + if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1))) { ++NumDeadStore; ++BBI; @@ -1086,9 +1086,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast(BBI)) { - if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && - LI->isSimple()) + if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) { + assert(SI.isUnordered() && "can't eliminate ordering operation"); return EraseInstFromFunction(SI); + } // Otherwise, this is a load from some other location. Stores before it // may not be dead. @@ -1114,6 +1115,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (isa(Val)) return EraseInstFromFunction(SI); + // The code below needs to be audited and adjusted for unordered atomics + if (!SI.isSimple()) + return nullptr; + // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll index 5dfbd714090..b8730413f1b 100644 --- a/test/Transforms/InstCombine/store.ll +++ b/test/Transforms/InstCombine/store.ll @@ -113,6 +113,119 @@ for.end: ; preds = %for.cond ; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0 } +define void @dse1(i32* %p) { +; CHECK-LABEL: dse1 +; CHECK-NEXT: store +; CHECK-NEXT: ret + store i32 0, i32* %p + store i32 0, i32* %p + ret void +} + +; Slightly subtle: if we're mixing atomic and non-atomic access to the +; same location, then the contents of the location are undefined if there's +; an actual race. As such, we're free to pick either store under the +; assumption that we're not racing with any other thread. +define void @dse2(i32* %p) { +; CHECK-LABEL: dse2 +; CHECK-NEXT: store i32 0, i32* %p +; CHECK-NEXT: ret + store atomic i32 0, i32* %p unordered, align 4 + store i32 0, i32* %p + ret void +} + +define void @dse3(i32* %p) { +; CHECK-LABEL: dse3 +; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4 +; CHECK-NEXT: ret + store i32 0, i32* %p + store atomic i32 0, i32* %p unordered, align 4 + ret void +} + +define void @dse4(i32* %p) { +; CHECK-LABEL: dse4 +; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4 +; CHECK-NEXT: ret + store atomic i32 0, i32* %p unordered, align 4 + store atomic i32 0, i32* %p unordered, align 4 + ret void +} + +; Implementation limit - could remove unordered store here, but +; currently don't. +define void @dse5(i32* %p) { +; CHECK-LABEL: dse5 +; CHECK-NEXT: store +; CHECK-NEXT: store +; CHECK-NEXT: ret + store atomic i32 0, i32* %p unordered, align 4 + store atomic i32 0, i32* %p seq_cst, align 4 + ret void +} + +define void @write_back1(i32* %p) { +; CHECK-LABEL: write_back1 +; CHECK-NEXT: ret + %v = load i32, i32* %p + store i32 %v, i32* %p + ret void +} + +define void @write_back2(i32* %p) { +; CHECK-LABEL: write_back2 +; CHECK-NEXT: ret + %v = load atomic i32, i32* %p unordered, align 4 + store i32 %v, i32* %p + ret void +} + +define void @write_back3(i32* %p) { +; CHECK-LABEL: write_back3 +; CHECK-NEXT: ret + %v = load i32, i32* %p + store atomic i32 %v, i32* %p unordered, align 4 + ret void +} + +define void @write_back4(i32* %p) { +; CHECK-LABEL: write_back4 +; CHECK-NEXT: ret + %v = load atomic i32, i32* %p unordered, align 4 + store atomic i32 %v, i32* %p unordered, align 4 + ret void +} + +; Can't remove store due to ordering side effect +define void @write_back5(i32* %p) { +; CHECK-LABEL: write_back5 +; CHECK-NEXT: load +; CHECK-NEXT: store +; CHECK-NEXT: ret + %v = load atomic i32, i32* %p unordered, align 4 + store atomic i32 %v, i32* %p seq_cst, align 4 + ret void +} + +define void @write_back6(i32* %p) { +; CHECK-LABEL: write_back6 +; CHECK-NEXT: load +; CHECK-NEXT: ret + %v = load atomic i32, i32* %p seq_cst, align 4 + store atomic i32 %v, i32* %p unordered, align 4 + ret void +} + +define void @write_back7(i32* %p) { +; CHECK-LABEL: write_back7 +; CHECK-NEXT: load +; CHECK-NEXT: ret + %v = load atomic volatile i32, i32* %p seq_cst, align 4 + store atomic i32 %v, i32* %p unordered, align 4 + ret void +} + !0 = !{!4, !4, i64 0} !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"}