From f2cb55e4056868fcea90f644dad2e9bbc821ddf9 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 4 Jun 2012 22:44:21 +0000 Subject: [PATCH] When gvn decides to replace an instruction with another, we have to patch the replacement to make it at least as generic as the instruction being replaced. This includes: * dropping nsw/nuw flags * getting the least restrictive tbaa and fpmath metadata * merging ranges Fixes PR12979. llvm-svn: 157958 --- lib/Transforms/Scalar/GVN.cpp | 202 ++++++++++++++++++++++++++++++++- test/Transforms/GVN/fpmath.ll | 45 ++++++++ test/Transforms/GVN/pr12979.ll | 79 +++++++++++++ test/Transforms/GVN/range.ll | 101 +++++++++++++++++ test/Transforms/GVN/tbaa.ll | 81 +++++++++++++ 5 files changed, 506 insertions(+), 2 deletions(-) create mode 100644 test/Transforms/GVN/fpmath.ll create mode 100644 test/Transforms/GVN/pr12979.ll create mode 100644 test/Transforms/GVN/range.ll create mode 100644 test/Transforms/GVN/tbaa.ll diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 09aff1b3b3c..c247ea9360c 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -19,6 +19,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Metadata.h" #include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" @@ -41,6 +42,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/PatternMatch.h" @@ -1734,6 +1736,202 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return true; } +static MDNode *getMostGenericTBAA(MDNode *A, MDNode *B) { + if (!A || !B) + return NULL; + + if (A == B) + return A; + + SmallVector PathA; + MDNode *T = A; + while (T) { + PathA.push_back(T); + T = T->getNumOperands() >= 2 ? cast_or_null(T->getOperand(1)) : 0; + } + + SmallVector PathB; + T = B; + while (T) { + PathB.push_back(T); + T = T->getNumOperands() >= 2 ? cast_or_null(T->getOperand(1)) : 0; + } + + int IA = PathA.size() - 1; + int IB = PathB.size() - 1; + + MDNode *Ret = 0; + while (IA >= 0 && IB >=0) { + if (PathA[IA] == PathB[IB]) + Ret = PathA[IA]; + else + break; + --IA; + --IB; + } + return Ret; +} + +static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B) { + if (!A || !B) + return NULL; + + APFloat AVal = cast(A->getOperand(0))->getValueAPF(); + APFloat BVal = cast(B->getOperand(0))->getValueAPF(); + if (AVal.compare(BVal) == APFloat::cmpLessThan) + return A; + return B; +} + +static bool isContiguous(const ConstantRange &A, const ConstantRange &B) { + return A.getUpper() == B.getLower() || A.getLower() == B.getUpper(); +} + +static bool canBeMerged(const ConstantRange &A, const ConstantRange &B) { + return !A.intersectWith(B).isEmptySet() || isContiguous(A, B); +} + +static bool tryMergeRange(SmallVector &EndPoints, ConstantInt *Low, + ConstantInt *High) { + ConstantRange NewRange(Low->getValue(), High->getValue()); + unsigned Size = EndPoints.size(); + APInt LB = cast(EndPoints[Size - 2])->getValue(); + APInt LE = cast(EndPoints[Size - 1])->getValue(); + ConstantRange LastRange(LB, LE); + if (canBeMerged(NewRange, LastRange)) { + ConstantRange Union = LastRange.unionWith(NewRange); + Type *Ty = High->getType(); + EndPoints[Size - 2] = ConstantInt::get(Ty, Union.getLower()); + EndPoints[Size - 1] = ConstantInt::get(Ty, Union.getUpper()); + return true; + } + return false; +} + +static void addRange(SmallVector &EndPoints, ConstantInt *Low, + ConstantInt *High) { + if (!EndPoints.empty()) + if (tryMergeRange(EndPoints, Low, High)) + return; + + EndPoints.push_back(Low); + EndPoints.push_back(High); +} + +static MDNode *getMostGenericRange(MDNode *A, MDNode *B) { + // Given two ranges, we want to compute the union of the ranges. This + // is slightly complitade by having to combine the intervals and merge + // the ones that overlap. + + if (!A || !B) + return NULL; + + if (A == B) + return A; + + // First, walk both lists in older of the lower boundary of each interval. + // At each step, try to merge the new interval to the last one we adedd. + SmallVector EndPoints; + int AI = 0; + int BI = 0; + int AN = A->getNumOperands() / 2; + int BN = B->getNumOperands() / 2; + while (AI < AN && BI < BN) { + ConstantInt *ALow = cast(A->getOperand(2 * AI)); + ConstantInt *BLow = cast(B->getOperand(2 * BI)); + + if (ALow->getValue().slt(BLow->getValue())) { + addRange(EndPoints, ALow, cast(A->getOperand(2 * AI + 1))); + ++AI; + } else { + addRange(EndPoints, BLow, cast(B->getOperand(2 * BI + 1))); + ++BI; + } + } + while (AI < AN) { + addRange(EndPoints, cast(A->getOperand(2 * AI)), + cast(A->getOperand(2 * AI + 1))); + ++AI; + } + while (BI < BN) { + addRange(EndPoints, cast(B->getOperand(2 * BI)), + cast(B->getOperand(2 * BI + 1))); + ++BI; + } + + // If we have more than 2 ranges (4 endpoints) we have to try to merge + // the last and first ones. + unsigned Size = EndPoints.size(); + if (Size > 4) { + ConstantInt *FB = cast(EndPoints[0]); + ConstantInt *FE = cast(EndPoints[1]); + if (tryMergeRange(EndPoints, FB, FE)) { + for (unsigned i = 0; i < Size - 2; ++i) { + EndPoints[i] = EndPoints[i + 2]; + } + EndPoints.resize(Size - 2); + } + } + + // If in the end we have a single range, it is possible that it is now the + // full range. Just drop the metadata in that case. + if (EndPoints.size() == 2) { + ConstantRange Range(cast(EndPoints[0])->getValue(), + cast(EndPoints[1])->getValue()); + if (Range.isFullSet()) + return NULL; + } + + return MDNode::get(A->getContext(), EndPoints); +} + +static void patchReplacementInstruction(Value *Repl, Instruction *I) { + // Patch the replacement so that it is not more restrictive than the value + // being replaced. + BinaryOperator *Op = dyn_cast(I); + BinaryOperator *ReplOp = dyn_cast(Repl); + if (Op && ReplOp && isa(Op) && + isa(ReplOp)) { + if (ReplOp->hasNoSignedWrap() && !Op->hasNoSignedWrap()) + ReplOp->setHasNoSignedWrap(false); + if (ReplOp->hasNoUnsignedWrap() && !Op->hasNoUnsignedWrap()) + ReplOp->setHasNoUnsignedWrap(false); + } + if (Instruction *ReplInst = dyn_cast(Repl)) { + SmallVector, 4> Metadata; + ReplInst->getAllMetadataOtherThanDebugLoc(Metadata); + for (int i = 0, n = Metadata.size(); i < n; ++i) { + unsigned Kind = Metadata[i].first; + MDNode *IMD = I->getMetadata(Kind); + MDNode *ReplMD = Metadata[i].second; + switch(Kind) { + default: + ReplInst->setMetadata(Kind, NULL); // Remove unknown metadata + break; + case LLVMContext::MD_dbg: + llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg"); + case LLVMContext::MD_tbaa: + ReplInst->setMetadata(Kind, getMostGenericTBAA(IMD, ReplMD)); + break; + case LLVMContext::MD_range: + ReplInst->setMetadata(Kind, getMostGenericRange(IMD, ReplMD)); + break; + case LLVMContext::MD_prof: + llvm_unreachable("MD_prof in a non terminator instruction"); + break; + case LLVMContext::MD_fpmath: + ReplInst->setMetadata(Kind, getMostGenericFPMath(IMD, ReplMD)); + break; + } + } + } +} + +static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) { + patchReplacementInstruction(Repl, I); + I->replaceAllUsesWith(Repl); +} + /// processLoad - Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. bool GVN::processLoad(LoadInst *L) { @@ -1892,7 +2090,7 @@ bool GVN::processLoad(LoadInst *L) { } // Remove it! - L->replaceAllUsesWith(AvailableVal); + patchAndReplaceAllUsesWith(AvailableVal, L); if (DepLI->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); markInstructionForDeletion(L); @@ -2224,7 +2422,7 @@ bool GVN::processInstruction(Instruction *I) { } // Remove it! - I->replaceAllUsesWith(repl); + patchAndReplaceAllUsesWith(repl, I); if (MD && repl->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); markInstructionForDeletion(I); diff --git a/test/Transforms/GVN/fpmath.ll b/test/Transforms/GVN/fpmath.ll new file mode 100644 index 00000000000..8ab285448fb --- /dev/null +++ b/test/Transforms/GVN/fpmath.ll @@ -0,0 +1,45 @@ +; RUN: opt %s -gvn -S -o - | FileCheck %s + +define double @test1(double %x, double %y) { +; CHECK: @test1(double %x, double %y) +; CHECK: %add1 = fadd double %x, %y +; CHECK-NOT: fpmath +; CHECK: %foo = fadd double %add1, %add1 + %add1 = fadd double %x, %y, !fpmath !0 + %add2 = fadd double %x, %y + %foo = fadd double %add1, %add2 + ret double %foo +} + +define double @test2(double %x, double %y) { +; CHECK: @test2(double %x, double %y) +; CHECK: %add1 = fadd double %x, %y, !fpmath !0 +; CHECK: %foo = fadd double %add1, %add1 + %add1 = fadd double %x, %y, !fpmath !0 + %add2 = fadd double %x, %y, !fpmath !0 + %foo = fadd double %add1, %add2 + ret double %foo +} + +define double @test3(double %x, double %y) { +; CHECK: @test3(double %x, double %y) +; CHECK: %add1 = fadd double %x, %y, !fpmath !1 +; CHECK: %foo = fadd double %add1, %add1 + %add1 = fadd double %x, %y, !fpmath !1 + %add2 = fadd double %x, %y, !fpmath !0 + %foo = fadd double %add1, %add2 + ret double %foo +} + +define double @test4(double %x, double %y) { +; CHECK: @test4(double %x, double %y) +; CHECK: %add1 = fadd double %x, %y, !fpmath !1 +; CHECK: %foo = fadd double %add1, %add1 + %add1 = fadd double %x, %y, !fpmath !0 + %add2 = fadd double %x, %y, !fpmath !1 + %foo = fadd double %add1, %add2 + ret double %foo +} + +!0 = metadata !{ float 5.0 } +!1 = metadata !{ float 2.5 } diff --git a/test/Transforms/GVN/pr12979.ll b/test/Transforms/GVN/pr12979.ll new file mode 100644 index 00000000000..669da9127d0 --- /dev/null +++ b/test/Transforms/GVN/pr12979.ll @@ -0,0 +1,79 @@ +; RUN: opt %s -gvn -S -o - | FileCheck %s + +define i32 @test1(i32 %x, i32 %y) { +; CHECK: @test1(i32 %x, i32 %y) +; CHECK: %add1 = add i32 %x, %y +; CHECK: %foo = add i32 %add1, %add1 + + %add1 = add nsw i32 %x, %y + %add2 = add i32 %x, %y + %foo = add i32 %add1, %add2 + ret i32 %foo +} + +define i32 @test2(i32 %x, i32 %y) { +; CHECK: @test2(i32 %x, i32 %y) +; CHECK: %add1 = add i32 %x, %y +; CHECK: %foo = add i32 %add1, %add1 + + %add1 = add nuw i32 %x, %y + %add2 = add i32 %x, %y + %foo = add i32 %add1, %add2 + ret i32 %foo +} + +define i32 @test3(i32 %x, i32 %y) { +; CHECK: @test3(i32 %x, i32 %y) +; CHECK: %add1 = add i32 %x, %y +; CHECK: %foo = add i32 %add1, %add1 + + %add1 = add nuw nsw i32 %x, %y + %add2 = add i32 %x, %y + %foo = add i32 %add1, %add2 + ret i32 %foo +} + +define i32 @test4(i32 %x, i32 %y) { +; CHECK: @test4(i32 %x, i32 %y) +; CHECK: %add1 = add nsw i32 %x, %y +; CHECK: %foo = add i32 %add1, %add1 + + %add1 = add nsw i32 %x, %y + %add2 = add nsw i32 %x, %y + %foo = add i32 %add1, %add2 + ret i32 %foo +} + +define i32 @test5(i32 %x, i32 %y) { +; CHECK: @test5(i32 %x, i32 %y) +; CHECK: %add1 = add i32 %x, %y +; CHECK: %foo = add i32 %add1, %add1 + + %add1 = add nuw i32 %x, %y + %add2 = add nsw i32 %x, %y + %foo = add i32 %add1, %add2 + ret i32 %foo +} + +define i32 @test6(i32 %x, i32 %y) { +; CHECK: @test6(i32 %x, i32 %y) +; CHECK: %add1 = add nsw i32 %x, %y +; CHECK: %foo = add i32 %add1, %add1 + + %add1 = add nuw nsw i32 %x, %y + %add2 = add nsw i32 %x, %y + %foo = add i32 %add1, %add2 + ret i32 %foo +} + +define i32 @test7(i32 %x, i32 %y) { +; CHECK: @test7(i32 %x, i32 %y) +; CHECK: %add1 = add i32 %x, %y +; CHECK-NOT: what_is_this +; CHECK: %foo = add i32 %add1, %add1 + + %add1 = add i32 %x, %y, !what_is_this !{} + %add2 = add i32 %x, %y + %foo = add i32 %add1, %add2 + ret i32 %foo +} diff --git a/test/Transforms/GVN/range.ll b/test/Transforms/GVN/range.ll new file mode 100644 index 00000000000..3759c415dab --- /dev/null +++ b/test/Transforms/GVN/range.ll @@ -0,0 +1,101 @@ +; RUN: opt %s -basicaa -gvn -S -o - | FileCheck %s + +define i32 @test1(i32* %p) { +; CHECK: @test1(i32* %p) +; CHECK: %a = load i32* %p, !range !0 +; CHECK: %c = add i32 %a, %a + %a = load i32* %p, !range !0 + %b = load i32* %p, !range !0 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test2(i32* %p) { +; CHECK: @test2(i32* %p) +; CHECK: %a = load i32* %p +; CHECK-NOT: range +; CHECK: %c = add i32 %a, %a + %a = load i32* %p, !range !0 + %b = load i32* %p + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test3(i32* %p) { +; CHECK: @test3(i32* %p) +; CHECK: %a = load i32* %p, !range ![[DISJOINT_RANGE:[0-9]+]] +; CHECK: %c = add i32 %a, %a + %a = load i32* %p, !range !0 + %b = load i32* %p, !range !1 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test4(i32* %p) { +; CHECK: @test4(i32* %p) +; CHECK: %a = load i32* %p, !range ![[MERGED_RANGE:[0-9]+]] +; CHECK: %c = add i32 %a, %a + %a = load i32* %p, !range !0 + %b = load i32* %p, !range !2 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test5(i32* %p) { +; CHECK: @test5(i32* %p) +; CHECK: %a = load i32* %p, !range ![[MERGED_SIGNED_RANGE:[0-9]+]] +; CHECK: %c = add i32 %a, %a + %a = load i32* %p, !range !3 + %b = load i32* %p, !range !4 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test6(i32* %p) { +; CHECK: @test6(i32* %p) +; CHECK: %a = load i32* %p, !range ![[MERGED_TEST6:[0-9]+]] +; CHECK: %c = add i32 %a, %a + %a = load i32* %p, !range !5 + %b = load i32* %p, !range !6 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test7(i32* %p) { +; CHECK: @test7(i32* %p) +; CHECK: %a = load i32* %p, !range ![[MERGED_TEST7:[0-9]+]] +; CHECK: %c = add i32 %a, %a + %a = load i32* %p, !range !7 + %b = load i32* %p, !range !8 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test8(i32* %p) { +; CHECK: @test8(i32* %p) +; CHECK: %a = load i32* %p +; CHECK-NOT: range +; CHECK: %c = add i32 %a, %a + %a = load i32* %p, !range !9 + %b = load i32* %p, !range !10 + %c = add i32 %a, %b + ret i32 %c +} + +; CHECK: ![[DISJOINT_RANGE]] = metadata !{i32 0, i32 2, i32 3, i32 5} +; CHECK: ![[MERGED_RANGE]] = metadata !{i32 0, i32 5} +; CHECK: ![[MERGED_SIGNED_RANGE]] = metadata !{i32 -3, i32 -2, i32 1, i32 2} +; CHECK: ![[MERGED_TEST6]] = metadata !{i32 10, i32 1} +; CHECK: ![[MERGED_TEST7]] = metadata !{i32 3, i32 4, i32 5, i32 2} + +!0 = metadata !{i32 0, i32 2} +!1 = metadata !{i32 3, i32 5} +!2 = metadata !{i32 2, i32 5} +!3 = metadata !{i32 -3, i32 -2} +!4 = metadata !{i32 1, i32 2} +!5 = metadata !{i32 10, i32 1} +!6 = metadata !{i32 12, i32 13} +!7 = metadata !{i32 1, i32 2, i32 3, i32 4} +!8 = metadata !{i32 5, i32 1} +!9 = metadata !{i32 1, i32 5} +!10 = metadata !{i32 5, i32 1} diff --git a/test/Transforms/GVN/tbaa.ll b/test/Transforms/GVN/tbaa.ll new file mode 100644 index 00000000000..90661c62507 --- /dev/null +++ b/test/Transforms/GVN/tbaa.ll @@ -0,0 +1,81 @@ +; RUN: opt %s -basicaa -gvn -S -o - | FileCheck %s + +define i32 @test1(i8* %p, i8* %q) { +; CHECK: @test1(i8* %p, i8* %q) +; CHECK: call i32 @foo(i8* %p) +; CHECK-NOT: tbaa +; CHECK: %c = add i32 %a, %a + %a = call i32 @foo(i8* %p), !tbaa !0 + %b = call i32 @foo(i8* %p) + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test2(i8* %p, i8* %q) { +; CHECK: @test2(i8* %p, i8* %q) +; CHECK: call i32 @foo(i8* %p), !tbaa !0 +; CHECK: %c = add i32 %a, %a + %a = call i32 @foo(i8* %p), !tbaa !0 + %b = call i32 @foo(i8* %p), !tbaa !0 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test3(i8* %p, i8* %q) { +; CHECK: @test3(i8* %p, i8* %q) +; CHECK: call i32 @foo(i8* %p), !tbaa !3 +; CHECK: %c = add i32 %a, %a + %a = call i32 @foo(i8* %p), !tbaa !3 + %b = call i32 @foo(i8* %p), !tbaa !3 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test4(i8* %p, i8* %q) { +; CHECK: @test4(i8* %p, i8* %q) +; CHECK: call i32 @foo(i8* %p), !tbaa !1 +; CHECK: %c = add i32 %a, %a + %a = call i32 @foo(i8* %p), !tbaa !1 + %b = call i32 @foo(i8* %p), !tbaa !0 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test5(i8* %p, i8* %q) { +; CHECK: @test5(i8* %p, i8* %q) +; CHECK: call i32 @foo(i8* %p), !tbaa !1 +; CHECK: %c = add i32 %a, %a + %a = call i32 @foo(i8* %p), !tbaa !0 + %b = call i32 @foo(i8* %p), !tbaa !1 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test6(i8* %p, i8* %q) { +; CHECK: @test6(i8* %p, i8* %q) +; CHECK: call i32 @foo(i8* %p), !tbaa !1 +; CHECK: %c = add i32 %a, %a + %a = call i32 @foo(i8* %p), !tbaa !0 + %b = call i32 @foo(i8* %p), !tbaa !3 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @test7(i8* %p, i8* %q) { +; CHECK: @test7(i8* %p, i8* %q) +; CHECK: call i32 @foo(i8* %p) +; CHECK-NOT: tbaa +; CHECK: %c = add i32 %a, %a + %a = call i32 @foo(i8* %p), !tbaa !4 + %b = call i32 @foo(i8* %p), !tbaa !3 + %c = add i32 %a, %b + ret i32 %c +} + +declare i32 @foo(i8*) readonly + +!0 = metadata !{metadata !"C", metadata !1} +!1 = metadata !{metadata !"A", metadata !2} +!2 = metadata !{metadata !"tbaa root", null} +!3 = metadata !{metadata !"B", metadata !1} +!4 = metadata !{metadata !"another root", null}